llama-stack 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/cli/stack/_list_deps.py +11 -7
- llama_stack/cli/stack/run.py +3 -25
- llama_stack/core/access_control/datatypes.py +78 -0
- llama_stack/core/configure.py +2 -2
- llama_stack/{distributions/meta-reference-gpu → core/connectors}/__init__.py +3 -1
- llama_stack/core/connectors/connectors.py +162 -0
- llama_stack/core/conversations/conversations.py +61 -58
- llama_stack/core/datatypes.py +54 -8
- llama_stack/core/library_client.py +60 -13
- llama_stack/core/prompts/prompts.py +43 -42
- llama_stack/core/routers/datasets.py +20 -17
- llama_stack/core/routers/eval_scoring.py +143 -53
- llama_stack/core/routers/inference.py +20 -9
- llama_stack/core/routers/safety.py +30 -42
- llama_stack/core/routers/vector_io.py +15 -7
- llama_stack/core/routing_tables/models.py +42 -3
- llama_stack/core/routing_tables/scoring_functions.py +19 -19
- llama_stack/core/routing_tables/shields.py +20 -17
- llama_stack/core/routing_tables/vector_stores.py +8 -5
- llama_stack/core/server/auth.py +192 -17
- llama_stack/core/server/fastapi_router_registry.py +40 -5
- llama_stack/core/server/server.py +24 -5
- llama_stack/core/stack.py +54 -10
- llama_stack/core/storage/datatypes.py +9 -0
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/exec.py +2 -2
- llama_stack/core/utils/type_inspection.py +16 -2
- llama_stack/distributions/dell/config.yaml +4 -1
- llama_stack/distributions/dell/run-with-safety.yaml +4 -1
- llama_stack/distributions/nvidia/config.yaml +4 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
- llama_stack/distributions/oci/config.yaml +4 -1
- llama_stack/distributions/open-benchmark/config.yaml +9 -1
- llama_stack/distributions/postgres-demo/config.yaml +1 -1
- llama_stack/distributions/starter/build.yaml +62 -0
- llama_stack/distributions/starter/config.yaml +22 -3
- llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
- llama_stack/distributions/starter/starter.py +13 -1
- llama_stack/distributions/starter-gpu/build.yaml +62 -0
- llama_stack/distributions/starter-gpu/config.yaml +22 -3
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
- llama_stack/distributions/template.py +10 -2
- llama_stack/distributions/watsonx/config.yaml +4 -1
- llama_stack/log.py +1 -0
- llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +58 -61
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +53 -51
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +99 -22
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
- llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
- llama_stack/providers/inline/batches/reference/batches.py +2 -1
- llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
- llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
- llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +5 -9
- llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +20 -24
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
- llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
- llama_stack/providers/registry/agents.py +1 -0
- llama_stack/providers/registry/inference.py +1 -9
- llama_stack/providers/registry/vector_io.py +136 -16
- llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
- llama_stack/providers/remote/files/s3/config.py +5 -3
- llama_stack/providers/remote/files/s3/files.py +2 -2
- llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
- llama_stack/providers/remote/inference/openai/openai.py +2 -0
- llama_stack/providers/remote/inference/together/together.py +4 -0
- llama_stack/providers/remote/inference/vertexai/config.py +3 -3
- llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
- llama_stack/providers/remote/inference/vllm/config.py +37 -18
- llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
- llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +3 -11
- llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
- llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
- llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
- llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
- llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
- llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
- llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
- llama_stack/providers/remote/vector_io/oci/config.py +41 -0
- llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
- llama_stack/providers/utils/bedrock/client.py +3 -3
- llama_stack/providers/utils/bedrock/config.py +7 -7
- llama_stack/providers/utils/inference/__init__.py +0 -25
- llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
- llama_stack/providers/utils/inference/http_client.py +239 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +6 -0
- llama_stack/providers/utils/inference/model_registry.py +148 -2
- llama_stack/providers/utils/inference/openai_compat.py +1 -158
- llama_stack/providers/utils/inference/openai_mixin.py +42 -2
- llama_stack/providers/utils/inference/prompt_adapter.py +0 -209
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
- llama_stack/providers/utils/memory/vector_store.py +46 -19
- llama_stack/providers/utils/responses/responses_store.py +7 -7
- llama_stack/providers/utils/safety.py +114 -0
- llama_stack/providers/utils/tools/mcp.py +44 -3
- llama_stack/testing/api_recorder.py +9 -3
- {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/METADATA +14 -2
- {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/RECORD +115 -148
- llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
- llama_stack/distributions/meta-reference-gpu/doc_template.md +0 -119
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
- llama_stack/models/llama/hadamard_utils.py +0 -88
- llama_stack/models/llama/llama3/args.py +0 -74
- llama_stack/models/llama/llama3/dog.jpg +0 -0
- llama_stack/models/llama/llama3/generation.py +0 -378
- llama_stack/models/llama/llama3/model.py +0 -304
- llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
- llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
- llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
- llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
- llama_stack/models/llama/llama3/pasta.jpeg +0 -0
- llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
- llama_stack/models/llama/llama3/quantization/loader.py +0 -316
- llama_stack/models/llama/llama3_1/__init__.py +0 -12
- llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
- llama_stack/models/llama/llama3_1/prompts.py +0 -258
- llama_stack/models/llama/llama3_2/__init__.py +0 -5
- llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
- llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
- llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
- llama_stack/models/llama/llama3_3/__init__.py +0 -5
- llama_stack/models/llama/llama3_3/prompts.py +0 -259
- llama_stack/models/llama/llama4/args.py +0 -107
- llama_stack/models/llama/llama4/ffn.py +0 -58
- llama_stack/models/llama/llama4/moe.py +0 -214
- llama_stack/models/llama/llama4/preprocess.py +0 -435
- llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
- llama_stack/models/llama/llama4/quantization/loader.py +0 -226
- llama_stack/models/llama/llama4/vision/__init__.py +0 -5
- llama_stack/models/llama/llama4/vision/embedding.py +0 -210
- llama_stack/models/llama/llama4/vision/encoder.py +0 -412
- llama_stack/models/llama/quantize_impls.py +0 -316
- llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
- llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
- llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
- llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
- llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
- {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/top_level.txt +0 -0
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
from collections.abc import AsyncIterator
|
|
7
8
|
|
|
8
9
|
from llama_stack.core.datatypes import AccessRule
|
|
9
10
|
from llama_stack.core.storage.kvstore import InmemoryKVStoreImpl, kvstore_impl
|
|
@@ -11,21 +12,21 @@ from llama_stack.log import get_logger
|
|
|
11
12
|
from llama_stack.providers.utils.responses.responses_store import ResponsesStore
|
|
12
13
|
from llama_stack_api import (
|
|
13
14
|
Agents,
|
|
15
|
+
Connectors,
|
|
14
16
|
Conversations,
|
|
17
|
+
CreateResponseRequest,
|
|
18
|
+
DeleteResponseRequest,
|
|
15
19
|
Files,
|
|
16
20
|
Inference,
|
|
17
21
|
ListOpenAIResponseInputItem,
|
|
18
22
|
ListOpenAIResponseObject,
|
|
23
|
+
ListResponseInputItemsRequest,
|
|
24
|
+
ListResponsesRequest,
|
|
19
25
|
OpenAIDeleteResponseObject,
|
|
20
|
-
OpenAIResponseInput,
|
|
21
|
-
OpenAIResponseInputTool,
|
|
22
|
-
OpenAIResponseInputToolChoice,
|
|
23
26
|
OpenAIResponseObject,
|
|
24
|
-
|
|
25
|
-
OpenAIResponseText,
|
|
26
|
-
Order,
|
|
27
|
+
OpenAIResponseObjectStream,
|
|
27
28
|
Prompts,
|
|
28
|
-
|
|
29
|
+
RetrieveResponseRequest,
|
|
29
30
|
Safety,
|
|
30
31
|
ToolGroups,
|
|
31
32
|
ToolRuntime,
|
|
@@ -50,6 +51,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|
|
50
51
|
conversations_api: Conversations,
|
|
51
52
|
prompts_api: Prompts,
|
|
52
53
|
files_api: Files,
|
|
54
|
+
connectors_api: Connectors,
|
|
53
55
|
policy: list[AccessRule],
|
|
54
56
|
):
|
|
55
57
|
self.config = config
|
|
@@ -64,6 +66,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|
|
64
66
|
self.in_memory_store = InmemoryKVStoreImpl()
|
|
65
67
|
self.openai_responses_impl: OpenAIResponsesImpl | None = None
|
|
66
68
|
self.policy = policy
|
|
69
|
+
self.connectors_api = connectors_api
|
|
67
70
|
|
|
68
71
|
async def initialize(self) -> None:
|
|
69
72
|
self.persistence_store = await kvstore_impl(self.config.persistence.agent_state)
|
|
@@ -80,6 +83,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|
|
80
83
|
prompts_api=self.prompts_api,
|
|
81
84
|
files_api=self.files_api,
|
|
82
85
|
vector_stores_config=self.config.vector_stores_config,
|
|
86
|
+
connectors_api=self.connectors_api,
|
|
83
87
|
)
|
|
84
88
|
|
|
85
89
|
async def shutdown(self) -> None:
|
|
@@ -88,79 +92,72 @@ class MetaReferenceAgentsImpl(Agents):
|
|
|
88
92
|
# OpenAI responses
|
|
89
93
|
async def get_openai_response(
|
|
90
94
|
self,
|
|
91
|
-
|
|
95
|
+
request: RetrieveResponseRequest,
|
|
92
96
|
) -> OpenAIResponseObject:
|
|
93
97
|
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
|
94
|
-
return await self.openai_responses_impl.get_openai_response(response_id)
|
|
98
|
+
return await self.openai_responses_impl.get_openai_response(request.response_id)
|
|
95
99
|
|
|
96
100
|
async def create_openai_response(
|
|
97
101
|
self,
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
store: bool | None = True,
|
|
106
|
-
stream: bool | None = False,
|
|
107
|
-
temperature: float | None = None,
|
|
108
|
-
text: OpenAIResponseText | None = None,
|
|
109
|
-
tool_choice: OpenAIResponseInputToolChoice | None = None,
|
|
110
|
-
tools: list[OpenAIResponseInputTool] | None = None,
|
|
111
|
-
include: list[str] | None = None,
|
|
112
|
-
max_infer_iters: int | None = 10,
|
|
113
|
-
guardrails: list[ResponseGuardrail] | None = None,
|
|
114
|
-
max_tool_calls: int | None = None,
|
|
115
|
-
metadata: dict[str, str] | None = None,
|
|
116
|
-
) -> OpenAIResponseObject:
|
|
102
|
+
request: CreateResponseRequest,
|
|
103
|
+
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
|
104
|
+
"""Create an OpenAI response.
|
|
105
|
+
|
|
106
|
+
Returns either a single response object (non-streaming) or an async iterator
|
|
107
|
+
yielding response stream events (streaming).
|
|
108
|
+
"""
|
|
117
109
|
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
|
118
110
|
result = await self.openai_responses_impl.create_openai_response(
|
|
119
|
-
input,
|
|
120
|
-
model,
|
|
121
|
-
prompt,
|
|
122
|
-
instructions,
|
|
123
|
-
previous_response_id,
|
|
124
|
-
conversation,
|
|
125
|
-
store,
|
|
126
|
-
stream,
|
|
127
|
-
temperature,
|
|
128
|
-
text,
|
|
129
|
-
tool_choice,
|
|
130
|
-
tools,
|
|
131
|
-
include,
|
|
132
|
-
max_infer_iters,
|
|
133
|
-
guardrails,
|
|
134
|
-
parallel_tool_calls,
|
|
135
|
-
max_tool_calls,
|
|
136
|
-
|
|
111
|
+
request.input,
|
|
112
|
+
request.model,
|
|
113
|
+
request.prompt,
|
|
114
|
+
request.instructions,
|
|
115
|
+
request.previous_response_id,
|
|
116
|
+
request.conversation,
|
|
117
|
+
request.store,
|
|
118
|
+
request.stream,
|
|
119
|
+
request.temperature,
|
|
120
|
+
request.text,
|
|
121
|
+
request.tool_choice,
|
|
122
|
+
request.tools,
|
|
123
|
+
request.include,
|
|
124
|
+
request.max_infer_iters,
|
|
125
|
+
request.guardrails,
|
|
126
|
+
request.parallel_tool_calls,
|
|
127
|
+
request.max_tool_calls,
|
|
128
|
+
request.max_output_tokens,
|
|
129
|
+
request.reasoning,
|
|
130
|
+
request.safety_identifier,
|
|
131
|
+
request.metadata,
|
|
137
132
|
)
|
|
138
|
-
return result
|
|
133
|
+
return result
|
|
139
134
|
|
|
140
135
|
async def list_openai_responses(
|
|
141
136
|
self,
|
|
142
|
-
|
|
143
|
-
limit: int | None = 50,
|
|
144
|
-
model: str | None = None,
|
|
145
|
-
order: Order | None = Order.desc,
|
|
137
|
+
request: ListResponsesRequest,
|
|
146
138
|
) -> ListOpenAIResponseObject:
|
|
147
139
|
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
|
148
|
-
return await self.openai_responses_impl.list_openai_responses(
|
|
140
|
+
return await self.openai_responses_impl.list_openai_responses(
|
|
141
|
+
request.after, request.limit, request.model, request.order
|
|
142
|
+
)
|
|
149
143
|
|
|
150
144
|
async def list_openai_response_input_items(
|
|
151
145
|
self,
|
|
152
|
-
|
|
153
|
-
after: str | None = None,
|
|
154
|
-
before: str | None = None,
|
|
155
|
-
include: list[str] | None = None,
|
|
156
|
-
limit: int | None = 20,
|
|
157
|
-
order: Order | None = Order.desc,
|
|
146
|
+
request: ListResponseInputItemsRequest,
|
|
158
147
|
) -> ListOpenAIResponseInputItem:
|
|
159
148
|
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
|
160
149
|
return await self.openai_responses_impl.list_openai_response_input_items(
|
|
161
|
-
response_id,
|
|
150
|
+
request.response_id,
|
|
151
|
+
request.after,
|
|
152
|
+
request.before,
|
|
153
|
+
request.include,
|
|
154
|
+
request.limit,
|
|
155
|
+
request.order,
|
|
162
156
|
)
|
|
163
157
|
|
|
164
|
-
async def delete_openai_response(
|
|
158
|
+
async def delete_openai_response(
|
|
159
|
+
self,
|
|
160
|
+
request: DeleteResponseRequest,
|
|
161
|
+
) -> OpenAIDeleteResponseObject:
|
|
165
162
|
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
|
166
|
-
return await self.openai_responses_impl.delete_openai_response(response_id)
|
|
163
|
+
return await self.openai_responses_impl.delete_openai_response(request.response_id)
|
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
import asyncio
|
|
8
7
|
import re
|
|
9
8
|
import time
|
|
10
9
|
import uuid
|
|
@@ -19,11 +18,14 @@ from llama_stack.providers.utils.responses.responses_store import (
|
|
|
19
18
|
)
|
|
20
19
|
from llama_stack.providers.utils.tools.mcp import MCPSessionManager
|
|
21
20
|
from llama_stack_api import (
|
|
21
|
+
AddItemsRequest,
|
|
22
|
+
Connectors,
|
|
22
23
|
ConversationItem,
|
|
23
24
|
Conversations,
|
|
24
25
|
Files,
|
|
25
26
|
Inference,
|
|
26
27
|
InvalidConversationIdError,
|
|
28
|
+
ListItemsRequest,
|
|
27
29
|
ListOpenAIResponseInputItem,
|
|
28
30
|
ListOpenAIResponseObject,
|
|
29
31
|
OpenAIChatCompletionContentPartParam,
|
|
@@ -39,6 +41,7 @@ from llama_stack_api import (
|
|
|
39
41
|
OpenAIResponseObject,
|
|
40
42
|
OpenAIResponseObjectStream,
|
|
41
43
|
OpenAIResponsePrompt,
|
|
44
|
+
OpenAIResponseReasoning,
|
|
42
45
|
OpenAIResponseText,
|
|
43
46
|
OpenAIResponseTextFormat,
|
|
44
47
|
OpenAISystemMessageParam,
|
|
@@ -83,6 +86,7 @@ class OpenAIResponsesImpl:
|
|
|
83
86
|
conversations_api: Conversations,
|
|
84
87
|
prompts_api: Prompts,
|
|
85
88
|
files_api: Files,
|
|
89
|
+
connectors_api: Connectors,
|
|
86
90
|
vector_stores_config=None,
|
|
87
91
|
):
|
|
88
92
|
self.inference_api = inference_api
|
|
@@ -100,6 +104,7 @@ class OpenAIResponsesImpl:
|
|
|
100
104
|
)
|
|
101
105
|
self.prompts_api = prompts_api
|
|
102
106
|
self.files_api = files_api
|
|
107
|
+
self.connectors_api = connectors_api
|
|
103
108
|
|
|
104
109
|
async def _prepend_previous_response(
|
|
105
110
|
self,
|
|
@@ -150,7 +155,9 @@ class OpenAIResponsesImpl:
|
|
|
150
155
|
|
|
151
156
|
tool_context.recover_tools_from_previous_response(previous_response)
|
|
152
157
|
elif conversation is not None:
|
|
153
|
-
conversation_items = await self.conversations_api.list_items(
|
|
158
|
+
conversation_items = await self.conversations_api.list_items(
|
|
159
|
+
ListItemsRequest(conversation_id=conversation, order="asc")
|
|
160
|
+
)
|
|
154
161
|
|
|
155
162
|
# Use stored messages as source of truth (like previous_response.messages)
|
|
156
163
|
stored_messages = await self.responses_store.get_conversation_messages(conversation)
|
|
@@ -462,6 +469,9 @@ class OpenAIResponsesImpl:
|
|
|
462
469
|
guardrails: list[str | ResponseGuardrailSpec] | None = None,
|
|
463
470
|
parallel_tool_calls: bool | None = None,
|
|
464
471
|
max_tool_calls: int | None = None,
|
|
472
|
+
reasoning: OpenAIResponseReasoning | None = None,
|
|
473
|
+
max_output_tokens: int | None = None,
|
|
474
|
+
safety_identifier: str | None = None,
|
|
465
475
|
metadata: dict[str, str] | None = None,
|
|
466
476
|
):
|
|
467
477
|
stream = bool(stream)
|
|
@@ -499,9 +509,6 @@ class OpenAIResponsesImpl:
|
|
|
499
509
|
if not conversation.startswith("conv_"):
|
|
500
510
|
raise InvalidConversationIdError(conversation)
|
|
501
511
|
|
|
502
|
-
if max_tool_calls is not None and max_tool_calls < 1:
|
|
503
|
-
raise ValueError(f"Invalid {max_tool_calls=}; should be >= 1")
|
|
504
|
-
|
|
505
512
|
stream_gen = self._create_streaming_response(
|
|
506
513
|
input=input,
|
|
507
514
|
conversation=conversation,
|
|
@@ -518,6 +525,9 @@ class OpenAIResponsesImpl:
|
|
|
518
525
|
guardrail_ids=guardrail_ids,
|
|
519
526
|
parallel_tool_calls=parallel_tool_calls,
|
|
520
527
|
max_tool_calls=max_tool_calls,
|
|
528
|
+
reasoning=reasoning,
|
|
529
|
+
max_output_tokens=max_output_tokens,
|
|
530
|
+
safety_identifier=safety_identifier,
|
|
521
531
|
metadata=metadata,
|
|
522
532
|
include=include,
|
|
523
533
|
)
|
|
@@ -573,6 +583,9 @@ class OpenAIResponsesImpl:
|
|
|
573
583
|
guardrail_ids: list[str] | None = None,
|
|
574
584
|
parallel_tool_calls: bool | None = True,
|
|
575
585
|
max_tool_calls: int | None = None,
|
|
586
|
+
reasoning: OpenAIResponseReasoning | None = None,
|
|
587
|
+
max_output_tokens: int | None = None,
|
|
588
|
+
safety_identifier: str | None = None,
|
|
576
589
|
metadata: dict[str, str] | None = None,
|
|
577
590
|
include: list[ResponseItemInclude] | None = None,
|
|
578
591
|
) -> AsyncIterator[OpenAIResponseObjectStream]:
|
|
@@ -612,46 +625,45 @@ class OpenAIResponsesImpl:
|
|
|
612
625
|
|
|
613
626
|
# Create a per-request MCP session manager for session reuse (fix for #4452)
|
|
614
627
|
# This avoids redundant tools/list calls when making multiple MCP tool invocations
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
mcp_session_manager=mcp_session_manager,
|
|
624
|
-
)
|
|
628
|
+
async with MCPSessionManager() as mcp_session_manager:
|
|
629
|
+
request_tool_executor = ToolExecutor(
|
|
630
|
+
tool_groups_api=self.tool_groups_api,
|
|
631
|
+
tool_runtime_api=self.tool_runtime_api,
|
|
632
|
+
vector_io_api=self.vector_io_api,
|
|
633
|
+
vector_stores_config=self.tool_executor.vector_stores_config,
|
|
634
|
+
mcp_session_manager=mcp_session_manager,
|
|
635
|
+
)
|
|
625
636
|
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
637
|
+
orchestrator = StreamingResponseOrchestrator(
|
|
638
|
+
inference_api=self.inference_api,
|
|
639
|
+
ctx=ctx,
|
|
640
|
+
response_id=response_id,
|
|
641
|
+
created_at=created_at,
|
|
642
|
+
prompt=prompt,
|
|
643
|
+
text=text,
|
|
644
|
+
max_infer_iters=max_infer_iters,
|
|
645
|
+
parallel_tool_calls=parallel_tool_calls,
|
|
646
|
+
tool_executor=request_tool_executor,
|
|
647
|
+
safety_api=self.safety_api,
|
|
648
|
+
connectors_api=self.connectors_api,
|
|
649
|
+
guardrail_ids=guardrail_ids,
|
|
650
|
+
instructions=instructions,
|
|
651
|
+
max_tool_calls=max_tool_calls,
|
|
652
|
+
reasoning=reasoning,
|
|
653
|
+
max_output_tokens=max_output_tokens,
|
|
654
|
+
safety_identifier=safety_identifier,
|
|
655
|
+
metadata=metadata,
|
|
656
|
+
include=include,
|
|
657
|
+
store=store,
|
|
658
|
+
)
|
|
643
659
|
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
failed_response = None
|
|
660
|
+
final_response = None
|
|
661
|
+
failed_response = None
|
|
647
662
|
|
|
648
|
-
|
|
649
|
-
output_items: list[ConversationItem] = []
|
|
663
|
+
output_items: list[ConversationItem] = []
|
|
650
664
|
|
|
651
|
-
|
|
652
|
-
input_items_for_storage = self._prepare_input_items_for_storage(all_input)
|
|
665
|
+
input_items_for_storage = self._prepare_input_items_for_storage(all_input)
|
|
653
666
|
|
|
654
|
-
try:
|
|
655
667
|
async for stream_chunk in orchestrator.create_response():
|
|
656
668
|
match stream_chunk.type:
|
|
657
669
|
case "response.completed" | "response.incomplete":
|
|
@@ -689,16 +701,6 @@ class OpenAIResponsesImpl:
|
|
|
689
701
|
await self.responses_store.store_conversation_messages(conversation, messages_to_store)
|
|
690
702
|
|
|
691
703
|
yield stream_chunk
|
|
692
|
-
finally:
|
|
693
|
-
# Clean up MCP sessions at the end of the request (fix for #4452)
|
|
694
|
-
# Use shield() to prevent cancellation from interrupting cleanup and leaking resources
|
|
695
|
-
# Wrap in try/except as cleanup errors should not mask the original response
|
|
696
|
-
try:
|
|
697
|
-
await asyncio.shield(mcp_session_manager.close_all())
|
|
698
|
-
except BaseException as e:
|
|
699
|
-
# Debug level - cleanup errors are expected in streaming scenarios where
|
|
700
|
-
# anyio cancel scopes may be in a different task context
|
|
701
|
-
logger.debug(f"Error during MCP session cleanup: {e}")
|
|
702
704
|
|
|
703
705
|
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
|
704
706
|
return await self.responses_store.delete_response_object(response_id)
|
|
@@ -721,4 +723,4 @@ class OpenAIResponsesImpl:
|
|
|
721
723
|
|
|
722
724
|
adapter = TypeAdapter(list[ConversationItem])
|
|
723
725
|
validated_items = adapter.validate_python(conversation_items)
|
|
724
|
-
await self.conversations_api.add_items(conversation_id, validated_items)
|
|
726
|
+
await self.conversations_api.add_items(conversation_id, AddItemsRequest(items=validated_items))
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
import time
|
|
7
8
|
import uuid
|
|
8
9
|
from collections.abc import AsyncIterator
|
|
9
10
|
from typing import Any
|
|
@@ -16,6 +17,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import interleaved_con
|
|
|
16
17
|
from llama_stack_api import (
|
|
17
18
|
AllowedToolsFilter,
|
|
18
19
|
ApprovalFilter,
|
|
20
|
+
Connectors,
|
|
19
21
|
Inference,
|
|
20
22
|
MCPListToolsTool,
|
|
21
23
|
ModelNotFoundError,
|
|
@@ -30,6 +32,7 @@ from llama_stack_api import (
|
|
|
30
32
|
OpenAIChatCompletionToolChoiceFunctionTool,
|
|
31
33
|
OpenAIChoice,
|
|
32
34
|
OpenAIChoiceLogprobs,
|
|
35
|
+
OpenAIFinishReason,
|
|
33
36
|
OpenAIMessageParam,
|
|
34
37
|
OpenAIResponseContentPartOutputText,
|
|
35
38
|
OpenAIResponseContentPartReasoningText,
|
|
@@ -77,6 +80,7 @@ from llama_stack_api import (
|
|
|
77
80
|
OpenAIResponseOutputMessageMCPListTools,
|
|
78
81
|
OpenAIResponseOutputMessageWebSearchToolCall,
|
|
79
82
|
OpenAIResponsePrompt,
|
|
83
|
+
OpenAIResponseReasoning,
|
|
80
84
|
OpenAIResponseText,
|
|
81
85
|
OpenAIResponseUsage,
|
|
82
86
|
OpenAIResponseUsageInputTokensDetails,
|
|
@@ -133,11 +137,16 @@ class StreamingResponseOrchestrator:
|
|
|
133
137
|
instructions: str | None,
|
|
134
138
|
safety_api: Safety | None,
|
|
135
139
|
guardrail_ids: list[str] | None = None,
|
|
140
|
+
connectors_api: Connectors | None = None,
|
|
136
141
|
prompt: OpenAIResponsePrompt | None = None,
|
|
137
142
|
parallel_tool_calls: bool | None = None,
|
|
138
143
|
max_tool_calls: int | None = None,
|
|
144
|
+
reasoning: OpenAIResponseReasoning | None = None,
|
|
145
|
+
max_output_tokens: int | None = None,
|
|
146
|
+
safety_identifier: str | None = None,
|
|
139
147
|
metadata: dict[str, str] | None = None,
|
|
140
148
|
include: list[ResponseItemInclude] | None = None,
|
|
149
|
+
store: bool | None = True,
|
|
141
150
|
):
|
|
142
151
|
self.inference_api = inference_api
|
|
143
152
|
self.ctx = ctx
|
|
@@ -147,6 +156,7 @@ class StreamingResponseOrchestrator:
|
|
|
147
156
|
self.max_infer_iters = max_infer_iters
|
|
148
157
|
self.tool_executor = tool_executor
|
|
149
158
|
self.safety_api = safety_api
|
|
159
|
+
self.connectors_api = connectors_api
|
|
150
160
|
self.guardrail_ids = guardrail_ids or []
|
|
151
161
|
self.prompt = prompt
|
|
152
162
|
# System message that is inserted into the model's context
|
|
@@ -155,8 +165,14 @@ class StreamingResponseOrchestrator:
|
|
|
155
165
|
self.parallel_tool_calls = parallel_tool_calls
|
|
156
166
|
# Max number of total calls to built-in tools that can be processed in a response
|
|
157
167
|
self.max_tool_calls = max_tool_calls
|
|
168
|
+
self.reasoning = reasoning
|
|
169
|
+
# An upper bound for the number of tokens that can be generated for a response
|
|
170
|
+
self.max_output_tokens = max_output_tokens
|
|
171
|
+
self.safety_identifier = safety_identifier
|
|
158
172
|
self.metadata = metadata
|
|
173
|
+
self.store = store
|
|
159
174
|
self.include = include
|
|
175
|
+
self.store = bool(store) if store is not None else True
|
|
160
176
|
self.sequence_number = 0
|
|
161
177
|
# Store MCP tool mapping that gets built during tool processing
|
|
162
178
|
self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = (
|
|
@@ -179,6 +195,8 @@ class StreamingResponseOrchestrator:
|
|
|
179
195
|
self.violation_detected = False
|
|
180
196
|
# Track total calls made to built-in tools
|
|
181
197
|
self.accumulated_builtin_tool_calls = 0
|
|
198
|
+
# Track total output tokens generated across inference calls
|
|
199
|
+
self.accumulated_builtin_output_tokens = 0
|
|
182
200
|
|
|
183
201
|
async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
|
|
184
202
|
"""Create a refusal response to replace streaming content."""
|
|
@@ -191,7 +209,10 @@ class StreamingResponseOrchestrator:
|
|
|
191
209
|
model=self.ctx.model,
|
|
192
210
|
status="completed",
|
|
193
211
|
output=[OpenAIResponseMessage(role="assistant", content=[refusal_content], type="message")],
|
|
212
|
+
max_output_tokens=self.max_output_tokens,
|
|
213
|
+
safety_identifier=self.safety_identifier,
|
|
194
214
|
metadata=self.metadata,
|
|
215
|
+
store=self.store,
|
|
195
216
|
)
|
|
196
217
|
|
|
197
218
|
return OpenAIResponseObjectStreamResponseCompleted(response=refusal_response)
|
|
@@ -212,8 +233,10 @@ class StreamingResponseOrchestrator:
|
|
|
212
233
|
*,
|
|
213
234
|
error: OpenAIResponseError | None = None,
|
|
214
235
|
) -> OpenAIResponseObject:
|
|
236
|
+
completed_at = int(time.time()) if status == "completed" else None
|
|
215
237
|
return OpenAIResponseObject(
|
|
216
238
|
created_at=self.created_at,
|
|
239
|
+
completed_at=completed_at,
|
|
217
240
|
id=self.response_id,
|
|
218
241
|
model=self.ctx.model,
|
|
219
242
|
object="response",
|
|
@@ -228,7 +251,11 @@ class StreamingResponseOrchestrator:
|
|
|
228
251
|
prompt=self.prompt,
|
|
229
252
|
parallel_tool_calls=self.parallel_tool_calls,
|
|
230
253
|
max_tool_calls=self.max_tool_calls,
|
|
254
|
+
reasoning=self.reasoning,
|
|
255
|
+
max_output_tokens=self.max_output_tokens,
|
|
256
|
+
safety_identifier=self.safety_identifier,
|
|
231
257
|
metadata=self.metadata,
|
|
258
|
+
store=self.store,
|
|
232
259
|
)
|
|
233
260
|
|
|
234
261
|
async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
|
|
@@ -292,6 +319,22 @@ class StreamingResponseOrchestrator:
|
|
|
292
319
|
|
|
293
320
|
try:
|
|
294
321
|
while True:
|
|
322
|
+
if (
|
|
323
|
+
self.max_output_tokens is not None
|
|
324
|
+
and self.accumulated_builtin_output_tokens >= self.max_output_tokens
|
|
325
|
+
):
|
|
326
|
+
logger.info(
|
|
327
|
+
"Skipping inference call since max_output_tokens reached: "
|
|
328
|
+
f"{self.accumulated_builtin_output_tokens}/{self.max_output_tokens}"
|
|
329
|
+
)
|
|
330
|
+
final_status = "incomplete"
|
|
331
|
+
break
|
|
332
|
+
|
|
333
|
+
remaining_output_tokens = (
|
|
334
|
+
self.max_output_tokens - self.accumulated_builtin_output_tokens
|
|
335
|
+
if self.max_output_tokens is not None
|
|
336
|
+
else None
|
|
337
|
+
)
|
|
295
338
|
# Text is the default response format for chat completion so don't need to pass it
|
|
296
339
|
# (some providers don't support non-empty response_format when tools are present)
|
|
297
340
|
response_format = (
|
|
@@ -311,6 +354,11 @@ class StreamingResponseOrchestrator:
|
|
|
311
354
|
True if self.include and ResponseItemInclude.message_output_text_logprobs in self.include else None
|
|
312
355
|
)
|
|
313
356
|
|
|
357
|
+
# In OpenAI, parallel_tool_calls is only allowed when 'tools' are specified.
|
|
358
|
+
effective_parallel_tool_calls = (
|
|
359
|
+
self.parallel_tool_calls if effective_tools is not None and len(effective_tools) > 0 else None
|
|
360
|
+
)
|
|
361
|
+
|
|
314
362
|
params = OpenAIChatCompletionRequestWithExtraBody(
|
|
315
363
|
model=self.ctx.model,
|
|
316
364
|
messages=messages,
|
|
@@ -324,6 +372,10 @@ class StreamingResponseOrchestrator:
|
|
|
324
372
|
"include_usage": True,
|
|
325
373
|
},
|
|
326
374
|
logprobs=logprobs,
|
|
375
|
+
parallel_tool_calls=effective_parallel_tool_calls,
|
|
376
|
+
reasoning_effort=self.reasoning.effort if self.reasoning else None,
|
|
377
|
+
safety_identifier=self.safety_identifier,
|
|
378
|
+
max_completion_tokens=remaining_output_tokens,
|
|
327
379
|
)
|
|
328
380
|
completion_result = await self.inference_api.openai_chat_completion(params)
|
|
329
381
|
|
|
@@ -480,23 +532,24 @@ class StreamingResponseOrchestrator:
|
|
|
480
532
|
if not chunk.usage:
|
|
481
533
|
return
|
|
482
534
|
|
|
535
|
+
self.accumulated_builtin_output_tokens += chunk.usage.completion_tokens
|
|
536
|
+
|
|
483
537
|
if self.accumulated_usage is None:
|
|
484
538
|
# Convert from chat completion format to response format
|
|
485
539
|
self.accumulated_usage = OpenAIResponseUsage(
|
|
486
540
|
input_tokens=chunk.usage.prompt_tokens,
|
|
487
541
|
output_tokens=chunk.usage.completion_tokens,
|
|
488
542
|
total_tokens=chunk.usage.total_tokens,
|
|
489
|
-
input_tokens_details=(
|
|
490
|
-
|
|
491
|
-
if chunk.usage.prompt_tokens_details
|
|
492
|
-
else
|
|
543
|
+
input_tokens_details=OpenAIResponseUsageInputTokensDetails(
|
|
544
|
+
cached_tokens=chunk.usage.prompt_tokens_details.cached_tokens
|
|
545
|
+
if chunk.usage.prompt_tokens_details and chunk.usage.prompt_tokens_details.cached_tokens is not None
|
|
546
|
+
else 0
|
|
493
547
|
),
|
|
494
|
-
output_tokens_details=(
|
|
495
|
-
|
|
496
|
-
reasoning_tokens=chunk.usage.completion_tokens_details.reasoning_tokens
|
|
497
|
-
)
|
|
548
|
+
output_tokens_details=OpenAIResponseUsageOutputTokensDetails(
|
|
549
|
+
reasoning_tokens=chunk.usage.completion_tokens_details.reasoning_tokens
|
|
498
550
|
if chunk.usage.completion_tokens_details
|
|
499
|
-
|
|
551
|
+
and chunk.usage.completion_tokens_details.reasoning_tokens is not None
|
|
552
|
+
else 0
|
|
500
553
|
),
|
|
501
554
|
)
|
|
502
555
|
else:
|
|
@@ -506,17 +559,16 @@ class StreamingResponseOrchestrator:
|
|
|
506
559
|
output_tokens=self.accumulated_usage.output_tokens + chunk.usage.completion_tokens,
|
|
507
560
|
total_tokens=self.accumulated_usage.total_tokens + chunk.usage.total_tokens,
|
|
508
561
|
# Use latest non-null details
|
|
509
|
-
input_tokens_details=(
|
|
510
|
-
|
|
511
|
-
if chunk.usage.prompt_tokens_details
|
|
512
|
-
else self.accumulated_usage.input_tokens_details
|
|
562
|
+
input_tokens_details=OpenAIResponseUsageInputTokensDetails(
|
|
563
|
+
cached_tokens=chunk.usage.prompt_tokens_details.cached_tokens
|
|
564
|
+
if chunk.usage.prompt_tokens_details and chunk.usage.prompt_tokens_details.cached_tokens is not None
|
|
565
|
+
else self.accumulated_usage.input_tokens_details.cached_tokens
|
|
513
566
|
),
|
|
514
|
-
output_tokens_details=(
|
|
515
|
-
|
|
516
|
-
reasoning_tokens=chunk.usage.completion_tokens_details.reasoning_tokens
|
|
517
|
-
)
|
|
567
|
+
output_tokens_details=OpenAIResponseUsageOutputTokensDetails(
|
|
568
|
+
reasoning_tokens=chunk.usage.completion_tokens_details.reasoning_tokens
|
|
518
569
|
if chunk.usage.completion_tokens_details
|
|
519
|
-
|
|
570
|
+
and chunk.usage.completion_tokens_details.reasoning_tokens is not None
|
|
571
|
+
else self.accumulated_usage.output_tokens_details.reasoning_tokens
|
|
520
572
|
),
|
|
521
573
|
)
|
|
522
574
|
|
|
@@ -652,7 +704,7 @@ class StreamingResponseOrchestrator:
|
|
|
652
704
|
chat_response_tool_calls: dict[int, OpenAIChatCompletionToolCall] = {}
|
|
653
705
|
chunk_created = 0
|
|
654
706
|
chunk_model = ""
|
|
655
|
-
chunk_finish_reason = ""
|
|
707
|
+
chunk_finish_reason: OpenAIFinishReason = "stop"
|
|
656
708
|
chat_response_logprobs = []
|
|
657
709
|
|
|
658
710
|
# Create a placeholder message item for delta events
|
|
@@ -744,9 +796,9 @@ class StreamingResponseOrchestrator:
|
|
|
744
796
|
chunk_finish_reason = chunk_choice.finish_reason
|
|
745
797
|
|
|
746
798
|
# Handle reasoning content if present (non-standard field for o1/o3 models)
|
|
747
|
-
if hasattr(chunk_choice.delta, "
|
|
799
|
+
if hasattr(chunk_choice.delta, "reasoning") and chunk_choice.delta.reasoning:
|
|
748
800
|
async for event in self._handle_reasoning_content_chunk(
|
|
749
|
-
reasoning_content=chunk_choice.delta.
|
|
801
|
+
reasoning_content=chunk_choice.delta.reasoning,
|
|
750
802
|
reasoning_part_emitted=reasoning_part_emitted,
|
|
751
803
|
reasoning_content_index=reasoning_content_index,
|
|
752
804
|
message_item_id=message_item_id,
|
|
@@ -758,7 +810,7 @@ class StreamingResponseOrchestrator:
|
|
|
758
810
|
else:
|
|
759
811
|
yield event
|
|
760
812
|
reasoning_part_emitted = True
|
|
761
|
-
reasoning_text_accumulated.append(chunk_choice.delta.
|
|
813
|
+
reasoning_text_accumulated.append(chunk_choice.delta.reasoning)
|
|
762
814
|
|
|
763
815
|
# Handle refusal content if present
|
|
764
816
|
if chunk_choice.delta.refusal:
|
|
@@ -1175,6 +1227,9 @@ class StreamingResponseOrchestrator:
|
|
|
1175
1227
|
"""Process an MCP tool configuration and emit appropriate streaming events."""
|
|
1176
1228
|
from llama_stack.providers.utils.tools.mcp import list_mcp_tools
|
|
1177
1229
|
|
|
1230
|
+
# Resolve connector_id to server_url if provided
|
|
1231
|
+
mcp_tool = await resolve_mcp_connector_id(mcp_tool, self.connectors_api)
|
|
1232
|
+
|
|
1178
1233
|
# Emit mcp_list_tools.in_progress
|
|
1179
1234
|
self.sequence_number += 1
|
|
1180
1235
|
yield OpenAIResponseObjectStreamResponseMcpListToolsInProgress(
|
|
@@ -1489,3 +1544,25 @@ async def _process_tool_choice(
|
|
|
1489
1544
|
tools=tool_choice,
|
|
1490
1545
|
mode="required",
|
|
1491
1546
|
)
|
|
1547
|
+
|
|
1548
|
+
|
|
1549
|
+
async def resolve_mcp_connector_id(
|
|
1550
|
+
mcp_tool: OpenAIResponseInputToolMCP,
|
|
1551
|
+
connectors_api: Connectors,
|
|
1552
|
+
) -> OpenAIResponseInputToolMCP:
|
|
1553
|
+
"""Resolve connector_id to server_url for an MCP tool.
|
|
1554
|
+
|
|
1555
|
+
If the mcp_tool has a connector_id but no server_url, this function
|
|
1556
|
+
looks up the connector and populates the server_url from it.
|
|
1557
|
+
|
|
1558
|
+
Args:
|
|
1559
|
+
mcp_tool: The MCP tool configuration to resolve
|
|
1560
|
+
connectors_api: The connectors API for looking up connectors
|
|
1561
|
+
|
|
1562
|
+
Returns:
|
|
1563
|
+
The mcp_tool with server_url populated (may be same instance if already set)
|
|
1564
|
+
"""
|
|
1565
|
+
if mcp_tool.connector_id and not mcp_tool.server_url:
|
|
1566
|
+
connector = await connectors_api.get_connector(mcp_tool.connector_id)
|
|
1567
|
+
return mcp_tool.model_copy(update={"server_url": connector.url})
|
|
1568
|
+
return mcp_tool
|