llama-stack 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/cli/stack/_list_deps.py +11 -7
- llama_stack/cli/stack/run.py +3 -25
- llama_stack/core/access_control/datatypes.py +78 -0
- llama_stack/core/configure.py +2 -2
- {llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
- llama_stack/core/connectors/connectors.py +162 -0
- llama_stack/core/conversations/conversations.py +61 -58
- llama_stack/core/datatypes.py +54 -8
- llama_stack/core/library_client.py +60 -13
- llama_stack/core/prompts/prompts.py +43 -42
- llama_stack/core/routers/datasets.py +20 -17
- llama_stack/core/routers/eval_scoring.py +143 -53
- llama_stack/core/routers/inference.py +20 -9
- llama_stack/core/routers/safety.py +30 -42
- llama_stack/core/routers/vector_io.py +15 -7
- llama_stack/core/routing_tables/models.py +42 -3
- llama_stack/core/routing_tables/scoring_functions.py +19 -19
- llama_stack/core/routing_tables/shields.py +20 -17
- llama_stack/core/routing_tables/vector_stores.py +8 -5
- llama_stack/core/server/auth.py +192 -17
- llama_stack/core/server/fastapi_router_registry.py +40 -5
- llama_stack/core/server/server.py +24 -5
- llama_stack/core/stack.py +54 -10
- llama_stack/core/storage/datatypes.py +9 -0
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/exec.py +2 -2
- llama_stack/core/utils/type_inspection.py +16 -2
- llama_stack/distributions/dell/config.yaml +4 -1
- llama_stack/distributions/dell/doc_template.md +209 -0
- llama_stack/distributions/dell/run-with-safety.yaml +4 -1
- llama_stack/distributions/nvidia/config.yaml +4 -1
- llama_stack/distributions/nvidia/doc_template.md +170 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
- llama_stack/distributions/oci/config.yaml +4 -1
- llama_stack/distributions/oci/doc_template.md +140 -0
- llama_stack/distributions/open-benchmark/config.yaml +9 -1
- llama_stack/distributions/postgres-demo/config.yaml +1 -1
- llama_stack/distributions/starter/build.yaml +62 -0
- llama_stack/distributions/starter/config.yaml +22 -3
- llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
- llama_stack/distributions/starter/starter.py +13 -1
- llama_stack/distributions/starter-gpu/build.yaml +62 -0
- llama_stack/distributions/starter-gpu/config.yaml +22 -3
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
- llama_stack/distributions/template.py +10 -2
- llama_stack/distributions/watsonx/config.yaml +4 -1
- llama_stack/log.py +1 -0
- llama_stack/models/llama/resources/dog.jpg +0 -0
- llama_stack/models/llama/resources/pasta.jpeg +0 -0
- llama_stack/models/llama/resources/small_dog.jpg +0 -0
- llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +58 -61
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +187 -60
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +99 -22
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
- llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
- llama_stack/providers/inline/batches/reference/batches.py +2 -1
- llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
- llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +5 -9
- llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +20 -24
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
- llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
- llama_stack/providers/registry/agents.py +1 -0
- llama_stack/providers/registry/inference.py +1 -9
- llama_stack/providers/registry/vector_io.py +136 -16
- llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
- llama_stack/providers/remote/eval/nvidia/README.md +134 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
- llama_stack/providers/remote/files/s3/README.md +266 -0
- llama_stack/providers/remote/files/s3/config.py +5 -3
- llama_stack/providers/remote/files/s3/files.py +2 -2
- llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
- llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
- llama_stack/providers/remote/inference/openai/openai.py +2 -0
- llama_stack/providers/remote/inference/together/together.py +4 -0
- llama_stack/providers/remote/inference/vertexai/config.py +3 -3
- llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
- llama_stack/providers/remote/inference/vllm/config.py +37 -18
- llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
- llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
- llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +3 -11
- llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
- llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
- llama_stack/providers/remote/safety/nvidia/README.md +78 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
- llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
- llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
- llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
- llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
- llama_stack/providers/remote/vector_io/oci/config.py +41 -0
- llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
- llama_stack/providers/utils/bedrock/client.py +3 -3
- llama_stack/providers/utils/bedrock/config.py +7 -7
- llama_stack/providers/utils/inference/__init__.py +0 -25
- llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
- llama_stack/providers/utils/inference/http_client.py +239 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +6 -0
- llama_stack/providers/utils/inference/model_registry.py +148 -2
- llama_stack/providers/utils/inference/openai_compat.py +1 -158
- llama_stack/providers/utils/inference/openai_mixin.py +42 -2
- llama_stack/providers/utils/inference/prompt_adapter.py +0 -209
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
- llama_stack/providers/utils/memory/vector_store.py +46 -19
- llama_stack/providers/utils/responses/responses_store.py +40 -6
- llama_stack/providers/utils/safety.py +114 -0
- llama_stack/providers/utils/tools/mcp.py +44 -3
- llama_stack/testing/api_recorder.py +9 -3
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/METADATA +14 -2
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/RECORD +135 -279
- llama_stack-0.5.0.dist-info/top_level.txt +1 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
- llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
- llama_stack/models/llama/hadamard_utils.py +0 -88
- llama_stack/models/llama/llama3/args.py +0 -74
- llama_stack/models/llama/llama3/generation.py +0 -378
- llama_stack/models/llama/llama3/model.py +0 -304
- llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
- llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
- llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
- llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
- llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
- llama_stack/models/llama/llama3/quantization/loader.py +0 -316
- llama_stack/models/llama/llama3_1/__init__.py +0 -12
- llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
- llama_stack/models/llama/llama3_1/prompts.py +0 -258
- llama_stack/models/llama/llama3_2/__init__.py +0 -5
- llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
- llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
- llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
- llama_stack/models/llama/llama3_3/__init__.py +0 -5
- llama_stack/models/llama/llama3_3/prompts.py +0 -259
- llama_stack/models/llama/llama4/args.py +0 -107
- llama_stack/models/llama/llama4/ffn.py +0 -58
- llama_stack/models/llama/llama4/moe.py +0 -214
- llama_stack/models/llama/llama4/preprocess.py +0 -435
- llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
- llama_stack/models/llama/llama4/quantization/loader.py +0 -226
- llama_stack/models/llama/llama4/vision/__init__.py +0 -5
- llama_stack/models/llama/llama4/vision/embedding.py +0 -210
- llama_stack/models/llama/llama4/vision/encoder.py +0 -412
- llama_stack/models/llama/quantize_impls.py +0 -316
- llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
- llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
- llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
- llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
- llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
- llama_stack-0.4.3.dist-info/top_level.txt +0 -2
- llama_stack_api/__init__.py +0 -945
- llama_stack_api/admin/__init__.py +0 -45
- llama_stack_api/admin/api.py +0 -72
- llama_stack_api/admin/fastapi_routes.py +0 -117
- llama_stack_api/admin/models.py +0 -113
- llama_stack_api/agents.py +0 -173
- llama_stack_api/batches/__init__.py +0 -40
- llama_stack_api/batches/api.py +0 -53
- llama_stack_api/batches/fastapi_routes.py +0 -113
- llama_stack_api/batches/models.py +0 -78
- llama_stack_api/benchmarks/__init__.py +0 -43
- llama_stack_api/benchmarks/api.py +0 -39
- llama_stack_api/benchmarks/fastapi_routes.py +0 -109
- llama_stack_api/benchmarks/models.py +0 -109
- llama_stack_api/common/__init__.py +0 -5
- llama_stack_api/common/content_types.py +0 -101
- llama_stack_api/common/errors.py +0 -95
- llama_stack_api/common/job_types.py +0 -38
- llama_stack_api/common/responses.py +0 -77
- llama_stack_api/common/training_types.py +0 -47
- llama_stack_api/common/type_system.py +0 -146
- llama_stack_api/connectors.py +0 -146
- llama_stack_api/conversations.py +0 -270
- llama_stack_api/datasetio.py +0 -55
- llama_stack_api/datasets/__init__.py +0 -61
- llama_stack_api/datasets/api.py +0 -35
- llama_stack_api/datasets/fastapi_routes.py +0 -104
- llama_stack_api/datasets/models.py +0 -152
- llama_stack_api/datatypes.py +0 -373
- llama_stack_api/eval.py +0 -137
- llama_stack_api/file_processors/__init__.py +0 -27
- llama_stack_api/file_processors/api.py +0 -64
- llama_stack_api/file_processors/fastapi_routes.py +0 -78
- llama_stack_api/file_processors/models.py +0 -42
- llama_stack_api/files/__init__.py +0 -35
- llama_stack_api/files/api.py +0 -51
- llama_stack_api/files/fastapi_routes.py +0 -124
- llama_stack_api/files/models.py +0 -107
- llama_stack_api/inference.py +0 -1169
- llama_stack_api/inspect_api/__init__.py +0 -37
- llama_stack_api/inspect_api/api.py +0 -25
- llama_stack_api/inspect_api/fastapi_routes.py +0 -76
- llama_stack_api/inspect_api/models.py +0 -28
- llama_stack_api/internal/kvstore.py +0 -28
- llama_stack_api/internal/sqlstore.py +0 -81
- llama_stack_api/llama_stack_api/__init__.py +0 -945
- llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
- llama_stack_api/llama_stack_api/admin/api.py +0 -72
- llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
- llama_stack_api/llama_stack_api/admin/models.py +0 -113
- llama_stack_api/llama_stack_api/agents.py +0 -173
- llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
- llama_stack_api/llama_stack_api/batches/api.py +0 -53
- llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
- llama_stack_api/llama_stack_api/batches/models.py +0 -78
- llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
- llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
- llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
- llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
- llama_stack_api/llama_stack_api/common/__init__.py +0 -5
- llama_stack_api/llama_stack_api/common/content_types.py +0 -101
- llama_stack_api/llama_stack_api/common/errors.py +0 -95
- llama_stack_api/llama_stack_api/common/job_types.py +0 -38
- llama_stack_api/llama_stack_api/common/responses.py +0 -77
- llama_stack_api/llama_stack_api/common/training_types.py +0 -47
- llama_stack_api/llama_stack_api/common/type_system.py +0 -146
- llama_stack_api/llama_stack_api/connectors.py +0 -146
- llama_stack_api/llama_stack_api/conversations.py +0 -270
- llama_stack_api/llama_stack_api/datasetio.py +0 -55
- llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
- llama_stack_api/llama_stack_api/datasets/api.py +0 -35
- llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
- llama_stack_api/llama_stack_api/datasets/models.py +0 -152
- llama_stack_api/llama_stack_api/datatypes.py +0 -373
- llama_stack_api/llama_stack_api/eval.py +0 -137
- llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
- llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
- llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
- llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
- llama_stack_api/llama_stack_api/files/__init__.py +0 -35
- llama_stack_api/llama_stack_api/files/api.py +0 -51
- llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
- llama_stack_api/llama_stack_api/files/models.py +0 -107
- llama_stack_api/llama_stack_api/inference.py +0 -1169
- llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
- llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
- llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
- llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
- llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
- llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
- llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
- llama_stack_api/llama_stack_api/models.py +0 -171
- llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
- llama_stack_api/llama_stack_api/post_training.py +0 -370
- llama_stack_api/llama_stack_api/prompts.py +0 -203
- llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
- llama_stack_api/llama_stack_api/providers/api.py +0 -16
- llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
- llama_stack_api/llama_stack_api/providers/models.py +0 -24
- llama_stack_api/llama_stack_api/py.typed +0 -0
- llama_stack_api/llama_stack_api/rag_tool.py +0 -168
- llama_stack_api/llama_stack_api/resource.py +0 -37
- llama_stack_api/llama_stack_api/router_utils.py +0 -160
- llama_stack_api/llama_stack_api/safety.py +0 -132
- llama_stack_api/llama_stack_api/schema_utils.py +0 -208
- llama_stack_api/llama_stack_api/scoring.py +0 -93
- llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
- llama_stack_api/llama_stack_api/shields.py +0 -93
- llama_stack_api/llama_stack_api/tools.py +0 -226
- llama_stack_api/llama_stack_api/vector_io.py +0 -941
- llama_stack_api/llama_stack_api/vector_stores.py +0 -53
- llama_stack_api/llama_stack_api/version.py +0 -9
- llama_stack_api/models.py +0 -171
- llama_stack_api/openai_responses.py +0 -1468
- llama_stack_api/post_training.py +0 -370
- llama_stack_api/prompts.py +0 -203
- llama_stack_api/providers/__init__.py +0 -33
- llama_stack_api/providers/api.py +0 -16
- llama_stack_api/providers/fastapi_routes.py +0 -57
- llama_stack_api/providers/models.py +0 -24
- llama_stack_api/py.typed +0 -0
- llama_stack_api/rag_tool.py +0 -168
- llama_stack_api/resource.py +0 -37
- llama_stack_api/router_utils.py +0 -160
- llama_stack_api/safety.py +0 -132
- llama_stack_api/schema_utils.py +0 -208
- llama_stack_api/scoring.py +0 -93
- llama_stack_api/scoring_functions.py +0 -211
- llama_stack_api/shields.py +0 -93
- llama_stack_api/tools.py +0 -226
- llama_stack_api/vector_io.py +0 -941
- llama_stack_api/vector_stores.py +0 -53
- llama_stack_api/version.py +0 -9
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
from collections.abc import AsyncIterator
|
|
7
8
|
|
|
8
9
|
from llama_stack.core.datatypes import AccessRule
|
|
9
10
|
from llama_stack.core.storage.kvstore import InmemoryKVStoreImpl, kvstore_impl
|
|
@@ -11,21 +12,21 @@ from llama_stack.log import get_logger
|
|
|
11
12
|
from llama_stack.providers.utils.responses.responses_store import ResponsesStore
|
|
12
13
|
from llama_stack_api import (
|
|
13
14
|
Agents,
|
|
15
|
+
Connectors,
|
|
14
16
|
Conversations,
|
|
17
|
+
CreateResponseRequest,
|
|
18
|
+
DeleteResponseRequest,
|
|
15
19
|
Files,
|
|
16
20
|
Inference,
|
|
17
21
|
ListOpenAIResponseInputItem,
|
|
18
22
|
ListOpenAIResponseObject,
|
|
23
|
+
ListResponseInputItemsRequest,
|
|
24
|
+
ListResponsesRequest,
|
|
19
25
|
OpenAIDeleteResponseObject,
|
|
20
|
-
OpenAIResponseInput,
|
|
21
|
-
OpenAIResponseInputTool,
|
|
22
|
-
OpenAIResponseInputToolChoice,
|
|
23
26
|
OpenAIResponseObject,
|
|
24
|
-
|
|
25
|
-
OpenAIResponseText,
|
|
26
|
-
Order,
|
|
27
|
+
OpenAIResponseObjectStream,
|
|
27
28
|
Prompts,
|
|
28
|
-
|
|
29
|
+
RetrieveResponseRequest,
|
|
29
30
|
Safety,
|
|
30
31
|
ToolGroups,
|
|
31
32
|
ToolRuntime,
|
|
@@ -50,6 +51,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|
|
50
51
|
conversations_api: Conversations,
|
|
51
52
|
prompts_api: Prompts,
|
|
52
53
|
files_api: Files,
|
|
54
|
+
connectors_api: Connectors,
|
|
53
55
|
policy: list[AccessRule],
|
|
54
56
|
):
|
|
55
57
|
self.config = config
|
|
@@ -64,6 +66,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|
|
64
66
|
self.in_memory_store = InmemoryKVStoreImpl()
|
|
65
67
|
self.openai_responses_impl: OpenAIResponsesImpl | None = None
|
|
66
68
|
self.policy = policy
|
|
69
|
+
self.connectors_api = connectors_api
|
|
67
70
|
|
|
68
71
|
async def initialize(self) -> None:
|
|
69
72
|
self.persistence_store = await kvstore_impl(self.config.persistence.agent_state)
|
|
@@ -80,6 +83,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|
|
80
83
|
prompts_api=self.prompts_api,
|
|
81
84
|
files_api=self.files_api,
|
|
82
85
|
vector_stores_config=self.config.vector_stores_config,
|
|
86
|
+
connectors_api=self.connectors_api,
|
|
83
87
|
)
|
|
84
88
|
|
|
85
89
|
async def shutdown(self) -> None:
|
|
@@ -88,79 +92,72 @@ class MetaReferenceAgentsImpl(Agents):
|
|
|
88
92
|
# OpenAI responses
|
|
89
93
|
async def get_openai_response(
|
|
90
94
|
self,
|
|
91
|
-
|
|
95
|
+
request: RetrieveResponseRequest,
|
|
92
96
|
) -> OpenAIResponseObject:
|
|
93
97
|
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
|
94
|
-
return await self.openai_responses_impl.get_openai_response(response_id)
|
|
98
|
+
return await self.openai_responses_impl.get_openai_response(request.response_id)
|
|
95
99
|
|
|
96
100
|
async def create_openai_response(
|
|
97
101
|
self,
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
store: bool | None = True,
|
|
106
|
-
stream: bool | None = False,
|
|
107
|
-
temperature: float | None = None,
|
|
108
|
-
text: OpenAIResponseText | None = None,
|
|
109
|
-
tool_choice: OpenAIResponseInputToolChoice | None = None,
|
|
110
|
-
tools: list[OpenAIResponseInputTool] | None = None,
|
|
111
|
-
include: list[str] | None = None,
|
|
112
|
-
max_infer_iters: int | None = 10,
|
|
113
|
-
guardrails: list[ResponseGuardrail] | None = None,
|
|
114
|
-
max_tool_calls: int | None = None,
|
|
115
|
-
metadata: dict[str, str] | None = None,
|
|
116
|
-
) -> OpenAIResponseObject:
|
|
102
|
+
request: CreateResponseRequest,
|
|
103
|
+
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
|
104
|
+
"""Create an OpenAI response.
|
|
105
|
+
|
|
106
|
+
Returns either a single response object (non-streaming) or an async iterator
|
|
107
|
+
yielding response stream events (streaming).
|
|
108
|
+
"""
|
|
117
109
|
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
|
118
110
|
result = await self.openai_responses_impl.create_openai_response(
|
|
119
|
-
input,
|
|
120
|
-
model,
|
|
121
|
-
prompt,
|
|
122
|
-
instructions,
|
|
123
|
-
previous_response_id,
|
|
124
|
-
conversation,
|
|
125
|
-
store,
|
|
126
|
-
stream,
|
|
127
|
-
temperature,
|
|
128
|
-
text,
|
|
129
|
-
tool_choice,
|
|
130
|
-
tools,
|
|
131
|
-
include,
|
|
132
|
-
max_infer_iters,
|
|
133
|
-
guardrails,
|
|
134
|
-
parallel_tool_calls,
|
|
135
|
-
max_tool_calls,
|
|
136
|
-
|
|
111
|
+
request.input,
|
|
112
|
+
request.model,
|
|
113
|
+
request.prompt,
|
|
114
|
+
request.instructions,
|
|
115
|
+
request.previous_response_id,
|
|
116
|
+
request.conversation,
|
|
117
|
+
request.store,
|
|
118
|
+
request.stream,
|
|
119
|
+
request.temperature,
|
|
120
|
+
request.text,
|
|
121
|
+
request.tool_choice,
|
|
122
|
+
request.tools,
|
|
123
|
+
request.include,
|
|
124
|
+
request.max_infer_iters,
|
|
125
|
+
request.guardrails,
|
|
126
|
+
request.parallel_tool_calls,
|
|
127
|
+
request.max_tool_calls,
|
|
128
|
+
request.max_output_tokens,
|
|
129
|
+
request.reasoning,
|
|
130
|
+
request.safety_identifier,
|
|
131
|
+
request.metadata,
|
|
137
132
|
)
|
|
138
|
-
return result
|
|
133
|
+
return result
|
|
139
134
|
|
|
140
135
|
async def list_openai_responses(
|
|
141
136
|
self,
|
|
142
|
-
|
|
143
|
-
limit: int | None = 50,
|
|
144
|
-
model: str | None = None,
|
|
145
|
-
order: Order | None = Order.desc,
|
|
137
|
+
request: ListResponsesRequest,
|
|
146
138
|
) -> ListOpenAIResponseObject:
|
|
147
139
|
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
|
148
|
-
return await self.openai_responses_impl.list_openai_responses(
|
|
140
|
+
return await self.openai_responses_impl.list_openai_responses(
|
|
141
|
+
request.after, request.limit, request.model, request.order
|
|
142
|
+
)
|
|
149
143
|
|
|
150
144
|
async def list_openai_response_input_items(
|
|
151
145
|
self,
|
|
152
|
-
|
|
153
|
-
after: str | None = None,
|
|
154
|
-
before: str | None = None,
|
|
155
|
-
include: list[str] | None = None,
|
|
156
|
-
limit: int | None = 20,
|
|
157
|
-
order: Order | None = Order.desc,
|
|
146
|
+
request: ListResponseInputItemsRequest,
|
|
158
147
|
) -> ListOpenAIResponseInputItem:
|
|
159
148
|
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
|
160
149
|
return await self.openai_responses_impl.list_openai_response_input_items(
|
|
161
|
-
response_id,
|
|
150
|
+
request.response_id,
|
|
151
|
+
request.after,
|
|
152
|
+
request.before,
|
|
153
|
+
request.include,
|
|
154
|
+
request.limit,
|
|
155
|
+
request.order,
|
|
162
156
|
)
|
|
163
157
|
|
|
164
|
-
async def delete_openai_response(
|
|
158
|
+
async def delete_openai_response(
|
|
159
|
+
self,
|
|
160
|
+
request: DeleteResponseRequest,
|
|
161
|
+
) -> OpenAIDeleteResponseObject:
|
|
165
162
|
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
|
166
|
-
return await self.openai_responses_impl.delete_openai_response(response_id)
|
|
163
|
+
return await self.openai_responses_impl.delete_openai_response(request.response_id)
|
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
import asyncio
|
|
8
7
|
import re
|
|
9
8
|
import time
|
|
10
9
|
import uuid
|
|
@@ -19,11 +18,14 @@ from llama_stack.providers.utils.responses.responses_store import (
|
|
|
19
18
|
)
|
|
20
19
|
from llama_stack.providers.utils.tools.mcp import MCPSessionManager
|
|
21
20
|
from llama_stack_api import (
|
|
21
|
+
AddItemsRequest,
|
|
22
|
+
Connectors,
|
|
22
23
|
ConversationItem,
|
|
23
24
|
Conversations,
|
|
24
25
|
Files,
|
|
25
26
|
Inference,
|
|
26
27
|
InvalidConversationIdError,
|
|
28
|
+
ListItemsRequest,
|
|
27
29
|
ListOpenAIResponseInputItem,
|
|
28
30
|
ListOpenAIResponseObject,
|
|
29
31
|
OpenAIChatCompletionContentPartParam,
|
|
@@ -39,6 +41,7 @@ from llama_stack_api import (
|
|
|
39
41
|
OpenAIResponseObject,
|
|
40
42
|
OpenAIResponseObjectStream,
|
|
41
43
|
OpenAIResponsePrompt,
|
|
44
|
+
OpenAIResponseReasoning,
|
|
42
45
|
OpenAIResponseText,
|
|
43
46
|
OpenAIResponseTextFormat,
|
|
44
47
|
OpenAISystemMessageParam,
|
|
@@ -83,6 +86,7 @@ class OpenAIResponsesImpl:
|
|
|
83
86
|
conversations_api: Conversations,
|
|
84
87
|
prompts_api: Prompts,
|
|
85
88
|
files_api: Files,
|
|
89
|
+
connectors_api: Connectors,
|
|
86
90
|
vector_stores_config=None,
|
|
87
91
|
):
|
|
88
92
|
self.inference_api = inference_api
|
|
@@ -100,6 +104,7 @@ class OpenAIResponsesImpl:
|
|
|
100
104
|
)
|
|
101
105
|
self.prompts_api = prompts_api
|
|
102
106
|
self.files_api = files_api
|
|
107
|
+
self.connectors_api = connectors_api
|
|
103
108
|
|
|
104
109
|
async def _prepend_previous_response(
|
|
105
110
|
self,
|
|
@@ -150,7 +155,9 @@ class OpenAIResponsesImpl:
|
|
|
150
155
|
|
|
151
156
|
tool_context.recover_tools_from_previous_response(previous_response)
|
|
152
157
|
elif conversation is not None:
|
|
153
|
-
conversation_items = await self.conversations_api.list_items(
|
|
158
|
+
conversation_items = await self.conversations_api.list_items(
|
|
159
|
+
ListItemsRequest(conversation_id=conversation, order="asc")
|
|
160
|
+
)
|
|
154
161
|
|
|
155
162
|
# Use stored messages as source of truth (like previous_response.messages)
|
|
156
163
|
stored_messages = await self.responses_store.get_conversation_messages(conversation)
|
|
@@ -324,6 +331,125 @@ class OpenAIResponsesImpl:
|
|
|
324
331
|
messages=messages,
|
|
325
332
|
)
|
|
326
333
|
|
|
334
|
+
def _prepare_input_items_for_storage(
|
|
335
|
+
self,
|
|
336
|
+
input: str | list[OpenAIResponseInput],
|
|
337
|
+
) -> list[OpenAIResponseInput]:
|
|
338
|
+
"""Prepare input items for storage, adding IDs where needed.
|
|
339
|
+
|
|
340
|
+
This method is called once at the start of streaming to prepare input items
|
|
341
|
+
that will be reused across multiple persistence calls during streaming.
|
|
342
|
+
"""
|
|
343
|
+
new_input_id = f"msg_{uuid.uuid4()}"
|
|
344
|
+
input_items_data: list[OpenAIResponseInput] = []
|
|
345
|
+
|
|
346
|
+
if isinstance(input, str):
|
|
347
|
+
input_content = OpenAIResponseInputMessageContentText(text=input)
|
|
348
|
+
input_content_item = OpenAIResponseMessage(
|
|
349
|
+
role="user",
|
|
350
|
+
content=[input_content],
|
|
351
|
+
id=new_input_id,
|
|
352
|
+
)
|
|
353
|
+
input_items_data = [input_content_item]
|
|
354
|
+
else:
|
|
355
|
+
for input_item in input:
|
|
356
|
+
if isinstance(input_item, OpenAIResponseMessage):
|
|
357
|
+
input_item_dict = input_item.model_dump()
|
|
358
|
+
if "id" not in input_item_dict:
|
|
359
|
+
input_item_dict["id"] = new_input_id
|
|
360
|
+
input_items_data.append(OpenAIResponseMessage(**input_item_dict))
|
|
361
|
+
else:
|
|
362
|
+
input_items_data.append(input_item)
|
|
363
|
+
|
|
364
|
+
return input_items_data
|
|
365
|
+
|
|
366
|
+
async def _persist_streaming_state(
|
|
367
|
+
self,
|
|
368
|
+
stream_chunk: OpenAIResponseObjectStream,
|
|
369
|
+
orchestrator,
|
|
370
|
+
input_items: list[OpenAIResponseInput],
|
|
371
|
+
output_items: list,
|
|
372
|
+
) -> None:
|
|
373
|
+
"""Persist response state at significant streaming events.
|
|
374
|
+
|
|
375
|
+
This enables clients to poll GET /v1/responses/{response_id} during streaming
|
|
376
|
+
to see in-progress turn state instead of empty results.
|
|
377
|
+
|
|
378
|
+
Persistence occurs at:
|
|
379
|
+
- response.in_progress: Initial INSERT with empty output
|
|
380
|
+
- response.output_item.done: UPDATE with accumulated output items
|
|
381
|
+
- response.completed/response.incomplete: Final UPDATE with complete state
|
|
382
|
+
- response.failed: UPDATE with error state
|
|
383
|
+
|
|
384
|
+
:param stream_chunk: The current streaming event.
|
|
385
|
+
:param orchestrator: The streaming orchestrator (for snapshotting response).
|
|
386
|
+
:param input_items: Pre-prepared input items for storage.
|
|
387
|
+
:param output_items: Accumulated output items so far.
|
|
388
|
+
"""
|
|
389
|
+
try:
|
|
390
|
+
match stream_chunk.type:
|
|
391
|
+
case "response.in_progress":
|
|
392
|
+
# Initial persistence when response starts
|
|
393
|
+
in_progress_response = stream_chunk.response
|
|
394
|
+
await self.responses_store.upsert_response_object(
|
|
395
|
+
response_object=in_progress_response,
|
|
396
|
+
input=input_items,
|
|
397
|
+
messages=[],
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
case "response.output_item.done":
|
|
401
|
+
# Incremental update when an output item completes (tool call, message)
|
|
402
|
+
current_snapshot = orchestrator._snapshot_response(
|
|
403
|
+
status="in_progress",
|
|
404
|
+
outputs=output_items,
|
|
405
|
+
)
|
|
406
|
+
# Get current messages (filter out system messages)
|
|
407
|
+
messages_to_store = list(
|
|
408
|
+
filter(
|
|
409
|
+
lambda x: not isinstance(x, OpenAISystemMessageParam),
|
|
410
|
+
orchestrator.final_messages or orchestrator.ctx.messages,
|
|
411
|
+
)
|
|
412
|
+
)
|
|
413
|
+
await self.responses_store.upsert_response_object(
|
|
414
|
+
response_object=current_snapshot,
|
|
415
|
+
input=input_items,
|
|
416
|
+
messages=messages_to_store,
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
case "response.completed" | "response.incomplete":
|
|
420
|
+
# Final persistence when response finishes
|
|
421
|
+
final_response = stream_chunk.response
|
|
422
|
+
messages_to_store = list(
|
|
423
|
+
filter(
|
|
424
|
+
lambda x: not isinstance(x, OpenAISystemMessageParam),
|
|
425
|
+
orchestrator.final_messages,
|
|
426
|
+
)
|
|
427
|
+
)
|
|
428
|
+
await self.responses_store.upsert_response_object(
|
|
429
|
+
response_object=final_response,
|
|
430
|
+
input=input_items,
|
|
431
|
+
messages=messages_to_store,
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
case "response.failed":
|
|
435
|
+
# Persist failed state so GET shows error
|
|
436
|
+
failed_response = stream_chunk.response
|
|
437
|
+
# Preserve any accumulated non-system messages for failed responses
|
|
438
|
+
messages_to_store = list(
|
|
439
|
+
filter(
|
|
440
|
+
lambda x: not isinstance(x, OpenAISystemMessageParam),
|
|
441
|
+
orchestrator.final_messages or orchestrator.ctx.messages,
|
|
442
|
+
)
|
|
443
|
+
)
|
|
444
|
+
await self.responses_store.upsert_response_object(
|
|
445
|
+
response_object=failed_response,
|
|
446
|
+
input=input_items,
|
|
447
|
+
messages=messages_to_store,
|
|
448
|
+
)
|
|
449
|
+
except Exception as e:
|
|
450
|
+
# Best-effort persistence: log error but don't fail the stream
|
|
451
|
+
logger.warning(f"Failed to persist streaming state for {stream_chunk.type}: {e}")
|
|
452
|
+
|
|
327
453
|
async def create_openai_response(
|
|
328
454
|
self,
|
|
329
455
|
input: str | list[OpenAIResponseInput],
|
|
@@ -343,6 +469,9 @@ class OpenAIResponsesImpl:
|
|
|
343
469
|
guardrails: list[str | ResponseGuardrailSpec] | None = None,
|
|
344
470
|
parallel_tool_calls: bool | None = None,
|
|
345
471
|
max_tool_calls: int | None = None,
|
|
472
|
+
reasoning: OpenAIResponseReasoning | None = None,
|
|
473
|
+
max_output_tokens: int | None = None,
|
|
474
|
+
safety_identifier: str | None = None,
|
|
346
475
|
metadata: dict[str, str] | None = None,
|
|
347
476
|
):
|
|
348
477
|
stream = bool(stream)
|
|
@@ -380,9 +509,6 @@ class OpenAIResponsesImpl:
|
|
|
380
509
|
if not conversation.startswith("conv_"):
|
|
381
510
|
raise InvalidConversationIdError(conversation)
|
|
382
511
|
|
|
383
|
-
if max_tool_calls is not None and max_tool_calls < 1:
|
|
384
|
-
raise ValueError(f"Invalid {max_tool_calls=}; should be >= 1")
|
|
385
|
-
|
|
386
512
|
stream_gen = self._create_streaming_response(
|
|
387
513
|
input=input,
|
|
388
514
|
conversation=conversation,
|
|
@@ -399,6 +525,9 @@ class OpenAIResponsesImpl:
|
|
|
399
525
|
guardrail_ids=guardrail_ids,
|
|
400
526
|
parallel_tool_calls=parallel_tool_calls,
|
|
401
527
|
max_tool_calls=max_tool_calls,
|
|
528
|
+
reasoning=reasoning,
|
|
529
|
+
max_output_tokens=max_output_tokens,
|
|
530
|
+
safety_identifier=safety_identifier,
|
|
402
531
|
metadata=metadata,
|
|
403
532
|
include=include,
|
|
404
533
|
)
|
|
@@ -454,6 +583,9 @@ class OpenAIResponsesImpl:
|
|
|
454
583
|
guardrail_ids: list[str] | None = None,
|
|
455
584
|
parallel_tool_calls: bool | None = True,
|
|
456
585
|
max_tool_calls: int | None = None,
|
|
586
|
+
reasoning: OpenAIResponseReasoning | None = None,
|
|
587
|
+
max_output_tokens: int | None = None,
|
|
588
|
+
safety_identifier: str | None = None,
|
|
457
589
|
metadata: dict[str, str] | None = None,
|
|
458
590
|
include: list[ResponseItemInclude] | None = None,
|
|
459
591
|
) -> AsyncIterator[OpenAIResponseObjectStream]:
|
|
@@ -493,42 +625,45 @@ class OpenAIResponsesImpl:
|
|
|
493
625
|
|
|
494
626
|
# Create a per-request MCP session manager for session reuse (fix for #4452)
|
|
495
627
|
# This avoids redundant tools/list calls when making multiple MCP tool invocations
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
mcp_session_manager=mcp_session_manager,
|
|
505
|
-
)
|
|
628
|
+
async with MCPSessionManager() as mcp_session_manager:
|
|
629
|
+
request_tool_executor = ToolExecutor(
|
|
630
|
+
tool_groups_api=self.tool_groups_api,
|
|
631
|
+
tool_runtime_api=self.tool_runtime_api,
|
|
632
|
+
vector_io_api=self.vector_io_api,
|
|
633
|
+
vector_stores_config=self.tool_executor.vector_stores_config,
|
|
634
|
+
mcp_session_manager=mcp_session_manager,
|
|
635
|
+
)
|
|
506
636
|
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
637
|
+
orchestrator = StreamingResponseOrchestrator(
|
|
638
|
+
inference_api=self.inference_api,
|
|
639
|
+
ctx=ctx,
|
|
640
|
+
response_id=response_id,
|
|
641
|
+
created_at=created_at,
|
|
642
|
+
prompt=prompt,
|
|
643
|
+
text=text,
|
|
644
|
+
max_infer_iters=max_infer_iters,
|
|
645
|
+
parallel_tool_calls=parallel_tool_calls,
|
|
646
|
+
tool_executor=request_tool_executor,
|
|
647
|
+
safety_api=self.safety_api,
|
|
648
|
+
connectors_api=self.connectors_api,
|
|
649
|
+
guardrail_ids=guardrail_ids,
|
|
650
|
+
instructions=instructions,
|
|
651
|
+
max_tool_calls=max_tool_calls,
|
|
652
|
+
reasoning=reasoning,
|
|
653
|
+
max_output_tokens=max_output_tokens,
|
|
654
|
+
safety_identifier=safety_identifier,
|
|
655
|
+
metadata=metadata,
|
|
656
|
+
include=include,
|
|
657
|
+
store=store,
|
|
658
|
+
)
|
|
524
659
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
660
|
+
final_response = None
|
|
661
|
+
failed_response = None
|
|
662
|
+
|
|
663
|
+
output_items: list[ConversationItem] = []
|
|
664
|
+
|
|
665
|
+
input_items_for_storage = self._prepare_input_items_for_storage(all_input)
|
|
528
666
|
|
|
529
|
-
# Type as ConversationItem to avoid list invariance issues
|
|
530
|
-
output_items: list[ConversationItem] = []
|
|
531
|
-
try:
|
|
532
667
|
async for stream_chunk in orchestrator.create_response():
|
|
533
668
|
match stream_chunk.type:
|
|
534
669
|
case "response.completed" | "response.incomplete":
|
|
@@ -541,6 +676,16 @@ class OpenAIResponsesImpl:
|
|
|
541
676
|
case _:
|
|
542
677
|
pass # Other event types
|
|
543
678
|
|
|
679
|
+
# Incremental persistence: persist on significant state changes
|
|
680
|
+
# This enables clients to poll GET /v1/responses/{response_id} during streaming
|
|
681
|
+
if store:
|
|
682
|
+
await self._persist_streaming_state(
|
|
683
|
+
stream_chunk=stream_chunk,
|
|
684
|
+
orchestrator=orchestrator,
|
|
685
|
+
input_items=input_items_for_storage,
|
|
686
|
+
output_items=output_items,
|
|
687
|
+
)
|
|
688
|
+
|
|
544
689
|
# Store and sync before yielding terminal events
|
|
545
690
|
# This ensures the storage/syncing happens even if the consumer breaks after receiving the event
|
|
546
691
|
if (
|
|
@@ -548,32 +693,14 @@ class OpenAIResponsesImpl:
|
|
|
548
693
|
and final_response
|
|
549
694
|
and failed_response is None
|
|
550
695
|
):
|
|
551
|
-
messages_to_store = list(
|
|
552
|
-
filter(lambda x: not isinstance(x, OpenAISystemMessageParam), orchestrator.final_messages)
|
|
553
|
-
)
|
|
554
|
-
if store:
|
|
555
|
-
# TODO: we really should work off of output_items instead of "final_messages"
|
|
556
|
-
await self._store_response(
|
|
557
|
-
response=final_response,
|
|
558
|
-
input=all_input,
|
|
559
|
-
messages=messages_to_store,
|
|
560
|
-
)
|
|
561
|
-
|
|
562
696
|
if conversation:
|
|
697
|
+
messages_to_store = list(
|
|
698
|
+
filter(lambda x: not isinstance(x, OpenAISystemMessageParam), orchestrator.final_messages)
|
|
699
|
+
)
|
|
563
700
|
await self._sync_response_to_conversation(conversation, input, output_items)
|
|
564
701
|
await self.responses_store.store_conversation_messages(conversation, messages_to_store)
|
|
565
702
|
|
|
566
703
|
yield stream_chunk
|
|
567
|
-
finally:
|
|
568
|
-
# Clean up MCP sessions at the end of the request (fix for #4452)
|
|
569
|
-
# Use shield() to prevent cancellation from interrupting cleanup and leaking resources
|
|
570
|
-
# Wrap in try/except as cleanup errors should not mask the original response
|
|
571
|
-
try:
|
|
572
|
-
await asyncio.shield(mcp_session_manager.close_all())
|
|
573
|
-
except BaseException as e:
|
|
574
|
-
# Debug level - cleanup errors are expected in streaming scenarios where
|
|
575
|
-
# anyio cancel scopes may be in a different task context
|
|
576
|
-
logger.debug(f"Error during MCP session cleanup: {e}")
|
|
577
704
|
|
|
578
705
|
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
|
579
706
|
return await self.responses_store.delete_response_object(response_id)
|
|
@@ -596,4 +723,4 @@ class OpenAIResponsesImpl:
|
|
|
596
723
|
|
|
597
724
|
adapter = TypeAdapter(list[ConversationItem])
|
|
598
725
|
validated_items = adapter.validate_python(conversation_items)
|
|
599
|
-
await self.conversations_api.add_items(conversation_id, validated_items)
|
|
726
|
+
await self.conversations_api.add_items(conversation_id, AddItemsRequest(items=validated_items))
|