llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +12 -21
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.4.dist-info/RECORD +0 -625
- llama_stack-0.3.4.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -4,50 +4,58 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
import re
|
|
7
8
|
import time
|
|
8
9
|
import uuid
|
|
9
10
|
from collections.abc import AsyncIterator
|
|
10
11
|
|
|
11
12
|
from pydantic import BaseModel, TypeAdapter
|
|
12
13
|
|
|
13
|
-
from llama_stack.
|
|
14
|
-
from llama_stack.
|
|
15
|
-
|
|
14
|
+
from llama_stack.log import get_logger
|
|
15
|
+
from llama_stack.providers.utils.responses.responses_store import (
|
|
16
|
+
ResponsesStore,
|
|
17
|
+
_OpenAIResponseObjectWithInputAndMessages,
|
|
18
|
+
)
|
|
19
|
+
from llama_stack_api import (
|
|
20
|
+
ConversationItem,
|
|
21
|
+
Conversations,
|
|
22
|
+
Files,
|
|
23
|
+
Inference,
|
|
24
|
+
InvalidConversationIdError,
|
|
16
25
|
ListOpenAIResponseInputItem,
|
|
17
26
|
ListOpenAIResponseObject,
|
|
27
|
+
OpenAIChatCompletionContentPartParam,
|
|
18
28
|
OpenAIDeleteResponseObject,
|
|
29
|
+
OpenAIMessageParam,
|
|
19
30
|
OpenAIResponseInput,
|
|
31
|
+
OpenAIResponseInputMessageContentFile,
|
|
32
|
+
OpenAIResponseInputMessageContentImage,
|
|
20
33
|
OpenAIResponseInputMessageContentText,
|
|
21
34
|
OpenAIResponseInputTool,
|
|
35
|
+
OpenAIResponseInputToolChoice,
|
|
22
36
|
OpenAIResponseMessage,
|
|
23
37
|
OpenAIResponseObject,
|
|
24
38
|
OpenAIResponseObjectStream,
|
|
39
|
+
OpenAIResponsePrompt,
|
|
25
40
|
OpenAIResponseText,
|
|
26
41
|
OpenAIResponseTextFormat,
|
|
27
|
-
)
|
|
28
|
-
from llama_stack.apis.common.errors import (
|
|
29
|
-
InvalidConversationIdError,
|
|
30
|
-
)
|
|
31
|
-
from llama_stack.apis.conversations import Conversations
|
|
32
|
-
from llama_stack.apis.conversations.conversations import ConversationItem
|
|
33
|
-
from llama_stack.apis.inference import (
|
|
34
|
-
Inference,
|
|
35
|
-
OpenAIMessageParam,
|
|
36
42
|
OpenAISystemMessageParam,
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
43
|
+
OpenAIUserMessageParam,
|
|
44
|
+
Order,
|
|
45
|
+
Prompts,
|
|
46
|
+
ResponseGuardrailSpec,
|
|
47
|
+
ResponseItemInclude,
|
|
48
|
+
Safety,
|
|
49
|
+
ToolGroups,
|
|
50
|
+
ToolRuntime,
|
|
51
|
+
VectorIO,
|
|
45
52
|
)
|
|
46
53
|
|
|
47
54
|
from .streaming import StreamingResponseOrchestrator
|
|
48
55
|
from .tool_executor import ToolExecutor
|
|
49
56
|
from .types import ChatCompletionContext, ToolContext
|
|
50
57
|
from .utils import (
|
|
58
|
+
convert_response_content_to_chat_content,
|
|
51
59
|
convert_response_input_to_chat_messages,
|
|
52
60
|
convert_response_text_to_chat_response_format,
|
|
53
61
|
extract_guardrail_ids,
|
|
@@ -69,8 +77,11 @@ class OpenAIResponsesImpl:
|
|
|
69
77
|
tool_runtime_api: ToolRuntime,
|
|
70
78
|
responses_store: ResponsesStore,
|
|
71
79
|
vector_io_api: VectorIO, # VectorIO
|
|
72
|
-
safety_api: Safety,
|
|
80
|
+
safety_api: Safety | None,
|
|
73
81
|
conversations_api: Conversations,
|
|
82
|
+
prompts_api: Prompts,
|
|
83
|
+
files_api: Files,
|
|
84
|
+
vector_stores_config=None,
|
|
74
85
|
):
|
|
75
86
|
self.inference_api = inference_api
|
|
76
87
|
self.tool_groups_api = tool_groups_api
|
|
@@ -83,14 +94,18 @@ class OpenAIResponsesImpl:
|
|
|
83
94
|
tool_groups_api=tool_groups_api,
|
|
84
95
|
tool_runtime_api=tool_runtime_api,
|
|
85
96
|
vector_io_api=vector_io_api,
|
|
97
|
+
vector_stores_config=vector_stores_config,
|
|
86
98
|
)
|
|
99
|
+
self.prompts_api = prompts_api
|
|
100
|
+
self.files_api = files_api
|
|
87
101
|
|
|
88
102
|
async def _prepend_previous_response(
|
|
89
103
|
self,
|
|
90
104
|
input: str | list[OpenAIResponseInput],
|
|
91
105
|
previous_response: _OpenAIResponseObjectWithInputAndMessages,
|
|
92
106
|
):
|
|
93
|
-
|
|
107
|
+
# Convert Sequence to list for mutation
|
|
108
|
+
new_input_items = list(previous_response.input)
|
|
94
109
|
new_input_items.extend(previous_response.output)
|
|
95
110
|
|
|
96
111
|
if isinstance(input, str):
|
|
@@ -106,7 +121,7 @@ class OpenAIResponsesImpl:
|
|
|
106
121
|
tools: list[OpenAIResponseInputTool] | None,
|
|
107
122
|
previous_response_id: str | None,
|
|
108
123
|
conversation: str | None,
|
|
109
|
-
) -> tuple[str | list[OpenAIResponseInput], list[OpenAIMessageParam]]:
|
|
124
|
+
) -> tuple[str | list[OpenAIResponseInput], list[OpenAIMessageParam], ToolContext]:
|
|
110
125
|
"""Process input with optional previous response context.
|
|
111
126
|
|
|
112
127
|
Returns:
|
|
@@ -123,15 +138,17 @@ class OpenAIResponsesImpl:
|
|
|
123
138
|
# Use stored messages directly and convert only new input
|
|
124
139
|
message_adapter = TypeAdapter(list[OpenAIMessageParam])
|
|
125
140
|
messages = message_adapter.validate_python(previous_response.messages)
|
|
126
|
-
new_messages = await convert_response_input_to_chat_messages(
|
|
141
|
+
new_messages = await convert_response_input_to_chat_messages(
|
|
142
|
+
input, previous_messages=messages, files_api=self.files_api
|
|
143
|
+
)
|
|
127
144
|
messages.extend(new_messages)
|
|
128
145
|
else:
|
|
129
146
|
# Backward compatibility: reconstruct from inputs
|
|
130
|
-
messages = await convert_response_input_to_chat_messages(all_input)
|
|
147
|
+
messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
|
|
131
148
|
|
|
132
149
|
tool_context.recover_tools_from_previous_response(previous_response)
|
|
133
150
|
elif conversation is not None:
|
|
134
|
-
conversation_items = await self.conversations_api.
|
|
151
|
+
conversation_items = await self.conversations_api.list_items(conversation, order="asc")
|
|
135
152
|
|
|
136
153
|
# Use stored messages as source of truth (like previous_response.messages)
|
|
137
154
|
stored_messages = await self.responses_store.get_conversation_messages(conversation)
|
|
@@ -139,7 +156,7 @@ class OpenAIResponsesImpl:
|
|
|
139
156
|
all_input = input
|
|
140
157
|
if not conversation_items.data:
|
|
141
158
|
# First turn - just convert the new input
|
|
142
|
-
messages = await convert_response_input_to_chat_messages(input)
|
|
159
|
+
messages = await convert_response_input_to_chat_messages(input, files_api=self.files_api)
|
|
143
160
|
else:
|
|
144
161
|
if not stored_messages:
|
|
145
162
|
all_input = conversation_items.data
|
|
@@ -155,14 +172,82 @@ class OpenAIResponsesImpl:
|
|
|
155
172
|
all_input = input
|
|
156
173
|
|
|
157
174
|
messages = stored_messages or []
|
|
158
|
-
new_messages = await convert_response_input_to_chat_messages(
|
|
175
|
+
new_messages = await convert_response_input_to_chat_messages(
|
|
176
|
+
all_input, previous_messages=messages, files_api=self.files_api
|
|
177
|
+
)
|
|
159
178
|
messages.extend(new_messages)
|
|
160
179
|
else:
|
|
161
180
|
all_input = input
|
|
162
|
-
messages = await convert_response_input_to_chat_messages(all_input)
|
|
181
|
+
messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
|
|
163
182
|
|
|
164
183
|
return all_input, messages, tool_context
|
|
165
184
|
|
|
185
|
+
async def _prepend_prompt(
|
|
186
|
+
self,
|
|
187
|
+
messages: list[OpenAIMessageParam],
|
|
188
|
+
openai_response_prompt: OpenAIResponsePrompt | None,
|
|
189
|
+
) -> None:
|
|
190
|
+
"""Prepend prompt template to messages, resolving text/image/file variables.
|
|
191
|
+
|
|
192
|
+
:param messages: List of OpenAIMessageParam objects
|
|
193
|
+
:param openai_response_prompt: (Optional) OpenAIResponsePrompt object with variables
|
|
194
|
+
:returns: string of utf-8 characters
|
|
195
|
+
"""
|
|
196
|
+
if not openai_response_prompt or not openai_response_prompt.id:
|
|
197
|
+
return
|
|
198
|
+
|
|
199
|
+
prompt_version = int(openai_response_prompt.version) if openai_response_prompt.version else None
|
|
200
|
+
cur_prompt = await self.prompts_api.get_prompt(openai_response_prompt.id, prompt_version)
|
|
201
|
+
|
|
202
|
+
if not cur_prompt or not cur_prompt.prompt:
|
|
203
|
+
return
|
|
204
|
+
|
|
205
|
+
cur_prompt_text = cur_prompt.prompt
|
|
206
|
+
cur_prompt_variables = cur_prompt.variables
|
|
207
|
+
|
|
208
|
+
if not openai_response_prompt.variables:
|
|
209
|
+
messages.insert(0, OpenAISystemMessageParam(content=cur_prompt_text))
|
|
210
|
+
return
|
|
211
|
+
|
|
212
|
+
# Validate that all provided variables exist in the prompt
|
|
213
|
+
for name in openai_response_prompt.variables.keys():
|
|
214
|
+
if name not in cur_prompt_variables:
|
|
215
|
+
raise ValueError(f"Variable {name} not found in prompt {openai_response_prompt.id}")
|
|
216
|
+
|
|
217
|
+
# Separate text and media variables
|
|
218
|
+
text_substitutions = {}
|
|
219
|
+
media_content_parts: list[OpenAIChatCompletionContentPartParam] = []
|
|
220
|
+
|
|
221
|
+
for name, value in openai_response_prompt.variables.items():
|
|
222
|
+
# Text variable found
|
|
223
|
+
if isinstance(value, OpenAIResponseInputMessageContentText):
|
|
224
|
+
text_substitutions[name] = value.text
|
|
225
|
+
|
|
226
|
+
# Media variable found
|
|
227
|
+
elif isinstance(value, OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile):
|
|
228
|
+
converted_parts = await convert_response_content_to_chat_content([value], files_api=self.files_api)
|
|
229
|
+
if isinstance(converted_parts, list):
|
|
230
|
+
media_content_parts.extend(converted_parts)
|
|
231
|
+
|
|
232
|
+
# Eg: {{product_photo}} becomes "[Image: product_photo]"
|
|
233
|
+
# This gives the model textual context about what media exists in the prompt
|
|
234
|
+
var_type = value.type.replace("input_", "").replace("_", " ").title()
|
|
235
|
+
text_substitutions[name] = f"[{var_type}: {name}]"
|
|
236
|
+
|
|
237
|
+
def replace_variable(match: re.Match[str]) -> str:
|
|
238
|
+
var_name = match.group(1).strip()
|
|
239
|
+
return str(text_substitutions.get(var_name, match.group(0)))
|
|
240
|
+
|
|
241
|
+
pattern = r"\{\{\s*(\w+)\s*\}\}"
|
|
242
|
+
processed_prompt_text = re.sub(pattern, replace_variable, cur_prompt_text)
|
|
243
|
+
|
|
244
|
+
# Insert system message with resolved text
|
|
245
|
+
messages.insert(0, OpenAISystemMessageParam(content=processed_prompt_text))
|
|
246
|
+
|
|
247
|
+
# If we have media, create a new user message because allows to ingest images and files
|
|
248
|
+
if media_content_parts:
|
|
249
|
+
messages.append(OpenAIUserMessageParam(content=media_content_parts))
|
|
250
|
+
|
|
166
251
|
async def get_openai_response(
|
|
167
252
|
self,
|
|
168
253
|
response_id: str,
|
|
@@ -184,7 +269,7 @@ class OpenAIResponsesImpl:
|
|
|
184
269
|
response_id: str,
|
|
185
270
|
after: str | None = None,
|
|
186
271
|
before: str | None = None,
|
|
187
|
-
include: list[
|
|
272
|
+
include: list[ResponseItemInclude] | None = None,
|
|
188
273
|
limit: int | None = 20,
|
|
189
274
|
order: Order | None = Order.desc,
|
|
190
275
|
) -> ListOpenAIResponseInputItem:
|
|
@@ -207,6 +292,9 @@ class OpenAIResponsesImpl:
|
|
|
207
292
|
messages: list[OpenAIMessageParam],
|
|
208
293
|
) -> None:
|
|
209
294
|
new_input_id = f"msg_{uuid.uuid4()}"
|
|
295
|
+
# Type input_items_data as the full OpenAIResponseInput union to avoid list invariance issues
|
|
296
|
+
input_items_data: list[OpenAIResponseInput] = []
|
|
297
|
+
|
|
210
298
|
if isinstance(input, str):
|
|
211
299
|
# synthesize a message from the input string
|
|
212
300
|
input_content = OpenAIResponseInputMessageContentText(text=input)
|
|
@@ -218,7 +306,6 @@ class OpenAIResponsesImpl:
|
|
|
218
306
|
input_items_data = [input_content_item]
|
|
219
307
|
else:
|
|
220
308
|
# we already have a list of messages
|
|
221
|
-
input_items_data = []
|
|
222
309
|
for input_item in input:
|
|
223
310
|
if isinstance(input_item, OpenAIResponseMessage):
|
|
224
311
|
# These may or may not already have an id, so dump to dict, check for id, and add if missing
|
|
@@ -239,6 +326,7 @@ class OpenAIResponsesImpl:
|
|
|
239
326
|
self,
|
|
240
327
|
input: str | list[OpenAIResponseInput],
|
|
241
328
|
model: str,
|
|
329
|
+
prompt: OpenAIResponsePrompt | None = None,
|
|
242
330
|
instructions: str | None = None,
|
|
243
331
|
previous_response_id: str | None = None,
|
|
244
332
|
conversation: str | None = None,
|
|
@@ -246,16 +334,41 @@ class OpenAIResponsesImpl:
|
|
|
246
334
|
stream: bool | None = False,
|
|
247
335
|
temperature: float | None = None,
|
|
248
336
|
text: OpenAIResponseText | None = None,
|
|
337
|
+
tool_choice: OpenAIResponseInputToolChoice | None = None,
|
|
249
338
|
tools: list[OpenAIResponseInputTool] | None = None,
|
|
250
|
-
include: list[
|
|
339
|
+
include: list[ResponseItemInclude] | None = None,
|
|
251
340
|
max_infer_iters: int | None = 10,
|
|
252
|
-
guardrails: list[ResponseGuardrailSpec] | None = None,
|
|
341
|
+
guardrails: list[str | ResponseGuardrailSpec] | None = None,
|
|
342
|
+
parallel_tool_calls: bool | None = None,
|
|
343
|
+
max_tool_calls: int | None = None,
|
|
344
|
+
metadata: dict[str, str] | None = None,
|
|
253
345
|
):
|
|
254
346
|
stream = bool(stream)
|
|
255
347
|
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
|
|
256
348
|
|
|
349
|
+
# Validate MCP tools: ensure Authorization header is not passed via headers dict
|
|
350
|
+
if tools:
|
|
351
|
+
from llama_stack_api.openai_responses import OpenAIResponseInputToolMCP
|
|
352
|
+
|
|
353
|
+
for tool in tools:
|
|
354
|
+
if isinstance(tool, OpenAIResponseInputToolMCP) and tool.headers:
|
|
355
|
+
for key in tool.headers.keys():
|
|
356
|
+
if key.lower() == "authorization":
|
|
357
|
+
raise ValueError(
|
|
358
|
+
"Authorization header cannot be passed via 'headers'. "
|
|
359
|
+
"Please use the 'authorization' parameter instead."
|
|
360
|
+
)
|
|
361
|
+
|
|
257
362
|
guardrail_ids = extract_guardrail_ids(guardrails) if guardrails else []
|
|
258
363
|
|
|
364
|
+
# Validate that Safety API is available if guardrails are requested
|
|
365
|
+
if guardrail_ids and self.safety_api is None:
|
|
366
|
+
raise ValueError(
|
|
367
|
+
"Cannot process guardrails: Safety API is not configured.\n\n"
|
|
368
|
+
"To use guardrails, ensure the Safety API is configured in your stack, or remove "
|
|
369
|
+
"the 'guardrails' parameter from your request."
|
|
370
|
+
)
|
|
371
|
+
|
|
259
372
|
if conversation is not None:
|
|
260
373
|
if previous_response_id is not None:
|
|
261
374
|
raise ValueError(
|
|
@@ -265,18 +378,27 @@ class OpenAIResponsesImpl:
|
|
|
265
378
|
if not conversation.startswith("conv_"):
|
|
266
379
|
raise InvalidConversationIdError(conversation)
|
|
267
380
|
|
|
381
|
+
if max_tool_calls is not None and max_tool_calls < 1:
|
|
382
|
+
raise ValueError(f"Invalid {max_tool_calls=}; should be >= 1")
|
|
383
|
+
|
|
268
384
|
stream_gen = self._create_streaming_response(
|
|
269
385
|
input=input,
|
|
270
386
|
conversation=conversation,
|
|
271
387
|
model=model,
|
|
388
|
+
prompt=prompt,
|
|
272
389
|
instructions=instructions,
|
|
273
390
|
previous_response_id=previous_response_id,
|
|
274
391
|
store=store,
|
|
275
392
|
temperature=temperature,
|
|
276
393
|
text=text,
|
|
277
394
|
tools=tools,
|
|
395
|
+
tool_choice=tool_choice,
|
|
278
396
|
max_infer_iters=max_infer_iters,
|
|
279
397
|
guardrail_ids=guardrail_ids,
|
|
398
|
+
parallel_tool_calls=parallel_tool_calls,
|
|
399
|
+
max_tool_calls=max_tool_calls,
|
|
400
|
+
metadata=metadata,
|
|
401
|
+
include=include,
|
|
280
402
|
)
|
|
281
403
|
|
|
282
404
|
if stream:
|
|
@@ -287,16 +409,19 @@ class OpenAIResponsesImpl:
|
|
|
287
409
|
failed_response = None
|
|
288
410
|
|
|
289
411
|
async for stream_chunk in stream_gen:
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
412
|
+
match stream_chunk.type:
|
|
413
|
+
case "response.completed" | "response.incomplete":
|
|
414
|
+
if final_response is not None:
|
|
415
|
+
raise ValueError(
|
|
416
|
+
"The response stream produced multiple terminal responses! "
|
|
417
|
+
f"Earlier response from {final_event_type}"
|
|
418
|
+
)
|
|
419
|
+
final_response = stream_chunk.response
|
|
420
|
+
final_event_type = stream_chunk.type
|
|
421
|
+
case "response.failed":
|
|
422
|
+
failed_response = stream_chunk.response
|
|
423
|
+
case _:
|
|
424
|
+
pass # Other event types don't have .response
|
|
300
425
|
|
|
301
426
|
if failed_response is not None:
|
|
302
427
|
error_message = (
|
|
@@ -317,13 +442,24 @@ class OpenAIResponsesImpl:
|
|
|
317
442
|
instructions: str | None = None,
|
|
318
443
|
previous_response_id: str | None = None,
|
|
319
444
|
conversation: str | None = None,
|
|
445
|
+
prompt: OpenAIResponsePrompt | None = None,
|
|
320
446
|
store: bool | None = True,
|
|
321
447
|
temperature: float | None = None,
|
|
322
448
|
text: OpenAIResponseText | None = None,
|
|
323
449
|
tools: list[OpenAIResponseInputTool] | None = None,
|
|
450
|
+
tool_choice: OpenAIResponseInputToolChoice | None = None,
|
|
324
451
|
max_infer_iters: int | None = 10,
|
|
325
452
|
guardrail_ids: list[str] | None = None,
|
|
453
|
+
parallel_tool_calls: bool | None = True,
|
|
454
|
+
max_tool_calls: int | None = None,
|
|
455
|
+
metadata: dict[str, str] | None = None,
|
|
456
|
+
include: list[ResponseItemInclude] | None = None,
|
|
326
457
|
) -> AsyncIterator[OpenAIResponseObjectStream]:
|
|
458
|
+
# These should never be None when called from create_openai_response (which sets defaults)
|
|
459
|
+
# but we assert here to help mypy understand the types
|
|
460
|
+
assert text is not None, "text must not be None"
|
|
461
|
+
assert max_infer_iters is not None, "max_infer_iters must not be None"
|
|
462
|
+
|
|
327
463
|
# Input preprocessing
|
|
328
464
|
all_input, messages, tool_context = await self._process_input_with_previous_response(
|
|
329
465
|
input, tools, previous_response_id, conversation
|
|
@@ -332,6 +468,9 @@ class OpenAIResponsesImpl:
|
|
|
332
468
|
if instructions:
|
|
333
469
|
messages.insert(0, OpenAISystemMessageParam(content=instructions))
|
|
334
470
|
|
|
471
|
+
# Prepend reusable prompt (if provided)
|
|
472
|
+
await self._prepend_prompt(messages, prompt)
|
|
473
|
+
|
|
335
474
|
# Structured outputs
|
|
336
475
|
response_format = await convert_response_text_to_chat_response_format(text)
|
|
337
476
|
|
|
@@ -339,6 +478,7 @@ class OpenAIResponsesImpl:
|
|
|
339
478
|
model=model,
|
|
340
479
|
messages=messages,
|
|
341
480
|
response_tools=tools,
|
|
481
|
+
tool_choice=tool_choice,
|
|
342
482
|
temperature=temperature,
|
|
343
483
|
response_format=response_format,
|
|
344
484
|
tool_context=tool_context,
|
|
@@ -354,32 +494,39 @@ class OpenAIResponsesImpl:
|
|
|
354
494
|
ctx=ctx,
|
|
355
495
|
response_id=response_id,
|
|
356
496
|
created_at=created_at,
|
|
497
|
+
prompt=prompt,
|
|
357
498
|
text=text,
|
|
358
499
|
max_infer_iters=max_infer_iters,
|
|
500
|
+
parallel_tool_calls=parallel_tool_calls,
|
|
359
501
|
tool_executor=self.tool_executor,
|
|
360
502
|
safety_api=self.safety_api,
|
|
361
503
|
guardrail_ids=guardrail_ids,
|
|
362
504
|
instructions=instructions,
|
|
505
|
+
max_tool_calls=max_tool_calls,
|
|
506
|
+
metadata=metadata,
|
|
507
|
+
include=include,
|
|
363
508
|
)
|
|
364
509
|
|
|
365
510
|
# Stream the response
|
|
366
511
|
final_response = None
|
|
367
512
|
failed_response = None
|
|
368
513
|
|
|
369
|
-
|
|
514
|
+
# Type as ConversationItem to avoid list invariance issues
|
|
515
|
+
output_items: list[ConversationItem] = []
|
|
370
516
|
async for stream_chunk in orchestrator.create_response():
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
#
|
|
517
|
+
match stream_chunk.type:
|
|
518
|
+
case "response.completed" | "response.incomplete":
|
|
519
|
+
final_response = stream_chunk.response
|
|
520
|
+
case "response.failed":
|
|
521
|
+
failed_response = stream_chunk.response
|
|
522
|
+
case "response.output_item.done":
|
|
523
|
+
item = stream_chunk.item
|
|
524
|
+
output_items.append(item)
|
|
525
|
+
case _:
|
|
526
|
+
pass # Other event types
|
|
527
|
+
|
|
528
|
+
# Store and sync before yielding terminal events
|
|
529
|
+
# This ensures the storage/syncing happens even if the consumer breaks after receiving the event
|
|
383
530
|
if (
|
|
384
531
|
stream_chunk.type in {"response.completed", "response.incomplete"}
|
|
385
532
|
and final_response
|
|
@@ -400,6 +547,8 @@ class OpenAIResponsesImpl:
|
|
|
400
547
|
await self._sync_response_to_conversation(conversation, input, output_items)
|
|
401
548
|
await self.responses_store.store_conversation_messages(conversation, messages_to_store)
|
|
402
549
|
|
|
550
|
+
yield stream_chunk
|
|
551
|
+
|
|
403
552
|
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
|
404
553
|
return await self.responses_store.delete_response_object(response_id)
|
|
405
554
|
|
|
@@ -407,7 +556,8 @@ class OpenAIResponsesImpl:
|
|
|
407
556
|
self, conversation_id: str, input: str | list[OpenAIResponseInput] | None, output_items: list[ConversationItem]
|
|
408
557
|
) -> None:
|
|
409
558
|
"""Sync content and response messages to the conversation."""
|
|
410
|
-
|
|
559
|
+
# Type as ConversationItem union to avoid list invariance issues
|
|
560
|
+
conversation_items: list[ConversationItem] = []
|
|
411
561
|
|
|
412
562
|
if isinstance(input, str):
|
|
413
563
|
conversation_items.append(
|