llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +235 -62
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
- llama_stack/providers/registry/agents.py +8 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/providers/utils/vector_io/__init__.py +16 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
- llama_stack-0.4.1.dist-info/RECORD +588 -0
- llama_stack-0.4.1.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -5,15 +5,36 @@
|
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
7
|
import asyncio
|
|
8
|
+
import base64
|
|
9
|
+
import mimetypes
|
|
8
10
|
import re
|
|
9
11
|
import uuid
|
|
12
|
+
from collections.abc import Sequence
|
|
10
13
|
|
|
11
|
-
from
|
|
12
|
-
|
|
14
|
+
from llama_stack_api import (
|
|
15
|
+
Files,
|
|
16
|
+
OpenAIAssistantMessageParam,
|
|
17
|
+
OpenAIChatCompletionContentPartImageParam,
|
|
18
|
+
OpenAIChatCompletionContentPartParam,
|
|
19
|
+
OpenAIChatCompletionContentPartTextParam,
|
|
20
|
+
OpenAIChatCompletionToolCall,
|
|
21
|
+
OpenAIChatCompletionToolCallFunction,
|
|
22
|
+
OpenAIChoice,
|
|
23
|
+
OpenAIDeveloperMessageParam,
|
|
24
|
+
OpenAIFile,
|
|
25
|
+
OpenAIFileFile,
|
|
26
|
+
OpenAIImageURL,
|
|
27
|
+
OpenAIJSONSchema,
|
|
28
|
+
OpenAIMessageParam,
|
|
13
29
|
OpenAIResponseAnnotationFileCitation,
|
|
30
|
+
OpenAIResponseFormatJSONObject,
|
|
31
|
+
OpenAIResponseFormatJSONSchema,
|
|
32
|
+
OpenAIResponseFormatParam,
|
|
33
|
+
OpenAIResponseFormatText,
|
|
14
34
|
OpenAIResponseInput,
|
|
15
35
|
OpenAIResponseInputFunctionToolCallOutput,
|
|
16
36
|
OpenAIResponseInputMessageContent,
|
|
37
|
+
OpenAIResponseInputMessageContentFile,
|
|
17
38
|
OpenAIResponseInputMessageContentImage,
|
|
18
39
|
OpenAIResponseInputMessageContentText,
|
|
19
40
|
OpenAIResponseInputTool,
|
|
@@ -22,32 +43,58 @@ from llama_stack.apis.agents.openai_responses import (
|
|
|
22
43
|
OpenAIResponseMessage,
|
|
23
44
|
OpenAIResponseOutputMessageContent,
|
|
24
45
|
OpenAIResponseOutputMessageContentOutputText,
|
|
46
|
+
OpenAIResponseOutputMessageFileSearchToolCall,
|
|
25
47
|
OpenAIResponseOutputMessageFunctionToolCall,
|
|
26
48
|
OpenAIResponseOutputMessageMCPCall,
|
|
27
49
|
OpenAIResponseOutputMessageMCPListTools,
|
|
50
|
+
OpenAIResponseOutputMessageWebSearchToolCall,
|
|
28
51
|
OpenAIResponseText,
|
|
29
|
-
)
|
|
30
|
-
from llama_stack.apis.inference import (
|
|
31
|
-
OpenAIAssistantMessageParam,
|
|
32
|
-
OpenAIChatCompletionContentPartImageParam,
|
|
33
|
-
OpenAIChatCompletionContentPartParam,
|
|
34
|
-
OpenAIChatCompletionContentPartTextParam,
|
|
35
|
-
OpenAIChatCompletionToolCall,
|
|
36
|
-
OpenAIChatCompletionToolCallFunction,
|
|
37
|
-
OpenAIChoice,
|
|
38
|
-
OpenAIDeveloperMessageParam,
|
|
39
|
-
OpenAIImageURL,
|
|
40
|
-
OpenAIJSONSchema,
|
|
41
|
-
OpenAIMessageParam,
|
|
42
|
-
OpenAIResponseFormatJSONObject,
|
|
43
|
-
OpenAIResponseFormatJSONSchema,
|
|
44
|
-
OpenAIResponseFormatParam,
|
|
45
|
-
OpenAIResponseFormatText,
|
|
46
52
|
OpenAISystemMessageParam,
|
|
47
53
|
OpenAIToolMessageParam,
|
|
48
54
|
OpenAIUserMessageParam,
|
|
55
|
+
ResponseGuardrailSpec,
|
|
56
|
+
Safety,
|
|
49
57
|
)
|
|
50
|
-
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
async def extract_bytes_from_file(file_id: str, files_api: Files) -> bytes:
|
|
61
|
+
"""
|
|
62
|
+
Extract raw bytes from file using the Files API.
|
|
63
|
+
|
|
64
|
+
:param file_id: The file identifier (e.g., "file-abc123")
|
|
65
|
+
:param files_api: Files API instance
|
|
66
|
+
:returns: Raw file content as bytes
|
|
67
|
+
:raises: ValueError if file cannot be retrieved
|
|
68
|
+
"""
|
|
69
|
+
try:
|
|
70
|
+
response = await files_api.openai_retrieve_file_content(file_id)
|
|
71
|
+
return bytes(response.body)
|
|
72
|
+
except Exception as e:
|
|
73
|
+
raise ValueError(f"Failed to retrieve file content for file_id '{file_id}': {str(e)}") from e
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def generate_base64_ascii_text_from_bytes(raw_bytes: bytes) -> str:
|
|
77
|
+
"""
|
|
78
|
+
Converts raw binary bytes into a safe ASCII text representation for URLs
|
|
79
|
+
|
|
80
|
+
:param raw_bytes: the actual bytes that represents file content
|
|
81
|
+
:returns: string of utf-8 characters
|
|
82
|
+
"""
|
|
83
|
+
return base64.b64encode(raw_bytes).decode("utf-8")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def construct_data_url(ascii_text: str, mime_type: str | None) -> str:
|
|
87
|
+
"""
|
|
88
|
+
Construct data url with decoded data inside
|
|
89
|
+
|
|
90
|
+
:param ascii_text: ASCII content
|
|
91
|
+
:param mime_type: MIME type of file
|
|
92
|
+
:returns: data url string (eg. data:image/png,base64,%3Ch1%3EHello%2C%20World%21%3C%2Fh1%3E)
|
|
93
|
+
"""
|
|
94
|
+
if not mime_type:
|
|
95
|
+
mime_type = "application/octet-stream"
|
|
96
|
+
|
|
97
|
+
return f"data:{mime_type};base64,{ascii_text}"
|
|
51
98
|
|
|
52
99
|
|
|
53
100
|
async def convert_chat_choice_to_response_message(
|
|
@@ -68,36 +115,107 @@ async def convert_chat_choice_to_response_message(
|
|
|
68
115
|
)
|
|
69
116
|
|
|
70
117
|
annotations, clean_text = _extract_citations_from_text(output_content, citation_files or {})
|
|
118
|
+
logprobs = choice.logprobs.content if choice.logprobs and choice.logprobs.content else None
|
|
71
119
|
|
|
72
120
|
return OpenAIResponseMessage(
|
|
73
121
|
id=message_id or f"msg_{uuid.uuid4()}",
|
|
74
|
-
content=[
|
|
122
|
+
content=[
|
|
123
|
+
OpenAIResponseOutputMessageContentOutputText(
|
|
124
|
+
text=clean_text,
|
|
125
|
+
annotations=list(annotations),
|
|
126
|
+
logprobs=logprobs,
|
|
127
|
+
)
|
|
128
|
+
],
|
|
75
129
|
status="completed",
|
|
76
130
|
role="assistant",
|
|
77
131
|
)
|
|
78
132
|
|
|
79
133
|
|
|
80
134
|
async def convert_response_content_to_chat_content(
|
|
81
|
-
content:
|
|
135
|
+
content: str | Sequence[OpenAIResponseInputMessageContent | OpenAIResponseOutputMessageContent],
|
|
136
|
+
files_api: Files | None,
|
|
82
137
|
) -> str | list[OpenAIChatCompletionContentPartParam]:
|
|
83
138
|
"""
|
|
84
139
|
Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
|
|
85
140
|
|
|
86
141
|
The content schemas of each API look similar, but are not exactly the same.
|
|
142
|
+
|
|
143
|
+
:param content: The content to convert
|
|
144
|
+
:param files_api: Files API for resolving file_id to raw file content (required if content contains files/images)
|
|
87
145
|
"""
|
|
88
146
|
if isinstance(content, str):
|
|
89
147
|
return content
|
|
90
148
|
|
|
91
|
-
|
|
149
|
+
# Type with union to avoid list invariance issues
|
|
150
|
+
converted_parts: list[OpenAIChatCompletionContentPartParam] = []
|
|
92
151
|
for content_part in content:
|
|
93
152
|
if isinstance(content_part, OpenAIResponseInputMessageContentText):
|
|
94
153
|
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
|
|
95
154
|
elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText):
|
|
96
155
|
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
|
|
97
156
|
elif isinstance(content_part, OpenAIResponseInputMessageContentImage):
|
|
157
|
+
detail = content_part.detail
|
|
158
|
+
image_mime_type = None
|
|
98
159
|
if content_part.image_url:
|
|
99
|
-
image_url = OpenAIImageURL(url=content_part.image_url, detail=
|
|
160
|
+
image_url = OpenAIImageURL(url=content_part.image_url, detail=detail)
|
|
100
161
|
converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
|
|
162
|
+
elif content_part.file_id:
|
|
163
|
+
if files_api is None:
|
|
164
|
+
raise ValueError("file_ids are not supported by this implementation of the Stack")
|
|
165
|
+
image_file_response = await files_api.openai_retrieve_file(content_part.file_id)
|
|
166
|
+
if image_file_response.filename:
|
|
167
|
+
image_mime_type, _ = mimetypes.guess_type(image_file_response.filename)
|
|
168
|
+
raw_image_bytes = await extract_bytes_from_file(content_part.file_id, files_api)
|
|
169
|
+
ascii_text = generate_base64_ascii_text_from_bytes(raw_image_bytes)
|
|
170
|
+
image_data_url = construct_data_url(ascii_text, image_mime_type)
|
|
171
|
+
image_url = OpenAIImageURL(url=image_data_url, detail=detail)
|
|
172
|
+
converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
|
|
173
|
+
else:
|
|
174
|
+
raise ValueError(
|
|
175
|
+
f"Image content must have either 'image_url' or 'file_id'. "
|
|
176
|
+
f"Got image_url={content_part.image_url}, file_id={content_part.file_id}"
|
|
177
|
+
)
|
|
178
|
+
elif isinstance(content_part, OpenAIResponseInputMessageContentFile):
|
|
179
|
+
resolved_file_data = None
|
|
180
|
+
file_data = content_part.file_data
|
|
181
|
+
file_id = content_part.file_id
|
|
182
|
+
file_url = content_part.file_url
|
|
183
|
+
filename = content_part.filename
|
|
184
|
+
file_mime_type = None
|
|
185
|
+
if not any([file_data, file_id, file_url]):
|
|
186
|
+
raise ValueError(
|
|
187
|
+
f"File content must have at least one of 'file_data', 'file_id', or 'file_url'. "
|
|
188
|
+
f"Got file_data={file_data}, file_id={file_id}, file_url={file_url}"
|
|
189
|
+
)
|
|
190
|
+
if file_id:
|
|
191
|
+
if files_api is None:
|
|
192
|
+
raise ValueError("file_ids are not supported by this implementation of the Stack")
|
|
193
|
+
|
|
194
|
+
file_response = await files_api.openai_retrieve_file(file_id)
|
|
195
|
+
if not filename:
|
|
196
|
+
filename = file_response.filename
|
|
197
|
+
file_mime_type, _ = mimetypes.guess_type(file_response.filename)
|
|
198
|
+
raw_file_bytes = await extract_bytes_from_file(file_id, files_api)
|
|
199
|
+
ascii_text = generate_base64_ascii_text_from_bytes(raw_file_bytes)
|
|
200
|
+
resolved_file_data = construct_data_url(ascii_text, file_mime_type)
|
|
201
|
+
elif file_data:
|
|
202
|
+
if file_data.startswith("data:"):
|
|
203
|
+
resolved_file_data = file_data
|
|
204
|
+
else:
|
|
205
|
+
# Raw base64 data, wrap in data URL format
|
|
206
|
+
if filename:
|
|
207
|
+
file_mime_type, _ = mimetypes.guess_type(filename)
|
|
208
|
+
resolved_file_data = construct_data_url(file_data, file_mime_type)
|
|
209
|
+
elif file_url:
|
|
210
|
+
resolved_file_data = file_url
|
|
211
|
+
converted_parts.append(
|
|
212
|
+
OpenAIFile(
|
|
213
|
+
file=OpenAIFileFile(
|
|
214
|
+
file_data=resolved_file_data,
|
|
215
|
+
filename=filename,
|
|
216
|
+
)
|
|
217
|
+
)
|
|
218
|
+
)
|
|
101
219
|
elif isinstance(content_part, str):
|
|
102
220
|
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part))
|
|
103
221
|
else:
|
|
@@ -110,12 +228,14 @@ async def convert_response_content_to_chat_content(
|
|
|
110
228
|
async def convert_response_input_to_chat_messages(
|
|
111
229
|
input: str | list[OpenAIResponseInput],
|
|
112
230
|
previous_messages: list[OpenAIMessageParam] | None = None,
|
|
231
|
+
files_api: Files | None = None,
|
|
113
232
|
) -> list[OpenAIMessageParam]:
|
|
114
233
|
"""
|
|
115
234
|
Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages.
|
|
116
235
|
|
|
117
236
|
:param input: The input to convert
|
|
118
237
|
:param previous_messages: Optional previous messages to check for function_call references
|
|
238
|
+
:param files_api: Files API for resolving file_id to raw file content (optional, required for file/image content)
|
|
119
239
|
"""
|
|
120
240
|
messages: list[OpenAIMessageParam] = []
|
|
121
241
|
if isinstance(input, list):
|
|
@@ -158,22 +278,31 @@ async def convert_response_input_to_chat_messages(
|
|
|
158
278
|
),
|
|
159
279
|
)
|
|
160
280
|
messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call]))
|
|
281
|
+
# Output can be None, use empty string as fallback
|
|
282
|
+
output_content = input_item.output if input_item.output is not None else ""
|
|
161
283
|
messages.append(
|
|
162
284
|
OpenAIToolMessageParam(
|
|
163
|
-
content=
|
|
285
|
+
content=output_content,
|
|
164
286
|
tool_call_id=input_item.id,
|
|
165
287
|
)
|
|
166
288
|
)
|
|
167
289
|
elif isinstance(input_item, OpenAIResponseOutputMessageMCPListTools):
|
|
168
290
|
# the tool list will be handled separately
|
|
169
291
|
pass
|
|
292
|
+
elif isinstance(
|
|
293
|
+
input_item,
|
|
294
|
+
OpenAIResponseOutputMessageWebSearchToolCall | OpenAIResponseOutputMessageFileSearchToolCall,
|
|
295
|
+
):
|
|
296
|
+
# these tool calls are tracked internally but not converted to chat messages
|
|
297
|
+
pass
|
|
170
298
|
elif isinstance(input_item, OpenAIResponseMCPApprovalRequest) or isinstance(
|
|
171
299
|
input_item, OpenAIResponseMCPApprovalResponse
|
|
172
300
|
):
|
|
173
301
|
# these are handled by the responses impl itself and not pass through to chat completions
|
|
174
302
|
pass
|
|
175
|
-
|
|
176
|
-
content
|
|
303
|
+
elif isinstance(input_item, OpenAIResponseMessage):
|
|
304
|
+
# Narrow type to OpenAIResponseMessage which has content and role attributes
|
|
305
|
+
content = await convert_response_content_to_chat_content(input_item.content, files_api)
|
|
177
306
|
message_type = await get_message_type_by_role(input_item.role)
|
|
178
307
|
if message_type is None:
|
|
179
308
|
raise ValueError(
|
|
@@ -191,7 +320,8 @@ async def convert_response_input_to_chat_messages(
|
|
|
191
320
|
last_user_content = getattr(last_user_msg, "content", None)
|
|
192
321
|
if last_user_content == content:
|
|
193
322
|
continue # Skip duplicate user message
|
|
194
|
-
|
|
323
|
+
# Dynamic message type call - different message types have different content expectations
|
|
324
|
+
messages.append(message_type(content=content)) # type: ignore[call-arg,arg-type]
|
|
195
325
|
if len(tool_call_results):
|
|
196
326
|
# Check if unpaired function_call_outputs reference function_calls from previous messages
|
|
197
327
|
if previous_messages:
|
|
@@ -237,8 +367,11 @@ async def convert_response_text_to_chat_response_format(
|
|
|
237
367
|
if text.format["type"] == "json_object":
|
|
238
368
|
return OpenAIResponseFormatJSONObject()
|
|
239
369
|
if text.format["type"] == "json_schema":
|
|
370
|
+
# Assert name exists for json_schema format
|
|
371
|
+
assert text.format.get("name"), "json_schema format requires a name"
|
|
372
|
+
schema_name: str = text.format["name"] # type: ignore[assignment]
|
|
240
373
|
return OpenAIResponseFormatJSONSchema(
|
|
241
|
-
json_schema=OpenAIJSONSchema(name=
|
|
374
|
+
json_schema=OpenAIJSONSchema(name=schema_name, schema=text.format["schema"])
|
|
242
375
|
)
|
|
243
376
|
raise ValueError(f"Unsupported text format: {text.format}")
|
|
244
377
|
|
|
@@ -251,7 +384,7 @@ async def get_message_type_by_role(role: str) -> type[OpenAIMessageParam] | None
|
|
|
251
384
|
"assistant": OpenAIAssistantMessageParam,
|
|
252
385
|
"developer": OpenAIDeveloperMessageParam,
|
|
253
386
|
}
|
|
254
|
-
return role_to_type.get(role)
|
|
387
|
+
return role_to_type.get(role) # type: ignore[return-value] # Pydantic models use ModelMetaclass
|
|
255
388
|
|
|
256
389
|
|
|
257
390
|
def _extract_citations_from_text(
|
|
@@ -313,14 +446,19 @@ def is_function_tool_call(
|
|
|
313
446
|
return False
|
|
314
447
|
|
|
315
448
|
|
|
316
|
-
async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[str]) -> str | None:
|
|
449
|
+
async def run_guardrails(safety_api: Safety | None, messages: str, guardrail_ids: list[str]) -> str | None:
|
|
317
450
|
"""Run guardrails against messages and return violation message if blocked."""
|
|
318
451
|
if not messages:
|
|
319
452
|
return None
|
|
320
453
|
|
|
454
|
+
# If safety API is not available, skip guardrails
|
|
455
|
+
if safety_api is None:
|
|
456
|
+
return None
|
|
457
|
+
|
|
321
458
|
# Look up shields to get their provider_resource_id (actual model ID)
|
|
322
459
|
model_ids = []
|
|
323
|
-
|
|
460
|
+
# TODO: list_shields not in Safety interface but available at runtime via API routing
|
|
461
|
+
shields_list = await safety_api.routing_table.list_shields() # type: ignore[attr-defined]
|
|
324
462
|
|
|
325
463
|
for guardrail_id in guardrail_ids:
|
|
326
464
|
matching_shields = [shield for shield in shields_list.data if shield.identifier == guardrail_id]
|
|
@@ -337,7 +475,9 @@ async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[
|
|
|
337
475
|
for result in response.results:
|
|
338
476
|
if result.flagged:
|
|
339
477
|
message = result.user_message or "Content blocked by safety guardrails"
|
|
340
|
-
flagged_categories =
|
|
478
|
+
flagged_categories = (
|
|
479
|
+
[cat for cat, flagged in result.categories.items() if flagged] if result.categories else []
|
|
480
|
+
)
|
|
341
481
|
violation_type = result.metadata.get("violation_type", []) if result.metadata else []
|
|
342
482
|
|
|
343
483
|
if flagged_categories:
|
|
@@ -347,6 +487,9 @@ async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[
|
|
|
347
487
|
|
|
348
488
|
return message
|
|
349
489
|
|
|
490
|
+
# No violations found
|
|
491
|
+
return None
|
|
492
|
+
|
|
350
493
|
|
|
351
494
|
def extract_guardrail_ids(guardrails: list | None) -> list[str]:
|
|
352
495
|
"""Extract guardrail IDs from guardrails parameter, handling both string IDs and ResponseGuardrailSpec objects."""
|
|
@@ -363,3 +506,28 @@ def extract_guardrail_ids(guardrails: list | None) -> list[str]:
|
|
|
363
506
|
raise ValueError(f"Unknown guardrail format: {guardrail}, expected str or ResponseGuardrailSpec")
|
|
364
507
|
|
|
365
508
|
return guardrail_ids
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def convert_mcp_tool_choice(
|
|
512
|
+
chat_tool_names: list[str],
|
|
513
|
+
server_label: str | None = None,
|
|
514
|
+
server_label_to_tools: dict[str, list[str]] | None = None,
|
|
515
|
+
tool_name: str | None = None,
|
|
516
|
+
) -> dict[str, str] | list[dict[str, str]]:
|
|
517
|
+
"""Convert a responses tool choice of type mcp to a chat completions compatible function tool choice."""
|
|
518
|
+
|
|
519
|
+
if tool_name:
|
|
520
|
+
if tool_name not in chat_tool_names:
|
|
521
|
+
return None
|
|
522
|
+
return {"type": "function", "function": {"name": tool_name}}
|
|
523
|
+
|
|
524
|
+
elif server_label and server_label_to_tools:
|
|
525
|
+
# no tool name specified, so we need to enforce an allowed_tools with the function tools derived only from the given server label
|
|
526
|
+
# Use reverse mapping for lookup by server_label
|
|
527
|
+
# This already accounts for allowed_tools restrictions applied during _process_mcp_tool
|
|
528
|
+
tool_names = server_label_to_tools.get(server_label, [])
|
|
529
|
+
if not tool_names:
|
|
530
|
+
return None
|
|
531
|
+
matching_tools = [{"type": "function", "function": {"name": tool_name}} for tool_name in tool_names]
|
|
532
|
+
return matching_tools
|
|
533
|
+
return []
|
|
@@ -6,10 +6,8 @@
|
|
|
6
6
|
|
|
7
7
|
import asyncio
|
|
8
8
|
|
|
9
|
-
from llama_stack.apis.inference import Message
|
|
10
|
-
from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel
|
|
11
9
|
from llama_stack.log import get_logger
|
|
12
|
-
from
|
|
10
|
+
from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel
|
|
13
11
|
|
|
14
12
|
log = get_logger(name=__name__, category="agents::meta_reference")
|
|
15
13
|
|
|
@@ -31,16 +29,13 @@ class ShieldRunnerMixin:
|
|
|
31
29
|
self.input_shields = input_shields
|
|
32
30
|
self.output_shields = output_shields
|
|
33
31
|
|
|
34
|
-
async def run_multiple_shields(self, messages: list[
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
responses = await asyncio.gather(*[run_shield_with_span(identifier) for identifier in identifiers])
|
|
32
|
+
async def run_multiple_shields(self, messages: list[OpenAIMessageParam], identifiers: list[str]) -> None:
|
|
33
|
+
responses = await asyncio.gather(
|
|
34
|
+
*[
|
|
35
|
+
self.safety_api.run_shield(shield_id=identifier, messages=messages, params={})
|
|
36
|
+
for identifier in identifiers
|
|
37
|
+
]
|
|
38
|
+
)
|
|
44
39
|
for identifier, response in zip(identifiers, responses, strict=False):
|
|
45
40
|
if not response.violation:
|
|
46
41
|
continue
|
|
@@ -6,11 +6,9 @@
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from llama_stack.apis.files import Files
|
|
10
|
-
from llama_stack.apis.inference import Inference
|
|
11
|
-
from llama_stack.apis.models import Models
|
|
12
9
|
from llama_stack.core.datatypes import AccessRule, Api
|
|
13
|
-
from llama_stack.
|
|
10
|
+
from llama_stack.core.storage.kvstore import kvstore_impl
|
|
11
|
+
from llama_stack_api import Files, Inference, Models
|
|
14
12
|
|
|
15
13
|
from .batches import ReferenceBatchesImpl
|
|
16
14
|
from .config import ReferenceBatchesImplConfig
|