llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +235 -62
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
- llama_stack/providers/registry/agents.py +8 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/providers/utils/vector_io/__init__.py +16 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
- llama_stack-0.4.1.dist-info/RECORD +588 -0
- llama_stack-0.4.1.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -4,25 +4,22 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from llama_stack.
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
from llama_stack.
|
|
7
|
+
from llama_stack.core.datatypes import AccessRule
|
|
8
|
+
from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
|
|
9
|
+
from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
|
10
|
+
from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
|
|
11
|
+
from llama_stack.log import get_logger
|
|
12
|
+
from llama_stack_api import (
|
|
11
13
|
ListOpenAIResponseInputItem,
|
|
12
14
|
ListOpenAIResponseObject,
|
|
13
15
|
OpenAIDeleteResponseObject,
|
|
16
|
+
OpenAIMessageParam,
|
|
14
17
|
OpenAIResponseInput,
|
|
15
18
|
OpenAIResponseObject,
|
|
16
19
|
OpenAIResponseObjectWithInput,
|
|
20
|
+
Order,
|
|
17
21
|
)
|
|
18
|
-
from
|
|
19
|
-
from llama_stack.core.datatypes import AccessRule
|
|
20
|
-
from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
|
|
21
|
-
from llama_stack.log import get_logger
|
|
22
|
-
|
|
23
|
-
from ..sqlstore.api import ColumnDefinition, ColumnType
|
|
24
|
-
from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
|
25
|
-
from ..sqlstore.sqlstore import sqlstore_impl
|
|
22
|
+
from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
|
|
26
23
|
|
|
27
24
|
logger = get_logger(name=__name__, category="openai_responses")
|
|
28
25
|
|
|
@@ -252,19 +249,12 @@ class ResponsesStore:
|
|
|
252
249
|
# Serialize messages to dict format for JSON storage
|
|
253
250
|
messages_data = [msg.model_dump() for msg in messages]
|
|
254
251
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
except Exception:
|
|
262
|
-
# If insert fails due to ID conflict, update existing record
|
|
263
|
-
await self.sql_store.update(
|
|
264
|
-
table="conversation_messages",
|
|
265
|
-
data={"messages": messages_data},
|
|
266
|
-
where={"conversation_id": conversation_id},
|
|
267
|
-
)
|
|
252
|
+
await self.sql_store.upsert(
|
|
253
|
+
table="conversation_messages",
|
|
254
|
+
data={"conversation_id": conversation_id, "messages": messages_data},
|
|
255
|
+
conflict_columns=["conversation_id"],
|
|
256
|
+
update_columns=["messages"],
|
|
257
|
+
)
|
|
268
258
|
|
|
269
259
|
logger.debug(f"Stored {len(messages)} messages for conversation {conversation_id}")
|
|
270
260
|
|
|
@@ -6,8 +6,7 @@
|
|
|
6
6
|
import statistics
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from
|
|
10
|
-
from llama_stack.apis.scoring_functions import AggregationFunctionType
|
|
9
|
+
from llama_stack_api import AggregationFunctionType, ScoringResultRow
|
|
11
10
|
|
|
12
11
|
|
|
13
12
|
def aggregate_accuracy(scoring_results: list[ScoringResultRow]) -> dict[str, Any]:
|
|
@@ -6,9 +6,8 @@
|
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from llama_stack.apis.scoring import ScoringFnParams, ScoringResultRow
|
|
10
|
-
from llama_stack.apis.scoring_functions import ScoringFn
|
|
11
9
|
from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics
|
|
10
|
+
from llama_stack_api import ScoringFn, ScoringFnParams, ScoringResultRow
|
|
12
11
|
|
|
13
12
|
|
|
14
13
|
class BaseScoringFn(ABC):
|
|
@@ -15,18 +15,55 @@ from mcp import types as mcp_types
|
|
|
15
15
|
from mcp.client.sse import sse_client
|
|
16
16
|
from mcp.client.streamable_http import streamablehttp_client
|
|
17
17
|
|
|
18
|
-
from llama_stack.
|
|
19
|
-
from llama_stack.
|
|
18
|
+
from llama_stack.core.datatypes import AuthenticationRequiredError
|
|
19
|
+
from llama_stack.log import get_logger
|
|
20
|
+
from llama_stack.providers.utils.tools.ttl_dict import TTLDict
|
|
21
|
+
from llama_stack_api import (
|
|
22
|
+
ImageContentItem,
|
|
23
|
+
InterleavedContentItem,
|
|
20
24
|
ListToolDefsResponse,
|
|
25
|
+
TextContentItem,
|
|
21
26
|
ToolDef,
|
|
22
27
|
ToolInvocationResult,
|
|
28
|
+
_URLOrData,
|
|
23
29
|
)
|
|
24
|
-
from llama_stack.core.datatypes import AuthenticationRequiredError
|
|
25
|
-
from llama_stack.log import get_logger
|
|
26
|
-
from llama_stack.providers.utils.tools.ttl_dict import TTLDict
|
|
27
30
|
|
|
28
31
|
logger = get_logger(__name__, category="tools")
|
|
29
32
|
|
|
33
|
+
|
|
34
|
+
def prepare_mcp_headers(base_headers: dict[str, str] | None, authorization: str | None) -> dict[str, str]:
|
|
35
|
+
"""
|
|
36
|
+
Prepare headers for MCP requests with authorization support.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
base_headers: Base headers dictionary (can be None)
|
|
40
|
+
authorization: OAuth access token (without "Bearer " prefix)
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Headers dictionary with Authorization header if token provided
|
|
44
|
+
|
|
45
|
+
Raises:
|
|
46
|
+
ValueError: If Authorization header is specified in the headers dict (security risk)
|
|
47
|
+
"""
|
|
48
|
+
headers = dict(base_headers or {})
|
|
49
|
+
|
|
50
|
+
# Security check: reject any Authorization header in the headers dict
|
|
51
|
+
# Users must use the authorization parameter instead to avoid security risks
|
|
52
|
+
existing_keys_lower = {k.lower() for k in headers.keys()}
|
|
53
|
+
if "authorization" in existing_keys_lower:
|
|
54
|
+
raise ValueError(
|
|
55
|
+
"For security reasons, Authorization header cannot be passed via 'headers'. "
|
|
56
|
+
"Please use the 'authorization' parameter instead."
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Add Authorization header if token provided
|
|
60
|
+
if authorization:
|
|
61
|
+
# OAuth access token - add "Bearer " prefix
|
|
62
|
+
headers["Authorization"] = f"Bearer {authorization}"
|
|
63
|
+
|
|
64
|
+
return headers
|
|
65
|
+
|
|
66
|
+
|
|
30
67
|
protocol_cache = TTLDict(ttl_seconds=3600)
|
|
31
68
|
|
|
32
69
|
|
|
@@ -49,7 +86,10 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
|
|
|
49
86
|
try:
|
|
50
87
|
client = streamablehttp_client
|
|
51
88
|
if strategy == MCPProtol.SSE:
|
|
52
|
-
|
|
89
|
+
# sse_client and streamablehttp_client have different signatures, but both
|
|
90
|
+
# are called the same way here, so we cast to Any to avoid type errors
|
|
91
|
+
client = cast(Any, sse_client)
|
|
92
|
+
|
|
53
93
|
async with client(endpoint, headers=headers) as client_streams:
|
|
54
94
|
async with ClientSession(read_stream=client_streams[0], write_stream=client_streams[1]) as session:
|
|
55
95
|
await session.initialize()
|
|
@@ -107,9 +147,29 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
|
|
|
107
147
|
raise
|
|
108
148
|
|
|
109
149
|
|
|
110
|
-
async def list_mcp_tools(
|
|
150
|
+
async def list_mcp_tools(
|
|
151
|
+
endpoint: str,
|
|
152
|
+
headers: dict[str, str] | None = None,
|
|
153
|
+
authorization: str | None = None,
|
|
154
|
+
) -> ListToolDefsResponse:
|
|
155
|
+
"""List tools available from an MCP server.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
endpoint: MCP server endpoint URL
|
|
159
|
+
headers: Optional base headers to include
|
|
160
|
+
authorization: Optional OAuth access token (just the token, not "Bearer <token>")
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
List of tool definitions from the MCP server
|
|
164
|
+
|
|
165
|
+
Raises:
|
|
166
|
+
ValueError: If Authorization is found in the headers parameter
|
|
167
|
+
"""
|
|
168
|
+
# Prepare headers with authorization handling
|
|
169
|
+
final_headers = prepare_mcp_headers(headers, authorization)
|
|
170
|
+
|
|
111
171
|
tools = []
|
|
112
|
-
async with client_wrapper(endpoint,
|
|
172
|
+
async with client_wrapper(endpoint, final_headers) as session:
|
|
113
173
|
tools_result = await session.list_tools()
|
|
114
174
|
for tool in tools_result.tools:
|
|
115
175
|
tools.append(
|
|
@@ -127,9 +187,31 @@ async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefs
|
|
|
127
187
|
|
|
128
188
|
|
|
129
189
|
async def invoke_mcp_tool(
|
|
130
|
-
endpoint: str,
|
|
190
|
+
endpoint: str,
|
|
191
|
+
tool_name: str,
|
|
192
|
+
kwargs: dict[str, Any],
|
|
193
|
+
headers: dict[str, str] | None = None,
|
|
194
|
+
authorization: str | None = None,
|
|
131
195
|
) -> ToolInvocationResult:
|
|
132
|
-
|
|
196
|
+
"""Invoke an MCP tool with the given arguments.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
endpoint: MCP server endpoint URL
|
|
200
|
+
tool_name: Name of the tool to invoke
|
|
201
|
+
kwargs: Tool invocation arguments
|
|
202
|
+
headers: Optional base headers to include
|
|
203
|
+
authorization: Optional OAuth access token (just the token, not "Bearer <token>")
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
Tool invocation result with content and error information
|
|
207
|
+
|
|
208
|
+
Raises:
|
|
209
|
+
ValueError: If Authorization header is found in the headers parameter
|
|
210
|
+
"""
|
|
211
|
+
# Prepare headers with authorization handling
|
|
212
|
+
final_headers = prepare_mcp_headers(headers, authorization)
|
|
213
|
+
|
|
214
|
+
async with client_wrapper(endpoint, final_headers) as session:
|
|
133
215
|
result = await session.call_tool(tool_name, kwargs)
|
|
134
216
|
|
|
135
217
|
content: list[InterleavedContentItem] = []
|
|
@@ -137,7 +219,7 @@ async def invoke_mcp_tool(
|
|
|
137
219
|
if isinstance(item, mcp_types.TextContent):
|
|
138
220
|
content.append(TextContentItem(text=item.text))
|
|
139
221
|
elif isinstance(item, mcp_types.ImageContent):
|
|
140
|
-
content.append(ImageContentItem(image=item.data))
|
|
222
|
+
content.append(ImageContentItem(image=_URLOrData(data=item.data)))
|
|
141
223
|
elif isinstance(item, mcp_types.EmbeddedResource):
|
|
142
224
|
logger.warning(f"EmbeddedResource is not supported: {item}")
|
|
143
225
|
else:
|
|
@@ -3,3 +3,19 @@
|
|
|
3
3
|
#
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from .vector_utils import (
|
|
8
|
+
WeightedInMemoryAggregator,
|
|
9
|
+
generate_chunk_id,
|
|
10
|
+
load_embedded_chunk_with_backward_compat,
|
|
11
|
+
proper_case,
|
|
12
|
+
sanitize_collection_name,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"WeightedInMemoryAggregator",
|
|
17
|
+
"generate_chunk_id",
|
|
18
|
+
"load_embedded_chunk_with_backward_compat",
|
|
19
|
+
"proper_case",
|
|
20
|
+
"sanitize_collection_name",
|
|
21
|
+
]
|
|
@@ -7,6 +7,9 @@
|
|
|
7
7
|
import hashlib
|
|
8
8
|
import re
|
|
9
9
|
import uuid
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from llama_stack_api import EmbeddedChunk
|
|
10
13
|
|
|
11
14
|
|
|
12
15
|
def generate_chunk_id(document_id: str, chunk_text: str, chunk_window: str | None = None) -> str:
|
|
@@ -154,3 +157,36 @@ class WeightedInMemoryAggregator:
|
|
|
154
157
|
# Default to RRF for None, RRF, or any unknown types
|
|
155
158
|
impact_factor = reranker_params.get("impact_factor", 60.0)
|
|
156
159
|
return WeightedInMemoryAggregator.rrf_rerank(vector_scores, keyword_scores, impact_factor)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def load_embedded_chunk_with_backward_compat(
|
|
163
|
+
chunk_data: dict[str, Any],
|
|
164
|
+
) -> EmbeddedChunk:
|
|
165
|
+
"""
|
|
166
|
+
Load EmbeddedChunk data with backward compatibility for legacy field locations.
|
|
167
|
+
|
|
168
|
+
Handles migration from old format where embedding_model and embedding_dimension
|
|
169
|
+
were stored in chunk_metadata to current top-level format.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
chunk_data: Dictionary containing chunk data to load
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
EmbeddedChunk object with migrated data
|
|
176
|
+
"""
|
|
177
|
+
# Migrate old data: extract embedding_model/embedding_dimension from chunk_metadata if missing
|
|
178
|
+
if "embedding_model" not in chunk_data:
|
|
179
|
+
chunk_metadata = chunk_data.get("chunk_metadata", {})
|
|
180
|
+
chunk_data["embedding_model"] = chunk_metadata.get("chunk_embedding_model", "unknown")
|
|
181
|
+
|
|
182
|
+
if "embedding_dimension" not in chunk_data:
|
|
183
|
+
chunk_metadata = chunk_data.get("chunk_metadata", {})
|
|
184
|
+
chunk_data["embedding_dimension"] = chunk_metadata.get(
|
|
185
|
+
"chunk_embedding_dimension", len(chunk_data.get("embedding", []))
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# Ensure embedding field exists (required by EmbeddedChunk)
|
|
189
|
+
if "embedding" not in chunk_data:
|
|
190
|
+
chunk_data["embedding"] = []
|
|
191
|
+
|
|
192
|
+
return EmbeddedChunk(**chunk_data)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
This file contains constants used for naming data captured for telemetry.
|
|
9
|
+
|
|
10
|
+
This is used to ensure that the data captured for telemetry is consistent and can be used to
|
|
11
|
+
identify and correlate data. If custom telemetry data is added to llama stack, please add
|
|
12
|
+
constants for it here.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
llama_stack_prefix = "llama_stack"
|
|
16
|
+
|
|
17
|
+
# Safety Attributes
|
|
18
|
+
RUN_SHIELD_OPERATION_NAME = "run_shield"
|
|
19
|
+
|
|
20
|
+
SAFETY_REQUEST_PREFIX = f"{llama_stack_prefix}.safety.request"
|
|
21
|
+
SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.shield_id"
|
|
22
|
+
SAFETY_REQUEST_MESSAGES_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.messages"
|
|
23
|
+
|
|
24
|
+
SAFETY_RESPONSE_PREFIX = f"{llama_stack_prefix}.safety.response"
|
|
25
|
+
SAFETY_RESPONSE_METADATA_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.metadata"
|
|
26
|
+
SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.level"
|
|
27
|
+
SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.user_message"
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
|
|
9
|
+
from opentelemetry import trace
|
|
10
|
+
|
|
11
|
+
from llama_stack_api import OpenAIMessageParam, RunShieldResponse
|
|
12
|
+
|
|
13
|
+
from .constants import (
|
|
14
|
+
RUN_SHIELD_OPERATION_NAME,
|
|
15
|
+
SAFETY_REQUEST_MESSAGES_ATTRIBUTE,
|
|
16
|
+
SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE,
|
|
17
|
+
SAFETY_RESPONSE_METADATA_ATTRIBUTE,
|
|
18
|
+
SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE,
|
|
19
|
+
SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def safety_span_name(shield_id: str) -> str:
|
|
24
|
+
return f"{RUN_SHIELD_OPERATION_NAME} {shield_id}"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# TODO: Consider using Wrapt to automatically instrument code
|
|
28
|
+
# This is the industry standard way to package automatically instrumentation in python.
|
|
29
|
+
def safety_request_span_attributes(
|
|
30
|
+
shield_id: str, messages: list[OpenAIMessageParam], response: RunShieldResponse
|
|
31
|
+
) -> None:
|
|
32
|
+
span = trace.get_current_span()
|
|
33
|
+
span.set_attribute(SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE, shield_id)
|
|
34
|
+
messages_json = json.dumps([msg.model_dump() for msg in messages])
|
|
35
|
+
span.set_attribute(SAFETY_REQUEST_MESSAGES_ATTRIBUTE, messages_json)
|
|
36
|
+
|
|
37
|
+
if response.violation:
|
|
38
|
+
if response.violation.metadata:
|
|
39
|
+
metadata_json = json.dumps(response.violation.metadata)
|
|
40
|
+
span.set_attribute(SAFETY_RESPONSE_METADATA_ATTRIBUTE, metadata_json)
|
|
41
|
+
if response.violation.user_message:
|
|
42
|
+
span.set_attribute(SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE, response.violation.user_message)
|
|
43
|
+
span.set_attribute(SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE, response.violation.violation_level.value)
|
|
@@ -40,10 +40,12 @@ from openai.types.completion_choice import CompletionChoice
|
|
|
40
40
|
from llama_stack.core.testing_context import get_test_context, is_debug_mode
|
|
41
41
|
|
|
42
42
|
# update the "finish_reason" field, since its type definition is wrong (no None is accepted)
|
|
43
|
-
CompletionChoice.model_fields["finish_reason"].annotation =
|
|
43
|
+
CompletionChoice.model_fields["finish_reason"].annotation = cast(
|
|
44
|
+
type[Any] | None, Literal["stop", "length", "content_filter"] | None
|
|
45
|
+
)
|
|
44
46
|
CompletionChoice.model_rebuild()
|
|
45
47
|
|
|
46
|
-
REPO_ROOT = Path(__file__).parent.parent.parent
|
|
48
|
+
REPO_ROOT = Path(__file__).parent.parent.parent.parent
|
|
47
49
|
DEFAULT_STORAGE_DIR = REPO_ROOT / "tests/integration/common"
|
|
48
50
|
|
|
49
51
|
|
|
@@ -154,7 +156,7 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
|
|
|
154
156
|
}
|
|
155
157
|
|
|
156
158
|
# Include test_id for isolation, except for shared infrastructure endpoints
|
|
157
|
-
if parsed.path not in ("/api/tags", "/v1/models"):
|
|
159
|
+
if parsed.path not in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
|
|
158
160
|
normalized["test_id"] = test_id
|
|
159
161
|
|
|
160
162
|
normalized_json = json.dumps(normalized, sort_keys=True)
|
|
@@ -428,7 +430,7 @@ class ResponseStorage:
|
|
|
428
430
|
|
|
429
431
|
# For model-list endpoints, include digest in filename to distinguish different model sets
|
|
430
432
|
endpoint = request.get("endpoint")
|
|
431
|
-
if endpoint in ("/api/tags", "/v1/models"):
|
|
433
|
+
if endpoint in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
|
|
432
434
|
digest = _model_identifiers_digest(endpoint, response)
|
|
433
435
|
response_file = f"models-{request_hash}-{digest}.json"
|
|
434
436
|
|
|
@@ -552,13 +554,14 @@ def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
|
|
|
552
554
|
Supported endpoints:
|
|
553
555
|
- '/api/tags' (Ollama): response body has 'models': [ { name/model/digest/id/... }, ... ]
|
|
554
556
|
- '/v1/models' (OpenAI): response body is: [ { id: ... }, ... ]
|
|
557
|
+
- '/v1/openai/v1/models' (OpenAI): response body is: [ { id: ... }, ... ]
|
|
555
558
|
Returns a list of unique identifiers or None if structure doesn't match.
|
|
556
559
|
"""
|
|
557
560
|
if "models" in response["body"]:
|
|
558
561
|
# ollama
|
|
559
562
|
items = response["body"]["models"]
|
|
560
563
|
else:
|
|
561
|
-
# openai
|
|
564
|
+
# openai or openai-style endpoints
|
|
562
565
|
items = response["body"]
|
|
563
566
|
idents = [m.model if endpoint == "/api/tags" else m.id for m in items]
|
|
564
567
|
return sorted(set(idents))
|
|
@@ -579,7 +582,7 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]])
|
|
|
579
582
|
seen: dict[str, dict[str, Any]] = {}
|
|
580
583
|
for rec in records:
|
|
581
584
|
body = rec["response"]["body"]
|
|
582
|
-
if endpoint
|
|
585
|
+
if endpoint in ("/v1/models", "/v1/openai/v1/models"):
|
|
583
586
|
for m in body:
|
|
584
587
|
key = m.id
|
|
585
588
|
seen[key] = m
|
|
@@ -597,19 +600,23 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]])
|
|
|
597
600
|
if endpoint == "/api/tags":
|
|
598
601
|
from ollama import ListResponse
|
|
599
602
|
|
|
600
|
-
|
|
603
|
+
# Both cast(Any, ...) and type: ignore are needed here:
|
|
604
|
+
# - cast(Any, ...) attempts to bypass type checking on the argument
|
|
605
|
+
# - type: ignore is still needed because mypy checks the call site independently
|
|
606
|
+
# and reports arg-type mismatch even after casting
|
|
607
|
+
body = ListResponse(models=cast(Any, ordered)) # type: ignore[arg-type]
|
|
601
608
|
return {"request": canonical_req, "response": {"body": body, "is_streaming": False}}
|
|
602
609
|
|
|
603
610
|
|
|
604
611
|
async def _patched_tool_invoke_method(
|
|
605
|
-
original_method, provider_name: str, self, tool_name: str, kwargs: dict[str, Any]
|
|
612
|
+
original_method, provider_name: str, self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
|
|
606
613
|
):
|
|
607
614
|
"""Patched version of tool runtime invoke_tool method for recording/replay."""
|
|
608
615
|
global _current_mode, _current_storage
|
|
609
616
|
|
|
610
617
|
if _current_mode == APIRecordingMode.LIVE or _current_storage is None:
|
|
611
618
|
# Normal operation
|
|
612
|
-
return await original_method(self, tool_name, kwargs)
|
|
619
|
+
return await original_method(self, tool_name, kwargs, authorization=authorization)
|
|
613
620
|
|
|
614
621
|
request_hash = normalize_tool_request(provider_name, tool_name, kwargs)
|
|
615
622
|
|
|
@@ -627,7 +634,7 @@ async def _patched_tool_invoke_method(
|
|
|
627
634
|
|
|
628
635
|
if _current_mode in (APIRecordingMode.RECORD, APIRecordingMode.RECORD_IF_MISSING):
|
|
629
636
|
# Make the tool call and record it
|
|
630
|
-
result = await original_method(self, tool_name, kwargs)
|
|
637
|
+
result = await original_method(self, tool_name, kwargs, authorization=authorization)
|
|
631
638
|
|
|
632
639
|
request_data = {
|
|
633
640
|
"test_id": get_test_context(),
|
|
@@ -659,7 +666,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
|
|
659
666
|
logger.info(f" Test context: {get_test_context()}")
|
|
660
667
|
|
|
661
668
|
if mode == APIRecordingMode.LIVE or storage is None:
|
|
662
|
-
if endpoint
|
|
669
|
+
if endpoint in ("/v1/models", "/v1/openai/v1/models"):
|
|
663
670
|
return original_method(self, *args, **kwargs)
|
|
664
671
|
else:
|
|
665
672
|
return await original_method(self, *args, **kwargs)
|
|
@@ -693,7 +700,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
|
|
693
700
|
recording = None
|
|
694
701
|
if mode == APIRecordingMode.REPLAY or mode == APIRecordingMode.RECORD_IF_MISSING:
|
|
695
702
|
# Special handling for model-list endpoints: merge all recordings with this hash
|
|
696
|
-
if endpoint in ("/api/tags", "/v1/models"):
|
|
703
|
+
if endpoint in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
|
|
697
704
|
records = storage._model_list_responses(request_hash)
|
|
698
705
|
recording = _combine_model_list_responses(endpoint, records)
|
|
699
706
|
else:
|
|
@@ -733,13 +740,13 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
|
|
733
740
|
)
|
|
734
741
|
|
|
735
742
|
if mode == APIRecordingMode.RECORD or (mode == APIRecordingMode.RECORD_IF_MISSING and not recording):
|
|
736
|
-
if endpoint
|
|
743
|
+
if endpoint in ("/v1/models", "/v1/openai/v1/models"):
|
|
737
744
|
response = original_method(self, *args, **kwargs)
|
|
738
745
|
else:
|
|
739
746
|
response = await original_method(self, *args, **kwargs)
|
|
740
747
|
|
|
741
748
|
# we want to store the result of the iterator, not the iterator itself
|
|
742
|
-
if endpoint
|
|
749
|
+
if endpoint in ("/v1/models", "/v1/openai/v1/models"):
|
|
743
750
|
response = [m async for m in response]
|
|
744
751
|
|
|
745
752
|
request_data = {
|
|
@@ -878,9 +885,11 @@ def patch_inference_clients():
|
|
|
878
885
|
OllamaAsyncClient.list = patched_ollama_list
|
|
879
886
|
|
|
880
887
|
# Create patched methods for tool runtimes
|
|
881
|
-
async def patched_tavily_invoke_tool(
|
|
888
|
+
async def patched_tavily_invoke_tool(
|
|
889
|
+
self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
|
|
890
|
+
):
|
|
882
891
|
return await _patched_tool_invoke_method(
|
|
883
|
-
_original_methods["tavily_invoke_tool"], "tavily", self, tool_name, kwargs
|
|
892
|
+
_original_methods["tavily_invoke_tool"], "tavily", self, tool_name, kwargs, authorization=authorization
|
|
884
893
|
)
|
|
885
894
|
|
|
886
895
|
# Apply tool runtime patches
|