llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +12 -21
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.4.dist-info/RECORD +0 -625
- llama_stack-0.3.4.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -12,10 +12,14 @@ from typing import Any
|
|
|
12
12
|
from aiohttp import hdrs
|
|
13
13
|
from starlette.routing import Route
|
|
14
14
|
|
|
15
|
-
from llama_stack.apis.datatypes import Api, ExternalApiSpec
|
|
16
|
-
from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup
|
|
17
15
|
from llama_stack.core.resolver import api_protocol_map
|
|
18
|
-
from llama_stack.
|
|
16
|
+
from llama_stack.core.server.fastapi_router_registry import (
|
|
17
|
+
_ROUTER_FACTORIES,
|
|
18
|
+
build_fastapi_router,
|
|
19
|
+
get_router_routes,
|
|
20
|
+
)
|
|
21
|
+
from llama_stack_api import Api, ExternalApiSpec, WebMethod
|
|
22
|
+
from llama_stack_api.router_utils import PUBLIC_ROUTE_KEY
|
|
19
23
|
|
|
20
24
|
EndpointFunc = Callable[..., Any]
|
|
21
25
|
PathParams = dict[str, str]
|
|
@@ -25,33 +29,28 @@ RouteImpls = dict[str, PathImpl]
|
|
|
25
29
|
RouteMatch = tuple[EndpointFunc, PathParams, str, WebMethod]
|
|
26
30
|
|
|
27
31
|
|
|
28
|
-
def toolgroup_protocol_map():
|
|
29
|
-
return {
|
|
30
|
-
SpecialToolGroup.rag_tool: RAGToolRuntime,
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
|
|
34
32
|
def get_all_api_routes(
|
|
35
33
|
external_apis: dict[Api, ExternalApiSpec] | None = None,
|
|
36
34
|
) -> dict[Api, list[tuple[Route, WebMethod]]]:
|
|
35
|
+
"""Get all API routes from webmethod-based protocols.
|
|
36
|
+
|
|
37
|
+
This function only returns routes from APIs that use the legacy @webmethod
|
|
38
|
+
decorator system. For APIs that have been migrated to FastAPI routers,
|
|
39
|
+
use the router registry (fastapi_router_registry.has_router() and fastapi_router_registry.build_fastapi_router()).
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
external_apis: Optional dictionary of external API specifications
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Dictionary mapping API to list of (Route, WebMethod) tuples
|
|
46
|
+
"""
|
|
37
47
|
apis = {}
|
|
38
48
|
|
|
39
49
|
protocols = api_protocol_map(external_apis)
|
|
40
|
-
toolgroup_protocols = toolgroup_protocol_map()
|
|
41
50
|
for api, protocol in protocols.items():
|
|
42
51
|
routes = []
|
|
43
52
|
protocol_methods = inspect.getmembers(protocol, predicate=inspect.isfunction)
|
|
44
53
|
|
|
45
|
-
# HACK ALERT
|
|
46
|
-
if api == Api.tool_runtime:
|
|
47
|
-
for tool_group in SpecialToolGroup:
|
|
48
|
-
sub_protocol = toolgroup_protocols[tool_group]
|
|
49
|
-
sub_protocol_methods = inspect.getmembers(sub_protocol, predicate=inspect.isfunction)
|
|
50
|
-
for name, method in sub_protocol_methods:
|
|
51
|
-
if not hasattr(method, "__webmethod__"):
|
|
52
|
-
continue
|
|
53
|
-
protocol_methods.append((f"{tool_group.value}.{name}", method))
|
|
54
|
-
|
|
55
54
|
for name, method in protocol_methods:
|
|
56
55
|
# Get all webmethods for this method (supports multiple decorators)
|
|
57
56
|
webmethods = getattr(method, "__webmethods__", [])
|
|
@@ -68,8 +67,9 @@ def get_all_api_routes(
|
|
|
68
67
|
else:
|
|
69
68
|
http_method = hdrs.METH_POST
|
|
70
69
|
routes.append(
|
|
71
|
-
|
|
72
|
-
|
|
70
|
+
# setting endpoint to None since don't use a Router object
|
|
71
|
+
(Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod) # type: ignore[arg-type]
|
|
72
|
+
)
|
|
73
73
|
|
|
74
74
|
apis[api] = routes
|
|
75
75
|
|
|
@@ -91,22 +91,74 @@ def initialize_route_impls(impls, external_apis: dict[Api, ExternalApiSpec] | No
|
|
|
91
91
|
|
|
92
92
|
return f"^{pattern}$"
|
|
93
93
|
|
|
94
|
+
# Process routes from FastAPI routers
|
|
95
|
+
for api_name in _ROUTER_FACTORIES.keys():
|
|
96
|
+
api = Api(api_name)
|
|
97
|
+
if api not in impls:
|
|
98
|
+
continue
|
|
99
|
+
impl = impls[api]
|
|
100
|
+
router = build_fastapi_router(api, impl)
|
|
101
|
+
if router:
|
|
102
|
+
router_routes = get_router_routes(router)
|
|
103
|
+
for route in router_routes:
|
|
104
|
+
# Get the endpoint function from the route
|
|
105
|
+
# For FastAPI routes, the endpoint is the actual function
|
|
106
|
+
func = route.endpoint
|
|
107
|
+
if func is None:
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
# Get the first (and typically only) method from the set, filtering out HEAD
|
|
111
|
+
available_methods = [m for m in (route.methods or []) if m != "HEAD"]
|
|
112
|
+
if not available_methods:
|
|
113
|
+
continue # Skip if only HEAD method is available
|
|
114
|
+
method = available_methods[0].lower()
|
|
115
|
+
|
|
116
|
+
if method not in route_impls:
|
|
117
|
+
route_impls[method] = {}
|
|
118
|
+
|
|
119
|
+
# Create a minimal WebMethod for router routes (needed for RouteMatch tuple)
|
|
120
|
+
# We don't have webmethod metadata for router routes, so create a minimal one
|
|
121
|
+
# that has the attributes used by the library client (descriptive_name for tracing)
|
|
122
|
+
#
|
|
123
|
+
# TODO: Long-term migration plan (once all APIs are migrated to FastAPI routers):
|
|
124
|
+
# - Extract summary from APIRoute: route.summary (available on FastAPI APIRoute objects)
|
|
125
|
+
# - Pass summary directly in RouteMatch instead of WebMethod
|
|
126
|
+
# - Remove this WebMethod() instantiation entirely
|
|
127
|
+
# - Update library_client.py to use the extracted summary instead of webmethod.descriptive_name
|
|
128
|
+
|
|
129
|
+
# Routes with openapi_extra[PUBLIC_ROUTE_KEY]=True don't require authentication
|
|
130
|
+
is_public = (route.openapi_extra or {}).get(PUBLIC_ROUTE_KEY, False)
|
|
131
|
+
webmethod = WebMethod(
|
|
132
|
+
descriptive_name=None,
|
|
133
|
+
require_authentication=not is_public,
|
|
134
|
+
)
|
|
135
|
+
route_impls[method][_convert_path_to_regex(route.path)] = (
|
|
136
|
+
func,
|
|
137
|
+
route.path,
|
|
138
|
+
webmethod,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Process routes from legacy webmethod-based APIs
|
|
94
142
|
for api, api_routes in api_to_routes.items():
|
|
143
|
+
# Skip APIs that have routers (already processed above)
|
|
144
|
+
if api.value in _ROUTER_FACTORIES:
|
|
145
|
+
continue
|
|
146
|
+
|
|
95
147
|
if api not in impls:
|
|
96
148
|
continue
|
|
97
|
-
for
|
|
149
|
+
for legacy_route, webmethod in api_routes:
|
|
98
150
|
impl = impls[api]
|
|
99
|
-
func = getattr(impl,
|
|
151
|
+
func = getattr(impl, legacy_route.name)
|
|
100
152
|
# Get the first (and typically only) method from the set, filtering out HEAD
|
|
101
|
-
available_methods = [m for m in
|
|
153
|
+
available_methods = [m for m in (legacy_route.methods or []) if m != "HEAD"]
|
|
102
154
|
if not available_methods:
|
|
103
155
|
continue # Skip if only HEAD method is available
|
|
104
156
|
method = available_methods[0].lower()
|
|
105
157
|
if method not in route_impls:
|
|
106
158
|
route_impls[method] = {}
|
|
107
|
-
route_impls[method][_convert_path_to_regex(
|
|
159
|
+
route_impls[method][_convert_path_to_regex(legacy_route.path)] = (
|
|
108
160
|
func,
|
|
109
|
-
|
|
161
|
+
legacy_route.path,
|
|
110
162
|
webmethod,
|
|
111
163
|
)
|
|
112
164
|
|
|
@@ -31,13 +31,10 @@ from fastapi.responses import JSONResponse, StreamingResponse
|
|
|
31
31
|
from openai import BadRequestError
|
|
32
32
|
from pydantic import BaseModel, ValidationError
|
|
33
33
|
|
|
34
|
-
from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
|
|
35
|
-
from llama_stack.apis.common.responses import PaginatedResponse
|
|
36
34
|
from llama_stack.core.access_control.access_control import AccessDeniedError
|
|
37
35
|
from llama_stack.core.datatypes import (
|
|
38
36
|
AuthenticationRequiredError,
|
|
39
|
-
|
|
40
|
-
StackRunConfig,
|
|
37
|
+
StackConfig,
|
|
41
38
|
process_cors_config,
|
|
42
39
|
)
|
|
43
40
|
from llama_stack.core.distribution import builtin_automatically_routed_apis
|
|
@@ -47,6 +44,7 @@ from llama_stack.core.request_headers import (
|
|
|
47
44
|
request_provider_data_context,
|
|
48
45
|
user_from_scope,
|
|
49
46
|
)
|
|
47
|
+
from llama_stack.core.server.fastapi_router_registry import build_fastapi_router
|
|
50
48
|
from llama_stack.core.server.routes import get_all_api_routes
|
|
51
49
|
from llama_stack.core.stack import (
|
|
52
50
|
Stack,
|
|
@@ -54,22 +52,13 @@ from llama_stack.core.stack import (
|
|
|
54
52
|
replace_env_vars,
|
|
55
53
|
)
|
|
56
54
|
from llama_stack.core.utils.config import redact_sensitive_fields
|
|
57
|
-
from llama_stack.core.utils.config_resolution import
|
|
55
|
+
from llama_stack.core.utils.config_resolution import resolve_config_or_distro
|
|
58
56
|
from llama_stack.core.utils.context import preserve_contexts_async_generator
|
|
59
|
-
from llama_stack.log import
|
|
60
|
-
from
|
|
61
|
-
from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig
|
|
62
|
-
from llama_stack.providers.inline.telemetry.meta_reference.telemetry import (
|
|
63
|
-
TelemetryAdapter,
|
|
64
|
-
)
|
|
65
|
-
from llama_stack.providers.utils.telemetry.tracing import (
|
|
66
|
-
CURRENT_TRACE_CONTEXT,
|
|
67
|
-
setup_logger,
|
|
68
|
-
)
|
|
57
|
+
from llama_stack.log import LoggingConfig, get_logger
|
|
58
|
+
from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
|
|
69
59
|
|
|
70
60
|
from .auth import AuthenticationMiddleware
|
|
71
61
|
from .quota import QuotaMiddleware
|
|
72
|
-
from .tracing import TracingMiddleware
|
|
73
62
|
|
|
74
63
|
REPO_ROOT = Path(__file__).parent.parent.parent.parent
|
|
75
64
|
|
|
@@ -96,7 +85,7 @@ def create_sse_event(data: Any) -> str:
|
|
|
96
85
|
|
|
97
86
|
|
|
98
87
|
async def global_exception_handler(request: Request, exc: Exception):
|
|
99
|
-
traceback.print_exception(exc)
|
|
88
|
+
traceback.print_exception(type(exc), exc, exc.__traceback__)
|
|
100
89
|
http_exc = translate_exception(exc)
|
|
101
90
|
|
|
102
91
|
return JSONResponse(status_code=http_exc.status_code, content={"error": {"detail": http_exc.detail}})
|
|
@@ -158,7 +147,7 @@ class StackApp(FastAPI):
|
|
|
158
147
|
start background tasks (e.g. refresh model registry periodically) from the lifespan context manager.
|
|
159
148
|
"""
|
|
160
149
|
|
|
161
|
-
def __init__(self, config:
|
|
150
|
+
def __init__(self, config: StackConfig, *args, **kwargs):
|
|
162
151
|
super().__init__(*args, **kwargs)
|
|
163
152
|
self.stack: Stack = Stack(config)
|
|
164
153
|
|
|
@@ -174,7 +163,9 @@ class StackApp(FastAPI):
|
|
|
174
163
|
|
|
175
164
|
@asynccontextmanager
|
|
176
165
|
async def lifespan(app: StackApp):
|
|
177
|
-
|
|
166
|
+
server_version = parse_version("llama-stack")
|
|
167
|
+
|
|
168
|
+
logger.info(f"Starting up Llama Stack server (version: {server_version})")
|
|
178
169
|
assert app.stack is not None
|
|
179
170
|
app.stack.create_registry_refresh_task()
|
|
180
171
|
yield
|
|
@@ -244,56 +235,36 @@ async def log_request_pre_validation(request: Request):
|
|
|
244
235
|
def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
|
|
245
236
|
@functools.wraps(func)
|
|
246
237
|
async def route_handler(request: Request, **kwargs):
|
|
247
|
-
# Get auth attributes from the request scope
|
|
248
|
-
user = user_from_scope(request.scope)
|
|
249
|
-
|
|
250
238
|
await log_request_pre_validation(request)
|
|
251
239
|
|
|
252
|
-
|
|
253
|
-
test_context_var = None
|
|
254
|
-
reset_test_context_fn = None
|
|
255
|
-
|
|
256
|
-
# Use context manager with both provider data and auth attributes
|
|
257
|
-
with request_provider_data_context(request.headers, user):
|
|
258
|
-
if os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE"):
|
|
259
|
-
from llama_stack.core.testing_context import (
|
|
260
|
-
TEST_CONTEXT,
|
|
261
|
-
reset_test_context,
|
|
262
|
-
sync_test_context_from_provider_data,
|
|
263
|
-
)
|
|
240
|
+
is_streaming = is_streaming_request(func.__name__, request, **kwargs)
|
|
264
241
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
else:
|
|
292
|
-
logger.error(f"Error executing endpoint {route=} {method=}: {str(e)}")
|
|
293
|
-
raise translate_exception(e) from e
|
|
294
|
-
finally:
|
|
295
|
-
if test_context_token is not None and reset_test_context_fn is not None:
|
|
296
|
-
reset_test_context_fn(test_context_token)
|
|
242
|
+
try:
|
|
243
|
+
if is_streaming:
|
|
244
|
+
# Preserve context vars across async generator boundaries
|
|
245
|
+
context_vars = [PROVIDER_DATA_VAR]
|
|
246
|
+
if os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE"):
|
|
247
|
+
from llama_stack.core.testing_context import TEST_CONTEXT
|
|
248
|
+
|
|
249
|
+
context_vars.append(TEST_CONTEXT)
|
|
250
|
+
gen = preserve_contexts_async_generator(sse_generator(func(**kwargs)), context_vars)
|
|
251
|
+
return StreamingResponse(gen, media_type="text/event-stream")
|
|
252
|
+
else:
|
|
253
|
+
value = func(**kwargs)
|
|
254
|
+
result = await maybe_await(value)
|
|
255
|
+
if isinstance(result, PaginatedResponse) and result.url is None:
|
|
256
|
+
result.url = route
|
|
257
|
+
|
|
258
|
+
if method.upper() == "DELETE" and result is None:
|
|
259
|
+
return Response(status_code=httpx.codes.NO_CONTENT)
|
|
260
|
+
|
|
261
|
+
return result
|
|
262
|
+
except Exception as e:
|
|
263
|
+
if logger.isEnabledFor(logging.INFO):
|
|
264
|
+
logger.exception(f"Error executing endpoint {route=} {method=}")
|
|
265
|
+
else:
|
|
266
|
+
logger.error(f"Error executing endpoint {route=} {method=}: {str(e)}")
|
|
267
|
+
raise translate_exception(e) from e
|
|
297
268
|
|
|
298
269
|
sig = inspect.signature(func)
|
|
299
270
|
|
|
@@ -365,6 +336,42 @@ class ClientVersionMiddleware:
|
|
|
365
336
|
return await self.app(scope, receive, send)
|
|
366
337
|
|
|
367
338
|
|
|
339
|
+
class ProviderDataMiddleware:
|
|
340
|
+
"""Middleware to set up request context for all routes.
|
|
341
|
+
|
|
342
|
+
Sets up provider data context from X-LlamaStack-Provider-Data header
|
|
343
|
+
and auth attributes. Also handles test context propagation when
|
|
344
|
+
running in test mode for deterministic ID generation.
|
|
345
|
+
"""
|
|
346
|
+
|
|
347
|
+
def __init__(self, app):
|
|
348
|
+
self.app = app
|
|
349
|
+
|
|
350
|
+
async def __call__(self, scope, receive, send):
|
|
351
|
+
if scope["type"] == "http":
|
|
352
|
+
headers = {k.decode(): v.decode() for k, v in scope.get("headers", [])}
|
|
353
|
+
user = user_from_scope(scope)
|
|
354
|
+
|
|
355
|
+
with request_provider_data_context(headers, user):
|
|
356
|
+
test_context_token = None
|
|
357
|
+
reset_fn = None
|
|
358
|
+
if os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE"):
|
|
359
|
+
from llama_stack.core.testing_context import (
|
|
360
|
+
reset_test_context,
|
|
361
|
+
sync_test_context_from_provider_data,
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
test_context_token = sync_test_context_from_provider_data()
|
|
365
|
+
reset_fn = reset_test_context
|
|
366
|
+
try:
|
|
367
|
+
return await self.app(scope, receive, send)
|
|
368
|
+
finally:
|
|
369
|
+
if test_context_token and reset_fn:
|
|
370
|
+
reset_fn(test_context_token)
|
|
371
|
+
|
|
372
|
+
return await self.app(scope, receive, send)
|
|
373
|
+
|
|
374
|
+
|
|
368
375
|
def create_app() -> StackApp:
|
|
369
376
|
"""Create and configure the FastAPI application.
|
|
370
377
|
|
|
@@ -374,14 +381,11 @@ def create_app() -> StackApp:
|
|
|
374
381
|
Returns:
|
|
375
382
|
Configured StackApp instance.
|
|
376
383
|
"""
|
|
377
|
-
# Initialize logging from environment variables first
|
|
378
|
-
setup_logging()
|
|
379
|
-
|
|
380
384
|
config_file = os.getenv("LLAMA_STACK_CONFIG")
|
|
381
385
|
if config_file is None:
|
|
382
386
|
raise ValueError("LLAMA_STACK_CONFIG environment variable is required")
|
|
383
387
|
|
|
384
|
-
config_file = resolve_config_or_distro(config_file
|
|
388
|
+
config_file = resolve_config_or_distro(config_file)
|
|
385
389
|
|
|
386
390
|
# Load and process configuration
|
|
387
391
|
logger_config = None
|
|
@@ -392,7 +396,7 @@ def create_app() -> StackApp:
|
|
|
392
396
|
logger = get_logger(name=__name__, category="core::server", config=logger_config)
|
|
393
397
|
|
|
394
398
|
config = replace_env_vars(config_contents)
|
|
395
|
-
config =
|
|
399
|
+
config = StackConfig(**cast_image_name_to_string(config))
|
|
396
400
|
|
|
397
401
|
_log_run_config(run_config=config)
|
|
398
402
|
|
|
@@ -407,6 +411,8 @@ def create_app() -> StackApp:
|
|
|
407
411
|
if not os.environ.get("LLAMA_STACK_DISABLE_VERSION_CHECK"):
|
|
408
412
|
app.add_middleware(ClientVersionMiddleware)
|
|
409
413
|
|
|
414
|
+
app.add_middleware(ProviderDataMiddleware)
|
|
415
|
+
|
|
410
416
|
impls = app.stack.impls
|
|
411
417
|
|
|
412
418
|
if config.server.auth:
|
|
@@ -448,11 +454,6 @@ def create_app() -> StackApp:
|
|
|
448
454
|
if cors_config:
|
|
449
455
|
app.add_middleware(CORSMiddleware, **cors_config.model_dump())
|
|
450
456
|
|
|
451
|
-
if config.telemetry.enabled:
|
|
452
|
-
setup_logger(impls[Api.telemetry])
|
|
453
|
-
else:
|
|
454
|
-
setup_logger(TelemetryAdapter(TelemetryConfig(), {}))
|
|
455
|
-
|
|
456
457
|
# Load external APIs if configured
|
|
457
458
|
external_apis = load_external_apis(config)
|
|
458
459
|
all_routes = get_all_api_routes(external_apis)
|
|
@@ -468,19 +469,27 @@ def create_app() -> StackApp:
|
|
|
468
469
|
continue
|
|
469
470
|
apis_to_serve.add(inf.routing_table_api.value)
|
|
470
471
|
|
|
472
|
+
apis_to_serve.add("admin")
|
|
471
473
|
apis_to_serve.add("inspect")
|
|
472
474
|
apis_to_serve.add("providers")
|
|
473
475
|
apis_to_serve.add("prompts")
|
|
474
476
|
apis_to_serve.add("conversations")
|
|
477
|
+
|
|
475
478
|
for api_str in apis_to_serve:
|
|
476
479
|
api = Api(api_str)
|
|
477
480
|
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
481
|
+
# Try to discover and use a router factory from the API package
|
|
482
|
+
impl = impls[api]
|
|
483
|
+
router = build_fastapi_router(api, impl)
|
|
484
|
+
if router:
|
|
485
|
+
app.include_router(router)
|
|
486
|
+
logger.debug(f"Registered FastAPIrouter for {api} API")
|
|
487
|
+
continue
|
|
488
|
+
|
|
489
|
+
# Fall back to old webmethod-based route discovery until the migration is complete
|
|
490
|
+
impl = impls[api]
|
|
483
491
|
|
|
492
|
+
routes = all_routes[api]
|
|
484
493
|
for route, _ in routes:
|
|
485
494
|
if not hasattr(impl, route.name):
|
|
486
495
|
# ideally this should be a typing violation already
|
|
@@ -506,17 +515,23 @@ def create_app() -> StackApp:
|
|
|
506
515
|
|
|
507
516
|
logger.debug(f"serving APIs: {apis_to_serve}")
|
|
508
517
|
|
|
518
|
+
# Register specific exception handlers before the generic Exception handler
|
|
519
|
+
# This prevents the re-raising behavior that causes connection resets
|
|
509
520
|
app.exception_handler(RequestValidationError)(global_exception_handler)
|
|
521
|
+
app.exception_handler(ConflictError)(global_exception_handler)
|
|
522
|
+
app.exception_handler(ResourceNotFoundError)(global_exception_handler)
|
|
523
|
+
app.exception_handler(AuthenticationRequiredError)(global_exception_handler)
|
|
524
|
+
app.exception_handler(AccessDeniedError)(global_exception_handler)
|
|
525
|
+
app.exception_handler(BadRequestError)(global_exception_handler)
|
|
526
|
+
# Generic Exception handler should be last
|
|
510
527
|
app.exception_handler(Exception)(global_exception_handler)
|
|
511
528
|
|
|
512
|
-
app.add_middleware(TracingMiddleware, impls=impls, external_apis=external_apis)
|
|
513
|
-
|
|
514
529
|
return app
|
|
515
530
|
|
|
516
531
|
|
|
517
|
-
def _log_run_config(run_config:
|
|
532
|
+
def _log_run_config(run_config: StackConfig):
|
|
518
533
|
"""Logs the run config with redacted fields and disabled providers removed."""
|
|
519
|
-
logger.info("
|
|
534
|
+
logger.info("Stack Configuration:")
|
|
520
535
|
safe_config = redact_sensitive_fields(run_config.model_dump(mode="json"))
|
|
521
536
|
clean_config = remove_disabled_providers(safe_config)
|
|
522
537
|
logger.info(yaml.dump(clean_config, indent=2))
|
|
@@ -532,8 +547,8 @@ def extract_path_params(route: str) -> list[str]:
|
|
|
532
547
|
|
|
533
548
|
def remove_disabled_providers(obj):
|
|
534
549
|
if isinstance(obj, dict):
|
|
535
|
-
|
|
536
|
-
if
|
|
550
|
+
# Filter out items where provider_id is explicitly disabled or empty
|
|
551
|
+
if "provider_id" in obj and obj["provider_id"] in ("__disabled__", "", None):
|
|
537
552
|
return None
|
|
538
553
|
return {k: v for k, v in ((k, remove_disabled_providers(v)) for k, v in obj.items()) if v is not None}
|
|
539
554
|
elif isinstance(obj, list):
|