llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
llama_stack/core/inspect.py
CHANGED
|
@@ -8,21 +8,28 @@ from importlib.metadata import version
|
|
|
8
8
|
|
|
9
9
|
from pydantic import BaseModel
|
|
10
10
|
|
|
11
|
-
from llama_stack.
|
|
11
|
+
from llama_stack.core.datatypes import StackConfig
|
|
12
|
+
from llama_stack.core.distribution import builtin_automatically_routed_apis
|
|
13
|
+
from llama_stack.core.external import load_external_apis
|
|
14
|
+
from llama_stack.core.server.fastapi_router_registry import (
|
|
15
|
+
_ROUTER_FACTORIES,
|
|
16
|
+
build_fastapi_router,
|
|
17
|
+
get_router_routes,
|
|
18
|
+
)
|
|
19
|
+
from llama_stack.core.server.routes import get_all_api_routes
|
|
20
|
+
from llama_stack_api import (
|
|
21
|
+
Api,
|
|
12
22
|
HealthInfo,
|
|
23
|
+
HealthStatus,
|
|
13
24
|
Inspect,
|
|
14
25
|
ListRoutesResponse,
|
|
15
26
|
RouteInfo,
|
|
16
27
|
VersionInfo,
|
|
17
28
|
)
|
|
18
|
-
from llama_stack.core.datatypes import StackRunConfig
|
|
19
|
-
from llama_stack.core.external import load_external_apis
|
|
20
|
-
from llama_stack.core.server.routes import get_all_api_routes
|
|
21
|
-
from llama_stack.providers.datatypes import HealthStatus
|
|
22
29
|
|
|
23
30
|
|
|
24
31
|
class DistributionInspectConfig(BaseModel):
|
|
25
|
-
|
|
32
|
+
config: StackConfig
|
|
26
33
|
|
|
27
34
|
|
|
28
35
|
async def get_provider_impl(config, deps):
|
|
@@ -33,19 +40,95 @@ async def get_provider_impl(config, deps):
|
|
|
33
40
|
|
|
34
41
|
class DistributionInspectImpl(Inspect):
|
|
35
42
|
def __init__(self, config: DistributionInspectConfig, deps):
|
|
36
|
-
self.
|
|
43
|
+
self.stack_config = config.config
|
|
37
44
|
self.deps = deps
|
|
38
45
|
|
|
39
46
|
async def initialize(self) -> None:
|
|
40
47
|
pass
|
|
41
48
|
|
|
42
|
-
async def list_routes(self) -> ListRoutesResponse:
|
|
43
|
-
|
|
49
|
+
async def list_routes(self, api_filter: str | None = None) -> ListRoutesResponse:
|
|
50
|
+
config: StackConfig = self.stack_config
|
|
51
|
+
|
|
52
|
+
# Helper function to determine if a route should be included based on api_filter
|
|
53
|
+
# TODO: remove this once we've migrated all APIs to FastAPI routers
|
|
54
|
+
def should_include_route(webmethod) -> bool:
|
|
55
|
+
if api_filter is None:
|
|
56
|
+
# Default: only non-deprecated APIs
|
|
57
|
+
return not webmethod.deprecated
|
|
58
|
+
elif api_filter == "deprecated":
|
|
59
|
+
# Special filter: show deprecated routes regardless of their actual level
|
|
60
|
+
return bool(webmethod.deprecated)
|
|
61
|
+
else:
|
|
62
|
+
# Filter by API level (non-deprecated routes only)
|
|
63
|
+
return not webmethod.deprecated and webmethod.level == api_filter
|
|
64
|
+
|
|
65
|
+
# Helper function to get provider types for an API
|
|
66
|
+
def _get_provider_types(api: Api) -> list[str]:
|
|
67
|
+
if api.value in ["providers", "inspect"]:
|
|
68
|
+
return [] # These APIs don't have "real" providers they're internal to the stack
|
|
69
|
+
|
|
70
|
+
# For routing table APIs, look up providers from their router API
|
|
71
|
+
# (e.g., benchmarks -> eval, models -> inference, etc.)
|
|
72
|
+
auto_routed_apis = builtin_automatically_routed_apis()
|
|
73
|
+
for auto_routed in auto_routed_apis:
|
|
74
|
+
if auto_routed.routing_table_api == api:
|
|
75
|
+
# This is a routing table API, use its router API for providers
|
|
76
|
+
providers = config.providers.get(auto_routed.router_api.value, [])
|
|
77
|
+
return [p.provider_type for p in providers] if providers else []
|
|
78
|
+
|
|
79
|
+
# Regular API, look up providers directly
|
|
80
|
+
providers = config.providers.get(api.value, [])
|
|
81
|
+
return [p.provider_type for p in providers] if providers else []
|
|
82
|
+
|
|
83
|
+
# Helper function to determine if a router route should be included based on api_filter
|
|
84
|
+
def _should_include_router_route(route, router_prefix: str | None) -> bool:
|
|
85
|
+
"""Check if a router-based route should be included based on api_filter."""
|
|
86
|
+
# Check deprecated status
|
|
87
|
+
route_deprecated = getattr(route, "deprecated", False) or False
|
|
88
|
+
|
|
89
|
+
if api_filter is None:
|
|
90
|
+
# Default: only non-deprecated routes
|
|
91
|
+
return not route_deprecated
|
|
92
|
+
elif api_filter == "deprecated":
|
|
93
|
+
# Special filter: show deprecated routes regardless of their actual level
|
|
94
|
+
return route_deprecated
|
|
95
|
+
else:
|
|
96
|
+
# Filter by API level (non-deprecated routes only)
|
|
97
|
+
# Extract level from router prefix (e.g., "/v1" -> "v1")
|
|
98
|
+
if router_prefix:
|
|
99
|
+
prefix_level = router_prefix.lstrip("/")
|
|
100
|
+
return not route_deprecated and prefix_level == api_filter
|
|
101
|
+
return not route_deprecated
|
|
44
102
|
|
|
45
103
|
ret = []
|
|
46
|
-
external_apis = load_external_apis(
|
|
104
|
+
external_apis = load_external_apis(config)
|
|
47
105
|
all_endpoints = get_all_api_routes(external_apis)
|
|
106
|
+
|
|
107
|
+
# Process routes from APIs with FastAPI routers
|
|
108
|
+
for api_name in _ROUTER_FACTORIES.keys():
|
|
109
|
+
api = Api(api_name)
|
|
110
|
+
router = build_fastapi_router(api, None) # we don't need the impl here, just the routes
|
|
111
|
+
if router:
|
|
112
|
+
router_routes = get_router_routes(router)
|
|
113
|
+
for route in router_routes:
|
|
114
|
+
if _should_include_router_route(route, router.prefix):
|
|
115
|
+
if route.methods is not None:
|
|
116
|
+
available_methods = [m for m in route.methods if m != "HEAD"]
|
|
117
|
+
if available_methods:
|
|
118
|
+
ret.append(
|
|
119
|
+
RouteInfo(
|
|
120
|
+
route=route.path,
|
|
121
|
+
method=available_methods[0],
|
|
122
|
+
provider_types=_get_provider_types(api),
|
|
123
|
+
)
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Process routes from legacy webmethod-based APIs
|
|
48
127
|
for api, endpoints in all_endpoints.items():
|
|
128
|
+
# Skip APIs that have routers (already processed above)
|
|
129
|
+
if api.value in _ROUTER_FACTORIES:
|
|
130
|
+
continue
|
|
131
|
+
|
|
49
132
|
# Always include provider and inspect APIs, filter others based on run config
|
|
50
133
|
if api.value in ["providers", "inspect"]:
|
|
51
134
|
ret.extend(
|
|
@@ -55,12 +138,12 @@ class DistributionInspectImpl(Inspect):
|
|
|
55
138
|
method=next(iter([m for m in e.methods if m != "HEAD"])),
|
|
56
139
|
provider_types=[], # These APIs don't have "real" providers - they're internal to the stack
|
|
57
140
|
)
|
|
58
|
-
for e,
|
|
59
|
-
if e.methods is not None
|
|
141
|
+
for e, webmethod in endpoints
|
|
142
|
+
if e.methods is not None and should_include_route(webmethod)
|
|
60
143
|
]
|
|
61
144
|
)
|
|
62
145
|
else:
|
|
63
|
-
providers =
|
|
146
|
+
providers = config.providers.get(api.value, [])
|
|
64
147
|
if providers: # Only process if there are providers for this API
|
|
65
148
|
ret.extend(
|
|
66
149
|
[
|
|
@@ -69,8 +152,8 @@ class DistributionInspectImpl(Inspect):
|
|
|
69
152
|
method=next(iter([m for m in e.methods if m != "HEAD"])),
|
|
70
153
|
provider_types=[p.provider_type for p in providers],
|
|
71
154
|
)
|
|
72
|
-
for e,
|
|
73
|
-
if e.methods is not None
|
|
155
|
+
for e, webmethod in endpoints
|
|
156
|
+
if e.methods is not None and should_include_route(webmethod)
|
|
74
157
|
]
|
|
75
158
|
)
|
|
76
159
|
|
|
@@ -10,6 +10,7 @@ import json
|
|
|
10
10
|
import logging # allow-direct-logging
|
|
11
11
|
import os
|
|
12
12
|
import sys
|
|
13
|
+
import typing
|
|
13
14
|
from enum import Enum
|
|
14
15
|
from io import BytesIO
|
|
15
16
|
from pathlib import Path
|
|
@@ -18,38 +19,37 @@ from typing import Any, TypeVar, Union, get_args, get_origin
|
|
|
18
19
|
import httpx
|
|
19
20
|
import yaml
|
|
20
21
|
from fastapi import Response as FastAPIResponse
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
22
|
+
|
|
23
|
+
from llama_stack.core.utils.type_inspection import is_unwrapped_body_param
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
from llama_stack_client import (
|
|
27
|
+
NOT_GIVEN,
|
|
28
|
+
APIResponse,
|
|
29
|
+
AsyncAPIResponse,
|
|
30
|
+
AsyncLlamaStackClient,
|
|
31
|
+
AsyncStream,
|
|
32
|
+
LlamaStackClient,
|
|
33
|
+
)
|
|
34
|
+
except ImportError as e:
|
|
35
|
+
raise ImportError(
|
|
36
|
+
"llama-stack-client is not installed. Please install it with `uv pip install llama-stack[client]`."
|
|
37
|
+
) from e
|
|
38
|
+
|
|
29
39
|
from pydantic import BaseModel, TypeAdapter
|
|
30
40
|
from rich.console import Console
|
|
31
41
|
from termcolor import cprint
|
|
32
42
|
|
|
33
43
|
from llama_stack.core.build import print_pip_install_help
|
|
34
44
|
from llama_stack.core.configure import parse_and_maybe_upgrade_config
|
|
35
|
-
from llama_stack.core.
|
|
36
|
-
from llama_stack.core.request_headers import (
|
|
37
|
-
PROVIDER_DATA_VAR,
|
|
38
|
-
request_provider_data_context,
|
|
39
|
-
)
|
|
45
|
+
from llama_stack.core.request_headers import PROVIDER_DATA_VAR, request_provider_data_context
|
|
40
46
|
from llama_stack.core.resolver import ProviderRegistry
|
|
41
47
|
from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
|
|
42
|
-
from llama_stack.core.stack import
|
|
43
|
-
Stack,
|
|
44
|
-
get_stack_run_config_from_distro,
|
|
45
|
-
replace_env_vars,
|
|
46
|
-
)
|
|
48
|
+
from llama_stack.core.stack import Stack, get_stack_run_config_from_distro, replace_env_vars
|
|
47
49
|
from llama_stack.core.utils.config import redact_sensitive_fields
|
|
48
50
|
from llama_stack.core.utils.context import preserve_contexts_async_generator
|
|
49
51
|
from llama_stack.core.utils.exec import in_notebook
|
|
50
52
|
from llama_stack.log import get_logger, setup_logging
|
|
51
|
-
from llama_stack.providers.utils.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace
|
|
52
|
-
from llama_stack.strong_typing.inspection import is_unwrapped_body_param
|
|
53
53
|
|
|
54
54
|
logger = get_logger(name=__name__, category="core")
|
|
55
55
|
|
|
@@ -202,13 +202,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|
|
202
202
|
super().__init__()
|
|
203
203
|
# Initialize logging from environment variables first
|
|
204
204
|
setup_logging()
|
|
205
|
-
|
|
206
|
-
# when using the library client, we should not log to console since many
|
|
207
|
-
# of our logs are intended for server-side usage
|
|
208
|
-
if sinks_from_env := os.environ.get("TELEMETRY_SINKS", None):
|
|
209
|
-
current_sinks = sinks_from_env.strip().lower().split(",")
|
|
210
|
-
os.environ["TELEMETRY_SINKS"] = ",".join(sink for sink in current_sinks if sink != "console")
|
|
211
|
-
|
|
212
205
|
if in_notebook():
|
|
213
206
|
import nest_asyncio
|
|
214
207
|
|
|
@@ -264,20 +257,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|
|
264
257
|
file=sys.stderr,
|
|
265
258
|
)
|
|
266
259
|
if self.config_path_or_distro_name.endswith(".yaml"):
|
|
267
|
-
|
|
268
|
-
for api, run_providers in self.config.providers.items():
|
|
269
|
-
for provider in run_providers:
|
|
270
|
-
providers.setdefault(api, []).append(
|
|
271
|
-
BuildProvider(provider_type=provider.provider_type, module=provider.module)
|
|
272
|
-
)
|
|
273
|
-
providers = dict(providers)
|
|
274
|
-
build_config = BuildConfig(
|
|
275
|
-
distribution_spec=DistributionSpec(
|
|
276
|
-
providers=providers,
|
|
277
|
-
),
|
|
278
|
-
external_providers_dir=self.config.external_providers_dir,
|
|
279
|
-
)
|
|
280
|
-
print_pip_install_help(build_config)
|
|
260
|
+
print_pip_install_help(self.config)
|
|
281
261
|
else:
|
|
282
262
|
prefix = "!" if in_notebook() else ""
|
|
283
263
|
cprint(
|
|
@@ -293,8 +273,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|
|
293
273
|
raise _e
|
|
294
274
|
|
|
295
275
|
assert self.impls is not None
|
|
296
|
-
if Api.telemetry in self.impls:
|
|
297
|
-
setup_logger(self.impls[Api.telemetry])
|
|
298
276
|
|
|
299
277
|
if not os.environ.get("PYTEST_CURRENT_TEST"):
|
|
300
278
|
console = Console()
|
|
@@ -381,16 +359,16 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|
|
381
359
|
matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls)
|
|
382
360
|
body |= path_params
|
|
383
361
|
|
|
362
|
+
# Pass through params that aren't already handled as path params
|
|
363
|
+
if options.params:
|
|
364
|
+
extra_query_params = {k: v for k, v in options.params.items() if k not in path_params}
|
|
365
|
+
if extra_query_params:
|
|
366
|
+
body["extra_query"] = extra_query_params
|
|
367
|
+
|
|
384
368
|
body, field_names = self._handle_file_uploads(options, body)
|
|
385
369
|
|
|
386
370
|
body = self._convert_body(matched_func, body, exclude_params=set(field_names))
|
|
387
|
-
|
|
388
|
-
trace_path = webmethod.descriptive_name or route_path
|
|
389
|
-
await start_trace(trace_path, {"__location__": "library_client"})
|
|
390
|
-
try:
|
|
391
|
-
result = await matched_func(**body)
|
|
392
|
-
finally:
|
|
393
|
-
await end_trace()
|
|
371
|
+
result = await matched_func(**body)
|
|
394
372
|
|
|
395
373
|
# Handle FastAPI Response objects (e.g., from file content retrieval)
|
|
396
374
|
if isinstance(result, FastAPIResponse):
|
|
@@ -449,19 +427,13 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|
|
449
427
|
# Prepare body for the function call (handles both Pydantic and traditional params)
|
|
450
428
|
body = self._convert_body(func, body)
|
|
451
429
|
|
|
452
|
-
trace_path = webmethod.descriptive_name or route_path
|
|
453
|
-
await start_trace(trace_path, {"__location__": "library_client"})
|
|
454
|
-
|
|
455
430
|
async def gen():
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
yield sse_event.encode("utf-8")
|
|
461
|
-
finally:
|
|
462
|
-
await end_trace()
|
|
431
|
+
async for chunk in await func(**body):
|
|
432
|
+
data = json.dumps(convert_pydantic_to_json_value(chunk))
|
|
433
|
+
sse_event = f"data: {data}\n\n"
|
|
434
|
+
yield sse_event.encode("utf-8")
|
|
463
435
|
|
|
464
|
-
wrapped_gen = preserve_contexts_async_generator(gen(), [
|
|
436
|
+
wrapped_gen = preserve_contexts_async_generator(gen(), [PROVIDER_DATA_VAR])
|
|
465
437
|
|
|
466
438
|
mock_response = httpx.Response(
|
|
467
439
|
status_code=httpx.codes.OK,
|
|
@@ -519,6 +491,25 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|
|
519
491
|
unwrapped_body_param = param
|
|
520
492
|
break
|
|
521
493
|
|
|
494
|
+
# Check for parameters with Depends() annotation (FastAPI router endpoints)
|
|
495
|
+
# These need special handling: construct the request model from body
|
|
496
|
+
depends_param = None
|
|
497
|
+
for param in params_list:
|
|
498
|
+
param_type = param.annotation
|
|
499
|
+
if get_origin(param_type) is typing.Annotated:
|
|
500
|
+
args = get_args(param_type)
|
|
501
|
+
if len(args) > 1:
|
|
502
|
+
# Check if any metadata is Depends
|
|
503
|
+
metadata = args[1:]
|
|
504
|
+
for item in metadata:
|
|
505
|
+
# Check if it's a Depends object (has dependency attribute or is a callable)
|
|
506
|
+
# Depends objects typically have a 'dependency' attribute or are callable functions
|
|
507
|
+
if hasattr(item, "dependency") or callable(item) or "Depends" in str(type(item)):
|
|
508
|
+
depends_param = param
|
|
509
|
+
break
|
|
510
|
+
if depends_param:
|
|
511
|
+
break
|
|
512
|
+
|
|
522
513
|
# Convert parameters to Pydantic models where needed
|
|
523
514
|
converted_body = {}
|
|
524
515
|
for param_name, param in sig.parameters.items():
|
|
@@ -529,6 +520,27 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|
|
529
520
|
else:
|
|
530
521
|
converted_body[param_name] = convert_to_pydantic(param.annotation, value)
|
|
531
522
|
|
|
523
|
+
# Handle Depends parameter: construct request model from body
|
|
524
|
+
if depends_param and depends_param.name not in converted_body:
|
|
525
|
+
param_type = depends_param.annotation
|
|
526
|
+
if get_origin(param_type) is typing.Annotated:
|
|
527
|
+
base_type = get_args(param_type)[0]
|
|
528
|
+
# Handle Union types (e.g., SomeRequestModel | None) - extract the non-None type
|
|
529
|
+
# In Python 3.10+, Union types created with | syntax are still typing.Union
|
|
530
|
+
origin = get_origin(base_type)
|
|
531
|
+
if origin is Union:
|
|
532
|
+
# Get the first non-None type from the Union
|
|
533
|
+
union_args = get_args(base_type)
|
|
534
|
+
base_type = next(
|
|
535
|
+
(t for t in union_args if t is not type(None) and t is not None),
|
|
536
|
+
union_args[0] if union_args else None,
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
# Only try to instantiate if it's a class (not a Union or other non-callable type)
|
|
540
|
+
if base_type is not None and inspect.isclass(base_type) and callable(base_type):
|
|
541
|
+
# Construct the request model from all body parameters
|
|
542
|
+
converted_body[depends_param.name] = base_type(**body)
|
|
543
|
+
|
|
532
544
|
# handle unwrapped body parameter after processing all named parameters
|
|
533
545
|
if unwrapped_body_param:
|
|
534
546
|
base_type = get_args(unwrapped_body_param.annotation)[0]
|
|
@@ -9,10 +9,9 @@ from typing import Any
|
|
|
9
9
|
|
|
10
10
|
from pydantic import BaseModel
|
|
11
11
|
|
|
12
|
-
from llama_stack.
|
|
13
|
-
from llama_stack.core.
|
|
14
|
-
from
|
|
15
|
-
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
|
|
12
|
+
from llama_stack.core.datatypes import StackConfig
|
|
13
|
+
from llama_stack.core.storage.kvstore import KVStore, kvstore_impl
|
|
14
|
+
from llama_stack_api import ListPromptsResponse, Prompt, Prompts
|
|
16
15
|
|
|
17
16
|
|
|
18
17
|
class PromptServiceConfig(BaseModel):
|
|
@@ -21,7 +20,7 @@ class PromptServiceConfig(BaseModel):
|
|
|
21
20
|
:param run_config: Stack run configuration containing distribution info
|
|
22
21
|
"""
|
|
23
22
|
|
|
24
|
-
|
|
23
|
+
config: StackConfig
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
async def get_provider_impl(config: PromptServiceConfig, deps: dict[Any, Any]):
|
|
@@ -35,16 +34,15 @@ class PromptServiceImpl(Prompts):
|
|
|
35
34
|
"""Built-in prompt service implementation using KVStore."""
|
|
36
35
|
|
|
37
36
|
def __init__(self, config: PromptServiceConfig, deps: dict[Any, Any]):
|
|
38
|
-
self.
|
|
37
|
+
self.stack_config = config.config
|
|
39
38
|
self.deps = deps
|
|
40
39
|
self.kvstore: KVStore
|
|
41
40
|
|
|
42
41
|
async def initialize(self) -> None:
|
|
43
|
-
# Use
|
|
44
|
-
|
|
45
|
-
if not
|
|
46
|
-
raise ValueError("storage.stores.
|
|
47
|
-
prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend)
|
|
42
|
+
# Use prompts store reference from run config
|
|
43
|
+
prompts_ref = self.stack_config.storage.stores.prompts
|
|
44
|
+
if not prompts_ref:
|
|
45
|
+
raise ValueError("storage.stores.prompts must be configured in run config")
|
|
48
46
|
self.kvstore = await kvstore_impl(prompts_ref)
|
|
49
47
|
|
|
50
48
|
def _get_default_key(self, prompt_id: str) -> str:
|
|
@@ -232,3 +230,6 @@ class PromptServiceImpl(Prompts):
|
|
|
232
230
|
await self.kvstore.set(default_key, str(version))
|
|
233
231
|
|
|
234
232
|
return self._deserialize_prompt(data)
|
|
233
|
+
|
|
234
|
+
async def shutdown(self) -> None:
|
|
235
|
+
pass
|
llama_stack/core/providers.py
CHANGED
|
@@ -9,18 +9,24 @@ from typing import Any
|
|
|
9
9
|
|
|
10
10
|
from pydantic import BaseModel
|
|
11
11
|
|
|
12
|
-
from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers
|
|
13
12
|
from llama_stack.log import get_logger
|
|
14
|
-
from
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
from llama_stack_api import (
|
|
14
|
+
HealthResponse,
|
|
15
|
+
HealthStatus,
|
|
16
|
+
InspectProviderRequest,
|
|
17
|
+
ListProvidersResponse,
|
|
18
|
+
ProviderInfo,
|
|
19
|
+
Providers,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
from .datatypes import StackConfig
|
|
17
23
|
from .utils.config import redact_sensitive_fields
|
|
18
24
|
|
|
19
25
|
logger = get_logger(name=__name__, category="core")
|
|
20
26
|
|
|
21
27
|
|
|
22
28
|
class ProviderImplConfig(BaseModel):
|
|
23
|
-
|
|
29
|
+
config: StackConfig
|
|
24
30
|
|
|
25
31
|
|
|
26
32
|
async def get_provider_impl(config, deps):
|
|
@@ -31,7 +37,7 @@ async def get_provider_impl(config, deps):
|
|
|
31
37
|
|
|
32
38
|
class ProviderImpl(Providers):
|
|
33
39
|
def __init__(self, config, deps):
|
|
34
|
-
self.
|
|
40
|
+
self.stack_config = config.config
|
|
35
41
|
self.deps = deps
|
|
36
42
|
|
|
37
43
|
async def initialize(self) -> None:
|
|
@@ -42,8 +48,8 @@ class ProviderImpl(Providers):
|
|
|
42
48
|
pass
|
|
43
49
|
|
|
44
50
|
async def list_providers(self) -> ListProvidersResponse:
|
|
45
|
-
run_config = self.
|
|
46
|
-
safe_config =
|
|
51
|
+
run_config = self.stack_config
|
|
52
|
+
safe_config = StackConfig(**redact_sensitive_fields(run_config.model_dump()))
|
|
47
53
|
providers_health = await self.get_providers_health()
|
|
48
54
|
ret = []
|
|
49
55
|
for api, providers in safe_config.providers.items():
|
|
@@ -68,13 +74,13 @@ class ProviderImpl(Providers):
|
|
|
68
74
|
|
|
69
75
|
return ListProvidersResponse(data=ret)
|
|
70
76
|
|
|
71
|
-
async def inspect_provider(self,
|
|
77
|
+
async def inspect_provider(self, request: InspectProviderRequest) -> ProviderInfo:
|
|
72
78
|
all_providers = await self.list_providers()
|
|
73
79
|
for p in all_providers.data:
|
|
74
|
-
if p.provider_id == provider_id:
|
|
80
|
+
if p.provider_id == request.provider_id:
|
|
75
81
|
return p
|
|
76
82
|
|
|
77
|
-
raise ValueError(f"Provider {provider_id} not found")
|
|
83
|
+
raise ValueError(f"Provider {request.provider_id} not found")
|
|
78
84
|
|
|
79
85
|
async def get_providers_health(self) -> dict[str, dict[str, HealthResponse]]:
|
|
80
86
|
"""Get health status for all providers.
|