llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
llama_stack/core/resolver.py
CHANGED
|
@@ -3,60 +3,66 @@
|
|
|
3
3
|
#
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
|
+
|
|
6
7
|
import importlib
|
|
7
8
|
import importlib.metadata
|
|
8
9
|
import inspect
|
|
9
10
|
from typing import Any
|
|
10
11
|
|
|
11
|
-
from llama_stack.apis.agents import Agents
|
|
12
|
-
from llama_stack.apis.batches import Batches
|
|
13
|
-
from llama_stack.apis.benchmarks import Benchmarks
|
|
14
|
-
from llama_stack.apis.conversations import Conversations
|
|
15
|
-
from llama_stack.apis.datasetio import DatasetIO
|
|
16
|
-
from llama_stack.apis.datasets import Datasets
|
|
17
|
-
from llama_stack.apis.datatypes import ExternalApiSpec
|
|
18
|
-
from llama_stack.apis.eval import Eval
|
|
19
|
-
from llama_stack.apis.files import Files
|
|
20
|
-
from llama_stack.apis.inference import Inference, InferenceProvider
|
|
21
|
-
from llama_stack.apis.inspect import Inspect
|
|
22
|
-
from llama_stack.apis.models import Models
|
|
23
|
-
from llama_stack.apis.post_training import PostTraining
|
|
24
|
-
from llama_stack.apis.prompts import Prompts
|
|
25
|
-
from llama_stack.apis.providers import Providers as ProvidersAPI
|
|
26
|
-
from llama_stack.apis.safety import Safety
|
|
27
|
-
from llama_stack.apis.scoring import Scoring
|
|
28
|
-
from llama_stack.apis.scoring_functions import ScoringFunctions
|
|
29
|
-
from llama_stack.apis.shields import Shields
|
|
30
|
-
from llama_stack.apis.telemetry import Telemetry
|
|
31
|
-
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
|
32
|
-
from llama_stack.apis.vector_io import VectorIO
|
|
33
|
-
from llama_stack.apis.vector_stores import VectorStore
|
|
34
|
-
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
|
|
35
12
|
from llama_stack.core.client import get_client_impl
|
|
36
13
|
from llama_stack.core.datatypes import (
|
|
37
14
|
AccessRule,
|
|
38
15
|
AutoRoutedProviderSpec,
|
|
39
16
|
Provider,
|
|
40
17
|
RoutingTableProviderSpec,
|
|
41
|
-
|
|
18
|
+
StackConfig,
|
|
42
19
|
)
|
|
43
20
|
from llama_stack.core.distribution import builtin_automatically_routed_apis
|
|
44
21
|
from llama_stack.core.external import load_external_apis
|
|
45
22
|
from llama_stack.core.store import DistributionRegistry
|
|
46
23
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
|
47
24
|
from llama_stack.log import get_logger
|
|
48
|
-
from
|
|
25
|
+
from llama_stack_api import (
|
|
26
|
+
LLAMA_STACK_API_V1ALPHA,
|
|
27
|
+
Admin,
|
|
28
|
+
Agents,
|
|
49
29
|
Api,
|
|
30
|
+
Batches,
|
|
31
|
+
Benchmarks,
|
|
50
32
|
BenchmarksProtocolPrivate,
|
|
33
|
+
Connectors,
|
|
34
|
+
Conversations,
|
|
35
|
+
DatasetIO,
|
|
36
|
+
Datasets,
|
|
51
37
|
DatasetsProtocolPrivate,
|
|
52
|
-
|
|
38
|
+
Eval,
|
|
39
|
+
ExternalApiSpec,
|
|
40
|
+
FileProcessors,
|
|
41
|
+
Files,
|
|
42
|
+
Inference,
|
|
43
|
+
InferenceProvider,
|
|
44
|
+
Inspect,
|
|
45
|
+
Models,
|
|
53
46
|
ModelsProtocolPrivate,
|
|
47
|
+
PostTraining,
|
|
48
|
+
Prompts,
|
|
54
49
|
ProviderSpec,
|
|
55
50
|
RemoteProviderConfig,
|
|
56
51
|
RemoteProviderSpec,
|
|
52
|
+
Safety,
|
|
53
|
+
Scoring,
|
|
54
|
+
ScoringFunctions,
|
|
57
55
|
ScoringFunctionsProtocolPrivate,
|
|
56
|
+
Shields,
|
|
58
57
|
ShieldsProtocolPrivate,
|
|
58
|
+
ToolGroups,
|
|
59
59
|
ToolGroupsProtocolPrivate,
|
|
60
|
+
ToolRuntime,
|
|
61
|
+
VectorIO,
|
|
62
|
+
VectorStore,
|
|
63
|
+
)
|
|
64
|
+
from llama_stack_api import (
|
|
65
|
+
Providers as ProvidersAPI,
|
|
60
66
|
)
|
|
61
67
|
|
|
62
68
|
logger = get_logger(name=__name__, category="core")
|
|
@@ -76,6 +82,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
|
|
|
76
82
|
Dictionary mapping API types to their protocol classes
|
|
77
83
|
"""
|
|
78
84
|
protocols = {
|
|
85
|
+
Api.admin: Admin,
|
|
79
86
|
Api.providers: ProvidersAPI,
|
|
80
87
|
Api.agents: Agents,
|
|
81
88
|
Api.inference: Inference,
|
|
@@ -98,7 +105,8 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
|
|
|
98
105
|
Api.files: Files,
|
|
99
106
|
Api.prompts: Prompts,
|
|
100
107
|
Api.conversations: Conversations,
|
|
101
|
-
Api.
|
|
108
|
+
Api.file_processors: FileProcessors,
|
|
109
|
+
Api.connectors: Connectors,
|
|
102
110
|
}
|
|
103
111
|
|
|
104
112
|
if external_apis:
|
|
@@ -146,7 +154,7 @@ ProviderRegistry = dict[Api, dict[str, ProviderSpec]]
|
|
|
146
154
|
|
|
147
155
|
|
|
148
156
|
async def resolve_impls(
|
|
149
|
-
run_config:
|
|
157
|
+
run_config: StackConfig,
|
|
150
158
|
provider_registry: ProviderRegistry,
|
|
151
159
|
dist_registry: DistributionRegistry,
|
|
152
160
|
policy: list[AccessRule],
|
|
@@ -198,6 +206,13 @@ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str,
|
|
|
198
206
|
)
|
|
199
207
|
}
|
|
200
208
|
|
|
209
|
+
# Add inference as an optional dependency for vector_io to enable query rewriting
|
|
210
|
+
optional_deps = []
|
|
211
|
+
deps_list = [info.routing_table_api.value]
|
|
212
|
+
if info.router_api == Api.vector_io:
|
|
213
|
+
optional_deps = [Api.inference]
|
|
214
|
+
deps_list.append(Api.inference.value)
|
|
215
|
+
|
|
201
216
|
specs[info.router_api.value] = {
|
|
202
217
|
"__builtin__": ProviderWithSpec(
|
|
203
218
|
provider_id="__autorouted__",
|
|
@@ -208,7 +223,8 @@ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str,
|
|
|
208
223
|
module="llama_stack.core.routers",
|
|
209
224
|
routing_table_api=info.routing_table_api,
|
|
210
225
|
api_dependencies=[info.routing_table_api],
|
|
211
|
-
|
|
226
|
+
optional_api_dependencies=optional_deps,
|
|
227
|
+
deps__=deps_list,
|
|
212
228
|
),
|
|
213
229
|
)
|
|
214
230
|
}
|
|
@@ -216,7 +232,7 @@ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str,
|
|
|
216
232
|
|
|
217
233
|
|
|
218
234
|
def validate_and_prepare_providers(
|
|
219
|
-
run_config:
|
|
235
|
+
run_config: StackConfig, provider_registry: ProviderRegistry, routing_table_apis: set[Api], router_apis: set[Api]
|
|
220
236
|
) -> dict[str, dict[str, ProviderWithSpec]]:
|
|
221
237
|
"""Validates providers, handles deprecations, and organizes them into a spec dictionary."""
|
|
222
238
|
providers_with_specs: dict[str, dict[str, ProviderWithSpec]] = {}
|
|
@@ -241,24 +257,6 @@ def validate_and_prepare_providers(
|
|
|
241
257
|
key = api_str if api not in router_apis else f"inner-{api_str}"
|
|
242
258
|
providers_with_specs[key] = specs
|
|
243
259
|
|
|
244
|
-
# TODO: remove this logic, telemetry should not have providers.
|
|
245
|
-
# if telemetry has been enabled in the config initialize our internal impl
|
|
246
|
-
# telemetry is not an external API so it SHOULD NOT be auto-routed.
|
|
247
|
-
if run_config.telemetry.enabled:
|
|
248
|
-
specs = {}
|
|
249
|
-
p = InlineProviderSpec(
|
|
250
|
-
api=Api.telemetry,
|
|
251
|
-
provider_type="inline::meta-reference",
|
|
252
|
-
pip_packages=[],
|
|
253
|
-
optional_api_dependencies=[Api.datasetio],
|
|
254
|
-
module="llama_stack.providers.inline.telemetry.meta_reference",
|
|
255
|
-
config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig",
|
|
256
|
-
description="Meta's reference implementation of telemetry and observability using OpenTelemetry.",
|
|
257
|
-
)
|
|
258
|
-
spec = ProviderWithSpec(spec=p, provider_type="inline::meta-reference", provider_id="meta-reference")
|
|
259
|
-
specs["meta-reference"] = spec
|
|
260
|
-
providers_with_specs["telemetry"] = specs
|
|
261
|
-
|
|
262
260
|
return providers_with_specs
|
|
263
261
|
|
|
264
262
|
|
|
@@ -278,7 +276,7 @@ def validate_provider(provider: Provider, api: Api, provider_registry: ProviderR
|
|
|
278
276
|
|
|
279
277
|
|
|
280
278
|
def sort_providers_by_deps(
|
|
281
|
-
providers_with_specs: dict[str, dict[str, ProviderWithSpec]], run_config:
|
|
279
|
+
providers_with_specs: dict[str, dict[str, ProviderWithSpec]], run_config: StackConfig
|
|
282
280
|
) -> list[tuple[str, ProviderWithSpec]]:
|
|
283
281
|
"""Sorts providers based on their dependencies."""
|
|
284
282
|
sorted_providers: list[tuple[str, ProviderWithSpec]] = topological_sort(
|
|
@@ -295,7 +293,7 @@ async def instantiate_providers(
|
|
|
295
293
|
sorted_providers: list[tuple[str, ProviderWithSpec]],
|
|
296
294
|
router_apis: set[Api],
|
|
297
295
|
dist_registry: DistributionRegistry,
|
|
298
|
-
run_config:
|
|
296
|
+
run_config: StackConfig,
|
|
299
297
|
policy: list[AccessRule],
|
|
300
298
|
internal_impls: dict[Api, Any] | None = None,
|
|
301
299
|
) -> dict[Api, Any]:
|
|
@@ -332,6 +330,13 @@ async def instantiate_providers(
|
|
|
332
330
|
api = Api(api_str)
|
|
333
331
|
impls[api] = impl
|
|
334
332
|
|
|
333
|
+
# Post-instantiation: Inject VectorIORouter into VectorStoresRoutingTable
|
|
334
|
+
if Api.vector_io in impls and Api.vector_stores in impls:
|
|
335
|
+
vector_io_router = impls[Api.vector_io]
|
|
336
|
+
vector_stores_routing_table = impls[Api.vector_stores]
|
|
337
|
+
if hasattr(vector_stores_routing_table, "vector_io_router"):
|
|
338
|
+
vector_stores_routing_table.vector_io_router = vector_io_router
|
|
339
|
+
|
|
335
340
|
return impls
|
|
336
341
|
|
|
337
342
|
|
|
@@ -374,7 +379,7 @@ async def instantiate_provider(
|
|
|
374
379
|
deps: dict[Api, Any],
|
|
375
380
|
inner_impls: dict[str, Any],
|
|
376
381
|
dist_registry: DistributionRegistry,
|
|
377
|
-
run_config:
|
|
382
|
+
run_config: StackConfig,
|
|
378
383
|
policy: list[AccessRule],
|
|
379
384
|
):
|
|
380
385
|
provider_spec = provider.spec
|
|
@@ -406,15 +411,19 @@ async def instantiate_provider(
|
|
|
406
411
|
args = [provider_spec.api, inner_impls, deps, dist_registry, policy]
|
|
407
412
|
else:
|
|
408
413
|
method = "get_provider_impl"
|
|
414
|
+
provider_config = provider.config.copy()
|
|
409
415
|
|
|
416
|
+
# Inject vector_stores_config for providers that need it (introspection-based)
|
|
410
417
|
config_type = instantiate_class_type(provider_spec.config_class)
|
|
411
|
-
|
|
418
|
+
if hasattr(config_type, "__fields__") and "vector_stores_config" in config_type.__fields__:
|
|
419
|
+
# Only inject if vector_stores is provided, otherwise let default_factory handle it
|
|
420
|
+
if run_config.vector_stores is not None:
|
|
421
|
+
provider_config["vector_stores_config"] = run_config.vector_stores
|
|
422
|
+
|
|
423
|
+
config = config_type(**provider_config)
|
|
412
424
|
args = [config, deps]
|
|
413
425
|
if "policy" in inspect.signature(getattr(module, method)).parameters:
|
|
414
426
|
args.append(policy)
|
|
415
|
-
if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry:
|
|
416
|
-
args.append(run_config.telemetry.enabled)
|
|
417
|
-
|
|
418
427
|
fn = getattr(module, method)
|
|
419
428
|
impl = await fn(*args)
|
|
420
429
|
impl.__provider_id__ = provider.provider_id
|
|
@@ -9,11 +9,11 @@ from typing import Any
|
|
|
9
9
|
from llama_stack.core.datatypes import (
|
|
10
10
|
AccessRule,
|
|
11
11
|
RoutedProtocol,
|
|
12
|
+
StackConfig,
|
|
12
13
|
)
|
|
13
|
-
from llama_stack.core.stack import StackRunConfig
|
|
14
14
|
from llama_stack.core.store import DistributionRegistry
|
|
15
|
-
from llama_stack.providers.datatypes import Api, RoutingTable
|
|
16
15
|
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
|
16
|
+
from llama_stack_api import Api, RoutingTable
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
async def get_routing_table_impl(
|
|
@@ -45,12 +45,13 @@ async def get_routing_table_impl(
|
|
|
45
45
|
raise ValueError(f"API {api.value} not found in router map")
|
|
46
46
|
|
|
47
47
|
impl = api_to_tables[api.value](impls_by_provider_id, dist_registry, policy)
|
|
48
|
+
|
|
48
49
|
await impl.initialize()
|
|
49
50
|
return impl
|
|
50
51
|
|
|
51
52
|
|
|
52
53
|
async def get_auto_router_impl(
|
|
53
|
-
api: Api, routing_table: RoutingTable, deps: dict[str, Any], run_config:
|
|
54
|
+
api: Api, routing_table: RoutingTable, deps: dict[str, Any], run_config: StackConfig, policy: list[AccessRule]
|
|
54
55
|
) -> Any:
|
|
55
56
|
from .datasets import DatasetIORouter
|
|
56
57
|
from .eval_scoring import EvalRouter, ScoringRouter
|
|
@@ -72,14 +73,6 @@ async def get_auto_router_impl(
|
|
|
72
73
|
raise ValueError(f"API {api.value} not found in router map")
|
|
73
74
|
|
|
74
75
|
api_to_dep_impl = {}
|
|
75
|
-
if run_config.telemetry.enabled:
|
|
76
|
-
api_to_deps = {
|
|
77
|
-
"inference": {"telemetry": Api.telemetry},
|
|
78
|
-
}
|
|
79
|
-
for dep_name, dep_api in api_to_deps.get(api.value, {}).items():
|
|
80
|
-
if dep_api in deps:
|
|
81
|
-
api_to_dep_impl[dep_name] = deps[dep_api]
|
|
82
|
-
|
|
83
76
|
# TODO: move pass configs to routers instead
|
|
84
77
|
if api == Api.inference:
|
|
85
78
|
inference_ref = run_config.storage.stores.inference
|
|
@@ -92,10 +85,13 @@ async def get_auto_router_impl(
|
|
|
92
85
|
)
|
|
93
86
|
await inference_store.initialize()
|
|
94
87
|
api_to_dep_impl["store"] = inference_store
|
|
95
|
-
|
|
96
88
|
elif api == Api.vector_io:
|
|
97
89
|
api_to_dep_impl["vector_stores_config"] = run_config.vector_stores
|
|
90
|
+
api_to_dep_impl["inference_api"] = deps.get(Api.inference)
|
|
91
|
+
elif api == Api.safety:
|
|
92
|
+
api_to_dep_impl["safety_config"] = run_config.safety
|
|
98
93
|
|
|
99
94
|
impl = api_to_routers[api.value](routing_table, **api_to_dep_impl)
|
|
95
|
+
|
|
100
96
|
await impl.initialize()
|
|
101
97
|
return impl
|
|
@@ -6,11 +6,8 @@
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from llama_stack.apis.common.responses import PaginatedResponse
|
|
10
|
-
from llama_stack.apis.datasetio import DatasetIO
|
|
11
|
-
from llama_stack.apis.datasets import DatasetPurpose, DataSource
|
|
12
9
|
from llama_stack.log import get_logger
|
|
13
|
-
from
|
|
10
|
+
from llama_stack_api import DatasetIO, DatasetPurpose, DataSource, PaginatedResponse, RoutingTable
|
|
14
11
|
|
|
15
12
|
logger = get_logger(name=__name__, category="core::routers")
|
|
16
13
|
|
|
@@ -6,15 +6,18 @@
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from llama_stack.
|
|
10
|
-
from
|
|
9
|
+
from llama_stack.log import get_logger
|
|
10
|
+
from llama_stack_api import (
|
|
11
|
+
BenchmarkConfig,
|
|
12
|
+
Eval,
|
|
13
|
+
EvaluateResponse,
|
|
14
|
+
Job,
|
|
15
|
+
RoutingTable,
|
|
11
16
|
ScoreBatchResponse,
|
|
12
17
|
ScoreResponse,
|
|
13
18
|
Scoring,
|
|
14
19
|
ScoringFnParams,
|
|
15
20
|
)
|
|
16
|
-
from llama_stack.log import get_logger
|
|
17
|
-
from llama_stack.providers.datatypes import RoutingTable
|
|
18
21
|
|
|
19
22
|
logger = get_logger(name=__name__, category="core::routers")
|
|
20
23
|
|