llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -10,19 +10,31 @@ from typing import Annotated, Any
|
|
|
10
10
|
|
|
11
11
|
from fastapi import Body
|
|
12
12
|
|
|
13
|
-
from llama_stack.
|
|
14
|
-
from llama_stack.
|
|
15
|
-
from
|
|
16
|
-
|
|
13
|
+
from llama_stack.core.datatypes import VectorStoresConfig
|
|
14
|
+
from llama_stack.log import get_logger
|
|
15
|
+
from llama_stack_api import (
|
|
16
|
+
EmbeddedChunk,
|
|
17
|
+
HealthResponse,
|
|
18
|
+
HealthStatus,
|
|
19
|
+
Inference,
|
|
20
|
+
InterleavedContent,
|
|
21
|
+
ModelNotFoundError,
|
|
22
|
+
ModelType,
|
|
23
|
+
ModelTypeError,
|
|
24
|
+
OpenAIChatCompletionRequestWithExtraBody,
|
|
17
25
|
OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
|
|
18
26
|
OpenAICreateVectorStoreRequestWithExtraBody,
|
|
27
|
+
OpenAIUserMessageParam,
|
|
19
28
|
QueryChunksResponse,
|
|
29
|
+
RoutingTable,
|
|
20
30
|
SearchRankingOptions,
|
|
21
31
|
VectorIO,
|
|
22
32
|
VectorStoreChunkingStrategy,
|
|
33
|
+
VectorStoreChunkingStrategyStatic,
|
|
34
|
+
VectorStoreChunkingStrategyStaticConfig,
|
|
23
35
|
VectorStoreDeleteResponse,
|
|
24
36
|
VectorStoreFileBatchObject,
|
|
25
|
-
|
|
37
|
+
VectorStoreFileContentResponse,
|
|
26
38
|
VectorStoreFileDeleteResponse,
|
|
27
39
|
VectorStoreFileObject,
|
|
28
40
|
VectorStoreFilesListInBatchResponse,
|
|
@@ -31,9 +43,6 @@ from llama_stack.apis.vector_io import (
|
|
|
31
43
|
VectorStoreObject,
|
|
32
44
|
VectorStoreSearchResponsePage,
|
|
33
45
|
)
|
|
34
|
-
from llama_stack.core.datatypes import VectorStoresConfig
|
|
35
|
-
from llama_stack.log import get_logger
|
|
36
|
-
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
|
|
37
46
|
|
|
38
47
|
logger = get_logger(name=__name__, category="core::routers")
|
|
39
48
|
|
|
@@ -45,10 +54,11 @@ class VectorIORouter(VectorIO):
|
|
|
45
54
|
self,
|
|
46
55
|
routing_table: RoutingTable,
|
|
47
56
|
vector_stores_config: VectorStoresConfig | None = None,
|
|
57
|
+
inference_api: Inference | None = None,
|
|
48
58
|
) -> None:
|
|
49
|
-
logger.debug("Initializing VectorIORouter")
|
|
50
59
|
self.routing_table = routing_table
|
|
51
60
|
self.vector_stores_config = vector_stores_config
|
|
61
|
+
self.inference_api = inference_api
|
|
52
62
|
|
|
53
63
|
async def initialize(self) -> None:
|
|
54
64
|
logger.debug("VectorIORouter.initialize")
|
|
@@ -58,6 +68,46 @@ class VectorIORouter(VectorIO):
|
|
|
58
68
|
logger.debug("VectorIORouter.shutdown")
|
|
59
69
|
pass
|
|
60
70
|
|
|
71
|
+
async def _rewrite_query_for_search(self, query: str) -> str:
|
|
72
|
+
"""Rewrite a search query using the configured LLM model for better retrieval results."""
|
|
73
|
+
if (
|
|
74
|
+
not self.vector_stores_config
|
|
75
|
+
or not self.vector_stores_config.rewrite_query_params
|
|
76
|
+
or not self.vector_stores_config.rewrite_query_params.model
|
|
77
|
+
):
|
|
78
|
+
logger.warning(
|
|
79
|
+
"User is trying to use vector_store query rewriting, but it is not configured. Please configure rewrite_query_params.model in vector_stores config."
|
|
80
|
+
)
|
|
81
|
+
raise ValueError("Query rewriting is not available")
|
|
82
|
+
|
|
83
|
+
if not self.inference_api:
|
|
84
|
+
logger.warning("Query rewriting requires inference API but it is not available")
|
|
85
|
+
raise ValueError("Query rewriting is not available")
|
|
86
|
+
|
|
87
|
+
model = self.vector_stores_config.rewrite_query_params.model
|
|
88
|
+
model_id = f"{model.provider_id}/{model.model_id}"
|
|
89
|
+
|
|
90
|
+
prompt = self.vector_stores_config.rewrite_query_params.prompt.format(query=query)
|
|
91
|
+
|
|
92
|
+
request = OpenAIChatCompletionRequestWithExtraBody(
|
|
93
|
+
model=model_id,
|
|
94
|
+
messages=[OpenAIUserMessageParam(role="user", content=prompt)],
|
|
95
|
+
max_tokens=self.vector_stores_config.rewrite_query_params.max_tokens or 100,
|
|
96
|
+
temperature=self.vector_stores_config.rewrite_query_params.temperature or 0.3,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
response = await self.inference_api.openai_chat_completion(request)
|
|
101
|
+
content = response.choices[0].message.content
|
|
102
|
+
if content is None:
|
|
103
|
+
logger.error(f"LLM returned None content for query rewriting. Model: {model_id}")
|
|
104
|
+
raise RuntimeError("Query rewrite failed due to an internal error")
|
|
105
|
+
rewritten_query: str = content.strip()
|
|
106
|
+
return rewritten_query
|
|
107
|
+
except Exception as e:
|
|
108
|
+
logger.error(f"Query rewrite failed with LLM call error. Model: {model_id}, Error: {e}")
|
|
109
|
+
raise RuntimeError("Query rewrite failed due to an internal error") from e
|
|
110
|
+
|
|
61
111
|
async def _get_embedding_model_dimension(self, embedding_model_id: str) -> int:
|
|
62
112
|
"""Get the embedding dimension for a specific embedding model."""
|
|
63
113
|
all_models = await self.routing_table.get_all_with_type("model")
|
|
@@ -73,27 +123,25 @@ class VectorIORouter(VectorIO):
|
|
|
73
123
|
|
|
74
124
|
async def insert_chunks(
|
|
75
125
|
self,
|
|
76
|
-
|
|
77
|
-
chunks: list[
|
|
126
|
+
vector_store_id: str,
|
|
127
|
+
chunks: list[EmbeddedChunk],
|
|
78
128
|
ttl_seconds: int | None = None,
|
|
79
129
|
) -> None:
|
|
80
130
|
doc_ids = [chunk.document_id for chunk in chunks[:3]]
|
|
81
131
|
logger.debug(
|
|
82
|
-
f"VectorIORouter.insert_chunks: {
|
|
132
|
+
f"VectorIORouter.insert_chunks: {vector_store_id}, {len(chunks)} chunks, "
|
|
83
133
|
f"ttl_seconds={ttl_seconds}, chunk_ids={doc_ids}{' and more...' if len(chunks) > 3 else ''}"
|
|
84
134
|
)
|
|
85
|
-
|
|
86
|
-
return await provider.insert_chunks(vector_db_id, chunks, ttl_seconds)
|
|
135
|
+
return await self.routing_table.insert_chunks(vector_store_id, chunks, ttl_seconds)
|
|
87
136
|
|
|
88
137
|
async def query_chunks(
|
|
89
138
|
self,
|
|
90
|
-
|
|
139
|
+
vector_store_id: str,
|
|
91
140
|
query: InterleavedContent,
|
|
92
141
|
params: dict[str, Any] | None = None,
|
|
93
142
|
) -> QueryChunksResponse:
|
|
94
|
-
logger.debug(f"VectorIORouter.query_chunks: {
|
|
95
|
-
|
|
96
|
-
return await provider.query_chunks(vector_db_id, query, params)
|
|
143
|
+
logger.debug(f"VectorIORouter.query_chunks: {vector_store_id}")
|
|
144
|
+
return await self.routing_table.query_chunks(vector_store_id, query, params)
|
|
97
145
|
|
|
98
146
|
# OpenAI Vector Stores API endpoints
|
|
99
147
|
async def openai_create_vector_store(
|
|
@@ -120,6 +168,14 @@ class VectorIORouter(VectorIO):
|
|
|
120
168
|
if embedding_model is not None and embedding_dimension is None:
|
|
121
169
|
embedding_dimension = await self._get_embedding_model_dimension(embedding_model)
|
|
122
170
|
|
|
171
|
+
# Validate that embedding model exists and is of the correct type
|
|
172
|
+
if embedding_model is not None:
|
|
173
|
+
model = await self.routing_table.get_object_by_identifier("model", embedding_model)
|
|
174
|
+
if model is None:
|
|
175
|
+
raise ModelNotFoundError(embedding_model)
|
|
176
|
+
if model.model_type != ModelType.embedding:
|
|
177
|
+
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
|
|
178
|
+
|
|
123
179
|
# Auto-select provider if not specified
|
|
124
180
|
if provider_id is None:
|
|
125
181
|
num_providers = len(self.routing_table.impls_by_provider_id)
|
|
@@ -167,6 +223,13 @@ class VectorIORouter(VectorIO):
|
|
|
167
223
|
if embedding_dimension is not None:
|
|
168
224
|
params.model_extra["embedding_dimension"] = embedding_dimension
|
|
169
225
|
|
|
226
|
+
# Set chunking strategy explicitly if not provided
|
|
227
|
+
if params.chunking_strategy is None or params.chunking_strategy.type == "auto":
|
|
228
|
+
# actualize the chunking strategy to static
|
|
229
|
+
params.chunking_strategy = VectorStoreChunkingStrategyStatic(
|
|
230
|
+
static=VectorStoreChunkingStrategyStaticConfig()
|
|
231
|
+
)
|
|
232
|
+
|
|
170
233
|
return await provider.openai_create_vector_store(params)
|
|
171
234
|
|
|
172
235
|
async def openai_list_vector_stores(
|
|
@@ -183,9 +246,8 @@ class VectorIORouter(VectorIO):
|
|
|
183
246
|
all_stores = []
|
|
184
247
|
for vector_store in vector_stores:
|
|
185
248
|
try:
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
all_stores.append(vector_store)
|
|
249
|
+
vector_store_obj = await self.routing_table.openai_retrieve_vector_store(vector_store.identifier)
|
|
250
|
+
all_stores.append(vector_store_obj)
|
|
189
251
|
except Exception as e:
|
|
190
252
|
logger.error(f"Error retrieving vector store {vector_store.identifier}: {e}")
|
|
191
253
|
continue
|
|
@@ -227,8 +289,7 @@ class VectorIORouter(VectorIO):
|
|
|
227
289
|
vector_store_id: str,
|
|
228
290
|
) -> VectorStoreObject:
|
|
229
291
|
logger.debug(f"VectorIORouter.openai_retrieve_vector_store: {vector_store_id}")
|
|
230
|
-
|
|
231
|
-
return await provider.openai_retrieve_vector_store(vector_store_id)
|
|
292
|
+
return await self.routing_table.openai_retrieve_vector_store(vector_store_id)
|
|
232
293
|
|
|
233
294
|
async def openai_update_vector_store(
|
|
234
295
|
self,
|
|
@@ -238,8 +299,14 @@ class VectorIORouter(VectorIO):
|
|
|
238
299
|
metadata: dict[str, Any] | None = None,
|
|
239
300
|
) -> VectorStoreObject:
|
|
240
301
|
logger.debug(f"VectorIORouter.openai_update_vector_store: {vector_store_id}")
|
|
241
|
-
|
|
242
|
-
|
|
302
|
+
|
|
303
|
+
# Check if provider_id is being changed (not supported)
|
|
304
|
+
if metadata and "provider_id" in metadata:
|
|
305
|
+
current_store = await self.routing_table.get_object_by_identifier("vector_store", vector_store_id)
|
|
306
|
+
if current_store and current_store.provider_id != metadata["provider_id"]:
|
|
307
|
+
raise ValueError("provider_id cannot be changed after vector store creation")
|
|
308
|
+
|
|
309
|
+
return await self.routing_table.openai_update_vector_store(
|
|
243
310
|
vector_store_id=vector_store_id,
|
|
244
311
|
name=name,
|
|
245
312
|
expires_after=expires_after,
|
|
@@ -264,14 +331,23 @@ class VectorIORouter(VectorIO):
|
|
|
264
331
|
search_mode: str | None = "vector",
|
|
265
332
|
) -> VectorStoreSearchResponsePage:
|
|
266
333
|
logger.debug(f"VectorIORouter.openai_search_vector_store: {vector_store_id}")
|
|
267
|
-
|
|
268
|
-
|
|
334
|
+
|
|
335
|
+
# Handle query rewriting at the router level
|
|
336
|
+
search_query = query
|
|
337
|
+
if rewrite_query:
|
|
338
|
+
if isinstance(query, list):
|
|
339
|
+
original_query = " ".join(query)
|
|
340
|
+
else:
|
|
341
|
+
original_query = query
|
|
342
|
+
search_query = await self._rewrite_query_for_search(original_query)
|
|
343
|
+
|
|
344
|
+
return await self.routing_table.openai_search_vector_store(
|
|
269
345
|
vector_store_id=vector_store_id,
|
|
270
|
-
query=
|
|
346
|
+
query=search_query,
|
|
271
347
|
filters=filters,
|
|
272
348
|
max_num_results=max_num_results,
|
|
273
349
|
ranking_options=ranking_options,
|
|
274
|
-
rewrite_query=
|
|
350
|
+
rewrite_query=False, # Already handled at router level
|
|
275
351
|
search_mode=search_mode,
|
|
276
352
|
)
|
|
277
353
|
|
|
@@ -283,8 +359,9 @@ class VectorIORouter(VectorIO):
|
|
|
283
359
|
chunking_strategy: VectorStoreChunkingStrategy | None = None,
|
|
284
360
|
) -> VectorStoreFileObject:
|
|
285
361
|
logger.debug(f"VectorIORouter.openai_attach_file_to_vector_store: {vector_store_id}, {file_id}")
|
|
286
|
-
|
|
287
|
-
|
|
362
|
+
if chunking_strategy is None or chunking_strategy.type == "auto":
|
|
363
|
+
chunking_strategy = VectorStoreChunkingStrategyStatic(static=VectorStoreChunkingStrategyStaticConfig())
|
|
364
|
+
return await self.routing_table.openai_attach_file_to_vector_store(
|
|
288
365
|
vector_store_id=vector_store_id,
|
|
289
366
|
file_id=file_id,
|
|
290
367
|
attributes=attributes,
|
|
@@ -301,8 +378,7 @@ class VectorIORouter(VectorIO):
|
|
|
301
378
|
filter: VectorStoreFileStatus | None = None,
|
|
302
379
|
) -> list[VectorStoreFileObject]:
|
|
303
380
|
logger.debug(f"VectorIORouter.openai_list_files_in_vector_store: {vector_store_id}")
|
|
304
|
-
|
|
305
|
-
return await provider.openai_list_files_in_vector_store(
|
|
381
|
+
return await self.routing_table.openai_list_files_in_vector_store(
|
|
306
382
|
vector_store_id=vector_store_id,
|
|
307
383
|
limit=limit,
|
|
308
384
|
order=order,
|
|
@@ -317,8 +393,7 @@ class VectorIORouter(VectorIO):
|
|
|
317
393
|
file_id: str,
|
|
318
394
|
) -> VectorStoreFileObject:
|
|
319
395
|
logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file: {vector_store_id}, {file_id}")
|
|
320
|
-
|
|
321
|
-
return await provider.openai_retrieve_vector_store_file(
|
|
396
|
+
return await self.routing_table.openai_retrieve_vector_store_file(
|
|
322
397
|
vector_store_id=vector_store_id,
|
|
323
398
|
file_id=file_id,
|
|
324
399
|
)
|
|
@@ -327,12 +402,19 @@ class VectorIORouter(VectorIO):
|
|
|
327
402
|
self,
|
|
328
403
|
vector_store_id: str,
|
|
329
404
|
file_id: str,
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
405
|
+
include_embeddings: bool | None = False,
|
|
406
|
+
include_metadata: bool | None = False,
|
|
407
|
+
) -> VectorStoreFileContentResponse:
|
|
408
|
+
logger.debug(
|
|
409
|
+
f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}, "
|
|
410
|
+
f"include_embeddings={include_embeddings}, include_metadata={include_metadata}"
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
return await self.routing_table.openai_retrieve_vector_store_file_contents(
|
|
334
414
|
vector_store_id=vector_store_id,
|
|
335
415
|
file_id=file_id,
|
|
416
|
+
include_embeddings=include_embeddings,
|
|
417
|
+
include_metadata=include_metadata,
|
|
336
418
|
)
|
|
337
419
|
|
|
338
420
|
async def openai_update_vector_store_file(
|
|
@@ -342,8 +424,7 @@ class VectorIORouter(VectorIO):
|
|
|
342
424
|
attributes: dict[str, Any],
|
|
343
425
|
) -> VectorStoreFileObject:
|
|
344
426
|
logger.debug(f"VectorIORouter.openai_update_vector_store_file: {vector_store_id}, {file_id}")
|
|
345
|
-
|
|
346
|
-
return await provider.openai_update_vector_store_file(
|
|
427
|
+
return await self.routing_table.openai_update_vector_store_file(
|
|
347
428
|
vector_store_id=vector_store_id,
|
|
348
429
|
file_id=file_id,
|
|
349
430
|
attributes=attributes,
|
|
@@ -355,8 +436,7 @@ class VectorIORouter(VectorIO):
|
|
|
355
436
|
file_id: str,
|
|
356
437
|
) -> VectorStoreFileDeleteResponse:
|
|
357
438
|
logger.debug(f"VectorIORouter.openai_delete_vector_store_file: {vector_store_id}, {file_id}")
|
|
358
|
-
|
|
359
|
-
return await provider.openai_delete_vector_store_file(
|
|
439
|
+
return await self.routing_table.openai_delete_vector_store_file(
|
|
360
440
|
vector_store_id=vector_store_id,
|
|
361
441
|
file_id=file_id,
|
|
362
442
|
)
|
|
@@ -392,8 +472,10 @@ class VectorIORouter(VectorIO):
|
|
|
392
472
|
logger.debug(
|
|
393
473
|
f"VectorIORouter.openai_create_vector_store_file_batch: {vector_store_id}, {len(params.file_ids)} files"
|
|
394
474
|
)
|
|
395
|
-
|
|
396
|
-
|
|
475
|
+
return await self.routing_table.openai_create_vector_store_file_batch(
|
|
476
|
+
vector_store_id=vector_store_id,
|
|
477
|
+
params=params,
|
|
478
|
+
)
|
|
397
479
|
|
|
398
480
|
async def openai_retrieve_vector_store_file_batch(
|
|
399
481
|
self,
|
|
@@ -401,8 +483,7 @@ class VectorIORouter(VectorIO):
|
|
|
401
483
|
vector_store_id: str,
|
|
402
484
|
) -> VectorStoreFileBatchObject:
|
|
403
485
|
logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_batch: {batch_id}, {vector_store_id}")
|
|
404
|
-
|
|
405
|
-
return await provider.openai_retrieve_vector_store_file_batch(
|
|
486
|
+
return await self.routing_table.openai_retrieve_vector_store_file_batch(
|
|
406
487
|
batch_id=batch_id,
|
|
407
488
|
vector_store_id=vector_store_id,
|
|
408
489
|
)
|
|
@@ -418,8 +499,7 @@ class VectorIORouter(VectorIO):
|
|
|
418
499
|
order: str | None = "desc",
|
|
419
500
|
) -> VectorStoreFilesListInBatchResponse:
|
|
420
501
|
logger.debug(f"VectorIORouter.openai_list_files_in_vector_store_file_batch: {batch_id}, {vector_store_id}")
|
|
421
|
-
|
|
422
|
-
return await provider.openai_list_files_in_vector_store_file_batch(
|
|
502
|
+
return await self.routing_table.openai_list_files_in_vector_store_file_batch(
|
|
423
503
|
batch_id=batch_id,
|
|
424
504
|
vector_store_id=vector_store_id,
|
|
425
505
|
after=after,
|
|
@@ -435,8 +515,7 @@ class VectorIORouter(VectorIO):
|
|
|
435
515
|
vector_store_id: str,
|
|
436
516
|
) -> VectorStoreFileBatchObject:
|
|
437
517
|
logger.debug(f"VectorIORouter.openai_cancel_vector_store_file_batch: {batch_id}, {vector_store_id}")
|
|
438
|
-
|
|
439
|
-
return await provider.openai_cancel_vector_store_file_batch(
|
|
518
|
+
return await self.routing_table.openai_cancel_vector_store_file_batch(
|
|
440
519
|
batch_id=batch_id,
|
|
441
520
|
vector_store_id=vector_store_id,
|
|
442
521
|
)
|
|
@@ -4,13 +4,20 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from typing import Any
|
|
8
7
|
|
|
9
|
-
from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
|
|
10
8
|
from llama_stack.core.datatypes import (
|
|
11
9
|
BenchmarkWithOwner,
|
|
12
10
|
)
|
|
13
11
|
from llama_stack.log import get_logger
|
|
12
|
+
from llama_stack_api import (
|
|
13
|
+
Benchmark,
|
|
14
|
+
Benchmarks,
|
|
15
|
+
GetBenchmarkRequest,
|
|
16
|
+
ListBenchmarksRequest,
|
|
17
|
+
ListBenchmarksResponse,
|
|
18
|
+
RegisterBenchmarkRequest,
|
|
19
|
+
UnregisterBenchmarkRequest,
|
|
20
|
+
)
|
|
14
21
|
|
|
15
22
|
from .common import CommonRoutingTableImpl
|
|
16
23
|
|
|
@@ -18,26 +25,21 @@ logger = get_logger(name=__name__, category="core::routing_tables")
|
|
|
18
25
|
|
|
19
26
|
|
|
20
27
|
class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
|
|
21
|
-
async def list_benchmarks(self) -> ListBenchmarksResponse:
|
|
28
|
+
async def list_benchmarks(self, request: ListBenchmarksRequest) -> ListBenchmarksResponse:
|
|
22
29
|
return ListBenchmarksResponse(data=await self.get_all_with_type("benchmark"))
|
|
23
30
|
|
|
24
|
-
async def get_benchmark(self,
|
|
25
|
-
benchmark = await self.get_object_by_identifier("benchmark", benchmark_id)
|
|
31
|
+
async def get_benchmark(self, request: GetBenchmarkRequest) -> Benchmark:
|
|
32
|
+
benchmark = await self.get_object_by_identifier("benchmark", request.benchmark_id)
|
|
26
33
|
if benchmark is None:
|
|
27
|
-
raise ValueError(f"Benchmark '{benchmark_id}' not found")
|
|
34
|
+
raise ValueError(f"Benchmark '{request.benchmark_id}' not found")
|
|
28
35
|
return benchmark
|
|
29
36
|
|
|
30
37
|
async def register_benchmark(
|
|
31
38
|
self,
|
|
32
|
-
|
|
33
|
-
dataset_id: str,
|
|
34
|
-
scoring_functions: list[str],
|
|
35
|
-
metadata: dict[str, Any] | None = None,
|
|
36
|
-
provider_benchmark_id: str | None = None,
|
|
37
|
-
provider_id: str | None = None,
|
|
39
|
+
request: RegisterBenchmarkRequest,
|
|
38
40
|
) -> None:
|
|
39
|
-
if metadata is None
|
|
40
|
-
|
|
41
|
+
metadata = request.metadata if request.metadata is not None else {}
|
|
42
|
+
provider_id = request.provider_id
|
|
41
43
|
if provider_id is None:
|
|
42
44
|
if len(self.impls_by_provider_id) == 1:
|
|
43
45
|
provider_id = list(self.impls_by_provider_id.keys())[0]
|
|
@@ -45,18 +47,20 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
|
|
|
45
47
|
raise ValueError(
|
|
46
48
|
"No provider specified and multiple providers available. Please specify a provider_id."
|
|
47
49
|
)
|
|
50
|
+
provider_benchmark_id = request.provider_benchmark_id
|
|
48
51
|
if provider_benchmark_id is None:
|
|
49
|
-
provider_benchmark_id = benchmark_id
|
|
52
|
+
provider_benchmark_id = request.benchmark_id
|
|
50
53
|
benchmark = BenchmarkWithOwner(
|
|
51
|
-
identifier=benchmark_id,
|
|
52
|
-
dataset_id=dataset_id,
|
|
53
|
-
scoring_functions=scoring_functions,
|
|
54
|
+
identifier=request.benchmark_id,
|
|
55
|
+
dataset_id=request.dataset_id,
|
|
56
|
+
scoring_functions=request.scoring_functions,
|
|
54
57
|
metadata=metadata,
|
|
55
58
|
provider_id=provider_id,
|
|
56
59
|
provider_resource_id=provider_benchmark_id,
|
|
57
60
|
)
|
|
58
61
|
await self.register_object(benchmark)
|
|
59
62
|
|
|
60
|
-
async def unregister_benchmark(self,
|
|
61
|
-
|
|
63
|
+
async def unregister_benchmark(self, request: UnregisterBenchmarkRequest) -> None:
|
|
64
|
+
get_request = GetBenchmarkRequest(benchmark_id=request.benchmark_id)
|
|
65
|
+
existing_benchmark = await self.get_benchmark(get_request)
|
|
62
66
|
await self.unregister_object(existing_benchmark)
|
|
@@ -6,9 +6,6 @@
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from llama_stack.apis.common.errors import ModelNotFoundError
|
|
10
|
-
from llama_stack.apis.models import Model
|
|
11
|
-
from llama_stack.apis.resource import ResourceType
|
|
12
9
|
from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
|
|
13
10
|
from llama_stack.core.access_control.datatypes import Action
|
|
14
11
|
from llama_stack.core.datatypes import (
|
|
@@ -21,7 +18,7 @@ from llama_stack.core.datatypes import (
|
|
|
21
18
|
from llama_stack.core.request_headers import get_authenticated_user
|
|
22
19
|
from llama_stack.core.store import DistributionRegistry
|
|
23
20
|
from llama_stack.log import get_logger
|
|
24
|
-
from
|
|
21
|
+
from llama_stack_api import Api, Model, ModelNotFoundError, ResourceType, RoutingTable
|
|
25
22
|
|
|
26
23
|
logger = get_logger(name=__name__, category="core::routing_tables")
|
|
27
24
|
|
|
@@ -5,24 +5,26 @@
|
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
7
|
import uuid
|
|
8
|
-
from typing import Any
|
|
9
8
|
|
|
10
|
-
from llama_stack.
|
|
11
|
-
|
|
9
|
+
from llama_stack.core.datatypes import (
|
|
10
|
+
DatasetWithOwner,
|
|
11
|
+
)
|
|
12
|
+
from llama_stack.log import get_logger
|
|
13
|
+
from llama_stack_api import (
|
|
12
14
|
Dataset,
|
|
13
|
-
|
|
14
|
-
Datasets,
|
|
15
|
+
DatasetNotFoundError,
|
|
15
16
|
DatasetType,
|
|
16
|
-
DataSource,
|
|
17
17
|
ListDatasetsResponse,
|
|
18
|
+
ResourceType,
|
|
18
19
|
RowsDataSource,
|
|
19
20
|
URIDataSource,
|
|
20
21
|
)
|
|
21
|
-
from
|
|
22
|
-
|
|
23
|
-
|
|
22
|
+
from llama_stack_api.datasets.api import (
|
|
23
|
+
Datasets,
|
|
24
|
+
GetDatasetRequest,
|
|
25
|
+
RegisterDatasetRequest,
|
|
26
|
+
UnregisterDatasetRequest,
|
|
24
27
|
)
|
|
25
|
-
from llama_stack.log import get_logger
|
|
26
28
|
|
|
27
29
|
from .common import CommonRoutingTableImpl
|
|
28
30
|
|
|
@@ -33,19 +35,17 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
|
|
|
33
35
|
async def list_datasets(self) -> ListDatasetsResponse:
|
|
34
36
|
return ListDatasetsResponse(data=await self.get_all_with_type(ResourceType.dataset.value))
|
|
35
37
|
|
|
36
|
-
async def get_dataset(self,
|
|
37
|
-
dataset = await self.get_object_by_identifier("dataset", dataset_id)
|
|
38
|
+
async def get_dataset(self, request: GetDatasetRequest) -> Dataset:
|
|
39
|
+
dataset = await self.get_object_by_identifier("dataset", request.dataset_id)
|
|
38
40
|
if dataset is None:
|
|
39
|
-
raise DatasetNotFoundError(dataset_id)
|
|
41
|
+
raise DatasetNotFoundError(request.dataset_id)
|
|
40
42
|
return dataset
|
|
41
43
|
|
|
42
|
-
async def register_dataset(
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
dataset_id: str | None = None,
|
|
48
|
-
) -> Dataset:
|
|
44
|
+
async def register_dataset(self, request: RegisterDatasetRequest) -> Dataset:
|
|
45
|
+
purpose = request.purpose
|
|
46
|
+
source = request.source
|
|
47
|
+
metadata = request.metadata
|
|
48
|
+
dataset_id = request.dataset_id
|
|
49
49
|
if isinstance(source, dict):
|
|
50
50
|
if source["type"] == "uri":
|
|
51
51
|
source = URIDataSource.parse_obj(source)
|
|
@@ -86,6 +86,6 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
|
|
|
86
86
|
await self.register_object(dataset)
|
|
87
87
|
return dataset
|
|
88
88
|
|
|
89
|
-
async def unregister_dataset(self,
|
|
90
|
-
dataset = await self.get_dataset(dataset_id)
|
|
89
|
+
async def unregister_dataset(self, request: UnregisterDatasetRequest) -> None:
|
|
90
|
+
dataset = await self.get_dataset(GetDatasetRequest(dataset_id=request.dataset_id))
|
|
91
91
|
await self.unregister_object(dataset)
|