llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +12 -21
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.4.dist-info/RECORD +0 -625
- llama_stack-0.3.4.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -15,21 +15,37 @@ from typing import Annotated, Any
|
|
|
15
15
|
from fastapi import Body
|
|
16
16
|
from pydantic import TypeAdapter
|
|
17
17
|
|
|
18
|
-
from llama_stack.
|
|
19
|
-
from llama_stack.
|
|
20
|
-
from llama_stack.
|
|
18
|
+
from llama_stack.core.datatypes import VectorStoresConfig
|
|
19
|
+
from llama_stack.core.id_generation import generate_object_id
|
|
20
|
+
from llama_stack.log import get_logger
|
|
21
|
+
from llama_stack.providers.utils.inference.prompt_adapter import (
|
|
22
|
+
interleaved_content_as_str,
|
|
23
|
+
)
|
|
24
|
+
from llama_stack.providers.utils.memory.vector_store import (
|
|
25
|
+
ChunkForDeletion,
|
|
26
|
+
content_from_data_and_mime_type,
|
|
27
|
+
make_overlapped_chunks,
|
|
28
|
+
)
|
|
29
|
+
from llama_stack_api import (
|
|
21
30
|
Chunk,
|
|
31
|
+
EmbeddedChunk,
|
|
32
|
+
Files,
|
|
33
|
+
Inference,
|
|
22
34
|
OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
|
|
23
35
|
OpenAICreateVectorStoreRequestWithExtraBody,
|
|
36
|
+
OpenAIEmbeddingsRequestWithExtraBody,
|
|
37
|
+
OpenAIFileObject,
|
|
24
38
|
QueryChunksResponse,
|
|
25
39
|
SearchRankingOptions,
|
|
40
|
+
VectorStore,
|
|
26
41
|
VectorStoreChunkingStrategy,
|
|
27
42
|
VectorStoreChunkingStrategyAuto,
|
|
28
43
|
VectorStoreChunkingStrategyStatic,
|
|
44
|
+
VectorStoreChunkingStrategyStaticConfig,
|
|
29
45
|
VectorStoreContent,
|
|
30
46
|
VectorStoreDeleteResponse,
|
|
31
47
|
VectorStoreFileBatchObject,
|
|
32
|
-
|
|
48
|
+
VectorStoreFileContentResponse,
|
|
33
49
|
VectorStoreFileCounts,
|
|
34
50
|
VectorStoreFileDeleteResponse,
|
|
35
51
|
VectorStoreFileLastError,
|
|
@@ -38,29 +54,22 @@ from llama_stack.apis.vector_io import (
|
|
|
38
54
|
VectorStoreFileStatus,
|
|
39
55
|
VectorStoreListFilesResponse,
|
|
40
56
|
VectorStoreListResponse,
|
|
57
|
+
VectorStoreNotFoundError,
|
|
41
58
|
VectorStoreObject,
|
|
42
59
|
VectorStoreSearchResponse,
|
|
43
60
|
VectorStoreSearchResponsePage,
|
|
44
61
|
)
|
|
45
|
-
from
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
from llama_stack.providers.utils.kvstore.api import KVStore
|
|
49
|
-
from llama_stack.providers.utils.memory.vector_store import (
|
|
50
|
-
ChunkForDeletion,
|
|
51
|
-
content_from_data_and_mime_type,
|
|
52
|
-
make_overlapped_chunks,
|
|
62
|
+
from llama_stack_api.files.models import (
|
|
63
|
+
RetrieveFileContentRequest,
|
|
64
|
+
RetrieveFileRequest,
|
|
53
65
|
)
|
|
66
|
+
from llama_stack_api.internal.kvstore import KVStore
|
|
54
67
|
|
|
55
68
|
EMBEDDING_DIMENSION = 768
|
|
56
69
|
|
|
57
70
|
logger = get_logger(name=__name__, category="providers::utils")
|
|
58
71
|
|
|
59
72
|
# Constants for OpenAI vector stores
|
|
60
|
-
CHUNK_MULTIPLIER = 5
|
|
61
|
-
FILE_BATCH_CLEANUP_INTERVAL_SECONDS = 24 * 60 * 60 # 1 day in seconds
|
|
62
|
-
MAX_CONCURRENT_FILES_PER_BATCH = 3 # Maximum concurrent file processing within a batch
|
|
63
|
-
FILE_BATCH_CHUNK_SIZE = 10 # Process files in chunks of this size
|
|
64
73
|
|
|
65
74
|
VERSION = "v3"
|
|
66
75
|
VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::"
|
|
@@ -81,15 +90,29 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
81
90
|
# to properly initialize the mixin attributes.
|
|
82
91
|
def __init__(
|
|
83
92
|
self,
|
|
93
|
+
inference_api: Inference,
|
|
84
94
|
files_api: Files | None = None,
|
|
85
95
|
kvstore: KVStore | None = None,
|
|
96
|
+
vector_stores_config: VectorStoresConfig | None = None,
|
|
86
97
|
):
|
|
98
|
+
if not inference_api:
|
|
99
|
+
raise RuntimeError("Inference API is required for vector store operations")
|
|
100
|
+
|
|
101
|
+
self.inference_api = inference_api
|
|
87
102
|
self.openai_vector_stores: dict[str, dict[str, Any]] = {}
|
|
88
103
|
self.openai_file_batches: dict[str, dict[str, Any]] = {}
|
|
89
104
|
self.files_api = files_api
|
|
90
105
|
self.kvstore = kvstore
|
|
106
|
+
self.vector_stores_config = vector_stores_config or VectorStoresConfig()
|
|
91
107
|
self._last_file_batch_cleanup_time = 0
|
|
92
108
|
self._file_batch_tasks: dict[str, asyncio.Task[None]] = {}
|
|
109
|
+
self._vector_store_locks: dict[str, asyncio.Lock] = {}
|
|
110
|
+
|
|
111
|
+
def _get_vector_store_lock(self, vector_store_id: str) -> asyncio.Lock:
|
|
112
|
+
"""Get or create a lock for a specific vector store."""
|
|
113
|
+
if vector_store_id not in self._vector_store_locks:
|
|
114
|
+
self._vector_store_locks[vector_store_id] = asyncio.Lock()
|
|
115
|
+
return self._vector_store_locks[vector_store_id]
|
|
93
116
|
|
|
94
117
|
async def _save_openai_vector_store(self, store_id: str, store_info: dict[str, Any]) -> None:
|
|
95
118
|
"""Save vector store metadata to persistent storage."""
|
|
@@ -333,8 +356,8 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
333
356
|
@abstractmethod
|
|
334
357
|
async def insert_chunks(
|
|
335
358
|
self,
|
|
336
|
-
|
|
337
|
-
chunks: list[
|
|
359
|
+
vector_store_id: str,
|
|
360
|
+
chunks: list[EmbeddedChunk],
|
|
338
361
|
ttl_seconds: int | None = None,
|
|
339
362
|
) -> None:
|
|
340
363
|
"""Insert chunks into a vector database (provider-specific implementation)."""
|
|
@@ -342,7 +365,7 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
342
365
|
|
|
343
366
|
@abstractmethod
|
|
344
367
|
async def query_chunks(
|
|
345
|
-
self,
|
|
368
|
+
self, vector_store_id: str, query: Any, params: dict[str, Any] | None = None
|
|
346
369
|
) -> QueryChunksResponse:
|
|
347
370
|
"""Query chunks from a vector database (provider-specific implementation)."""
|
|
348
371
|
pass
|
|
@@ -414,6 +437,10 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
414
437
|
in_progress=0,
|
|
415
438
|
total=0,
|
|
416
439
|
)
|
|
440
|
+
if not params.chunking_strategy or params.chunking_strategy.type == "auto":
|
|
441
|
+
chunking_strategy = VectorStoreChunkingStrategyStatic(static=VectorStoreChunkingStrategyStaticConfig())
|
|
442
|
+
else:
|
|
443
|
+
chunking_strategy = params.chunking_strategy
|
|
417
444
|
store_info: dict[str, Any] = {
|
|
418
445
|
"id": vector_store_id,
|
|
419
446
|
"object": "vector_store",
|
|
@@ -426,7 +453,7 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
426
453
|
"expires_at": None,
|
|
427
454
|
"last_active_at": created_at,
|
|
428
455
|
"file_ids": [],
|
|
429
|
-
"chunking_strategy":
|
|
456
|
+
"chunking_strategy": chunking_strategy.model_dump(),
|
|
430
457
|
}
|
|
431
458
|
|
|
432
459
|
# Add provider information to metadata if provided
|
|
@@ -434,6 +461,11 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
434
461
|
metadata["provider_id"] = provider_id
|
|
435
462
|
if provider_vector_store_id:
|
|
436
463
|
metadata["provider_vector_store_id"] = provider_vector_store_id
|
|
464
|
+
|
|
465
|
+
# Add embedding configuration to metadata for file processing
|
|
466
|
+
metadata["embedding_model"] = embedding_model
|
|
467
|
+
metadata["embedding_dimension"] = str(embedding_dimension)
|
|
468
|
+
|
|
437
469
|
store_info["metadata"] = metadata
|
|
438
470
|
|
|
439
471
|
# Save to persistent storage (provider-specific)
|
|
@@ -445,7 +477,13 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
445
477
|
# Now that our vector store is created, attach any files that were provided
|
|
446
478
|
file_ids = params.file_ids or []
|
|
447
479
|
tasks = [self.openai_attach_file_to_vector_store(vector_store_id, file_id) for file_id in file_ids]
|
|
448
|
-
|
|
480
|
+
# Use return_exceptions=True to handle individual file attachment failures gracefully
|
|
481
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
482
|
+
|
|
483
|
+
# Log any exceptions but don't fail the vector store creation
|
|
484
|
+
for i, result in enumerate(results):
|
|
485
|
+
if isinstance(result, Exception):
|
|
486
|
+
logger.warning(f"Failed to attach file {file_ids[i]} to vector store {vector_store_id}: {result}")
|
|
449
487
|
|
|
450
488
|
# Get the updated store info and return it
|
|
451
489
|
store_info = self.openai_vector_stores[vector_store_id]
|
|
@@ -579,7 +617,11 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
579
617
|
str | None
|
|
580
618
|
) = "vector", # Using str instead of Literal due to OpenAPI schema generator limitations
|
|
581
619
|
) -> VectorStoreSearchResponsePage:
|
|
582
|
-
"""Search for chunks in a vector store.
|
|
620
|
+
"""Search for chunks in a vector store.
|
|
621
|
+
|
|
622
|
+
Note: Query rewriting is handled at the router level, not here.
|
|
623
|
+
The rewrite_query parameter is kept for API compatibility but is ignored.
|
|
624
|
+
"""
|
|
583
625
|
max_num_results = max_num_results or 10
|
|
584
626
|
|
|
585
627
|
# Validate search_mode
|
|
@@ -602,21 +644,24 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
602
644
|
else 0.0
|
|
603
645
|
)
|
|
604
646
|
params = {
|
|
605
|
-
"max_chunks": max_num_results *
|
|
647
|
+
"max_chunks": max_num_results * self.vector_stores_config.chunk_retrieval_params.chunk_multiplier,
|
|
606
648
|
"score_threshold": score_threshold,
|
|
607
649
|
"mode": search_mode,
|
|
608
650
|
}
|
|
609
651
|
# TODO: Add support for ranking_options.ranker
|
|
610
652
|
|
|
611
653
|
response = await self.query_chunks(
|
|
612
|
-
|
|
654
|
+
vector_store_id=vector_store_id,
|
|
613
655
|
query=search_query,
|
|
614
656
|
params=params,
|
|
615
657
|
)
|
|
616
658
|
|
|
617
659
|
# Convert response to OpenAI format
|
|
618
660
|
data = []
|
|
619
|
-
for
|
|
661
|
+
for embedded_chunk, score in zip(response.chunks, response.scores, strict=False):
|
|
662
|
+
# EmbeddedChunk inherits from Chunk, so use it directly
|
|
663
|
+
chunk = embedded_chunk
|
|
664
|
+
|
|
620
665
|
# Apply filters if provided
|
|
621
666
|
if filters:
|
|
622
667
|
# Simple metadata filtering
|
|
@@ -637,7 +682,7 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
637
682
|
break
|
|
638
683
|
|
|
639
684
|
return VectorStoreSearchResponsePage(
|
|
640
|
-
search_query=
|
|
685
|
+
search_query=query if isinstance(query, list) else [query],
|
|
641
686
|
data=data,
|
|
642
687
|
has_more=False, # For simplicity, we don't implement pagination here
|
|
643
688
|
next_page=None,
|
|
@@ -647,7 +692,7 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
647
692
|
logger.error(f"Error searching vector store {vector_store_id}: {e}")
|
|
648
693
|
# Return empty results on error
|
|
649
694
|
return VectorStoreSearchResponsePage(
|
|
650
|
-
search_query=
|
|
695
|
+
search_query=query if isinstance(query, list) else [query],
|
|
651
696
|
data=[],
|
|
652
697
|
has_more=False,
|
|
653
698
|
next_page=None,
|
|
@@ -699,34 +744,35 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
699
744
|
# Unknown filter type, default to no match
|
|
700
745
|
raise ValueError(f"Unsupported filter type: {filter_type}")
|
|
701
746
|
|
|
702
|
-
def _chunk_to_vector_store_content(
|
|
703
|
-
|
|
747
|
+
def _chunk_to_vector_store_content(
|
|
748
|
+
self, chunk: EmbeddedChunk, include_embeddings: bool = False, include_metadata: bool = False
|
|
749
|
+
) -> list[VectorStoreContent]:
|
|
750
|
+
def extract_fields() -> dict:
|
|
751
|
+
"""Extract metadata fields from chunk based on include flags."""
|
|
752
|
+
return {
|
|
753
|
+
"chunk_metadata": chunk.chunk_metadata if include_metadata else None,
|
|
754
|
+
"metadata": chunk.metadata if include_metadata else None,
|
|
755
|
+
"embedding": chunk.embedding if include_embeddings else None,
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
fields = extract_fields()
|
|
759
|
+
|
|
704
760
|
if isinstance(chunk.content, str):
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
type="text",
|
|
708
|
-
text=chunk.content,
|
|
709
|
-
)
|
|
710
|
-
]
|
|
761
|
+
content_item = VectorStoreContent(type="text", text=chunk.content, **fields)
|
|
762
|
+
content = [content_item]
|
|
711
763
|
elif isinstance(chunk.content, list):
|
|
712
764
|
# TODO: Add support for other types of content
|
|
713
|
-
content = [
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
text=item.text,
|
|
717
|
-
|
|
718
|
-
for item in chunk.content
|
|
719
|
-
if item.type == "text"
|
|
720
|
-
]
|
|
765
|
+
content = []
|
|
766
|
+
for item in chunk.content:
|
|
767
|
+
if item.type == "text":
|
|
768
|
+
content_item = VectorStoreContent(type="text", text=item.text, **fields)
|
|
769
|
+
content.append(content_item)
|
|
721
770
|
else:
|
|
722
771
|
if chunk.content.type != "text":
|
|
723
772
|
raise ValueError(f"Unsupported content type: {chunk.content.type}")
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
text=chunk.content.text,
|
|
728
|
-
)
|
|
729
|
-
]
|
|
773
|
+
|
|
774
|
+
content_item = VectorStoreContent(type="text", text=chunk.content.text, **fields)
|
|
775
|
+
content = [content_item]
|
|
730
776
|
return content
|
|
731
777
|
|
|
732
778
|
async def openai_attach_file_to_vector_store(
|
|
@@ -751,6 +797,7 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
751
797
|
chunking_strategy = chunking_strategy or VectorStoreChunkingStrategyAuto()
|
|
752
798
|
created_at = int(time.time())
|
|
753
799
|
chunks: list[Chunk] = []
|
|
800
|
+
embedded_chunks: list[EmbeddedChunk] = []
|
|
754
801
|
file_response: OpenAIFileObject | None = None
|
|
755
802
|
|
|
756
803
|
vector_store_file_object = VectorStoreFileObject(
|
|
@@ -779,15 +826,22 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
779
826
|
chunk_overlap_tokens = 400
|
|
780
827
|
|
|
781
828
|
try:
|
|
782
|
-
file_response = await self.files_api.openai_retrieve_file(file_id)
|
|
829
|
+
file_response = await self.files_api.openai_retrieve_file(RetrieveFileRequest(file_id=file_id))
|
|
783
830
|
mime_type, _ = mimetypes.guess_type(file_response.filename)
|
|
784
|
-
content_response = await self.files_api.openai_retrieve_file_content(
|
|
831
|
+
content_response = await self.files_api.openai_retrieve_file_content(
|
|
832
|
+
RetrieveFileContentRequest(file_id=file_id)
|
|
833
|
+
)
|
|
785
834
|
|
|
786
835
|
content = content_from_data_and_mime_type(content_response.body, mime_type)
|
|
787
836
|
|
|
788
837
|
chunk_attributes = attributes.copy()
|
|
789
838
|
chunk_attributes["filename"] = file_response.filename
|
|
790
839
|
|
|
840
|
+
# Get embedding model info from vector store metadata
|
|
841
|
+
store_info = self.openai_vector_stores[vector_store_id]
|
|
842
|
+
embedding_model = store_info["metadata"].get("embedding_model")
|
|
843
|
+
embedding_dimension = store_info["metadata"].get("embedding_dimension")
|
|
844
|
+
|
|
791
845
|
chunks = make_overlapped_chunks(
|
|
792
846
|
file_id,
|
|
793
847
|
content,
|
|
@@ -802,9 +856,42 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
802
856
|
message="No chunks were generated from the file",
|
|
803
857
|
)
|
|
804
858
|
else:
|
|
859
|
+
# Validate embedding model and dimension are available
|
|
860
|
+
if not embedding_model:
|
|
861
|
+
raise RuntimeError(f"Vector store {vector_store_id} is not properly configured for file processing")
|
|
862
|
+
if not embedding_dimension:
|
|
863
|
+
raise RuntimeError(f"Vector store {vector_store_id} is not properly configured for file processing")
|
|
864
|
+
|
|
865
|
+
# Generate embeddings for all chunks before insertion
|
|
866
|
+
|
|
867
|
+
# Prepare embedding request for all chunks
|
|
868
|
+
params = OpenAIEmbeddingsRequestWithExtraBody(
|
|
869
|
+
model=embedding_model,
|
|
870
|
+
input=[interleaved_content_as_str(c.content) for c in chunks],
|
|
871
|
+
)
|
|
872
|
+
resp = await self.inference_api.openai_embeddings(params)
|
|
873
|
+
|
|
874
|
+
# Create EmbeddedChunk instances from chunks and their embeddings
|
|
875
|
+
for chunk, data in zip(chunks, resp.data, strict=False):
|
|
876
|
+
# Ensure embedding is a list of floats
|
|
877
|
+
embedding = data.embedding
|
|
878
|
+
if isinstance(embedding, str):
|
|
879
|
+
# Handle case where embedding might be returned as a string (shouldn't normally happen)
|
|
880
|
+
raise ValueError(f"Received string embedding instead of list: {embedding}")
|
|
881
|
+
embedded_chunk = EmbeddedChunk(
|
|
882
|
+
content=chunk.content,
|
|
883
|
+
chunk_id=chunk.chunk_id,
|
|
884
|
+
metadata=chunk.metadata,
|
|
885
|
+
chunk_metadata=chunk.chunk_metadata,
|
|
886
|
+
embedding=embedding,
|
|
887
|
+
embedding_model=embedding_model,
|
|
888
|
+
embedding_dimension=len(embedding),
|
|
889
|
+
)
|
|
890
|
+
embedded_chunks.append(embedded_chunk)
|
|
891
|
+
|
|
805
892
|
await self.insert_chunks(
|
|
806
|
-
|
|
807
|
-
chunks=
|
|
893
|
+
vector_store_id=vector_store_id,
|
|
894
|
+
chunks=embedded_chunks,
|
|
808
895
|
)
|
|
809
896
|
vector_store_file_object.status = "completed"
|
|
810
897
|
except Exception as e:
|
|
@@ -815,26 +902,27 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
815
902
|
message=str(e),
|
|
816
903
|
)
|
|
817
904
|
|
|
818
|
-
#
|
|
905
|
+
# Save vector store file to persistent storage AFTER insert_chunks
|
|
906
|
+
# so that chunks include the embeddings that were generated
|
|
819
907
|
file_info = vector_store_file_object.model_dump(exclude={"last_error"})
|
|
820
908
|
file_info["filename"] = file_response.filename if file_response else ""
|
|
821
909
|
|
|
822
|
-
|
|
823
|
-
dict_chunks = [c.model_dump() for c in chunks]
|
|
824
|
-
# This should be updated to include chunk_id
|
|
910
|
+
dict_chunks = [c.model_dump() for c in embedded_chunks]
|
|
825
911
|
await self._save_openai_vector_store_file(vector_store_id, file_id, file_info, dict_chunks)
|
|
826
912
|
|
|
827
913
|
# Update file_ids and file_counts in vector store metadata
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
914
|
+
# Use lock to prevent race condition when multiple files are attached concurrently
|
|
915
|
+
async with self._get_vector_store_lock(vector_store_id):
|
|
916
|
+
store_info = self.openai_vector_stores[vector_store_id].copy()
|
|
917
|
+
# Deep copy file_counts to avoid mutating shared dict
|
|
918
|
+
store_info["file_counts"] = store_info["file_counts"].copy()
|
|
919
|
+
store_info["file_ids"] = store_info["file_ids"].copy()
|
|
920
|
+
store_info["file_ids"].append(file_id)
|
|
921
|
+
store_info["file_counts"]["total"] += 1
|
|
922
|
+
store_info["file_counts"][vector_store_file_object.status] += 1
|
|
923
|
+
|
|
924
|
+
# Save updated vector store to persistent storage
|
|
925
|
+
await self._save_openai_vector_store(vector_store_id, store_info)
|
|
838
926
|
|
|
839
927
|
return vector_store_file_object
|
|
840
928
|
|
|
@@ -886,8 +974,8 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
886
974
|
|
|
887
975
|
# Determine pagination info
|
|
888
976
|
has_more = len(file_objects) > limit
|
|
889
|
-
first_id =
|
|
890
|
-
last_id =
|
|
977
|
+
first_id = limited_files[0].id if file_objects else None
|
|
978
|
+
last_id = limited_files[-1].id if file_objects else None
|
|
891
979
|
|
|
892
980
|
return VectorStoreListFilesResponse(
|
|
893
981
|
data=limited_files,
|
|
@@ -916,22 +1004,27 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
916
1004
|
self,
|
|
917
1005
|
vector_store_id: str,
|
|
918
1006
|
file_id: str,
|
|
919
|
-
|
|
1007
|
+
include_embeddings: bool | None = False,
|
|
1008
|
+
include_metadata: bool | None = False,
|
|
1009
|
+
) -> VectorStoreFileContentResponse:
|
|
920
1010
|
"""Retrieves the contents of a vector store file."""
|
|
921
1011
|
if vector_store_id not in self.openai_vector_stores:
|
|
922
1012
|
raise VectorStoreNotFoundError(vector_store_id)
|
|
923
1013
|
|
|
924
|
-
|
|
1014
|
+
# Parameters are already provided directly
|
|
1015
|
+
# include_embeddings and include_metadata are now function parameters
|
|
1016
|
+
|
|
925
1017
|
dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
|
|
926
|
-
chunks = [
|
|
1018
|
+
chunks = [EmbeddedChunk.model_validate(c) for c in dict_chunks]
|
|
927
1019
|
content = []
|
|
928
1020
|
for chunk in chunks:
|
|
929
|
-
content.extend(
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
1021
|
+
content.extend(
|
|
1022
|
+
self._chunk_to_vector_store_content(
|
|
1023
|
+
chunk, include_embeddings=include_embeddings or False, include_metadata=include_metadata or False
|
|
1024
|
+
)
|
|
1025
|
+
)
|
|
1026
|
+
return VectorStoreFileContentResponse(
|
|
1027
|
+
data=content,
|
|
935
1028
|
)
|
|
936
1029
|
|
|
937
1030
|
async def openai_update_vector_store_file(
|
|
@@ -1048,7 +1141,10 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
1048
1141
|
|
|
1049
1142
|
# Run cleanup if needed (throttled to once every 1 day)
|
|
1050
1143
|
current_time = int(time.time())
|
|
1051
|
-
if
|
|
1144
|
+
if (
|
|
1145
|
+
current_time - self._last_file_batch_cleanup_time
|
|
1146
|
+
>= self.vector_stores_config.file_batch_params.cleanup_interval_seconds
|
|
1147
|
+
):
|
|
1052
1148
|
logger.info("Running throttled cleanup of expired file batches")
|
|
1053
1149
|
asyncio.create_task(self._cleanup_expired_file_batches())
|
|
1054
1150
|
self._last_file_batch_cleanup_time = current_time
|
|
@@ -1065,7 +1161,7 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
1065
1161
|
batch_info: dict[str, Any],
|
|
1066
1162
|
) -> None:
|
|
1067
1163
|
"""Process files with controlled concurrency and chunking."""
|
|
1068
|
-
semaphore = asyncio.Semaphore(
|
|
1164
|
+
semaphore = asyncio.Semaphore(self.vector_stores_config.file_batch_params.max_concurrent_files_per_batch)
|
|
1069
1165
|
|
|
1070
1166
|
async def process_single_file(file_id: str) -> tuple[str, bool]:
|
|
1071
1167
|
"""Process a single file with concurrency control."""
|
|
@@ -1084,12 +1180,13 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
1084
1180
|
|
|
1085
1181
|
# Process files in chunks to avoid creating too many tasks at once
|
|
1086
1182
|
total_files = len(file_ids)
|
|
1087
|
-
|
|
1088
|
-
|
|
1183
|
+
chunk_size = self.vector_stores_config.file_batch_params.file_batch_chunk_size
|
|
1184
|
+
for chunk_start in range(0, total_files, chunk_size):
|
|
1185
|
+
chunk_end = min(chunk_start + chunk_size, total_files)
|
|
1089
1186
|
chunk = file_ids[chunk_start:chunk_end]
|
|
1090
1187
|
|
|
1091
|
-
chunk_num = chunk_start //
|
|
1092
|
-
total_chunks = (total_files +
|
|
1188
|
+
chunk_num = chunk_start // chunk_size + 1
|
|
1189
|
+
total_chunks = (total_files + chunk_size - 1) // chunk_size
|
|
1093
1190
|
logger.info(
|
|
1094
1191
|
f"Processing chunk {chunk_num} of {total_chunks} ({len(chunk)} files, {chunk_start + 1}-{chunk_end} of {total_files} total files)"
|
|
1095
1192
|
)
|
|
@@ -17,21 +17,25 @@ import numpy as np
|
|
|
17
17
|
from numpy.typing import NDArray
|
|
18
18
|
from pydantic import BaseModel
|
|
19
19
|
|
|
20
|
-
from llama_stack.
|
|
21
|
-
URL,
|
|
22
|
-
InterleavedContent,
|
|
23
|
-
)
|
|
24
|
-
from llama_stack.apis.inference import OpenAIEmbeddingsRequestWithExtraBody
|
|
25
|
-
from llama_stack.apis.tools import RAGDocument
|
|
26
|
-
from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
|
|
27
|
-
from llama_stack.apis.vector_stores import VectorStore
|
|
20
|
+
from llama_stack.core.datatypes import VectorStoresConfig
|
|
28
21
|
from llama_stack.log import get_logger
|
|
29
22
|
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
|
|
30
|
-
from llama_stack.providers.datatypes import Api
|
|
31
23
|
from llama_stack.providers.utils.inference.prompt_adapter import (
|
|
32
24
|
interleaved_content_as_str,
|
|
33
25
|
)
|
|
34
26
|
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
|
27
|
+
from llama_stack_api import (
|
|
28
|
+
URL,
|
|
29
|
+
Api,
|
|
30
|
+
Chunk,
|
|
31
|
+
ChunkMetadata,
|
|
32
|
+
EmbeddedChunk,
|
|
33
|
+
InterleavedContent,
|
|
34
|
+
OpenAIEmbeddingsRequestWithExtraBody,
|
|
35
|
+
QueryChunksResponse,
|
|
36
|
+
RAGDocument,
|
|
37
|
+
VectorStore,
|
|
38
|
+
)
|
|
35
39
|
|
|
36
40
|
log = get_logger(name=__name__, category="providers::utils")
|
|
37
41
|
|
|
@@ -155,7 +159,11 @@ async def content_from_doc(doc: RAGDocument) -> str:
|
|
|
155
159
|
|
|
156
160
|
|
|
157
161
|
def make_overlapped_chunks(
|
|
158
|
-
document_id: str,
|
|
162
|
+
document_id: str,
|
|
163
|
+
text: str,
|
|
164
|
+
window_len: int,
|
|
165
|
+
overlap_len: int,
|
|
166
|
+
metadata: dict[str, Any],
|
|
159
167
|
) -> list[Chunk]:
|
|
160
168
|
default_tokenizer = "DEFAULT_TIKTOKEN_TOKENIZER"
|
|
161
169
|
tokenizer = Tokenizer.get_instance()
|
|
@@ -187,7 +195,6 @@ def make_overlapped_chunks(
|
|
|
187
195
|
updated_timestamp=int(time.time()),
|
|
188
196
|
chunk_window=chunk_window,
|
|
189
197
|
chunk_tokenizer=default_tokenizer,
|
|
190
|
-
chunk_embedding_model=None, # This will be set in `VectorStoreWithIndex.insert_chunks`
|
|
191
198
|
content_token_count=len(toks),
|
|
192
199
|
metadata_token_count=len(metadata_tokens),
|
|
193
200
|
)
|
|
@@ -196,6 +203,7 @@ def make_overlapped_chunks(
|
|
|
196
203
|
chunks.append(
|
|
197
204
|
Chunk(
|
|
198
205
|
content=chunk,
|
|
206
|
+
chunk_id=chunk_id,
|
|
199
207
|
metadata=chunk_metadata,
|
|
200
208
|
chunk_metadata=backend_chunk_metadata,
|
|
201
209
|
)
|
|
@@ -222,7 +230,7 @@ def _validate_embedding(embedding: NDArray, index: int, expected_dimension: int)
|
|
|
222
230
|
|
|
223
231
|
class EmbeddingIndex(ABC):
|
|
224
232
|
@abstractmethod
|
|
225
|
-
async def add_chunks(self,
|
|
233
|
+
async def add_chunks(self, embedded_chunks: list[EmbeddedChunk]):
|
|
226
234
|
raise NotImplementedError()
|
|
227
235
|
|
|
228
236
|
@abstractmethod
|
|
@@ -259,38 +267,25 @@ class VectorStoreWithIndex:
|
|
|
259
267
|
vector_store: VectorStore
|
|
260
268
|
index: EmbeddingIndex
|
|
261
269
|
inference_api: Api.inference
|
|
270
|
+
vector_stores_config: VectorStoresConfig | None = None
|
|
262
271
|
|
|
263
272
|
async def insert_chunks(
|
|
264
273
|
self,
|
|
265
|
-
chunks: list[
|
|
274
|
+
chunks: list[EmbeddedChunk],
|
|
266
275
|
) -> None:
|
|
267
|
-
|
|
268
|
-
for i,
|
|
269
|
-
|
|
270
|
-
chunks_to_embed.append(c)
|
|
271
|
-
if c.chunk_metadata:
|
|
272
|
-
c.chunk_metadata.chunk_embedding_model = self.vector_store.embedding_model
|
|
273
|
-
c.chunk_metadata.chunk_embedding_dimension = self.vector_store.embedding_dimension
|
|
274
|
-
else:
|
|
275
|
-
_validate_embedding(c.embedding, i, self.vector_store.embedding_dimension)
|
|
276
|
-
|
|
277
|
-
if chunks_to_embed:
|
|
278
|
-
params = OpenAIEmbeddingsRequestWithExtraBody(
|
|
279
|
-
model=self.vector_store.embedding_model,
|
|
280
|
-
input=[c.content for c in chunks_to_embed],
|
|
281
|
-
)
|
|
282
|
-
resp = await self.inference_api.openai_embeddings(params)
|
|
283
|
-
for c, data in zip(chunks_to_embed, resp.data, strict=False):
|
|
284
|
-
c.embedding = data.embedding
|
|
276
|
+
# Validate embedding dimensions match the vector store
|
|
277
|
+
for i, embedded_chunk in enumerate(chunks):
|
|
278
|
+
_validate_embedding(embedded_chunk.embedding, i, self.vector_store.embedding_dimension)
|
|
285
279
|
|
|
286
|
-
|
|
287
|
-
await self.index.add_chunks(chunks, embeddings)
|
|
280
|
+
await self.index.add_chunks(chunks)
|
|
288
281
|
|
|
289
282
|
async def query_chunks(
|
|
290
283
|
self,
|
|
291
284
|
query: InterleavedContent,
|
|
292
285
|
params: dict[str, Any] | None = None,
|
|
293
286
|
) -> QueryChunksResponse:
|
|
287
|
+
config = self.vector_stores_config or VectorStoresConfig()
|
|
288
|
+
|
|
294
289
|
if params is None:
|
|
295
290
|
params = {}
|
|
296
291
|
k = params.get("max_chunks", 3)
|
|
@@ -299,19 +294,25 @@ class VectorStoreWithIndex:
|
|
|
299
294
|
|
|
300
295
|
ranker = params.get("ranker")
|
|
301
296
|
if ranker is None:
|
|
302
|
-
reranker_type =
|
|
303
|
-
|
|
297
|
+
reranker_type = (
|
|
298
|
+
RERANKER_TYPE_RRF
|
|
299
|
+
if config.chunk_retrieval_params.default_reranker_strategy == "rrf"
|
|
300
|
+
else config.chunk_retrieval_params.default_reranker_strategy
|
|
301
|
+
)
|
|
302
|
+
reranker_params = {"impact_factor": config.chunk_retrieval_params.rrf_impact_factor}
|
|
304
303
|
else:
|
|
305
|
-
strategy = ranker.get("strategy",
|
|
304
|
+
strategy = ranker.get("strategy", config.chunk_retrieval_params.default_reranker_strategy)
|
|
306
305
|
if strategy == "weighted":
|
|
307
306
|
weights = ranker.get("params", {}).get("weights", [0.5, 0.5])
|
|
308
307
|
reranker_type = RERANKER_TYPE_WEIGHTED
|
|
309
|
-
reranker_params = {
|
|
308
|
+
reranker_params = {
|
|
309
|
+
"alpha": weights[0] if len(weights) > 0 else config.chunk_retrieval_params.weighted_search_alpha
|
|
310
|
+
}
|
|
310
311
|
elif strategy == "normalized":
|
|
311
312
|
reranker_type = RERANKER_TYPE_NORMALIZED
|
|
312
313
|
else:
|
|
313
314
|
reranker_type = RERANKER_TYPE_RRF
|
|
314
|
-
k_value = ranker.get("params", {}).get("k",
|
|
315
|
+
k_value = ranker.get("params", {}).get("k", config.chunk_retrieval_params.rrf_impact_factor)
|
|
315
316
|
reranker_params = {"impact_factor": k_value}
|
|
316
317
|
|
|
317
318
|
query_string = interleaved_content_as_str(query)
|