llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +235 -62
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
- llama_stack/providers/registry/agents.py +8 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/providers/utils/vector_io/__init__.py +16 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
- llama_stack-0.4.1.dist-info/RECORD +588 -0
- llama_stack-0.4.1.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -14,35 +14,32 @@ import httpx
|
|
|
14
14
|
from fastapi import UploadFile
|
|
15
15
|
from pydantic import TypeAdapter
|
|
16
16
|
|
|
17
|
-
from llama_stack.
|
|
17
|
+
from llama_stack.log import get_logger
|
|
18
|
+
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
|
19
|
+
from llama_stack.providers.utils.memory.vector_store import parse_data_url
|
|
20
|
+
from llama_stack_api import (
|
|
18
21
|
URL,
|
|
22
|
+
Files,
|
|
23
|
+
Inference,
|
|
19
24
|
InterleavedContent,
|
|
20
25
|
InterleavedContentItem,
|
|
21
|
-
TextContentItem,
|
|
22
|
-
)
|
|
23
|
-
from llama_stack.apis.files import Files, OpenAIFilePurpose
|
|
24
|
-
from llama_stack.apis.inference import Inference
|
|
25
|
-
from llama_stack.apis.tools import (
|
|
26
26
|
ListToolDefsResponse,
|
|
27
|
+
OpenAIFilePurpose,
|
|
28
|
+
QueryChunksResponse,
|
|
27
29
|
RAGDocument,
|
|
28
30
|
RAGQueryConfig,
|
|
29
31
|
RAGQueryResult,
|
|
30
|
-
|
|
32
|
+
TextContentItem,
|
|
31
33
|
ToolDef,
|
|
32
34
|
ToolGroup,
|
|
35
|
+
ToolGroupsProtocolPrivate,
|
|
33
36
|
ToolInvocationResult,
|
|
34
37
|
ToolRuntime,
|
|
35
|
-
|
|
36
|
-
from llama_stack.apis.vector_io import (
|
|
37
|
-
QueryChunksResponse,
|
|
38
|
+
UploadFileRequest,
|
|
38
39
|
VectorIO,
|
|
39
40
|
VectorStoreChunkingStrategyStatic,
|
|
40
41
|
VectorStoreChunkingStrategyStaticConfig,
|
|
41
42
|
)
|
|
42
|
-
from llama_stack.log import get_logger
|
|
43
|
-
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
|
44
|
-
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
|
45
|
-
from llama_stack.providers.utils.memory.vector_store import parse_data_url
|
|
46
43
|
|
|
47
44
|
from .config import RagToolRuntimeConfig
|
|
48
45
|
from .context_retriever import generate_rag_query
|
|
@@ -91,7 +88,7 @@ async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
|
|
|
91
88
|
return content_str.encode("utf-8"), "text/plain"
|
|
92
89
|
|
|
93
90
|
|
|
94
|
-
class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
|
|
91
|
+
class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime):
|
|
95
92
|
def __init__(
|
|
96
93
|
self,
|
|
97
94
|
config: RagToolRuntimeConfig,
|
|
@@ -119,9 +116,11 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|
|
119
116
|
async def insert(
|
|
120
117
|
self,
|
|
121
118
|
documents: list[RAGDocument],
|
|
122
|
-
|
|
123
|
-
chunk_size_in_tokens: int =
|
|
119
|
+
vector_store_id: str,
|
|
120
|
+
chunk_size_in_tokens: int | None = None,
|
|
124
121
|
) -> None:
|
|
122
|
+
if chunk_size_in_tokens is None:
|
|
123
|
+
chunk_size_in_tokens = self.config.vector_stores_config.file_ingestion_params.default_chunk_size_tokens
|
|
125
124
|
if not documents:
|
|
126
125
|
return
|
|
127
126
|
|
|
@@ -143,29 +142,31 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|
|
143
142
|
|
|
144
143
|
try:
|
|
145
144
|
created_file = await self.files_api.openai_upload_file(
|
|
146
|
-
|
|
145
|
+
request=UploadFileRequest(purpose=OpenAIFilePurpose.ASSISTANTS),
|
|
146
|
+
file=upload_file,
|
|
147
147
|
)
|
|
148
148
|
except Exception as e:
|
|
149
149
|
log.error(f"Failed to upload file for document {doc.document_id}: {e}")
|
|
150
150
|
continue
|
|
151
151
|
|
|
152
|
+
overlap_tokens = self.config.vector_stores_config.file_ingestion_params.default_chunk_overlap_tokens
|
|
152
153
|
chunking_strategy = VectorStoreChunkingStrategyStatic(
|
|
153
154
|
static=VectorStoreChunkingStrategyStaticConfig(
|
|
154
155
|
max_chunk_size_tokens=chunk_size_in_tokens,
|
|
155
|
-
chunk_overlap_tokens=
|
|
156
|
+
chunk_overlap_tokens=overlap_tokens,
|
|
156
157
|
)
|
|
157
158
|
)
|
|
158
159
|
|
|
159
160
|
try:
|
|
160
161
|
await self.vector_io_api.openai_attach_file_to_vector_store(
|
|
161
|
-
vector_store_id=
|
|
162
|
+
vector_store_id=vector_store_id,
|
|
162
163
|
file_id=created_file.id,
|
|
163
164
|
attributes=doc.metadata,
|
|
164
165
|
chunking_strategy=chunking_strategy,
|
|
165
166
|
)
|
|
166
167
|
except Exception as e:
|
|
167
168
|
log.error(
|
|
168
|
-
f"Failed to attach file {created_file.id} to vector store {
|
|
169
|
+
f"Failed to attach file {created_file.id} to vector store {vector_store_id} for document {doc.document_id}: {e}"
|
|
169
170
|
)
|
|
170
171
|
continue
|
|
171
172
|
|
|
@@ -176,15 +177,17 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|
|
176
177
|
async def query(
|
|
177
178
|
self,
|
|
178
179
|
content: InterleavedContent,
|
|
179
|
-
|
|
180
|
+
vector_store_ids: list[str],
|
|
180
181
|
query_config: RAGQueryConfig | None = None,
|
|
181
182
|
) -> RAGQueryResult:
|
|
182
|
-
if not
|
|
183
|
+
if not vector_store_ids:
|
|
183
184
|
raise ValueError(
|
|
184
185
|
"No vector DBs were provided to the knowledge search tool. Please provide at least one vector DB ID."
|
|
185
186
|
)
|
|
186
187
|
|
|
187
|
-
query_config = query_config or RAGQueryConfig(
|
|
188
|
+
query_config = query_config or RAGQueryConfig(
|
|
189
|
+
max_tokens_in_context=self.config.vector_stores_config.chunk_retrieval_params.max_tokens_in_context
|
|
190
|
+
)
|
|
188
191
|
query = await generate_rag_query(
|
|
189
192
|
query_config.query_generator_config,
|
|
190
193
|
content,
|
|
@@ -192,7 +195,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|
|
192
195
|
)
|
|
193
196
|
tasks = [
|
|
194
197
|
self.vector_io_api.query_chunks(
|
|
195
|
-
|
|
198
|
+
vector_store_id=vector_store_id,
|
|
196
199
|
query=query,
|
|
197
200
|
params={
|
|
198
201
|
"mode": query_config.mode,
|
|
@@ -201,18 +204,20 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|
|
201
204
|
"ranker": query_config.ranker,
|
|
202
205
|
},
|
|
203
206
|
)
|
|
204
|
-
for
|
|
207
|
+
for vector_store_id in vector_store_ids
|
|
205
208
|
]
|
|
206
209
|
results: list[QueryChunksResponse] = await asyncio.gather(*tasks)
|
|
207
210
|
|
|
208
211
|
chunks = []
|
|
209
212
|
scores = []
|
|
210
213
|
|
|
211
|
-
for
|
|
212
|
-
for
|
|
213
|
-
|
|
214
|
+
for vector_store_id, result in zip(vector_store_ids, results, strict=False):
|
|
215
|
+
for embedded_chunk, score in zip(result.chunks, result.scores, strict=False):
|
|
216
|
+
# EmbeddedChunk inherits from Chunk, so use it directly
|
|
217
|
+
chunk = embedded_chunk
|
|
218
|
+
if chunk.metadata is None:
|
|
214
219
|
chunk.metadata = {}
|
|
215
|
-
chunk.metadata["
|
|
220
|
+
chunk.metadata["vector_store_id"] = vector_store_id
|
|
216
221
|
|
|
217
222
|
chunks.append(chunk)
|
|
218
223
|
scores.append(score)
|
|
@@ -225,13 +230,17 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|
|
225
230
|
chunks = chunks[: query_config.max_chunks]
|
|
226
231
|
|
|
227
232
|
tokens = 0
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
233
|
+
|
|
234
|
+
# Get templates from vector stores config
|
|
235
|
+
vector_stores_config = self.config.vector_stores_config
|
|
236
|
+
header_template = vector_stores_config.file_search_params.header_template
|
|
237
|
+
footer_template = vector_stores_config.file_search_params.footer_template
|
|
238
|
+
chunk_template = vector_stores_config.context_prompt_params.chunk_annotation_template
|
|
239
|
+
context_template = vector_stores_config.context_prompt_params.context_template
|
|
240
|
+
|
|
241
|
+
picked: list[InterleavedContentItem] = [TextContentItem(text=header_template.format(num_chunks=len(chunks)))]
|
|
242
|
+
for i, embedded_chunk in enumerate(chunks):
|
|
243
|
+
metadata = embedded_chunk.metadata
|
|
235
244
|
tokens += metadata.get("token_count", 0)
|
|
236
245
|
tokens += metadata.get("metadata_token_count", 0)
|
|
237
246
|
|
|
@@ -250,22 +259,22 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|
|
250
259
|
metadata_keys_to_exclude_from_context = [
|
|
251
260
|
"token_count",
|
|
252
261
|
"metadata_token_count",
|
|
253
|
-
"
|
|
262
|
+
"vector_store_id",
|
|
254
263
|
]
|
|
255
264
|
metadata_for_context = {}
|
|
256
265
|
for k in chunk_metadata_keys_to_include_from_context:
|
|
257
|
-
metadata_for_context[k] = getattr(
|
|
266
|
+
metadata_for_context[k] = getattr(embedded_chunk.chunk_metadata, k)
|
|
258
267
|
for k in metadata:
|
|
259
268
|
if k not in metadata_keys_to_exclude_from_context:
|
|
260
269
|
metadata_for_context[k] = metadata[k]
|
|
261
270
|
|
|
262
|
-
text_content =
|
|
271
|
+
text_content = chunk_template.format(index=i + 1, chunk=embedded_chunk, metadata=metadata_for_context)
|
|
263
272
|
picked.append(TextContentItem(text=text_content))
|
|
264
273
|
|
|
265
|
-
picked.append(TextContentItem(text=
|
|
274
|
+
picked.append(TextContentItem(text=footer_template))
|
|
266
275
|
picked.append(
|
|
267
276
|
TextContentItem(
|
|
268
|
-
text=
|
|
277
|
+
text=context_template.format(query=interleaved_content_as_str(content), annotation_instruction="")
|
|
269
278
|
)
|
|
270
279
|
)
|
|
271
280
|
|
|
@@ -275,12 +284,15 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|
|
275
284
|
"document_ids": [c.document_id for c in chunks[: len(picked)]],
|
|
276
285
|
"chunks": [c.content for c in chunks[: len(picked)]],
|
|
277
286
|
"scores": scores[: len(picked)],
|
|
278
|
-
"
|
|
287
|
+
"vector_store_ids": [c.metadata["vector_store_id"] for c in chunks[: len(picked)]],
|
|
279
288
|
},
|
|
280
289
|
)
|
|
281
290
|
|
|
282
291
|
async def list_runtime_tools(
|
|
283
|
-
self,
|
|
292
|
+
self,
|
|
293
|
+
tool_group_id: str | None = None,
|
|
294
|
+
mcp_endpoint: URL | None = None,
|
|
295
|
+
authorization: str | None = None,
|
|
284
296
|
) -> ListToolDefsResponse:
|
|
285
297
|
# Parameters are not listed since these methods are not yet invoked automatically
|
|
286
298
|
# by the LLM. The method is only implemented so things like /tools can list without
|
|
@@ -308,18 +320,22 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|
|
308
320
|
]
|
|
309
321
|
)
|
|
310
322
|
|
|
311
|
-
async def invoke_tool(
|
|
312
|
-
|
|
323
|
+
async def invoke_tool(
|
|
324
|
+
self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
|
|
325
|
+
) -> ToolInvocationResult:
|
|
326
|
+
vector_store_ids = kwargs.get("vector_store_ids", [])
|
|
313
327
|
query_config = kwargs.get("query_config")
|
|
314
328
|
if query_config:
|
|
315
329
|
query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config)
|
|
316
330
|
else:
|
|
317
|
-
query_config = RAGQueryConfig(
|
|
331
|
+
query_config = RAGQueryConfig(
|
|
332
|
+
max_tokens_in_context=self.config.vector_stores_config.chunk_retrieval_params.max_tokens_in_context
|
|
333
|
+
)
|
|
318
334
|
|
|
319
335
|
query = kwargs["query"]
|
|
320
336
|
result = await self.query(
|
|
321
337
|
content=query,
|
|
322
|
-
|
|
338
|
+
vector_store_ids=vector_store_ids,
|
|
323
339
|
query_config=query_config,
|
|
324
340
|
)
|
|
325
341
|
|
|
@@ -9,7 +9,7 @@ from typing import Any
|
|
|
9
9
|
from pydantic import BaseModel, Field
|
|
10
10
|
|
|
11
11
|
from llama_stack.core.storage.datatypes import KVStoreReference
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@json_schema_type
|
|
@@ -10,21 +10,29 @@ import io
|
|
|
10
10
|
import json
|
|
11
11
|
from typing import Any
|
|
12
12
|
|
|
13
|
-
import faiss
|
|
13
|
+
import faiss # type: ignore[import-untyped]
|
|
14
14
|
import numpy as np
|
|
15
15
|
from numpy.typing import NDArray
|
|
16
16
|
|
|
17
|
-
from llama_stack.
|
|
18
|
-
from llama_stack.apis.files import Files
|
|
19
|
-
from llama_stack.apis.inference import Inference, InterleavedContent
|
|
20
|
-
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
|
21
|
-
from llama_stack.apis.vector_stores import VectorStore
|
|
17
|
+
from llama_stack.core.storage.kvstore import kvstore_impl
|
|
22
18
|
from llama_stack.log import get_logger
|
|
23
|
-
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate
|
|
24
|
-
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
25
|
-
from llama_stack.providers.utils.kvstore.api import KVStore
|
|
26
19
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
27
20
|
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
|
21
|
+
from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
|
|
22
|
+
from llama_stack_api import (
|
|
23
|
+
EmbeddedChunk,
|
|
24
|
+
Files,
|
|
25
|
+
HealthResponse,
|
|
26
|
+
HealthStatus,
|
|
27
|
+
Inference,
|
|
28
|
+
InterleavedContent,
|
|
29
|
+
QueryChunksResponse,
|
|
30
|
+
VectorIO,
|
|
31
|
+
VectorStore,
|
|
32
|
+
VectorStoreNotFoundError,
|
|
33
|
+
VectorStoresProtocolPrivate,
|
|
34
|
+
)
|
|
35
|
+
from llama_stack_api.internal.kvstore import KVStore
|
|
28
36
|
|
|
29
37
|
from .config import FaissVectorIOConfig
|
|
30
38
|
|
|
@@ -41,7 +49,7 @@ OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_conten
|
|
|
41
49
|
class FaissIndex(EmbeddingIndex):
|
|
42
50
|
def __init__(self, dimension: int, kvstore: KVStore | None = None, bank_id: str | None = None):
|
|
43
51
|
self.index = faiss.IndexFlatL2(dimension)
|
|
44
|
-
self.chunk_by_index: dict[int,
|
|
52
|
+
self.chunk_by_index: dict[int, EmbeddedChunk] = {}
|
|
45
53
|
self.kvstore = kvstore
|
|
46
54
|
self.bank_id = bank_id
|
|
47
55
|
|
|
@@ -65,12 +73,16 @@ class FaissIndex(EmbeddingIndex):
|
|
|
65
73
|
|
|
66
74
|
if stored_data:
|
|
67
75
|
data = json.loads(stored_data)
|
|
68
|
-
self.chunk_by_index = {
|
|
76
|
+
self.chunk_by_index = {}
|
|
77
|
+
for k, v in data["chunk_by_index"].items():
|
|
78
|
+
chunk_data = json.loads(v)
|
|
79
|
+
# Use generic backward compatibility utility
|
|
80
|
+
self.chunk_by_index[int(k)] = load_embedded_chunk_with_backward_compat(chunk_data)
|
|
69
81
|
|
|
70
82
|
buffer = io.BytesIO(base64.b64decode(data["faiss_index"]))
|
|
71
83
|
try:
|
|
72
84
|
self.index = faiss.deserialize_index(np.load(buffer, allow_pickle=False))
|
|
73
|
-
self.chunk_ids = [
|
|
85
|
+
self.chunk_ids = [embedded_chunk.chunk_id for embedded_chunk in self.chunk_by_index.values()]
|
|
74
86
|
except Exception as e:
|
|
75
87
|
logger.debug(e, exc_info=True)
|
|
76
88
|
raise ValueError(
|
|
@@ -100,19 +112,24 @@ class FaissIndex(EmbeddingIndex):
|
|
|
100
112
|
|
|
101
113
|
await self.kvstore.delete(f"{FAISS_INDEX_PREFIX}{self.bank_id}")
|
|
102
114
|
|
|
103
|
-
async def add_chunks(self,
|
|
104
|
-
|
|
115
|
+
async def add_chunks(self, embedded_chunks: list[EmbeddedChunk]):
|
|
116
|
+
if not embedded_chunks:
|
|
117
|
+
return
|
|
118
|
+
|
|
119
|
+
# Extract embeddings and validate dimensions
|
|
120
|
+
embeddings = np.array([ec.embedding for ec in embedded_chunks], dtype=np.float32)
|
|
105
121
|
embedding_dim = embeddings.shape[1] if len(embeddings.shape) > 1 else embeddings.shape[0]
|
|
106
122
|
if embedding_dim != self.index.d:
|
|
107
123
|
raise ValueError(f"Embedding dimension mismatch. Expected {self.index.d}, got {embedding_dim}")
|
|
108
124
|
|
|
125
|
+
# Store chunks by index
|
|
109
126
|
indexlen = len(self.chunk_by_index)
|
|
110
|
-
for i,
|
|
111
|
-
self.chunk_by_index[indexlen + i] =
|
|
127
|
+
for i, embedded_chunk in enumerate(embedded_chunks):
|
|
128
|
+
self.chunk_by_index[indexlen + i] = embedded_chunk
|
|
112
129
|
|
|
113
130
|
async with self.chunk_id_lock:
|
|
114
|
-
self.index.add(
|
|
115
|
-
self.chunk_ids.extend([
|
|
131
|
+
self.index.add(embeddings)
|
|
132
|
+
self.chunk_ids.extend([ec.chunk_id for ec in embedded_chunks]) # EmbeddedChunk inherits from Chunk
|
|
116
133
|
|
|
117
134
|
# Save updated index
|
|
118
135
|
await self._save_index()
|
|
@@ -144,8 +161,8 @@ class FaissIndex(EmbeddingIndex):
|
|
|
144
161
|
|
|
145
162
|
async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
|
|
146
163
|
distances, indices = await asyncio.to_thread(self.index.search, embedding.reshape(1, -1).astype(np.float32), k)
|
|
147
|
-
chunks = []
|
|
148
|
-
scores = []
|
|
164
|
+
chunks: list[EmbeddedChunk] = []
|
|
165
|
+
scores: list[float] = []
|
|
149
166
|
for d, i in zip(distances[0], indices[0], strict=False):
|
|
150
167
|
if i < 0:
|
|
151
168
|
continue
|
|
@@ -178,9 +195,8 @@ class FaissIndex(EmbeddingIndex):
|
|
|
178
195
|
|
|
179
196
|
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
|
|
180
197
|
def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
|
|
181
|
-
super().__init__(files_api=files_api, kvstore=None)
|
|
198
|
+
super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
|
|
182
199
|
self.config = config
|
|
183
|
-
self.inference_api = inference_api
|
|
184
200
|
self.cache: dict[str, VectorStoreWithIndex] = {}
|
|
185
201
|
|
|
186
202
|
async def initialize(self) -> None:
|
|
@@ -271,19 +287,21 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
|
|
|
271
287
|
self.cache[vector_store_id] = index
|
|
272
288
|
return index
|
|
273
289
|
|
|
274
|
-
async def insert_chunks(
|
|
275
|
-
|
|
290
|
+
async def insert_chunks(
|
|
291
|
+
self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
|
|
292
|
+
) -> None:
|
|
293
|
+
index = self.cache.get(vector_store_id)
|
|
276
294
|
if index is None:
|
|
277
|
-
raise ValueError(f"Vector DB {
|
|
295
|
+
raise ValueError(f"Vector DB {vector_store_id} not found. found: {self.cache.keys()}")
|
|
278
296
|
|
|
279
297
|
await index.insert_chunks(chunks)
|
|
280
298
|
|
|
281
299
|
async def query_chunks(
|
|
282
|
-
self,
|
|
300
|
+
self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
|
283
301
|
) -> QueryChunksResponse:
|
|
284
|
-
index = self.cache.get(
|
|
302
|
+
index = self.cache.get(vector_store_id)
|
|
285
303
|
if index is None:
|
|
286
|
-
raise VectorStoreNotFoundError(
|
|
304
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
287
305
|
|
|
288
306
|
return await index.query_chunks(query, params)
|
|
289
307
|
|
|
@@ -9,7 +9,7 @@ from typing import Any
|
|
|
9
9
|
from pydantic import BaseModel, Field
|
|
10
10
|
|
|
11
11
|
from llama_stack.core.storage.datatypes import KVStoreReference
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@json_schema_type
|
|
@@ -5,24 +5,18 @@
|
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
7
|
import asyncio
|
|
8
|
+
import json
|
|
8
9
|
import re
|
|
9
10
|
import sqlite3
|
|
10
11
|
import struct
|
|
11
12
|
from typing import Any
|
|
12
13
|
|
|
13
14
|
import numpy as np
|
|
14
|
-
import sqlite_vec
|
|
15
|
+
import sqlite_vec # type: ignore[import-untyped]
|
|
15
16
|
from numpy.typing import NDArray
|
|
16
17
|
|
|
17
|
-
from llama_stack.
|
|
18
|
-
from llama_stack.apis.files import Files
|
|
19
|
-
from llama_stack.apis.inference import Inference
|
|
20
|
-
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
|
21
|
-
from llama_stack.apis.vector_stores import VectorStore
|
|
18
|
+
from llama_stack.core.storage.kvstore import kvstore_impl
|
|
22
19
|
from llama_stack.log import get_logger
|
|
23
|
-
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
|
24
|
-
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
25
|
-
from llama_stack.providers.utils.kvstore.api import KVStore
|
|
26
20
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
27
21
|
from llama_stack.providers.utils.memory.vector_store import (
|
|
28
22
|
RERANKER_TYPE_RRF,
|
|
@@ -30,7 +24,19 @@ from llama_stack.providers.utils.memory.vector_store import (
|
|
|
30
24
|
EmbeddingIndex,
|
|
31
25
|
VectorStoreWithIndex,
|
|
32
26
|
)
|
|
27
|
+
from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
|
|
33
28
|
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
|
|
29
|
+
from llama_stack_api import (
|
|
30
|
+
EmbeddedChunk,
|
|
31
|
+
Files,
|
|
32
|
+
Inference,
|
|
33
|
+
QueryChunksResponse,
|
|
34
|
+
VectorIO,
|
|
35
|
+
VectorStore,
|
|
36
|
+
VectorStoreNotFoundError,
|
|
37
|
+
VectorStoresProtocolPrivate,
|
|
38
|
+
)
|
|
39
|
+
from llama_stack_api.internal.kvstore import KVStore
|
|
34
40
|
|
|
35
41
|
logger = get_logger(name=__name__, category="vector_io")
|
|
36
42
|
|
|
@@ -137,14 +143,16 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|
|
137
143
|
|
|
138
144
|
await asyncio.to_thread(_drop_tables)
|
|
139
145
|
|
|
140
|
-
async def add_chunks(self,
|
|
146
|
+
async def add_chunks(self, embedded_chunks: list[EmbeddedChunk], batch_size: int = 500):
|
|
141
147
|
"""
|
|
142
|
-
Add new chunks
|
|
143
|
-
For each chunk, we insert
|
|
148
|
+
Add new embedded chunks using batch inserts.
|
|
149
|
+
For each embedded chunk, we insert the chunk JSON into the metadata table and then insert its
|
|
144
150
|
embedding (serialized to raw bytes) into the virtual table using the assigned rowid.
|
|
145
151
|
If any insert fails, the transaction is rolled back to maintain consistency.
|
|
146
152
|
Also inserts chunk content into FTS table for keyword search support.
|
|
147
153
|
"""
|
|
154
|
+
chunks = embedded_chunks # EmbeddedChunk now inherits from Chunk
|
|
155
|
+
embeddings = np.array([ec.embedding for ec in embedded_chunks], dtype=np.float32)
|
|
148
156
|
assert all(isinstance(chunk.content, str) for chunk in chunks), "SQLiteVecIndex only supports text chunks"
|
|
149
157
|
|
|
150
158
|
def _execute_all_batch_inserts():
|
|
@@ -229,11 +237,12 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|
|
229
237
|
if score < score_threshold:
|
|
230
238
|
continue
|
|
231
239
|
try:
|
|
232
|
-
|
|
240
|
+
chunk_data = json.loads(chunk_json)
|
|
241
|
+
embedded_chunk = load_embedded_chunk_with_backward_compat(chunk_data)
|
|
233
242
|
except Exception as e:
|
|
234
243
|
logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
|
|
235
244
|
continue
|
|
236
|
-
chunks.append(
|
|
245
|
+
chunks.append(embedded_chunk)
|
|
237
246
|
scores.append(score)
|
|
238
247
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
239
248
|
|
|
@@ -270,11 +279,12 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|
|
270
279
|
if score > -score_threshold:
|
|
271
280
|
continue
|
|
272
281
|
try:
|
|
273
|
-
|
|
282
|
+
chunk_data = json.loads(chunk_json)
|
|
283
|
+
embedded_chunk = load_embedded_chunk_with_backward_compat(chunk_data)
|
|
274
284
|
except Exception as e:
|
|
275
285
|
logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
|
|
276
286
|
continue
|
|
277
|
-
chunks.append(
|
|
287
|
+
chunks.append(embedded_chunk)
|
|
278
288
|
scores.append(score)
|
|
279
289
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
280
290
|
|
|
@@ -308,13 +318,14 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|
|
308
318
|
vector_response = await self.query_vector(embedding, k, score_threshold)
|
|
309
319
|
keyword_response = await self.query_keyword(query_string, k, score_threshold)
|
|
310
320
|
|
|
311
|
-
# Convert responses to score dictionaries using chunk_id
|
|
321
|
+
# Convert responses to score dictionaries using chunk_id (EmbeddedChunk inherits from Chunk)
|
|
312
322
|
vector_scores = {
|
|
313
|
-
|
|
323
|
+
embedded_chunk.chunk_id: score
|
|
324
|
+
for embedded_chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
|
|
314
325
|
}
|
|
315
326
|
keyword_scores = {
|
|
316
|
-
|
|
317
|
-
for
|
|
327
|
+
embedded_chunk.chunk_id: score
|
|
328
|
+
for embedded_chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
|
|
318
329
|
}
|
|
319
330
|
|
|
320
331
|
# Combine scores using the reranking utility
|
|
@@ -329,10 +340,10 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|
|
329
340
|
# Filter by score threshold
|
|
330
341
|
filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
|
|
331
342
|
|
|
332
|
-
# Create a map of chunk_id to
|
|
333
|
-
chunk_map = {
|
|
343
|
+
# Create a map of chunk_id to embedded_chunk for both responses
|
|
344
|
+
chunk_map = {ec.chunk_id: ec for ec in vector_response.chunks + keyword_response.chunks}
|
|
334
345
|
|
|
335
|
-
# Use the map to look up chunks by their IDs
|
|
346
|
+
# Use the map to look up embedded chunks by their IDs
|
|
336
347
|
chunks = []
|
|
337
348
|
scores = []
|
|
338
349
|
for doc_id, score in filtered_items:
|
|
@@ -382,9 +393,8 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
|
|
|
382
393
|
"""
|
|
383
394
|
|
|
384
395
|
def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
|
|
385
|
-
super().__init__(files_api=files_api, kvstore=None)
|
|
396
|
+
super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
|
|
386
397
|
self.config = config
|
|
387
|
-
self.inference_api = inference_api
|
|
388
398
|
self.cache: dict[str, VectorStoreWithIndex] = {}
|
|
389
399
|
self.vector_store_table = None
|
|
390
400
|
|
|
@@ -458,20 +468,21 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
|
|
|
458
468
|
await self.cache[vector_store_id].index.delete()
|
|
459
469
|
del self.cache[vector_store_id]
|
|
460
470
|
|
|
461
|
-
async def insert_chunks(
|
|
462
|
-
|
|
471
|
+
async def insert_chunks(
|
|
472
|
+
self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
|
|
473
|
+
) -> None:
|
|
474
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
463
475
|
if not index:
|
|
464
|
-
raise VectorStoreNotFoundError(
|
|
465
|
-
# The VectorStoreWithIndex helper
|
|
466
|
-
# and then call our index's add_chunks.
|
|
476
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
477
|
+
# The VectorStoreWithIndex helper validates embeddings and calls the index's add_chunks method
|
|
467
478
|
await index.insert_chunks(chunks)
|
|
468
479
|
|
|
469
480
|
async def query_chunks(
|
|
470
|
-
self,
|
|
481
|
+
self, vector_store_id: str, query: Any, params: dict[str, Any] | None = None
|
|
471
482
|
) -> QueryChunksResponse:
|
|
472
|
-
index = await self._get_and_cache_vector_store_index(
|
|
483
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
473
484
|
if not index:
|
|
474
|
-
raise VectorStoreNotFoundError(
|
|
485
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
475
486
|
return await index.query_chunks(query, params)
|
|
476
487
|
|
|
477
488
|
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|