llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +12 -21
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.4.dist-info/RECORD +0 -625
- llama_stack-0.3.4.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -14,35 +14,32 @@ import httpx
|
|
|
14
14
|
from fastapi import UploadFile
|
|
15
15
|
from pydantic import TypeAdapter
|
|
16
16
|
|
|
17
|
-
from llama_stack.
|
|
17
|
+
from llama_stack.log import get_logger
|
|
18
|
+
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
|
19
|
+
from llama_stack.providers.utils.memory.vector_store import parse_data_url
|
|
20
|
+
from llama_stack_api import (
|
|
18
21
|
URL,
|
|
22
|
+
Files,
|
|
23
|
+
Inference,
|
|
19
24
|
InterleavedContent,
|
|
20
25
|
InterleavedContentItem,
|
|
21
|
-
TextContentItem,
|
|
22
|
-
)
|
|
23
|
-
from llama_stack.apis.files import Files, OpenAIFilePurpose
|
|
24
|
-
from llama_stack.apis.inference import Inference
|
|
25
|
-
from llama_stack.apis.tools import (
|
|
26
26
|
ListToolDefsResponse,
|
|
27
|
+
OpenAIFilePurpose,
|
|
28
|
+
QueryChunksResponse,
|
|
27
29
|
RAGDocument,
|
|
28
30
|
RAGQueryConfig,
|
|
29
31
|
RAGQueryResult,
|
|
30
|
-
|
|
32
|
+
TextContentItem,
|
|
31
33
|
ToolDef,
|
|
32
34
|
ToolGroup,
|
|
35
|
+
ToolGroupsProtocolPrivate,
|
|
33
36
|
ToolInvocationResult,
|
|
34
37
|
ToolRuntime,
|
|
35
|
-
|
|
36
|
-
from llama_stack.apis.vector_io import (
|
|
37
|
-
QueryChunksResponse,
|
|
38
|
+
UploadFileRequest,
|
|
38
39
|
VectorIO,
|
|
39
40
|
VectorStoreChunkingStrategyStatic,
|
|
40
41
|
VectorStoreChunkingStrategyStaticConfig,
|
|
41
42
|
)
|
|
42
|
-
from llama_stack.log import get_logger
|
|
43
|
-
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
|
44
|
-
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
|
45
|
-
from llama_stack.providers.utils.memory.vector_store import parse_data_url
|
|
46
43
|
|
|
47
44
|
from .config import RagToolRuntimeConfig
|
|
48
45
|
from .context_retriever import generate_rag_query
|
|
@@ -91,7 +88,7 @@ async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
|
|
|
91
88
|
return content_str.encode("utf-8"), "text/plain"
|
|
92
89
|
|
|
93
90
|
|
|
94
|
-
class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
|
|
91
|
+
class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime):
|
|
95
92
|
def __init__(
|
|
96
93
|
self,
|
|
97
94
|
config: RagToolRuntimeConfig,
|
|
@@ -119,9 +116,11 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|
|
119
116
|
async def insert(
|
|
120
117
|
self,
|
|
121
118
|
documents: list[RAGDocument],
|
|
122
|
-
|
|
123
|
-
chunk_size_in_tokens: int =
|
|
119
|
+
vector_store_id: str,
|
|
120
|
+
chunk_size_in_tokens: int | None = None,
|
|
124
121
|
) -> None:
|
|
122
|
+
if chunk_size_in_tokens is None:
|
|
123
|
+
chunk_size_in_tokens = self.config.vector_stores_config.file_ingestion_params.default_chunk_size_tokens
|
|
125
124
|
if not documents:
|
|
126
125
|
return
|
|
127
126
|
|
|
@@ -143,29 +142,31 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|
|
143
142
|
|
|
144
143
|
try:
|
|
145
144
|
created_file = await self.files_api.openai_upload_file(
|
|
146
|
-
|
|
145
|
+
request=UploadFileRequest(purpose=OpenAIFilePurpose.ASSISTANTS),
|
|
146
|
+
file=upload_file,
|
|
147
147
|
)
|
|
148
148
|
except Exception as e:
|
|
149
149
|
log.error(f"Failed to upload file for document {doc.document_id}: {e}")
|
|
150
150
|
continue
|
|
151
151
|
|
|
152
|
+
overlap_tokens = self.config.vector_stores_config.file_ingestion_params.default_chunk_overlap_tokens
|
|
152
153
|
chunking_strategy = VectorStoreChunkingStrategyStatic(
|
|
153
154
|
static=VectorStoreChunkingStrategyStaticConfig(
|
|
154
155
|
max_chunk_size_tokens=chunk_size_in_tokens,
|
|
155
|
-
chunk_overlap_tokens=
|
|
156
|
+
chunk_overlap_tokens=overlap_tokens,
|
|
156
157
|
)
|
|
157
158
|
)
|
|
158
159
|
|
|
159
160
|
try:
|
|
160
161
|
await self.vector_io_api.openai_attach_file_to_vector_store(
|
|
161
|
-
vector_store_id=
|
|
162
|
+
vector_store_id=vector_store_id,
|
|
162
163
|
file_id=created_file.id,
|
|
163
164
|
attributes=doc.metadata,
|
|
164
165
|
chunking_strategy=chunking_strategy,
|
|
165
166
|
)
|
|
166
167
|
except Exception as e:
|
|
167
168
|
log.error(
|
|
168
|
-
f"Failed to attach file {created_file.id} to vector store {
|
|
169
|
+
f"Failed to attach file {created_file.id} to vector store {vector_store_id} for document {doc.document_id}: {e}"
|
|
169
170
|
)
|
|
170
171
|
continue
|
|
171
172
|
|
|
@@ -176,15 +177,17 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|
|
176
177
|
async def query(
|
|
177
178
|
self,
|
|
178
179
|
content: InterleavedContent,
|
|
179
|
-
|
|
180
|
+
vector_store_ids: list[str],
|
|
180
181
|
query_config: RAGQueryConfig | None = None,
|
|
181
182
|
) -> RAGQueryResult:
|
|
182
|
-
if not
|
|
183
|
+
if not vector_store_ids:
|
|
183
184
|
raise ValueError(
|
|
184
185
|
"No vector DBs were provided to the knowledge search tool. Please provide at least one vector DB ID."
|
|
185
186
|
)
|
|
186
187
|
|
|
187
|
-
query_config = query_config or RAGQueryConfig(
|
|
188
|
+
query_config = query_config or RAGQueryConfig(
|
|
189
|
+
max_tokens_in_context=self.config.vector_stores_config.chunk_retrieval_params.max_tokens_in_context
|
|
190
|
+
)
|
|
188
191
|
query = await generate_rag_query(
|
|
189
192
|
query_config.query_generator_config,
|
|
190
193
|
content,
|
|
@@ -192,7 +195,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|
|
192
195
|
)
|
|
193
196
|
tasks = [
|
|
194
197
|
self.vector_io_api.query_chunks(
|
|
195
|
-
|
|
198
|
+
vector_store_id=vector_store_id,
|
|
196
199
|
query=query,
|
|
197
200
|
params={
|
|
198
201
|
"mode": query_config.mode,
|
|
@@ -201,18 +204,20 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|
|
201
204
|
"ranker": query_config.ranker,
|
|
202
205
|
},
|
|
203
206
|
)
|
|
204
|
-
for
|
|
207
|
+
for vector_store_id in vector_store_ids
|
|
205
208
|
]
|
|
206
209
|
results: list[QueryChunksResponse] = await asyncio.gather(*tasks)
|
|
207
210
|
|
|
208
211
|
chunks = []
|
|
209
212
|
scores = []
|
|
210
213
|
|
|
211
|
-
for
|
|
212
|
-
for
|
|
213
|
-
|
|
214
|
+
for vector_store_id, result in zip(vector_store_ids, results, strict=False):
|
|
215
|
+
for embedded_chunk, score in zip(result.chunks, result.scores, strict=False):
|
|
216
|
+
# EmbeddedChunk inherits from Chunk, so use it directly
|
|
217
|
+
chunk = embedded_chunk
|
|
218
|
+
if chunk.metadata is None:
|
|
214
219
|
chunk.metadata = {}
|
|
215
|
-
chunk.metadata["
|
|
220
|
+
chunk.metadata["vector_store_id"] = vector_store_id
|
|
216
221
|
|
|
217
222
|
chunks.append(chunk)
|
|
218
223
|
scores.append(score)
|
|
@@ -225,13 +230,17 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|
|
225
230
|
chunks = chunks[: query_config.max_chunks]
|
|
226
231
|
|
|
227
232
|
tokens = 0
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
233
|
+
|
|
234
|
+
# Get templates from vector stores config
|
|
235
|
+
vector_stores_config = self.config.vector_stores_config
|
|
236
|
+
header_template = vector_stores_config.file_search_params.header_template
|
|
237
|
+
footer_template = vector_stores_config.file_search_params.footer_template
|
|
238
|
+
chunk_template = vector_stores_config.context_prompt_params.chunk_annotation_template
|
|
239
|
+
context_template = vector_stores_config.context_prompt_params.context_template
|
|
240
|
+
|
|
241
|
+
picked: list[InterleavedContentItem] = [TextContentItem(text=header_template.format(num_chunks=len(chunks)))]
|
|
242
|
+
for i, embedded_chunk in enumerate(chunks):
|
|
243
|
+
metadata = embedded_chunk.metadata
|
|
235
244
|
tokens += metadata.get("token_count", 0)
|
|
236
245
|
tokens += metadata.get("metadata_token_count", 0)
|
|
237
246
|
|
|
@@ -250,22 +259,22 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|
|
250
259
|
metadata_keys_to_exclude_from_context = [
|
|
251
260
|
"token_count",
|
|
252
261
|
"metadata_token_count",
|
|
253
|
-
"
|
|
262
|
+
"vector_store_id",
|
|
254
263
|
]
|
|
255
264
|
metadata_for_context = {}
|
|
256
265
|
for k in chunk_metadata_keys_to_include_from_context:
|
|
257
|
-
metadata_for_context[k] = getattr(
|
|
266
|
+
metadata_for_context[k] = getattr(embedded_chunk.chunk_metadata, k)
|
|
258
267
|
for k in metadata:
|
|
259
268
|
if k not in metadata_keys_to_exclude_from_context:
|
|
260
269
|
metadata_for_context[k] = metadata[k]
|
|
261
270
|
|
|
262
|
-
text_content =
|
|
271
|
+
text_content = chunk_template.format(index=i + 1, chunk=embedded_chunk, metadata=metadata_for_context)
|
|
263
272
|
picked.append(TextContentItem(text=text_content))
|
|
264
273
|
|
|
265
|
-
picked.append(TextContentItem(text=
|
|
274
|
+
picked.append(TextContentItem(text=footer_template))
|
|
266
275
|
picked.append(
|
|
267
276
|
TextContentItem(
|
|
268
|
-
text=
|
|
277
|
+
text=context_template.format(query=interleaved_content_as_str(content), annotation_instruction="")
|
|
269
278
|
)
|
|
270
279
|
)
|
|
271
280
|
|
|
@@ -275,12 +284,15 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|
|
275
284
|
"document_ids": [c.document_id for c in chunks[: len(picked)]],
|
|
276
285
|
"chunks": [c.content for c in chunks[: len(picked)]],
|
|
277
286
|
"scores": scores[: len(picked)],
|
|
278
|
-
"
|
|
287
|
+
"vector_store_ids": [c.metadata["vector_store_id"] for c in chunks[: len(picked)]],
|
|
279
288
|
},
|
|
280
289
|
)
|
|
281
290
|
|
|
282
291
|
async def list_runtime_tools(
|
|
283
|
-
self,
|
|
292
|
+
self,
|
|
293
|
+
tool_group_id: str | None = None,
|
|
294
|
+
mcp_endpoint: URL | None = None,
|
|
295
|
+
authorization: str | None = None,
|
|
284
296
|
) -> ListToolDefsResponse:
|
|
285
297
|
# Parameters are not listed since these methods are not yet invoked automatically
|
|
286
298
|
# by the LLM. The method is only implemented so things like /tools can list without
|
|
@@ -308,18 +320,22 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|
|
308
320
|
]
|
|
309
321
|
)
|
|
310
322
|
|
|
311
|
-
async def invoke_tool(
|
|
312
|
-
|
|
323
|
+
async def invoke_tool(
|
|
324
|
+
self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
|
|
325
|
+
) -> ToolInvocationResult:
|
|
326
|
+
vector_store_ids = kwargs.get("vector_store_ids", [])
|
|
313
327
|
query_config = kwargs.get("query_config")
|
|
314
328
|
if query_config:
|
|
315
329
|
query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config)
|
|
316
330
|
else:
|
|
317
|
-
query_config = RAGQueryConfig(
|
|
331
|
+
query_config = RAGQueryConfig(
|
|
332
|
+
max_tokens_in_context=self.config.vector_stores_config.chunk_retrieval_params.max_tokens_in_context
|
|
333
|
+
)
|
|
318
334
|
|
|
319
335
|
query = kwargs["query"]
|
|
320
336
|
result = await self.query(
|
|
321
337
|
content=query,
|
|
322
|
-
|
|
338
|
+
vector_store_ids=vector_store_ids,
|
|
323
339
|
query_config=query_config,
|
|
324
340
|
)
|
|
325
341
|
|
|
@@ -9,7 +9,7 @@ from typing import Any
|
|
|
9
9
|
from pydantic import BaseModel, Field
|
|
10
10
|
|
|
11
11
|
from llama_stack.core.storage.datatypes import KVStoreReference
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@json_schema_type
|
|
@@ -10,21 +10,28 @@ import io
|
|
|
10
10
|
import json
|
|
11
11
|
from typing import Any
|
|
12
12
|
|
|
13
|
-
import faiss
|
|
13
|
+
import faiss # type: ignore[import-untyped]
|
|
14
14
|
import numpy as np
|
|
15
15
|
from numpy.typing import NDArray
|
|
16
16
|
|
|
17
|
-
from llama_stack.
|
|
18
|
-
from llama_stack.apis.files import Files
|
|
19
|
-
from llama_stack.apis.inference import Inference, InterleavedContent
|
|
20
|
-
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
|
21
|
-
from llama_stack.apis.vector_stores import VectorStore
|
|
17
|
+
from llama_stack.core.storage.kvstore import kvstore_impl
|
|
22
18
|
from llama_stack.log import get_logger
|
|
23
|
-
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate
|
|
24
|
-
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
25
|
-
from llama_stack.providers.utils.kvstore.api import KVStore
|
|
26
19
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
27
20
|
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
|
21
|
+
from llama_stack_api import (
|
|
22
|
+
EmbeddedChunk,
|
|
23
|
+
Files,
|
|
24
|
+
HealthResponse,
|
|
25
|
+
HealthStatus,
|
|
26
|
+
Inference,
|
|
27
|
+
InterleavedContent,
|
|
28
|
+
QueryChunksResponse,
|
|
29
|
+
VectorIO,
|
|
30
|
+
VectorStore,
|
|
31
|
+
VectorStoreNotFoundError,
|
|
32
|
+
VectorStoresProtocolPrivate,
|
|
33
|
+
)
|
|
34
|
+
from llama_stack_api.internal.kvstore import KVStore
|
|
28
35
|
|
|
29
36
|
from .config import FaissVectorIOConfig
|
|
30
37
|
|
|
@@ -41,7 +48,7 @@ OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_conten
|
|
|
41
48
|
class FaissIndex(EmbeddingIndex):
|
|
42
49
|
def __init__(self, dimension: int, kvstore: KVStore | None = None, bank_id: str | None = None):
|
|
43
50
|
self.index = faiss.IndexFlatL2(dimension)
|
|
44
|
-
self.chunk_by_index: dict[int,
|
|
51
|
+
self.chunk_by_index: dict[int, EmbeddedChunk] = {}
|
|
45
52
|
self.kvstore = kvstore
|
|
46
53
|
self.bank_id = bank_id
|
|
47
54
|
|
|
@@ -65,12 +72,14 @@ class FaissIndex(EmbeddingIndex):
|
|
|
65
72
|
|
|
66
73
|
if stored_data:
|
|
67
74
|
data = json.loads(stored_data)
|
|
68
|
-
self.chunk_by_index = {
|
|
75
|
+
self.chunk_by_index = {
|
|
76
|
+
int(k): EmbeddedChunk.model_validate_json(v) for k, v in data["chunk_by_index"].items()
|
|
77
|
+
}
|
|
69
78
|
|
|
70
79
|
buffer = io.BytesIO(base64.b64decode(data["faiss_index"]))
|
|
71
80
|
try:
|
|
72
81
|
self.index = faiss.deserialize_index(np.load(buffer, allow_pickle=False))
|
|
73
|
-
self.chunk_ids = [
|
|
82
|
+
self.chunk_ids = [embedded_chunk.chunk_id for embedded_chunk in self.chunk_by_index.values()]
|
|
74
83
|
except Exception as e:
|
|
75
84
|
logger.debug(e, exc_info=True)
|
|
76
85
|
raise ValueError(
|
|
@@ -100,19 +109,24 @@ class FaissIndex(EmbeddingIndex):
|
|
|
100
109
|
|
|
101
110
|
await self.kvstore.delete(f"{FAISS_INDEX_PREFIX}{self.bank_id}")
|
|
102
111
|
|
|
103
|
-
async def add_chunks(self,
|
|
104
|
-
|
|
112
|
+
async def add_chunks(self, embedded_chunks: list[EmbeddedChunk]):
|
|
113
|
+
if not embedded_chunks:
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
# Extract embeddings and validate dimensions
|
|
117
|
+
embeddings = np.array([ec.embedding for ec in embedded_chunks], dtype=np.float32)
|
|
105
118
|
embedding_dim = embeddings.shape[1] if len(embeddings.shape) > 1 else embeddings.shape[0]
|
|
106
119
|
if embedding_dim != self.index.d:
|
|
107
120
|
raise ValueError(f"Embedding dimension mismatch. Expected {self.index.d}, got {embedding_dim}")
|
|
108
121
|
|
|
122
|
+
# Store chunks by index
|
|
109
123
|
indexlen = len(self.chunk_by_index)
|
|
110
|
-
for i,
|
|
111
|
-
self.chunk_by_index[indexlen + i] =
|
|
124
|
+
for i, embedded_chunk in enumerate(embedded_chunks):
|
|
125
|
+
self.chunk_by_index[indexlen + i] = embedded_chunk
|
|
112
126
|
|
|
113
127
|
async with self.chunk_id_lock:
|
|
114
|
-
self.index.add(
|
|
115
|
-
self.chunk_ids.extend([
|
|
128
|
+
self.index.add(embeddings)
|
|
129
|
+
self.chunk_ids.extend([ec.chunk_id for ec in embedded_chunks]) # EmbeddedChunk inherits from Chunk
|
|
116
130
|
|
|
117
131
|
# Save updated index
|
|
118
132
|
await self._save_index()
|
|
@@ -144,8 +158,8 @@ class FaissIndex(EmbeddingIndex):
|
|
|
144
158
|
|
|
145
159
|
async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
|
|
146
160
|
distances, indices = await asyncio.to_thread(self.index.search, embedding.reshape(1, -1).astype(np.float32), k)
|
|
147
|
-
chunks = []
|
|
148
|
-
scores = []
|
|
161
|
+
chunks: list[EmbeddedChunk] = []
|
|
162
|
+
scores: list[float] = []
|
|
149
163
|
for d, i in zip(distances[0], indices[0], strict=False):
|
|
150
164
|
if i < 0:
|
|
151
165
|
continue
|
|
@@ -178,9 +192,8 @@ class FaissIndex(EmbeddingIndex):
|
|
|
178
192
|
|
|
179
193
|
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
|
|
180
194
|
def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
|
|
181
|
-
super().__init__(files_api=files_api, kvstore=None)
|
|
195
|
+
super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
|
|
182
196
|
self.config = config
|
|
183
|
-
self.inference_api = inference_api
|
|
184
197
|
self.cache: dict[str, VectorStoreWithIndex] = {}
|
|
185
198
|
|
|
186
199
|
async def initialize(self) -> None:
|
|
@@ -271,19 +284,21 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
|
|
|
271
284
|
self.cache[vector_store_id] = index
|
|
272
285
|
return index
|
|
273
286
|
|
|
274
|
-
async def insert_chunks(
|
|
275
|
-
|
|
287
|
+
async def insert_chunks(
|
|
288
|
+
self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
|
|
289
|
+
) -> None:
|
|
290
|
+
index = self.cache.get(vector_store_id)
|
|
276
291
|
if index is None:
|
|
277
|
-
raise ValueError(f"Vector DB {
|
|
292
|
+
raise ValueError(f"Vector DB {vector_store_id} not found. found: {self.cache.keys()}")
|
|
278
293
|
|
|
279
294
|
await index.insert_chunks(chunks)
|
|
280
295
|
|
|
281
296
|
async def query_chunks(
|
|
282
|
-
self,
|
|
297
|
+
self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
|
283
298
|
) -> QueryChunksResponse:
|
|
284
|
-
index = self.cache.get(
|
|
299
|
+
index = self.cache.get(vector_store_id)
|
|
285
300
|
if index is None:
|
|
286
|
-
raise VectorStoreNotFoundError(
|
|
301
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
287
302
|
|
|
288
303
|
return await index.query_chunks(query, params)
|
|
289
304
|
|
|
@@ -9,7 +9,7 @@ from typing import Any
|
|
|
9
9
|
from pydantic import BaseModel, Field
|
|
10
10
|
|
|
11
11
|
from llama_stack.core.storage.datatypes import KVStoreReference
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@json_schema_type
|
|
@@ -11,18 +11,11 @@ import struct
|
|
|
11
11
|
from typing import Any
|
|
12
12
|
|
|
13
13
|
import numpy as np
|
|
14
|
-
import sqlite_vec
|
|
14
|
+
import sqlite_vec # type: ignore[import-untyped]
|
|
15
15
|
from numpy.typing import NDArray
|
|
16
16
|
|
|
17
|
-
from llama_stack.
|
|
18
|
-
from llama_stack.apis.files import Files
|
|
19
|
-
from llama_stack.apis.inference import Inference
|
|
20
|
-
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
|
21
|
-
from llama_stack.apis.vector_stores import VectorStore
|
|
17
|
+
from llama_stack.core.storage.kvstore import kvstore_impl
|
|
22
18
|
from llama_stack.log import get_logger
|
|
23
|
-
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
|
24
|
-
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
25
|
-
from llama_stack.providers.utils.kvstore.api import KVStore
|
|
26
19
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
27
20
|
from llama_stack.providers.utils.memory.vector_store import (
|
|
28
21
|
RERANKER_TYPE_RRF,
|
|
@@ -31,6 +24,17 @@ from llama_stack.providers.utils.memory.vector_store import (
|
|
|
31
24
|
VectorStoreWithIndex,
|
|
32
25
|
)
|
|
33
26
|
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
|
|
27
|
+
from llama_stack_api import (
|
|
28
|
+
EmbeddedChunk,
|
|
29
|
+
Files,
|
|
30
|
+
Inference,
|
|
31
|
+
QueryChunksResponse,
|
|
32
|
+
VectorIO,
|
|
33
|
+
VectorStore,
|
|
34
|
+
VectorStoreNotFoundError,
|
|
35
|
+
VectorStoresProtocolPrivate,
|
|
36
|
+
)
|
|
37
|
+
from llama_stack_api.internal.kvstore import KVStore
|
|
34
38
|
|
|
35
39
|
logger = get_logger(name=__name__, category="vector_io")
|
|
36
40
|
|
|
@@ -137,14 +141,16 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|
|
137
141
|
|
|
138
142
|
await asyncio.to_thread(_drop_tables)
|
|
139
143
|
|
|
140
|
-
async def add_chunks(self,
|
|
144
|
+
async def add_chunks(self, embedded_chunks: list[EmbeddedChunk], batch_size: int = 500):
|
|
141
145
|
"""
|
|
142
|
-
Add new chunks
|
|
143
|
-
For each chunk, we insert
|
|
146
|
+
Add new embedded chunks using batch inserts.
|
|
147
|
+
For each embedded chunk, we insert the chunk JSON into the metadata table and then insert its
|
|
144
148
|
embedding (serialized to raw bytes) into the virtual table using the assigned rowid.
|
|
145
149
|
If any insert fails, the transaction is rolled back to maintain consistency.
|
|
146
150
|
Also inserts chunk content into FTS table for keyword search support.
|
|
147
151
|
"""
|
|
152
|
+
chunks = embedded_chunks # EmbeddedChunk now inherits from Chunk
|
|
153
|
+
embeddings = np.array([ec.embedding for ec in embedded_chunks], dtype=np.float32)
|
|
148
154
|
assert all(isinstance(chunk.content, str) for chunk in chunks), "SQLiteVecIndex only supports text chunks"
|
|
149
155
|
|
|
150
156
|
def _execute_all_batch_inserts():
|
|
@@ -229,11 +235,11 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|
|
229
235
|
if score < score_threshold:
|
|
230
236
|
continue
|
|
231
237
|
try:
|
|
232
|
-
|
|
238
|
+
embedded_chunk = EmbeddedChunk.model_validate_json(chunk_json)
|
|
233
239
|
except Exception as e:
|
|
234
240
|
logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
|
|
235
241
|
continue
|
|
236
|
-
chunks.append(
|
|
242
|
+
chunks.append(embedded_chunk)
|
|
237
243
|
scores.append(score)
|
|
238
244
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
239
245
|
|
|
@@ -270,11 +276,11 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|
|
270
276
|
if score > -score_threshold:
|
|
271
277
|
continue
|
|
272
278
|
try:
|
|
273
|
-
|
|
279
|
+
embedded_chunk = EmbeddedChunk.model_validate_json(chunk_json)
|
|
274
280
|
except Exception as e:
|
|
275
281
|
logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
|
|
276
282
|
continue
|
|
277
|
-
chunks.append(
|
|
283
|
+
chunks.append(embedded_chunk)
|
|
278
284
|
scores.append(score)
|
|
279
285
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
280
286
|
|
|
@@ -308,13 +314,14 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|
|
308
314
|
vector_response = await self.query_vector(embedding, k, score_threshold)
|
|
309
315
|
keyword_response = await self.query_keyword(query_string, k, score_threshold)
|
|
310
316
|
|
|
311
|
-
# Convert responses to score dictionaries using chunk_id
|
|
317
|
+
# Convert responses to score dictionaries using chunk_id (EmbeddedChunk inherits from Chunk)
|
|
312
318
|
vector_scores = {
|
|
313
|
-
|
|
319
|
+
embedded_chunk.chunk_id: score
|
|
320
|
+
for embedded_chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
|
|
314
321
|
}
|
|
315
322
|
keyword_scores = {
|
|
316
|
-
|
|
317
|
-
for
|
|
323
|
+
embedded_chunk.chunk_id: score
|
|
324
|
+
for embedded_chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
|
|
318
325
|
}
|
|
319
326
|
|
|
320
327
|
# Combine scores using the reranking utility
|
|
@@ -329,10 +336,10 @@ class SQLiteVecIndex(EmbeddingIndex):
|
|
|
329
336
|
# Filter by score threshold
|
|
330
337
|
filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
|
|
331
338
|
|
|
332
|
-
# Create a map of chunk_id to
|
|
333
|
-
chunk_map = {
|
|
339
|
+
# Create a map of chunk_id to embedded_chunk for both responses
|
|
340
|
+
chunk_map = {ec.chunk_id: ec for ec in vector_response.chunks + keyword_response.chunks}
|
|
334
341
|
|
|
335
|
-
# Use the map to look up chunks by their IDs
|
|
342
|
+
# Use the map to look up embedded chunks by their IDs
|
|
336
343
|
chunks = []
|
|
337
344
|
scores = []
|
|
338
345
|
for doc_id, score in filtered_items:
|
|
@@ -382,9 +389,8 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
|
|
|
382
389
|
"""
|
|
383
390
|
|
|
384
391
|
def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
|
|
385
|
-
super().__init__(files_api=files_api, kvstore=None)
|
|
392
|
+
super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
|
|
386
393
|
self.config = config
|
|
387
|
-
self.inference_api = inference_api
|
|
388
394
|
self.cache: dict[str, VectorStoreWithIndex] = {}
|
|
389
395
|
self.vector_store_table = None
|
|
390
396
|
|
|
@@ -458,20 +464,21 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
|
|
|
458
464
|
await self.cache[vector_store_id].index.delete()
|
|
459
465
|
del self.cache[vector_store_id]
|
|
460
466
|
|
|
461
|
-
async def insert_chunks(
|
|
462
|
-
|
|
467
|
+
async def insert_chunks(
|
|
468
|
+
self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
|
|
469
|
+
) -> None:
|
|
470
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
463
471
|
if not index:
|
|
464
|
-
raise VectorStoreNotFoundError(
|
|
465
|
-
# The VectorStoreWithIndex helper
|
|
466
|
-
# and then call our index's add_chunks.
|
|
472
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
473
|
+
# The VectorStoreWithIndex helper validates embeddings and calls the index's add_chunks method
|
|
467
474
|
await index.insert_chunks(chunks)
|
|
468
475
|
|
|
469
476
|
async def query_chunks(
|
|
470
|
-
self,
|
|
477
|
+
self, vector_store_id: str, query: Any, params: dict[str, Any] | None = None
|
|
471
478
|
) -> QueryChunksResponse:
|
|
472
|
-
index = await self._get_and_cache_vector_store_index(
|
|
479
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
473
480
|
if not index:
|
|
474
|
-
raise VectorStoreNotFoundError(
|
|
481
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
475
482
|
return await index.query_chunks(query, params)
|
|
476
483
|
|
|
477
484
|
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|