llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -11,16 +11,9 @@ from typing import Any
|
|
|
11
11
|
from numpy.typing import NDArray
|
|
12
12
|
from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker
|
|
13
13
|
|
|
14
|
-
from llama_stack.
|
|
15
|
-
from llama_stack.apis.files import Files
|
|
16
|
-
from llama_stack.apis.inference import Inference, InterleavedContent
|
|
17
|
-
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
|
18
|
-
from llama_stack.apis.vector_stores import VectorStore
|
|
14
|
+
from llama_stack.core.storage.kvstore import kvstore_impl
|
|
19
15
|
from llama_stack.log import get_logger
|
|
20
|
-
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
|
21
16
|
from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
|
|
22
|
-
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
23
|
-
from llama_stack.providers.utils.kvstore.api import KVStore
|
|
24
17
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
25
18
|
from llama_stack.providers.utils.memory.vector_store import (
|
|
26
19
|
RERANKER_TYPE_WEIGHTED,
|
|
@@ -29,6 +22,18 @@ from llama_stack.providers.utils.memory.vector_store import (
|
|
|
29
22
|
VectorStoreWithIndex,
|
|
30
23
|
)
|
|
31
24
|
from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
|
|
25
|
+
from llama_stack_api import (
|
|
26
|
+
EmbeddedChunk,
|
|
27
|
+
Files,
|
|
28
|
+
Inference,
|
|
29
|
+
InterleavedContent,
|
|
30
|
+
QueryChunksResponse,
|
|
31
|
+
VectorIO,
|
|
32
|
+
VectorStore,
|
|
33
|
+
VectorStoreNotFoundError,
|
|
34
|
+
VectorStoresProtocolPrivate,
|
|
35
|
+
)
|
|
36
|
+
from llama_stack_api.internal.kvstore import KVStore
|
|
32
37
|
|
|
33
38
|
from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
|
|
34
39
|
|
|
@@ -60,7 +65,7 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
60
65
|
if await asyncio.to_thread(self.client.has_collection, self.collection_name):
|
|
61
66
|
await asyncio.to_thread(self.client.drop_collection, collection_name=self.collection_name)
|
|
62
67
|
|
|
63
|
-
async def add_chunks(self, chunks: list[
|
|
68
|
+
async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
|
|
64
69
|
assert len(chunks) == len(embeddings), (
|
|
65
70
|
f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
|
|
66
71
|
)
|
|
@@ -131,7 +136,7 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
131
136
|
output_fields=["*"],
|
|
132
137
|
search_params={"params": {"radius": score_threshold}},
|
|
133
138
|
)
|
|
134
|
-
chunks = [
|
|
139
|
+
chunks = [EmbeddedChunk(**res["entity"]["chunk_content"]) for res in search_res[0]]
|
|
135
140
|
scores = [res["distance"] for res in search_res[0]]
|
|
136
141
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
137
142
|
|
|
@@ -158,7 +163,7 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
158
163
|
chunks = []
|
|
159
164
|
scores = []
|
|
160
165
|
for res in search_res[0]:
|
|
161
|
-
chunk =
|
|
166
|
+
chunk = EmbeddedChunk(**res["entity"]["chunk_content"])
|
|
162
167
|
chunks.append(chunk)
|
|
163
168
|
scores.append(res["distance"]) # BM25 score from Milvus
|
|
164
169
|
|
|
@@ -186,7 +191,7 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
186
191
|
output_fields=["*"],
|
|
187
192
|
limit=k,
|
|
188
193
|
)
|
|
189
|
-
chunks = [
|
|
194
|
+
chunks = [EmbeddedChunk(**res["chunk_content"]) for res in search_res]
|
|
190
195
|
scores = [1.0] * len(chunks) # Simple binary score for text search
|
|
191
196
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
192
197
|
|
|
@@ -238,7 +243,7 @@ class MilvusIndex(EmbeddingIndex):
|
|
|
238
243
|
chunks = []
|
|
239
244
|
scores = []
|
|
240
245
|
for res in search_res[0]:
|
|
241
|
-
chunk =
|
|
246
|
+
chunk = EmbeddedChunk(**res["entity"]["chunk_content"])
|
|
242
247
|
chunks.append(chunk)
|
|
243
248
|
scores.append(res["distance"])
|
|
244
249
|
|
|
@@ -268,11 +273,10 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
|
|
268
273
|
inference_api: Inference,
|
|
269
274
|
files_api: Files | None,
|
|
270
275
|
) -> None:
|
|
271
|
-
super().__init__(files_api=files_api, kvstore=None)
|
|
276
|
+
super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
|
|
272
277
|
self.config = config
|
|
273
278
|
self.cache = {}
|
|
274
279
|
self.client = None
|
|
275
|
-
self.inference_api = inference_api
|
|
276
280
|
self.vector_store_table = None
|
|
277
281
|
self.metadata_collection_name = "openai_vector_stores_metadata"
|
|
278
282
|
|
|
@@ -351,19 +355,21 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
|
|
351
355
|
await self.cache[vector_store_id].index.delete()
|
|
352
356
|
del self.cache[vector_store_id]
|
|
353
357
|
|
|
354
|
-
async def insert_chunks(
|
|
355
|
-
|
|
358
|
+
async def insert_chunks(
|
|
359
|
+
self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
|
|
360
|
+
) -> None:
|
|
361
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
356
362
|
if not index:
|
|
357
|
-
raise VectorStoreNotFoundError(
|
|
363
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
358
364
|
|
|
359
365
|
await index.insert_chunks(chunks)
|
|
360
366
|
|
|
361
367
|
async def query_chunks(
|
|
362
|
-
self,
|
|
368
|
+
self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
|
363
369
|
) -> QueryChunksResponse:
|
|
364
|
-
index = await self._get_and_cache_vector_store_index(
|
|
370
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
365
371
|
if not index:
|
|
366
|
-
raise VectorStoreNotFoundError(
|
|
372
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
367
373
|
return await index.query_chunks(query, params)
|
|
368
374
|
|
|
369
375
|
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from llama_stack_api import Api, ProviderSpec
|
|
8
8
|
|
|
9
9
|
from .config import PGVectorVectorIOConfig
|
|
10
10
|
|
|
@@ -9,7 +9,7 @@ from typing import Any
|
|
|
9
9
|
from pydantic import BaseModel, Field
|
|
10
10
|
|
|
11
11
|
from llama_stack.core.storage.datatypes import KVStoreReference
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@json_schema_type
|
|
@@ -13,19 +13,24 @@ from psycopg2 import sql
|
|
|
13
13
|
from psycopg2.extras import Json, execute_values
|
|
14
14
|
from pydantic import BaseModel, TypeAdapter
|
|
15
15
|
|
|
16
|
-
from llama_stack.
|
|
17
|
-
from llama_stack.apis.files import Files
|
|
18
|
-
from llama_stack.apis.inference import Inference, InterleavedContent
|
|
19
|
-
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
|
20
|
-
from llama_stack.apis.vector_stores import VectorStore
|
|
16
|
+
from llama_stack.core.storage.kvstore import kvstore_impl
|
|
21
17
|
from llama_stack.log import get_logger
|
|
22
|
-
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
|
23
18
|
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
|
24
|
-
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
25
|
-
from llama_stack.providers.utils.kvstore.api import KVStore
|
|
26
19
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
27
20
|
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
|
28
21
|
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name
|
|
22
|
+
from llama_stack_api import (
|
|
23
|
+
EmbeddedChunk,
|
|
24
|
+
Files,
|
|
25
|
+
Inference,
|
|
26
|
+
InterleavedContent,
|
|
27
|
+
QueryChunksResponse,
|
|
28
|
+
VectorIO,
|
|
29
|
+
VectorStore,
|
|
30
|
+
VectorStoreNotFoundError,
|
|
31
|
+
VectorStoresProtocolPrivate,
|
|
32
|
+
)
|
|
33
|
+
from llama_stack_api.internal.kvstore import KVStore
|
|
29
34
|
|
|
30
35
|
from .config import PGVectorVectorIOConfig
|
|
31
36
|
|
|
@@ -125,7 +130,7 @@ class PGVectorIndex(EmbeddingIndex):
|
|
|
125
130
|
log.exception(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}")
|
|
126
131
|
raise RuntimeError(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") from e
|
|
127
132
|
|
|
128
|
-
async def add_chunks(self, chunks: list[
|
|
133
|
+
async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
|
|
129
134
|
assert len(chunks) == len(embeddings), (
|
|
130
135
|
f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
|
|
131
136
|
)
|
|
@@ -189,7 +194,7 @@ class PGVectorIndex(EmbeddingIndex):
|
|
|
189
194
|
score = 1.0 / float(dist) if dist != 0 else float("inf")
|
|
190
195
|
if score < score_threshold:
|
|
191
196
|
continue
|
|
192
|
-
chunks.append(
|
|
197
|
+
chunks.append(EmbeddedChunk(**doc))
|
|
193
198
|
scores.append(score)
|
|
194
199
|
|
|
195
200
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
@@ -225,7 +230,7 @@ class PGVectorIndex(EmbeddingIndex):
|
|
|
225
230
|
for doc, score in results:
|
|
226
231
|
if score < score_threshold:
|
|
227
232
|
continue
|
|
228
|
-
chunks.append(
|
|
233
|
+
chunks.append(EmbeddedChunk(**doc))
|
|
229
234
|
scores.append(float(score))
|
|
230
235
|
|
|
231
236
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
@@ -327,16 +332,17 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
|
|
|
327
332
|
def __init__(
|
|
328
333
|
self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None
|
|
329
334
|
) -> None:
|
|
330
|
-
super().__init__(files_api=files_api, kvstore=None)
|
|
335
|
+
super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
|
|
331
336
|
self.config = config
|
|
332
|
-
self.inference_api = inference_api
|
|
333
337
|
self.conn = None
|
|
334
338
|
self.cache = {}
|
|
335
339
|
self.vector_store_table = None
|
|
336
340
|
self.metadata_collection_name = "openai_vector_stores_metadata"
|
|
337
341
|
|
|
338
342
|
async def initialize(self) -> None:
|
|
339
|
-
|
|
343
|
+
# Create a safe config representation with masked password for logging
|
|
344
|
+
safe_config = {**self.config.model_dump(exclude={"password"}), "password": "******"}
|
|
345
|
+
log.info(f"Initializing PGVector memory adapter with config: {safe_config}")
|
|
340
346
|
self.kvstore = await kvstore_impl(self.config.persistence)
|
|
341
347
|
await self.initialize_openai_vector_stores()
|
|
342
348
|
|
|
@@ -422,14 +428,16 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
|
|
|
422
428
|
raise RuntimeError("KVStore not initialized. Call initialize() before unregistering vector stores.")
|
|
423
429
|
await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}")
|
|
424
430
|
|
|
425
|
-
async def insert_chunks(
|
|
426
|
-
|
|
431
|
+
async def insert_chunks(
|
|
432
|
+
self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
|
|
433
|
+
) -> None:
|
|
434
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
427
435
|
await index.insert_chunks(chunks)
|
|
428
436
|
|
|
429
437
|
async def query_chunks(
|
|
430
|
-
self,
|
|
438
|
+
self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
|
431
439
|
) -> QueryChunksResponse:
|
|
432
|
-
index = await self._get_and_cache_vector_store_index(
|
|
440
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
433
441
|
return await index.query_chunks(query, params)
|
|
434
442
|
|
|
435
443
|
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from llama_stack_api import Api, ProviderSpec
|
|
8
8
|
|
|
9
9
|
from .config import QdrantVectorIOConfig
|
|
10
10
|
|
|
@@ -13,23 +13,24 @@ from numpy.typing import NDArray
|
|
|
13
13
|
from qdrant_client import AsyncQdrantClient, models
|
|
14
14
|
from qdrant_client.models import PointStruct
|
|
15
15
|
|
|
16
|
-
from llama_stack.
|
|
17
|
-
from llama_stack.
|
|
18
|
-
from llama_stack.
|
|
19
|
-
from llama_stack.
|
|
20
|
-
|
|
16
|
+
from llama_stack.core.storage.kvstore import kvstore_impl
|
|
17
|
+
from llama_stack.log import get_logger
|
|
18
|
+
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
|
|
19
|
+
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
20
|
+
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
|
21
|
+
from llama_stack_api import (
|
|
22
|
+
EmbeddedChunk,
|
|
23
|
+
Files,
|
|
24
|
+
Inference,
|
|
25
|
+
InterleavedContent,
|
|
21
26
|
QueryChunksResponse,
|
|
22
27
|
VectorIO,
|
|
28
|
+
VectorStore,
|
|
23
29
|
VectorStoreChunkingStrategy,
|
|
24
30
|
VectorStoreFileObject,
|
|
31
|
+
VectorStoreNotFoundError,
|
|
32
|
+
VectorStoresProtocolPrivate,
|
|
25
33
|
)
|
|
26
|
-
from llama_stack.apis.vector_stores import VectorStore
|
|
27
|
-
from llama_stack.log import get_logger
|
|
28
|
-
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
|
29
|
-
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
|
|
30
|
-
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
31
|
-
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
32
|
-
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
|
|
33
34
|
|
|
34
35
|
from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
|
|
35
36
|
|
|
@@ -65,7 +66,7 @@ class QdrantIndex(EmbeddingIndex):
|
|
|
65
66
|
# If the collection does not exist, it will be created in add_chunks.
|
|
66
67
|
pass
|
|
67
68
|
|
|
68
|
-
async def add_chunks(self, chunks: list[
|
|
69
|
+
async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
|
|
69
70
|
assert len(chunks) == len(embeddings), (
|
|
70
71
|
f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
|
|
71
72
|
)
|
|
@@ -117,7 +118,7 @@ class QdrantIndex(EmbeddingIndex):
|
|
|
117
118
|
assert point.payload is not None
|
|
118
119
|
|
|
119
120
|
try:
|
|
120
|
-
chunk =
|
|
121
|
+
chunk = EmbeddedChunk(**point.payload["chunk_content"])
|
|
121
122
|
except Exception:
|
|
122
123
|
log.exception("Failed to parse chunk")
|
|
123
124
|
continue
|
|
@@ -128,7 +129,63 @@ class QdrantIndex(EmbeddingIndex):
|
|
|
128
129
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
129
130
|
|
|
130
131
|
async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
|
|
131
|
-
|
|
132
|
+
"""
|
|
133
|
+
Performs keyword-based search using Qdrant's MatchText filter.
|
|
134
|
+
|
|
135
|
+
Uses Qdrant's query_filter with MatchText to search for chunks containing
|
|
136
|
+
the specified text query string in the chunk content.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
query_string: The text query for keyword search
|
|
140
|
+
k: Number of results to return
|
|
141
|
+
score_threshold: Minimum similarity score threshold
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
QueryChunksResponse with chunks and scores matching the keyword query
|
|
145
|
+
"""
|
|
146
|
+
try:
|
|
147
|
+
results = (
|
|
148
|
+
await self.client.query_points(
|
|
149
|
+
collection_name=self.collection_name,
|
|
150
|
+
query_filter=models.Filter(
|
|
151
|
+
must=[
|
|
152
|
+
models.FieldCondition(
|
|
153
|
+
key="chunk_content.content", match=models.MatchText(text=query_string)
|
|
154
|
+
)
|
|
155
|
+
]
|
|
156
|
+
),
|
|
157
|
+
limit=k,
|
|
158
|
+
with_payload=True,
|
|
159
|
+
with_vectors=False,
|
|
160
|
+
score_threshold=score_threshold,
|
|
161
|
+
)
|
|
162
|
+
).points
|
|
163
|
+
except Exception as e:
|
|
164
|
+
log.error(f"Error querying keyword search in Qdrant collection {self.collection_name}: {e}")
|
|
165
|
+
raise
|
|
166
|
+
|
|
167
|
+
chunks, scores = [], []
|
|
168
|
+
for point in results:
|
|
169
|
+
if not isinstance(point, models.ScoredPoint):
|
|
170
|
+
raise RuntimeError(f"Expected ScoredPoint from Qdrant query, got {type(point).__name__}")
|
|
171
|
+
if point.payload is None:
|
|
172
|
+
raise RuntimeError("Qdrant query returned point with no payload")
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
chunk = EmbeddedChunk(**point.payload["chunk_content"])
|
|
176
|
+
except Exception:
|
|
177
|
+
chunk_id = point.payload.get(CHUNK_ID_KEY, "unknown") if point.payload else "unknown"
|
|
178
|
+
point_id = getattr(point, "id", "unknown")
|
|
179
|
+
log.exception(
|
|
180
|
+
f"Failed to parse chunk in collection {self.collection_name}: "
|
|
181
|
+
f"chunk_id={chunk_id}, point_id={point_id}"
|
|
182
|
+
)
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
chunks.append(chunk)
|
|
186
|
+
scores.append(point.score)
|
|
187
|
+
|
|
188
|
+
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
132
189
|
|
|
133
190
|
async def query_hybrid(
|
|
134
191
|
self,
|
|
@@ -139,7 +196,66 @@ class QdrantIndex(EmbeddingIndex):
|
|
|
139
196
|
reranker_type: str,
|
|
140
197
|
reranker_params: dict[str, Any] | None = None,
|
|
141
198
|
) -> QueryChunksResponse:
|
|
142
|
-
|
|
199
|
+
"""
|
|
200
|
+
Hybrid search combining vector similarity and keyword filtering in a single query.
|
|
201
|
+
|
|
202
|
+
Uses Qdrant's native capability to combine a vector query with a query_filter,
|
|
203
|
+
allowing vector similarity search to be filtered by keyword matches in one call.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
embedding: The query embedding vector
|
|
207
|
+
query_string: The text query for keyword filtering
|
|
208
|
+
k: Number of results to return
|
|
209
|
+
score_threshold: Minimum similarity score threshold
|
|
210
|
+
reranker_type: Not used with this approach, but kept for API compatibility
|
|
211
|
+
reranker_params: Not used with this approach, but kept for API compatibility
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
QueryChunksResponse with filtered vector search results
|
|
215
|
+
"""
|
|
216
|
+
try:
|
|
217
|
+
results = (
|
|
218
|
+
await self.client.query_points(
|
|
219
|
+
collection_name=self.collection_name,
|
|
220
|
+
query=embedding.tolist(),
|
|
221
|
+
query_filter=models.Filter(
|
|
222
|
+
must=[
|
|
223
|
+
models.FieldCondition(
|
|
224
|
+
key="chunk_content.content", match=models.MatchText(text=query_string)
|
|
225
|
+
)
|
|
226
|
+
]
|
|
227
|
+
),
|
|
228
|
+
limit=k,
|
|
229
|
+
with_payload=True,
|
|
230
|
+
score_threshold=score_threshold,
|
|
231
|
+
)
|
|
232
|
+
).points
|
|
233
|
+
except Exception as e:
|
|
234
|
+
log.error(f"Error querying hybrid search in Qdrant collection {self.collection_name}: {e}")
|
|
235
|
+
raise
|
|
236
|
+
|
|
237
|
+
chunks, scores = [], []
|
|
238
|
+
for point in results:
|
|
239
|
+
if not isinstance(point, models.ScoredPoint):
|
|
240
|
+
raise RuntimeError(f"Expected ScoredPoint from Qdrant query, got {type(point).__name__}")
|
|
241
|
+
if point.payload is None:
|
|
242
|
+
raise RuntimeError("Qdrant query returned point with no payload")
|
|
243
|
+
|
|
244
|
+
try:
|
|
245
|
+
chunk = EmbeddedChunk(**point.payload["chunk_content"])
|
|
246
|
+
except Exception:
|
|
247
|
+
chunk_id = point.payload.get(CHUNK_ID_KEY, "unknown") if point.payload else "unknown"
|
|
248
|
+
point_id = getattr(point, "id", "unknown")
|
|
249
|
+
log.exception(
|
|
250
|
+
f"Failed to parse chunk in collection {self.collection_name}: "
|
|
251
|
+
f"chunk_id={chunk_id}, point_id={point_id}"
|
|
252
|
+
)
|
|
253
|
+
continue
|
|
254
|
+
|
|
255
|
+
chunks.append(chunk)
|
|
256
|
+
scores.append(point.score)
|
|
257
|
+
|
|
258
|
+
return QueryChunksResponse(chunks=chunks, scores=scores)
|
|
143
259
|
|
|
144
260
|
async def delete(self):
|
|
145
261
|
await self.client.delete_collection(collection_name=self.collection_name)
|
|
@@ -152,11 +268,10 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
|
|
152
268
|
inference_api: Inference,
|
|
153
269
|
files_api: Files | None = None,
|
|
154
270
|
) -> None:
|
|
155
|
-
super().__init__(files_api=files_api, kvstore=None)
|
|
271
|
+
super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
|
|
156
272
|
self.config = config
|
|
157
273
|
self.client: AsyncQdrantClient = None
|
|
158
274
|
self.cache = {}
|
|
159
|
-
self.inference_api = inference_api
|
|
160
275
|
self.vector_store_table = None
|
|
161
276
|
self._qdrant_lock = asyncio.Lock()
|
|
162
277
|
|
|
@@ -227,19 +342,21 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
|
|
227
342
|
self.cache[vector_store_id] = index
|
|
228
343
|
return index
|
|
229
344
|
|
|
230
|
-
async def insert_chunks(
|
|
231
|
-
|
|
345
|
+
async def insert_chunks(
|
|
346
|
+
self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
|
|
347
|
+
) -> None:
|
|
348
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
232
349
|
if not index:
|
|
233
|
-
raise VectorStoreNotFoundError(
|
|
350
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
234
351
|
|
|
235
352
|
await index.insert_chunks(chunks)
|
|
236
353
|
|
|
237
354
|
async def query_chunks(
|
|
238
|
-
self,
|
|
355
|
+
self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
|
239
356
|
) -> QueryChunksResponse:
|
|
240
|
-
index = await self._get_and_cache_vector_store_index(
|
|
357
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
241
358
|
if not index:
|
|
242
|
-
raise VectorStoreNotFoundError(
|
|
359
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
243
360
|
|
|
244
361
|
return await index.query_chunks(query, params)
|
|
245
362
|
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from llama_stack_api import Api, ProviderSpec
|
|
8
8
|
|
|
9
9
|
from .config import WeaviateVectorIOConfig
|
|
10
10
|
|
|
@@ -9,7 +9,7 @@ from typing import Any
|
|
|
9
9
|
from pydantic import BaseModel, Field
|
|
10
10
|
|
|
11
11
|
from llama_stack.core.storage.datatypes import KVStoreReference
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@json_schema_type
|
|
@@ -12,17 +12,9 @@ from numpy.typing import NDArray
|
|
|
12
12
|
from weaviate.classes.init import Auth
|
|
13
13
|
from weaviate.classes.query import Filter, HybridFusion
|
|
14
14
|
|
|
15
|
-
from llama_stack.apis.common.content_types import InterleavedContent
|
|
16
|
-
from llama_stack.apis.common.errors import VectorStoreNotFoundError
|
|
17
|
-
from llama_stack.apis.files import Files
|
|
18
|
-
from llama_stack.apis.inference import Inference
|
|
19
|
-
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
|
20
|
-
from llama_stack.apis.vector_stores import VectorStore
|
|
21
15
|
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
16
|
+
from llama_stack.core.storage.kvstore import kvstore_impl
|
|
22
17
|
from llama_stack.log import get_logger
|
|
23
|
-
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
|
24
|
-
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
25
|
-
from llama_stack.providers.utils.kvstore.api import KVStore
|
|
26
18
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
27
19
|
from llama_stack.providers.utils.memory.vector_store import (
|
|
28
20
|
RERANKER_TYPE_RRF,
|
|
@@ -31,6 +23,18 @@ from llama_stack.providers.utils.memory.vector_store import (
|
|
|
31
23
|
VectorStoreWithIndex,
|
|
32
24
|
)
|
|
33
25
|
from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
|
|
26
|
+
from llama_stack_api import (
|
|
27
|
+
EmbeddedChunk,
|
|
28
|
+
Files,
|
|
29
|
+
Inference,
|
|
30
|
+
InterleavedContent,
|
|
31
|
+
QueryChunksResponse,
|
|
32
|
+
VectorIO,
|
|
33
|
+
VectorStore,
|
|
34
|
+
VectorStoreNotFoundError,
|
|
35
|
+
VectorStoresProtocolPrivate,
|
|
36
|
+
)
|
|
37
|
+
from llama_stack_api.internal.kvstore import KVStore
|
|
34
38
|
|
|
35
39
|
from .config import WeaviateVectorIOConfig
|
|
36
40
|
|
|
@@ -53,7 +57,7 @@ class WeaviateIndex(EmbeddingIndex):
|
|
|
53
57
|
async def initialize(self):
|
|
54
58
|
pass
|
|
55
59
|
|
|
56
|
-
async def add_chunks(self, chunks: list[
|
|
60
|
+
async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
|
|
57
61
|
assert len(chunks) == len(embeddings), (
|
|
58
62
|
f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
|
|
59
63
|
)
|
|
@@ -112,7 +116,7 @@ class WeaviateIndex(EmbeddingIndex):
|
|
|
112
116
|
chunk_json = doc.properties["chunk_content"]
|
|
113
117
|
try:
|
|
114
118
|
chunk_dict = json.loads(chunk_json)
|
|
115
|
-
chunk =
|
|
119
|
+
chunk = EmbeddedChunk(**chunk_dict)
|
|
116
120
|
except Exception:
|
|
117
121
|
log.exception(f"Failed to parse document: {chunk_json}")
|
|
118
122
|
continue
|
|
@@ -172,7 +176,7 @@ class WeaviateIndex(EmbeddingIndex):
|
|
|
172
176
|
chunk_json = doc.properties["chunk_content"]
|
|
173
177
|
try:
|
|
174
178
|
chunk_dict = json.loads(chunk_json)
|
|
175
|
-
chunk =
|
|
179
|
+
chunk = EmbeddedChunk(**chunk_dict)
|
|
176
180
|
except Exception:
|
|
177
181
|
log.exception(f"Failed to parse document: {chunk_json}")
|
|
178
182
|
continue
|
|
@@ -241,7 +245,7 @@ class WeaviateIndex(EmbeddingIndex):
|
|
|
241
245
|
chunk_json = doc.properties["chunk_content"]
|
|
242
246
|
try:
|
|
243
247
|
chunk_dict = json.loads(chunk_json)
|
|
244
|
-
chunk =
|
|
248
|
+
chunk = EmbeddedChunk(**chunk_dict)
|
|
245
249
|
except Exception:
|
|
246
250
|
log.exception(f"Failed to parse document: {chunk_json}")
|
|
247
251
|
continue
|
|
@@ -259,9 +263,8 @@ class WeaviateIndex(EmbeddingIndex):
|
|
|
259
263
|
|
|
260
264
|
class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorStoresProtocolPrivate):
|
|
261
265
|
def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
|
|
262
|
-
super().__init__(files_api=files_api, kvstore=None)
|
|
266
|
+
super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
|
|
263
267
|
self.config = config
|
|
264
|
-
self.inference_api = inference_api
|
|
265
268
|
self.client_cache = {}
|
|
266
269
|
self.cache = {}
|
|
267
270
|
self.vector_store_table = None
|
|
@@ -369,19 +372,21 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
|
|
|
369
372
|
self.cache[vector_store_id] = index
|
|
370
373
|
return index
|
|
371
374
|
|
|
372
|
-
async def insert_chunks(
|
|
373
|
-
|
|
375
|
+
async def insert_chunks(
|
|
376
|
+
self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
|
|
377
|
+
) -> None:
|
|
378
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
374
379
|
if not index:
|
|
375
|
-
raise VectorStoreNotFoundError(
|
|
380
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
376
381
|
|
|
377
382
|
await index.insert_chunks(chunks)
|
|
378
383
|
|
|
379
384
|
async def query_chunks(
|
|
380
|
-
self,
|
|
385
|
+
self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
|
381
386
|
) -> QueryChunksResponse:
|
|
382
|
-
index = await self._get_and_cache_vector_store_index(
|
|
387
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
383
388
|
if not index:
|
|
384
|
-
raise VectorStoreNotFoundError(
|
|
389
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
385
390
|
|
|
386
391
|
return await index.query_chunks(query, params)
|
|
387
392
|
|
|
@@ -7,12 +7,8 @@
|
|
|
7
7
|
from enum import Enum
|
|
8
8
|
from typing import Any
|
|
9
9
|
|
|
10
|
-
from llama_stack.apis.common.type_system import (
|
|
11
|
-
ChatCompletionInputType,
|
|
12
|
-
CompletionInputType,
|
|
13
|
-
StringType,
|
|
14
|
-
)
|
|
15
10
|
from llama_stack.core.datatypes import Api
|
|
11
|
+
from llama_stack_api import ChatCompletionInputType, CompletionInputType, StringType
|
|
16
12
|
|
|
17
13
|
|
|
18
14
|
class ColumnName(Enum):
|
|
@@ -9,7 +9,7 @@ import json
|
|
|
9
9
|
from fastapi import Request
|
|
10
10
|
from pydantic import BaseModel, ValidationError
|
|
11
11
|
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import ExpiresAfter
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
async def parse_pydantic_from_form[T: BaseModel](request: Request, field_name: str, model_class: type[T]) -> T | None:
|
|
@@ -17,7 +17,7 @@ from llama_stack.log import get_logger
|
|
|
17
17
|
if TYPE_CHECKING:
|
|
18
18
|
from sentence_transformers import SentenceTransformer
|
|
19
19
|
|
|
20
|
-
from
|
|
20
|
+
from llama_stack_api import (
|
|
21
21
|
ModelStore,
|
|
22
22
|
OpenAIEmbeddingData,
|
|
23
23
|
OpenAIEmbeddingsRequestWithExtraBody,
|