llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +235 -62
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
- llama_stack/providers/registry/agents.py +8 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/providers/utils/vector_io/__init__.py +16 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
- llama_stack-0.4.1.dist-info/RECORD +588 -0
- llama_stack-0.4.1.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -8,19 +8,15 @@
|
|
|
8
8
|
#
|
|
9
9
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
10
10
|
# the root directory of this source tree.
|
|
11
|
-
import uuid
|
|
12
11
|
from typing import Annotated, Any, Literal, Protocol, runtime_checkable
|
|
13
12
|
|
|
14
|
-
from fastapi import Body
|
|
15
|
-
from pydantic import BaseModel, Field
|
|
13
|
+
from fastapi import Body, Query
|
|
14
|
+
from pydantic import BaseModel, Field, field_validator
|
|
16
15
|
|
|
17
|
-
from
|
|
18
|
-
from
|
|
19
|
-
from
|
|
20
|
-
from
|
|
21
|
-
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
|
22
|
-
from llama_stack.schema_utils import json_schema_type, webmethod
|
|
23
|
-
from llama_stack.strong_typing.schema import register_schema
|
|
16
|
+
from llama_stack_api.inference import InterleavedContent
|
|
17
|
+
from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
|
|
18
|
+
from llama_stack_api.vector_stores import VectorStore
|
|
19
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1
|
|
24
20
|
|
|
25
21
|
|
|
26
22
|
@json_schema_type
|
|
@@ -37,8 +33,6 @@ class ChunkMetadata(BaseModel):
|
|
|
37
33
|
:param updated_timestamp: An optional timestamp indicating when the chunk was last updated.
|
|
38
34
|
:param chunk_window: The window of the chunk, which can be used to group related chunks together.
|
|
39
35
|
:param chunk_tokenizer: The tokenizer used to create the chunk. Default is Tiktoken.
|
|
40
|
-
:param chunk_embedding_model: The embedding model used to create the chunk's embedding.
|
|
41
|
-
:param chunk_embedding_dimension: The dimension of the embedding vector for the chunk.
|
|
42
36
|
:param content_token_count: The number of tokens in the content of the chunk.
|
|
43
37
|
:param metadata_token_count: The number of tokens in the metadata of the chunk.
|
|
44
38
|
"""
|
|
@@ -50,8 +44,6 @@ class ChunkMetadata(BaseModel):
|
|
|
50
44
|
updated_timestamp: int | None = None
|
|
51
45
|
chunk_window: str | None = None
|
|
52
46
|
chunk_tokenizer: str | None = None
|
|
53
|
-
chunk_embedding_model: str | None = None
|
|
54
|
-
chunk_embedding_dimension: int | None = None
|
|
55
47
|
content_token_count: int | None = None
|
|
56
48
|
metadata_token_count: int | None = None
|
|
57
49
|
|
|
@@ -59,39 +51,18 @@ class ChunkMetadata(BaseModel):
|
|
|
59
51
|
@json_schema_type
|
|
60
52
|
class Chunk(BaseModel):
|
|
61
53
|
"""
|
|
62
|
-
A chunk of content
|
|
54
|
+
A chunk of content from file processing.
|
|
63
55
|
:param content: The content of the chunk, which can be interleaved text, images, or other types.
|
|
64
|
-
:param
|
|
56
|
+
:param chunk_id: Unique identifier for the chunk. Must be provided explicitly.
|
|
65
57
|
:param metadata: Metadata associated with the chunk that will be used in the model context during inference.
|
|
66
|
-
:param stored_chunk_id: The chunk ID that is stored in the vector database. Used for backend functionality.
|
|
67
58
|
:param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference.
|
|
68
59
|
The `chunk_metadata` is required backend functionality.
|
|
69
60
|
"""
|
|
70
61
|
|
|
71
62
|
content: InterleavedContent
|
|
63
|
+
chunk_id: str
|
|
72
64
|
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
73
|
-
|
|
74
|
-
# The alias parameter serializes the field as "chunk_id" in JSON but keeps the internal name as "stored_chunk_id"
|
|
75
|
-
stored_chunk_id: str | None = Field(default=None, alias="chunk_id")
|
|
76
|
-
chunk_metadata: ChunkMetadata | None = None
|
|
77
|
-
|
|
78
|
-
model_config = {"populate_by_name": True}
|
|
79
|
-
|
|
80
|
-
def model_post_init(self, __context):
|
|
81
|
-
# Extract chunk_id from metadata if present
|
|
82
|
-
if self.metadata and "chunk_id" in self.metadata:
|
|
83
|
-
self.stored_chunk_id = self.metadata.pop("chunk_id")
|
|
84
|
-
|
|
85
|
-
@property
|
|
86
|
-
def chunk_id(self) -> str:
|
|
87
|
-
"""Returns the chunk ID, which is either an input `chunk_id` or a generated one if not set."""
|
|
88
|
-
if self.stored_chunk_id:
|
|
89
|
-
return self.stored_chunk_id
|
|
90
|
-
|
|
91
|
-
if "document_id" in self.metadata:
|
|
92
|
-
return generate_chunk_id(self.metadata["document_id"], str(self.content))
|
|
93
|
-
|
|
94
|
-
return generate_chunk_id(str(uuid.uuid4()), str(self.content))
|
|
65
|
+
chunk_metadata: ChunkMetadata
|
|
95
66
|
|
|
96
67
|
@property
|
|
97
68
|
def document_id(self) -> str | None:
|
|
@@ -110,15 +81,30 @@ class Chunk(BaseModel):
|
|
|
110
81
|
return None
|
|
111
82
|
|
|
112
83
|
|
|
84
|
+
@json_schema_type
|
|
85
|
+
class EmbeddedChunk(Chunk):
|
|
86
|
+
"""
|
|
87
|
+
A chunk of content with its embedding vector for vector database operations.
|
|
88
|
+
Inherits all fields from Chunk and adds embedding-related fields.
|
|
89
|
+
:param embedding: The embedding vector for the chunk content.
|
|
90
|
+
:param embedding_model: The model used to generate the embedding (e.g., 'openai/text-embedding-3-small').
|
|
91
|
+
:param embedding_dimension: The dimension of the embedding vector.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
embedding: list[float]
|
|
95
|
+
embedding_model: str
|
|
96
|
+
embedding_dimension: int
|
|
97
|
+
|
|
98
|
+
|
|
113
99
|
@json_schema_type
|
|
114
100
|
class QueryChunksResponse(BaseModel):
|
|
115
101
|
"""Response from querying chunks in a vector database.
|
|
116
102
|
|
|
117
|
-
:param chunks: List of
|
|
103
|
+
:param chunks: List of embedded chunks returned from the query
|
|
118
104
|
:param scores: Relevance scores corresponding to each returned chunk
|
|
119
105
|
"""
|
|
120
106
|
|
|
121
|
-
chunks: list[
|
|
107
|
+
chunks: list[EmbeddedChunk]
|
|
122
108
|
scores: list[float]
|
|
123
109
|
|
|
124
110
|
|
|
@@ -245,10 +231,16 @@ class VectorStoreContent(BaseModel):
|
|
|
245
231
|
|
|
246
232
|
:param type: Content type, currently only "text" is supported
|
|
247
233
|
:param text: The actual text content
|
|
234
|
+
:param embedding: Optional embedding vector for this content chunk
|
|
235
|
+
:param chunk_metadata: Optional chunk metadata
|
|
236
|
+
:param metadata: Optional user-defined metadata
|
|
248
237
|
"""
|
|
249
238
|
|
|
250
239
|
type: Literal["text"]
|
|
251
240
|
text: str
|
|
241
|
+
embedding: list[float] | None = None
|
|
242
|
+
chunk_metadata: ChunkMetadata | None = None
|
|
243
|
+
metadata: dict[str, Any] | None = None
|
|
252
244
|
|
|
253
245
|
|
|
254
246
|
@json_schema_type
|
|
@@ -281,7 +273,7 @@ class VectorStoreSearchResponsePage(BaseModel):
|
|
|
281
273
|
"""
|
|
282
274
|
|
|
283
275
|
object: str = "vector_store.search_results.page"
|
|
284
|
-
search_query: str
|
|
276
|
+
search_query: list[str]
|
|
285
277
|
data: list[VectorStoreSearchResponse]
|
|
286
278
|
has_more: bool = False
|
|
287
279
|
next_page: str | None = None
|
|
@@ -301,6 +293,22 @@ class VectorStoreDeleteResponse(BaseModel):
|
|
|
301
293
|
deleted: bool = True
|
|
302
294
|
|
|
303
295
|
|
|
296
|
+
@json_schema_type
|
|
297
|
+
class VectorStoreFileContentResponse(BaseModel):
|
|
298
|
+
"""Represents the parsed content of a vector store file.
|
|
299
|
+
|
|
300
|
+
:param object: The object type, which is always `vector_store.file_content.page`
|
|
301
|
+
:param data: Parsed content of the file
|
|
302
|
+
:param has_more: Indicates if there are more content pages to fetch
|
|
303
|
+
:param next_page: The token for the next page, if any
|
|
304
|
+
"""
|
|
305
|
+
|
|
306
|
+
object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
|
|
307
|
+
data: list[VectorStoreContent]
|
|
308
|
+
has_more: bool = False
|
|
309
|
+
next_page: str | None = None
|
|
310
|
+
|
|
311
|
+
|
|
304
312
|
@json_schema_type
|
|
305
313
|
class VectorStoreChunkingStrategyAuto(BaseModel):
|
|
306
314
|
"""Automatic chunking strategy for vector store files.
|
|
@@ -372,6 +380,65 @@ VectorStoreFileStatus = Literal["completed"] | Literal["in_progress"] | Literal[
|
|
|
372
380
|
register_schema(VectorStoreFileStatus, name="VectorStoreFileStatus")
|
|
373
381
|
|
|
374
382
|
|
|
383
|
+
# VectorStoreFileAttributes type with OpenAPI constraints
|
|
384
|
+
VectorStoreFileAttributes = Annotated[
|
|
385
|
+
dict[str, Annotated[str, Field(max_length=512)] | float | bool],
|
|
386
|
+
Field(
|
|
387
|
+
max_length=16,
|
|
388
|
+
json_schema_extra={
|
|
389
|
+
"propertyNames": {"type": "string", "maxLength": 64},
|
|
390
|
+
"x-oaiTypeLabel": "map",
|
|
391
|
+
},
|
|
392
|
+
description=(
|
|
393
|
+
"Set of 16 key-value pairs that can be attached to an object. This can be "
|
|
394
|
+
"useful for storing additional information about the object in a structured "
|
|
395
|
+
"format, and querying for objects via API or the dashboard. Keys are strings "
|
|
396
|
+
"with a maximum length of 64 characters. Values are strings with a maximum "
|
|
397
|
+
"length of 512 characters, booleans, or numbers."
|
|
398
|
+
),
|
|
399
|
+
),
|
|
400
|
+
]
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def _sanitize_vector_store_attributes(metadata: dict[str, Any] | None) -> dict[str, str | float | bool]:
|
|
404
|
+
"""
|
|
405
|
+
Sanitize metadata to VectorStoreFileAttributes spec (max 16 properties, primitives only).
|
|
406
|
+
|
|
407
|
+
Converts dict[str, Any] to dict[str, str | float | bool]:
|
|
408
|
+
- Preserves: str (truncated to 512 chars), bool, int/float (as float)
|
|
409
|
+
- Converts: list -> comma-separated string
|
|
410
|
+
- Filters: dict, None, other types
|
|
411
|
+
- Enforces: max 16 properties, max 64 char keys, max 512 char string values
|
|
412
|
+
"""
|
|
413
|
+
if not metadata:
|
|
414
|
+
return {}
|
|
415
|
+
|
|
416
|
+
sanitized: dict[str, str | float | bool] = {}
|
|
417
|
+
for key, value in metadata.items():
|
|
418
|
+
# Enforce max 16 properties
|
|
419
|
+
if len(sanitized) >= 16:
|
|
420
|
+
break
|
|
421
|
+
|
|
422
|
+
# Enforce max 64 char keys
|
|
423
|
+
if len(key) > 64:
|
|
424
|
+
continue
|
|
425
|
+
|
|
426
|
+
# Convert to supported primitive types
|
|
427
|
+
if isinstance(value, bool):
|
|
428
|
+
sanitized[key] = value
|
|
429
|
+
elif isinstance(value, int | float):
|
|
430
|
+
sanitized[key] = float(value)
|
|
431
|
+
elif isinstance(value, str):
|
|
432
|
+
# Enforce max 512 char string values
|
|
433
|
+
sanitized[key] = value[:512] if len(value) > 512 else value
|
|
434
|
+
elif isinstance(value, list):
|
|
435
|
+
# Convert lists to comma-separated strings (max 512 chars)
|
|
436
|
+
list_str = ", ".join(str(item) for item in value)
|
|
437
|
+
sanitized[key] = list_str[:512] if len(list_str) > 512 else list_str
|
|
438
|
+
|
|
439
|
+
return sanitized
|
|
440
|
+
|
|
441
|
+
|
|
375
442
|
@json_schema_type
|
|
376
443
|
class VectorStoreFileObject(BaseModel):
|
|
377
444
|
"""OpenAI Vector Store File object.
|
|
@@ -389,7 +456,7 @@ class VectorStoreFileObject(BaseModel):
|
|
|
389
456
|
|
|
390
457
|
id: str
|
|
391
458
|
object: str = "vector_store.file"
|
|
392
|
-
attributes:
|
|
459
|
+
attributes: VectorStoreFileAttributes = Field(default_factory=dict)
|
|
393
460
|
chunking_strategy: VectorStoreChunkingStrategy
|
|
394
461
|
created_at: int
|
|
395
462
|
last_error: VectorStoreFileLastError | None = None
|
|
@@ -397,6 +464,12 @@ class VectorStoreFileObject(BaseModel):
|
|
|
397
464
|
usage_bytes: int = 0
|
|
398
465
|
vector_store_id: str
|
|
399
466
|
|
|
467
|
+
@field_validator("attributes", mode="before")
|
|
468
|
+
@classmethod
|
|
469
|
+
def _validate_attributes(cls, v: dict[str, Any] | None) -> dict[str, str | float | bool]:
|
|
470
|
+
"""Sanitize attributes to match VectorStoreFileAttributes OpenAPI spec."""
|
|
471
|
+
return _sanitize_vector_store_attributes(v)
|
|
472
|
+
|
|
400
473
|
|
|
401
474
|
@json_schema_type
|
|
402
475
|
class VectorStoreListFilesResponse(BaseModel):
|
|
@@ -416,22 +489,6 @@ class VectorStoreListFilesResponse(BaseModel):
|
|
|
416
489
|
has_more: bool = False
|
|
417
490
|
|
|
418
491
|
|
|
419
|
-
@json_schema_type
|
|
420
|
-
class VectorStoreFileContentsResponse(BaseModel):
|
|
421
|
-
"""Response from retrieving the contents of a vector store file.
|
|
422
|
-
|
|
423
|
-
:param file_id: Unique identifier for the file
|
|
424
|
-
:param filename: Name of the file
|
|
425
|
-
:param attributes: Key-value attributes associated with the file
|
|
426
|
-
:param content: List of content items from the file
|
|
427
|
-
"""
|
|
428
|
-
|
|
429
|
-
file_id: str
|
|
430
|
-
filename: str
|
|
431
|
-
attributes: dict[str, Any]
|
|
432
|
-
content: list[VectorStoreContent]
|
|
433
|
-
|
|
434
|
-
|
|
435
492
|
@json_schema_type
|
|
436
493
|
class VectorStoreFileDeleteResponse(BaseModel):
|
|
437
494
|
"""Response from deleting a vector store file.
|
|
@@ -499,7 +556,7 @@ class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"):
|
|
|
499
556
|
name: str | None = None
|
|
500
557
|
file_ids: list[str] | None = None
|
|
501
558
|
expires_after: dict[str, Any] | None = None
|
|
502
|
-
chunking_strategy:
|
|
559
|
+
chunking_strategy: VectorStoreChunkingStrategy | None = None
|
|
503
560
|
metadata: dict[str, Any] | None = None
|
|
504
561
|
|
|
505
562
|
|
|
@@ -523,42 +580,39 @@ class VectorStoreTable(Protocol):
|
|
|
523
580
|
|
|
524
581
|
|
|
525
582
|
@runtime_checkable
|
|
526
|
-
@trace_protocol
|
|
527
583
|
class VectorIO(Protocol):
|
|
528
584
|
vector_store_table: VectorStoreTable | None = None
|
|
529
585
|
|
|
530
586
|
# this will just block now until chunks are inserted, but it should
|
|
531
587
|
# probably return a Job instance which can be polled for completion
|
|
532
|
-
# TODO: rename
|
|
588
|
+
# TODO: rename vector_store_id to vector_store_id once Stainless is working
|
|
533
589
|
@webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
|
|
534
590
|
async def insert_chunks(
|
|
535
591
|
self,
|
|
536
|
-
|
|
537
|
-
chunks: list[
|
|
592
|
+
vector_store_id: str,
|
|
593
|
+
chunks: list[EmbeddedChunk],
|
|
538
594
|
ttl_seconds: int | None = None,
|
|
539
595
|
) -> None:
|
|
540
|
-
"""Insert chunks into a vector database.
|
|
596
|
+
"""Insert embedded chunks into a vector database.
|
|
541
597
|
|
|
542
|
-
:param
|
|
543
|
-
:param chunks: The chunks to insert. Each `
|
|
544
|
-
|
|
545
|
-
If `metadata` is provided, you configure how Llama Stack formats the chunk during generation.
|
|
546
|
-
If `embedding` is not provided, it will be computed later.
|
|
598
|
+
:param vector_store_id: The identifier of the vector database to insert the chunks into.
|
|
599
|
+
:param chunks: The embedded chunks to insert. Each `EmbeddedChunk` contains the content, metadata,
|
|
600
|
+
and embedding vector ready for storage.
|
|
547
601
|
:param ttl_seconds: The time to live of the chunks.
|
|
548
602
|
"""
|
|
549
603
|
...
|
|
550
604
|
|
|
551
|
-
# TODO: rename
|
|
605
|
+
# TODO: rename vector_store_id to vector_store_id once Stainless is working
|
|
552
606
|
@webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
|
|
553
607
|
async def query_chunks(
|
|
554
608
|
self,
|
|
555
|
-
|
|
609
|
+
vector_store_id: str,
|
|
556
610
|
query: InterleavedContent,
|
|
557
611
|
params: dict[str, Any] | None = None,
|
|
558
612
|
) -> QueryChunksResponse:
|
|
559
613
|
"""Query chunks from a vector database.
|
|
560
614
|
|
|
561
|
-
:param
|
|
615
|
+
:param vector_store_id: The identifier of the vector database to query.
|
|
562
616
|
:param query: The query to search for.
|
|
563
617
|
:param params: The parameters of the query.
|
|
564
618
|
:returns: A QueryChunksResponse.
|
|
@@ -566,7 +620,6 @@ class VectorIO(Protocol):
|
|
|
566
620
|
...
|
|
567
621
|
|
|
568
622
|
# OpenAI Vector Stores API endpoints
|
|
569
|
-
@webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
570
623
|
@webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
|
|
571
624
|
async def openai_create_vector_store(
|
|
572
625
|
self,
|
|
@@ -579,7 +632,6 @@ class VectorIO(Protocol):
|
|
|
579
632
|
"""
|
|
580
633
|
...
|
|
581
634
|
|
|
582
|
-
@webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
583
635
|
@webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
|
|
584
636
|
async def openai_list_vector_stores(
|
|
585
637
|
self,
|
|
@@ -598,9 +650,6 @@ class VectorIO(Protocol):
|
|
|
598
650
|
"""
|
|
599
651
|
...
|
|
600
652
|
|
|
601
|
-
@webmethod(
|
|
602
|
-
route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
|
|
603
|
-
)
|
|
604
653
|
@webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
|
|
605
654
|
async def openai_retrieve_vector_store(
|
|
606
655
|
self,
|
|
@@ -613,9 +662,6 @@ class VectorIO(Protocol):
|
|
|
613
662
|
"""
|
|
614
663
|
...
|
|
615
664
|
|
|
616
|
-
@webmethod(
|
|
617
|
-
route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
|
|
618
|
-
)
|
|
619
665
|
@webmethod(
|
|
620
666
|
route="/vector_stores/{vector_store_id}",
|
|
621
667
|
method="POST",
|
|
@@ -638,9 +684,6 @@ class VectorIO(Protocol):
|
|
|
638
684
|
"""
|
|
639
685
|
...
|
|
640
686
|
|
|
641
|
-
@webmethod(
|
|
642
|
-
route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True
|
|
643
|
-
)
|
|
644
687
|
@webmethod(
|
|
645
688
|
route="/vector_stores/{vector_store_id}",
|
|
646
689
|
method="DELETE",
|
|
@@ -657,12 +700,6 @@ class VectorIO(Protocol):
|
|
|
657
700
|
"""
|
|
658
701
|
...
|
|
659
702
|
|
|
660
|
-
@webmethod(
|
|
661
|
-
route="/openai/v1/vector_stores/{vector_store_id}/search",
|
|
662
|
-
method="POST",
|
|
663
|
-
level=LLAMA_STACK_API_V1,
|
|
664
|
-
deprecated=True,
|
|
665
|
-
)
|
|
666
703
|
@webmethod(
|
|
667
704
|
route="/vector_stores/{vector_store_id}/search",
|
|
668
705
|
method="POST",
|
|
@@ -695,12 +732,6 @@ class VectorIO(Protocol):
|
|
|
695
732
|
"""
|
|
696
733
|
...
|
|
697
734
|
|
|
698
|
-
@webmethod(
|
|
699
|
-
route="/openai/v1/vector_stores/{vector_store_id}/files",
|
|
700
|
-
method="POST",
|
|
701
|
-
level=LLAMA_STACK_API_V1,
|
|
702
|
-
deprecated=True,
|
|
703
|
-
)
|
|
704
735
|
@webmethod(
|
|
705
736
|
route="/vector_stores/{vector_store_id}/files",
|
|
706
737
|
method="POST",
|
|
@@ -723,12 +754,6 @@ class VectorIO(Protocol):
|
|
|
723
754
|
"""
|
|
724
755
|
...
|
|
725
756
|
|
|
726
|
-
@webmethod(
|
|
727
|
-
route="/openai/v1/vector_stores/{vector_store_id}/files",
|
|
728
|
-
method="GET",
|
|
729
|
-
level=LLAMA_STACK_API_V1,
|
|
730
|
-
deprecated=True,
|
|
731
|
-
)
|
|
732
757
|
@webmethod(
|
|
733
758
|
route="/vector_stores/{vector_store_id}/files",
|
|
734
759
|
method="GET",
|
|
@@ -755,12 +780,6 @@ class VectorIO(Protocol):
|
|
|
755
780
|
"""
|
|
756
781
|
...
|
|
757
782
|
|
|
758
|
-
@webmethod(
|
|
759
|
-
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
|
|
760
|
-
method="GET",
|
|
761
|
-
level=LLAMA_STACK_API_V1,
|
|
762
|
-
deprecated=True,
|
|
763
|
-
)
|
|
764
783
|
@webmethod(
|
|
765
784
|
route="/vector_stores/{vector_store_id}/files/{file_id}",
|
|
766
785
|
method="GET",
|
|
@@ -779,12 +798,6 @@ class VectorIO(Protocol):
|
|
|
779
798
|
"""
|
|
780
799
|
...
|
|
781
800
|
|
|
782
|
-
@webmethod(
|
|
783
|
-
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
|
|
784
|
-
method="GET",
|
|
785
|
-
level=LLAMA_STACK_API_V1,
|
|
786
|
-
deprecated=True,
|
|
787
|
-
)
|
|
788
801
|
@webmethod(
|
|
789
802
|
route="/vector_stores/{vector_store_id}/files/{file_id}/content",
|
|
790
803
|
method="GET",
|
|
@@ -794,21 +807,19 @@ class VectorIO(Protocol):
|
|
|
794
807
|
self,
|
|
795
808
|
vector_store_id: str,
|
|
796
809
|
file_id: str,
|
|
797
|
-
|
|
810
|
+
include_embeddings: Annotated[bool | None, Query()] = False,
|
|
811
|
+
include_metadata: Annotated[bool | None, Query()] = False,
|
|
812
|
+
) -> VectorStoreFileContentResponse:
|
|
798
813
|
"""Retrieves the contents of a vector store file.
|
|
799
814
|
|
|
800
815
|
:param vector_store_id: The ID of the vector store containing the file to retrieve.
|
|
801
816
|
:param file_id: The ID of the file to retrieve.
|
|
802
|
-
:
|
|
817
|
+
:param include_embeddings: Whether to include embedding vectors in the response.
|
|
818
|
+
:param include_metadata: Whether to include chunk metadata in the response.
|
|
819
|
+
:returns: File contents, optionally with embeddings and metadata based on query parameters.
|
|
803
820
|
"""
|
|
804
821
|
...
|
|
805
822
|
|
|
806
|
-
@webmethod(
|
|
807
|
-
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
|
|
808
|
-
method="POST",
|
|
809
|
-
level=LLAMA_STACK_API_V1,
|
|
810
|
-
deprecated=True,
|
|
811
|
-
)
|
|
812
823
|
@webmethod(
|
|
813
824
|
route="/vector_stores/{vector_store_id}/files/{file_id}",
|
|
814
825
|
method="POST",
|
|
@@ -829,12 +840,6 @@ class VectorIO(Protocol):
|
|
|
829
840
|
"""
|
|
830
841
|
...
|
|
831
842
|
|
|
832
|
-
@webmethod(
|
|
833
|
-
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
|
|
834
|
-
method="DELETE",
|
|
835
|
-
level=LLAMA_STACK_API_V1,
|
|
836
|
-
deprecated=True,
|
|
837
|
-
)
|
|
838
843
|
@webmethod(
|
|
839
844
|
route="/vector_stores/{vector_store_id}/files/{file_id}",
|
|
840
845
|
method="DELETE",
|
|
@@ -858,12 +863,6 @@ class VectorIO(Protocol):
|
|
|
858
863
|
method="POST",
|
|
859
864
|
level=LLAMA_STACK_API_V1,
|
|
860
865
|
)
|
|
861
|
-
@webmethod(
|
|
862
|
-
route="/openai/v1/vector_stores/{vector_store_id}/file_batches",
|
|
863
|
-
method="POST",
|
|
864
|
-
level=LLAMA_STACK_API_V1,
|
|
865
|
-
deprecated=True,
|
|
866
|
-
)
|
|
867
866
|
async def openai_create_vector_store_file_batch(
|
|
868
867
|
self,
|
|
869
868
|
vector_store_id: str,
|
|
@@ -882,12 +881,6 @@ class VectorIO(Protocol):
|
|
|
882
881
|
method="GET",
|
|
883
882
|
level=LLAMA_STACK_API_V1,
|
|
884
883
|
)
|
|
885
|
-
@webmethod(
|
|
886
|
-
route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
|
|
887
|
-
method="GET",
|
|
888
|
-
level=LLAMA_STACK_API_V1,
|
|
889
|
-
deprecated=True,
|
|
890
|
-
)
|
|
891
884
|
async def openai_retrieve_vector_store_file_batch(
|
|
892
885
|
self,
|
|
893
886
|
batch_id: str,
|
|
@@ -901,12 +894,6 @@ class VectorIO(Protocol):
|
|
|
901
894
|
"""
|
|
902
895
|
...
|
|
903
896
|
|
|
904
|
-
@webmethod(
|
|
905
|
-
route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
|
|
906
|
-
method="GET",
|
|
907
|
-
level=LLAMA_STACK_API_V1,
|
|
908
|
-
deprecated=True,
|
|
909
|
-
)
|
|
910
897
|
@webmethod(
|
|
911
898
|
route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
|
|
912
899
|
method="GET",
|
|
@@ -935,12 +922,6 @@ class VectorIO(Protocol):
|
|
|
935
922
|
"""
|
|
936
923
|
...
|
|
937
924
|
|
|
938
|
-
@webmethod(
|
|
939
|
-
route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
|
|
940
|
-
method="POST",
|
|
941
|
-
level=LLAMA_STACK_API_V1,
|
|
942
|
-
deprecated=True,
|
|
943
|
-
)
|
|
944
925
|
@webmethod(
|
|
945
926
|
route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
|
|
946
927
|
method="POST",
|
|
@@ -8,7 +8,7 @@ from typing import Literal
|
|
|
8
8
|
|
|
9
9
|
from pydantic import BaseModel
|
|
10
10
|
|
|
11
|
-
from
|
|
11
|
+
from llama_stack_api.resource import Resource, ResourceType
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
# Internal resource type for storing the vector store routing and other information
|