llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +5 -0
- llama_stack/apis/agents/__init__.py +1 -1
- llama_stack/apis/agents/agents.py +700 -281
- llama_stack/apis/agents/openai_responses.py +1311 -0
- llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
- llama_stack/apis/batches/batches.py +100 -0
- llama_stack/apis/benchmarks/__init__.py +7 -0
- llama_stack/apis/benchmarks/benchmarks.py +108 -0
- llama_stack/apis/common/content_types.py +143 -0
- llama_stack/apis/common/errors.py +103 -0
- llama_stack/apis/common/job_types.py +38 -0
- llama_stack/apis/common/responses.py +36 -0
- llama_stack/apis/common/training_types.py +36 -5
- llama_stack/apis/common/type_system.py +158 -0
- llama_stack/apis/conversations/__init__.py +31 -0
- llama_stack/apis/conversations/conversations.py +286 -0
- llama_stack/apis/datasetio/__init__.py +7 -0
- llama_stack/apis/datasetio/datasetio.py +59 -0
- llama_stack/apis/datasets/__init__.py +7 -0
- llama_stack/apis/datasets/datasets.py +251 -0
- llama_stack/apis/datatypes.py +160 -0
- llama_stack/apis/eval/__init__.py +7 -0
- llama_stack/apis/eval/eval.py +169 -0
- llama_stack/apis/files/__init__.py +7 -0
- llama_stack/apis/files/files.py +199 -0
- llama_stack/apis/inference/__init__.py +1 -1
- llama_stack/apis/inference/inference.py +1169 -113
- llama_stack/apis/inspect/__init__.py +1 -1
- llama_stack/apis/inspect/inspect.py +69 -16
- llama_stack/apis/models/__init__.py +1 -1
- llama_stack/apis/models/models.py +148 -21
- llama_stack/apis/post_training/__init__.py +1 -1
- llama_stack/apis/post_training/post_training.py +265 -120
- llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
- llama_stack/apis/prompts/prompts.py +204 -0
- llama_stack/apis/providers/__init__.py +7 -0
- llama_stack/apis/providers/providers.py +69 -0
- llama_stack/apis/resource.py +37 -0
- llama_stack/apis/safety/__init__.py +1 -1
- llama_stack/apis/safety/safety.py +95 -12
- llama_stack/apis/scoring/__init__.py +7 -0
- llama_stack/apis/scoring/scoring.py +93 -0
- llama_stack/apis/scoring_functions/__init__.py +7 -0
- llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
- llama_stack/apis/shields/__init__.py +1 -1
- llama_stack/apis/shields/shields.py +76 -33
- llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
- llama_stack/apis/telemetry/__init__.py +1 -1
- llama_stack/apis/telemetry/telemetry.py +322 -31
- llama_stack/apis/{dataset → tools}/__init__.py +2 -1
- llama_stack/apis/tools/rag_tool.py +218 -0
- llama_stack/apis/tools/tools.py +221 -0
- llama_stack/apis/vector_io/__init__.py +7 -0
- llama_stack/apis/vector_io/vector_io.py +960 -0
- llama_stack/apis/vector_stores/__init__.py +7 -0
- llama_stack/apis/vector_stores/vector_stores.py +51 -0
- llama_stack/apis/version.py +9 -0
- llama_stack/cli/llama.py +13 -5
- llama_stack/cli/stack/_list_deps.py +182 -0
- llama_stack/cli/stack/list_apis.py +1 -1
- llama_stack/cli/stack/list_deps.py +55 -0
- llama_stack/cli/stack/list_providers.py +24 -10
- llama_stack/cli/stack/list_stacks.py +56 -0
- llama_stack/cli/stack/remove.py +115 -0
- llama_stack/cli/stack/run.py +169 -56
- llama_stack/cli/stack/stack.py +18 -4
- llama_stack/cli/stack/utils.py +151 -0
- llama_stack/cli/table.py +23 -61
- llama_stack/cli/utils.py +29 -0
- llama_stack/core/access_control/access_control.py +131 -0
- llama_stack/core/access_control/conditions.py +129 -0
- llama_stack/core/access_control/datatypes.py +107 -0
- llama_stack/core/build.py +164 -0
- llama_stack/core/client.py +205 -0
- llama_stack/core/common.sh +37 -0
- llama_stack/{distribution → core}/configure.py +74 -55
- llama_stack/core/conversations/conversations.py +309 -0
- llama_stack/core/datatypes.py +625 -0
- llama_stack/core/distribution.py +276 -0
- llama_stack/core/external.py +54 -0
- llama_stack/core/id_generation.py +42 -0
- llama_stack/core/inspect.py +86 -0
- llama_stack/core/library_client.py +539 -0
- llama_stack/core/prompts/prompts.py +234 -0
- llama_stack/core/providers.py +137 -0
- llama_stack/core/request_headers.py +115 -0
- llama_stack/core/resolver.py +506 -0
- llama_stack/core/routers/__init__.py +101 -0
- llama_stack/core/routers/datasets.py +73 -0
- llama_stack/core/routers/eval_scoring.py +155 -0
- llama_stack/core/routers/inference.py +645 -0
- llama_stack/core/routers/safety.py +85 -0
- llama_stack/core/routers/tool_runtime.py +91 -0
- llama_stack/core/routers/vector_io.py +442 -0
- llama_stack/core/routing_tables/benchmarks.py +62 -0
- llama_stack/core/routing_tables/common.py +254 -0
- llama_stack/core/routing_tables/datasets.py +91 -0
- llama_stack/core/routing_tables/models.py +163 -0
- llama_stack/core/routing_tables/scoring_functions.py +66 -0
- llama_stack/core/routing_tables/shields.py +61 -0
- llama_stack/core/routing_tables/toolgroups.py +129 -0
- llama_stack/core/routing_tables/vector_stores.py +292 -0
- llama_stack/core/server/auth.py +187 -0
- llama_stack/core/server/auth_providers.py +494 -0
- llama_stack/core/server/quota.py +110 -0
- llama_stack/core/server/routes.py +141 -0
- llama_stack/core/server/server.py +542 -0
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +546 -0
- llama_stack/core/start_stack.sh +117 -0
- llama_stack/core/storage/datatypes.py +283 -0
- llama_stack/{cli/model → core/store}/__init__.py +1 -1
- llama_stack/core/store/registry.py +199 -0
- llama_stack/core/testing_context.py +49 -0
- llama_stack/core/ui/app.py +55 -0
- llama_stack/core/ui/modules/api.py +32 -0
- llama_stack/core/ui/modules/utils.py +42 -0
- llama_stack/core/ui/page/distribution/datasets.py +18 -0
- llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
- llama_stack/core/ui/page/distribution/models.py +18 -0
- llama_stack/core/ui/page/distribution/providers.py +27 -0
- llama_stack/core/ui/page/distribution/resources.py +48 -0
- llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
- llama_stack/core/ui/page/distribution/shields.py +19 -0
- llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
- llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
- llama_stack/core/ui/page/playground/chat.py +130 -0
- llama_stack/core/ui/page/playground/tools.py +352 -0
- llama_stack/core/utils/config.py +30 -0
- llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
- llama_stack/core/utils/config_resolution.py +125 -0
- llama_stack/core/utils/context.py +84 -0
- llama_stack/core/utils/exec.py +96 -0
- llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
- llama_stack/{distribution → core}/utils/model_utils.py +2 -2
- llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
- llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
- llama_stack/distributions/dell/build.yaml +33 -0
- llama_stack/distributions/dell/dell.py +158 -0
- llama_stack/distributions/dell/run-with-safety.yaml +141 -0
- llama_stack/distributions/dell/run.yaml +132 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
- llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
- llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
- llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
- llama_stack/distributions/nvidia/build.yaml +29 -0
- llama_stack/distributions/nvidia/nvidia.py +154 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
- llama_stack/distributions/nvidia/run.yaml +116 -0
- llama_stack/distributions/open-benchmark/__init__.py +7 -0
- llama_stack/distributions/open-benchmark/build.yaml +36 -0
- llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
- llama_stack/distributions/open-benchmark/run.yaml +252 -0
- llama_stack/distributions/postgres-demo/__init__.py +7 -0
- llama_stack/distributions/postgres-demo/build.yaml +23 -0
- llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
- llama_stack/distributions/postgres-demo/run.yaml +115 -0
- llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
- llama_stack/distributions/starter/build.yaml +61 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/run.yaml +276 -0
- llama_stack/distributions/starter/starter.py +345 -0
- llama_stack/distributions/starter-gpu/__init__.py +7 -0
- llama_stack/distributions/starter-gpu/build.yaml +61 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/distributions/starter-gpu/run.yaml +279 -0
- llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
- llama_stack/distributions/template.py +456 -0
- llama_stack/distributions/watsonx/__init__.py +7 -0
- llama_stack/distributions/watsonx/build.yaml +33 -0
- llama_stack/distributions/watsonx/run.yaml +133 -0
- llama_stack/distributions/watsonx/watsonx.py +95 -0
- llama_stack/env.py +24 -0
- llama_stack/log.py +314 -0
- llama_stack/models/llama/checkpoint.py +164 -0
- llama_stack/models/llama/datatypes.py +164 -0
- llama_stack/models/llama/hadamard_utils.py +86 -0
- llama_stack/models/llama/llama3/args.py +74 -0
- llama_stack/models/llama/llama3/chat_format.py +286 -0
- llama_stack/models/llama/llama3/generation.py +376 -0
- llama_stack/models/llama/llama3/interface.py +255 -0
- llama_stack/models/llama/llama3/model.py +304 -0
- llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
- llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
- llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
- llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
- llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
- llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
- llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
- llama_stack/models/llama/llama3/quantization/loader.py +316 -0
- llama_stack/models/llama/llama3/template_data.py +116 -0
- llama_stack/models/llama/llama3/tokenizer.model +128000 -0
- llama_stack/models/llama/llama3/tokenizer.py +198 -0
- llama_stack/models/llama/llama3/tool_utils.py +266 -0
- llama_stack/models/llama/llama3_1/__init__.py +12 -0
- llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
- llama_stack/models/llama/llama3_1/prompts.py +258 -0
- llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
- llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
- llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
- llama_stack/models/llama/llama3_3/prompts.py +259 -0
- llama_stack/models/llama/llama4/args.py +107 -0
- llama_stack/models/llama/llama4/chat_format.py +317 -0
- llama_stack/models/llama/llama4/datatypes.py +56 -0
- llama_stack/models/llama/llama4/ffn.py +58 -0
- llama_stack/models/llama/llama4/generation.py +313 -0
- llama_stack/models/llama/llama4/model.py +437 -0
- llama_stack/models/llama/llama4/moe.py +214 -0
- llama_stack/models/llama/llama4/preprocess.py +435 -0
- llama_stack/models/llama/llama4/prompt_format.md +304 -0
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
- llama_stack/models/llama/llama4/prompts.py +279 -0
- llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
- llama_stack/models/llama/llama4/quantization/loader.py +226 -0
- llama_stack/models/llama/llama4/tokenizer.model +200000 -0
- llama_stack/models/llama/llama4/tokenizer.py +263 -0
- llama_stack/models/llama/llama4/vision/__init__.py +5 -0
- llama_stack/models/llama/llama4/vision/embedding.py +210 -0
- llama_stack/models/llama/llama4/vision/encoder.py +412 -0
- llama_stack/models/llama/prompt_format.py +191 -0
- llama_stack/models/llama/quantize_impls.py +316 -0
- llama_stack/models/llama/sku_list.py +1029 -0
- llama_stack/models/llama/sku_types.py +233 -0
- llama_stack/models/llama/tokenizer_utils.py +40 -0
- llama_stack/providers/datatypes.py +136 -107
- llama_stack/providers/inline/__init__.py +5 -0
- llama_stack/providers/inline/agents/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
- llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
- llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
- llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
- llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
- llama_stack/providers/inline/batches/__init__.py +5 -0
- llama_stack/providers/inline/batches/reference/__init__.py +36 -0
- llama_stack/providers/inline/batches/reference/batches.py +679 -0
- llama_stack/providers/inline/batches/reference/config.py +40 -0
- llama_stack/providers/inline/datasetio/__init__.py +5 -0
- llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
- llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
- llama_stack/providers/inline/eval/__init__.py +5 -0
- llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
- llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
- llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
- llama_stack/providers/inline/files/localfs/__init__.py +20 -0
- llama_stack/providers/inline/files/localfs/config.py +31 -0
- llama_stack/providers/inline/files/localfs/files.py +219 -0
- llama_stack/providers/inline/inference/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
- llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
- llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
- llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
- llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
- llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
- llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
- llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
- llama_stack/providers/inline/post_training/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/common/utils.py +35 -0
- llama_stack/providers/inline/post_training/common/validator.py +36 -0
- llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
- llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
- llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
- llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
- llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
- llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
- llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
- llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
- llama_stack/providers/inline/safety/__init__.py +5 -0
- llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
- llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
- llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
- llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
- llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
- llama_stack/providers/inline/scoring/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
- llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
- llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
- llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
- llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
- llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
- llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
- llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
- llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
- llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
- llama_stack/providers/inline/telemetry/__init__.py +5 -0
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
- llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
- llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
- llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
- llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
- llama_stack/providers/inline/vector_io/__init__.py +5 -0
- llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
- llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
- llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
- llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
- llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
- llama_stack/providers/registry/agents.py +16 -18
- llama_stack/providers/registry/batches.py +26 -0
- llama_stack/providers/registry/datasetio.py +49 -0
- llama_stack/providers/registry/eval.py +46 -0
- llama_stack/providers/registry/files.py +31 -0
- llama_stack/providers/registry/inference.py +273 -118
- llama_stack/providers/registry/post_training.py +69 -0
- llama_stack/providers/registry/safety.py +46 -41
- llama_stack/providers/registry/scoring.py +51 -0
- llama_stack/providers/registry/tool_runtime.py +87 -0
- llama_stack/providers/registry/vector_io.py +828 -0
- llama_stack/providers/remote/__init__.py +5 -0
- llama_stack/providers/remote/agents/__init__.py +5 -0
- llama_stack/providers/remote/datasetio/__init__.py +5 -0
- llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
- llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
- llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
- llama_stack/providers/remote/eval/__init__.py +5 -0
- llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
- llama_stack/providers/remote/eval/nvidia/config.py +29 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
- llama_stack/providers/remote/files/s3/__init__.py +19 -0
- llama_stack/providers/remote/files/s3/config.py +42 -0
- llama_stack/providers/remote/files/s3/files.py +313 -0
- llama_stack/providers/remote/inference/__init__.py +5 -0
- llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
- llama_stack/providers/remote/inference/anthropic/config.py +28 -0
- llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
- llama_stack/providers/remote/inference/azure/azure.py +25 -0
- llama_stack/providers/remote/inference/azure/config.py +61 -0
- llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
- llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
- llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
- llama_stack/providers/remote/inference/bedrock/models.py +29 -0
- llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
- llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
- llama_stack/providers/remote/inference/cerebras/config.py +30 -0
- llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
- llama_stack/providers/remote/inference/databricks/config.py +37 -0
- llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
- llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
- llama_stack/providers/remote/inference/fireworks/config.py +27 -0
- llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
- llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
- llama_stack/providers/remote/inference/gemini/config.py +28 -0
- llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
- llama_stack/providers/remote/inference/groq/__init__.py +15 -0
- llama_stack/providers/remote/inference/groq/config.py +34 -0
- llama_stack/providers/remote/inference/groq/groq.py +18 -0
- llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
- llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/inference/nvidia/config.py +64 -0
- llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
- llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
- llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
- llama_stack/providers/remote/inference/ollama/config.py +25 -0
- llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
- llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
- llama_stack/providers/remote/inference/openai/config.py +39 -0
- llama_stack/providers/remote/inference/openai/openai.py +38 -0
- llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
- llama_stack/providers/remote/inference/passthrough/config.py +34 -0
- llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
- llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
- llama_stack/providers/remote/inference/runpod/config.py +32 -0
- llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
- llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
- llama_stack/providers/remote/inference/sambanova/config.py +34 -0
- llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
- llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
- llama_stack/providers/remote/inference/tgi/config.py +76 -0
- llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
- llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
- llama_stack/providers/remote/inference/together/config.py +27 -0
- llama_stack/providers/remote/inference/together/together.py +102 -0
- llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
- llama_stack/providers/remote/inference/vertexai/config.py +48 -0
- llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
- llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
- llama_stack/providers/remote/inference/vllm/config.py +59 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
- llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
- llama_stack/providers/remote/inference/watsonx/config.py +45 -0
- llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
- llama_stack/providers/remote/post_training/__init__.py +5 -0
- llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
- llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
- llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
- llama_stack/providers/remote/safety/__init__.py +5 -0
- llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
- llama_stack/providers/remote/safety/bedrock/config.py +14 -0
- llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
- llama_stack/providers/remote/safety/nvidia/config.py +40 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
- llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
- llama_stack/providers/remote/safety/sambanova/config.py +37 -0
- llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
- llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
- llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
- llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
- llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
- llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
- llama_stack/providers/remote/vector_io/__init__.py +5 -0
- llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
- llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
- llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
- llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
- llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
- llama_stack/providers/utils/bedrock/__init__.py +5 -0
- llama_stack/providers/utils/bedrock/client.py +74 -0
- llama_stack/providers/utils/bedrock/config.py +64 -0
- llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
- llama_stack/providers/utils/common/__init__.py +5 -0
- llama_stack/providers/utils/common/data_schema_validator.py +103 -0
- llama_stack/providers/utils/datasetio/__init__.py +5 -0
- llama_stack/providers/utils/datasetio/url_utils.py +47 -0
- llama_stack/providers/utils/files/__init__.py +5 -0
- llama_stack/providers/utils/files/form_data.py +69 -0
- llama_stack/providers/utils/inference/__init__.py +8 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
- llama_stack/providers/utils/inference/inference_store.py +264 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
- llama_stack/providers/utils/inference/model_registry.py +173 -23
- llama_stack/providers/utils/inference/openai_compat.py +1261 -49
- llama_stack/providers/utils/inference/openai_mixin.py +506 -0
- llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
- llama_stack/providers/utils/kvstore/api.py +6 -6
- llama_stack/providers/utils/kvstore/config.py +28 -48
- llama_stack/providers/utils/kvstore/kvstore.py +61 -15
- llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
- llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
- llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
- llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
- llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
- llama_stack/providers/utils/memory/vector_store.py +220 -82
- llama_stack/providers/utils/pagination.py +43 -0
- llama_stack/providers/utils/responses/__init__.py +5 -0
- llama_stack/providers/utils/responses/responses_store.py +292 -0
- llama_stack/providers/utils/scheduler.py +270 -0
- llama_stack/providers/utils/scoring/__init__.py +5 -0
- llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
- llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
- llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
- llama_stack/providers/utils/sqlstore/__init__.py +5 -0
- llama_stack/providers/utils/sqlstore/api.py +128 -0
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
- llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
- llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
- llama_stack/providers/utils/telemetry/tracing.py +192 -53
- llama_stack/providers/utils/tools/__init__.py +5 -0
- llama_stack/providers/utils/tools/mcp.py +148 -0
- llama_stack/providers/utils/tools/ttl_dict.py +70 -0
- llama_stack/providers/utils/vector_io/__init__.py +5 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
- llama_stack/schema_utils.py +118 -0
- llama_stack/strong_typing/__init__.py +19 -0
- llama_stack/strong_typing/auxiliary.py +228 -0
- llama_stack/strong_typing/classdef.py +440 -0
- llama_stack/strong_typing/core.py +46 -0
- llama_stack/strong_typing/deserializer.py +877 -0
- llama_stack/strong_typing/docstring.py +409 -0
- llama_stack/strong_typing/exception.py +23 -0
- llama_stack/strong_typing/inspection.py +1085 -0
- llama_stack/strong_typing/mapping.py +40 -0
- llama_stack/strong_typing/name.py +182 -0
- llama_stack/strong_typing/py.typed +0 -0
- llama_stack/strong_typing/schema.py +792 -0
- llama_stack/strong_typing/serialization.py +97 -0
- llama_stack/strong_typing/serializer.py +500 -0
- llama_stack/strong_typing/slots.py +27 -0
- llama_stack/strong_typing/topological.py +89 -0
- llama_stack/testing/__init__.py +5 -0
- llama_stack/testing/api_recorder.py +956 -0
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- llama_stack-0.3.4.dist-info/METADATA +261 -0
- llama_stack-0.3.4.dist-info/RECORD +625 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
- llama_stack/apis/agents/client.py +0 -292
- llama_stack/apis/agents/event_logger.py +0 -184
- llama_stack/apis/batch_inference/batch_inference.py +0 -72
- llama_stack/apis/common/deployment_types.py +0 -31
- llama_stack/apis/dataset/dataset.py +0 -63
- llama_stack/apis/evals/evals.py +0 -122
- llama_stack/apis/inference/client.py +0 -197
- llama_stack/apis/inspect/client.py +0 -82
- llama_stack/apis/memory/client.py +0 -155
- llama_stack/apis/memory/memory.py +0 -65
- llama_stack/apis/memory_banks/__init__.py +0 -7
- llama_stack/apis/memory_banks/client.py +0 -101
- llama_stack/apis/memory_banks/memory_banks.py +0 -78
- llama_stack/apis/models/client.py +0 -83
- llama_stack/apis/reward_scoring/__init__.py +0 -7
- llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
- llama_stack/apis/safety/client.py +0 -105
- llama_stack/apis/shields/client.py +0 -79
- llama_stack/cli/download.py +0 -340
- llama_stack/cli/model/describe.py +0 -82
- llama_stack/cli/model/download.py +0 -24
- llama_stack/cli/model/list.py +0 -62
- llama_stack/cli/model/model.py +0 -34
- llama_stack/cli/model/prompt_format.py +0 -112
- llama_stack/cli/model/safety_models.py +0 -52
- llama_stack/cli/stack/build.py +0 -299
- llama_stack/cli/stack/configure.py +0 -178
- llama_stack/distribution/build.py +0 -123
- llama_stack/distribution/build_conda_env.sh +0 -136
- llama_stack/distribution/build_container.sh +0 -142
- llama_stack/distribution/common.sh +0 -40
- llama_stack/distribution/configure_container.sh +0 -47
- llama_stack/distribution/datatypes.py +0 -139
- llama_stack/distribution/distribution.py +0 -58
- llama_stack/distribution/inspect.py +0 -67
- llama_stack/distribution/request_headers.py +0 -57
- llama_stack/distribution/resolver.py +0 -323
- llama_stack/distribution/routers/__init__.py +0 -48
- llama_stack/distribution/routers/routers.py +0 -158
- llama_stack/distribution/routers/routing_tables.py +0 -173
- llama_stack/distribution/server/endpoints.py +0 -48
- llama_stack/distribution/server/server.py +0 -343
- llama_stack/distribution/start_conda_env.sh +0 -42
- llama_stack/distribution/start_container.sh +0 -64
- llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
- llama_stack/distribution/templates/local-build.yaml +0 -10
- llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
- llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
- llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
- llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
- llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
- llama_stack/distribution/templates/local-together-build.yaml +0 -10
- llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
- llama_stack/distribution/utils/exec.py +0 -105
- llama_stack/providers/adapters/agents/sample/sample.py +0 -18
- llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
- llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
- llama_stack/providers/adapters/inference/databricks/config.py +0 -21
- llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
- llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
- llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
- llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
- llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
- llama_stack/providers/adapters/inference/sample/sample.py +0 -23
- llama_stack/providers/adapters/inference/tgi/config.py +0 -43
- llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
- llama_stack/providers/adapters/inference/together/config.py +0 -22
- llama_stack/providers/adapters/inference/together/together.py +0 -143
- llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
- llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
- llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
- llama_stack/providers/adapters/memory/sample/sample.py +0 -23
- llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
- llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
- llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
- llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
- llama_stack/providers/adapters/safety/sample/sample.py +0 -23
- llama_stack/providers/adapters/safety/together/__init__.py +0 -18
- llama_stack/providers/adapters/safety/together/config.py +0 -26
- llama_stack/providers/adapters/safety/together/together.py +0 -101
- llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
- llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
- llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
- llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
- llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
- llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
- llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
- llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
- llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
- llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
- llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
- llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
- llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
- llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
- llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
- llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
- llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
- llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
- llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
- llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
- llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
- llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
- llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
- llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
- llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
- llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
- llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
- llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
- llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
- llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
- llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
- llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
- llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
- llama_stack/providers/impls/vllm/config.py +0 -35
- llama_stack/providers/impls/vllm/vllm.py +0 -241
- llama_stack/providers/registry/memory.py +0 -78
- llama_stack/providers/registry/telemetry.py +0 -44
- llama_stack/providers/tests/agents/test_agents.py +0 -210
- llama_stack/providers/tests/inference/test_inference.py +0 -257
- llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
- llama_stack/providers/tests/memory/test_memory.py +0 -136
- llama_stack/providers/tests/resolver.py +0 -100
- llama_stack/providers/tests/safety/test_safety.py +0 -77
- llama_stack-0.0.42.dist-info/METADATA +0 -137
- llama_stack-0.0.42.dist-info/RECORD +0 -256
- /llama_stack/{distribution → core}/__init__.py +0 -0
- /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
- /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
- /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
- /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
- /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
- /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
- /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
- /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
- /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
- /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
- /llama_stack/{distribution → core}/utils/serialize.py +0 -0
- /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
- /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
- /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
- /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
- /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
- /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
- /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
- /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
- /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
- {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1304 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import json
|
|
9
|
+
import mimetypes
|
|
10
|
+
import time
|
|
11
|
+
import uuid
|
|
12
|
+
from abc import ABC, abstractmethod
|
|
13
|
+
from typing import Annotated, Any
|
|
14
|
+
|
|
15
|
+
from fastapi import Body
|
|
16
|
+
from pydantic import TypeAdapter
|
|
17
|
+
|
|
18
|
+
from llama_stack.apis.common.errors import VectorStoreNotFoundError
|
|
19
|
+
from llama_stack.apis.files import Files, OpenAIFileObject
|
|
20
|
+
from llama_stack.apis.vector_io import (
|
|
21
|
+
Chunk,
|
|
22
|
+
OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
|
|
23
|
+
OpenAICreateVectorStoreRequestWithExtraBody,
|
|
24
|
+
QueryChunksResponse,
|
|
25
|
+
SearchRankingOptions,
|
|
26
|
+
VectorStoreChunkingStrategy,
|
|
27
|
+
VectorStoreChunkingStrategyAuto,
|
|
28
|
+
VectorStoreChunkingStrategyStatic,
|
|
29
|
+
VectorStoreContent,
|
|
30
|
+
VectorStoreDeleteResponse,
|
|
31
|
+
VectorStoreFileBatchObject,
|
|
32
|
+
VectorStoreFileContentsResponse,
|
|
33
|
+
VectorStoreFileCounts,
|
|
34
|
+
VectorStoreFileDeleteResponse,
|
|
35
|
+
VectorStoreFileLastError,
|
|
36
|
+
VectorStoreFileObject,
|
|
37
|
+
VectorStoreFilesListInBatchResponse,
|
|
38
|
+
VectorStoreFileStatus,
|
|
39
|
+
VectorStoreListFilesResponse,
|
|
40
|
+
VectorStoreListResponse,
|
|
41
|
+
VectorStoreObject,
|
|
42
|
+
VectorStoreSearchResponse,
|
|
43
|
+
VectorStoreSearchResponsePage,
|
|
44
|
+
)
|
|
45
|
+
from llama_stack.apis.vector_stores import VectorStore
|
|
46
|
+
from llama_stack.core.id_generation import generate_object_id
|
|
47
|
+
from llama_stack.log import get_logger
|
|
48
|
+
from llama_stack.providers.utils.kvstore.api import KVStore
|
|
49
|
+
from llama_stack.providers.utils.memory.vector_store import (
|
|
50
|
+
ChunkForDeletion,
|
|
51
|
+
content_from_data_and_mime_type,
|
|
52
|
+
make_overlapped_chunks,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
EMBEDDING_DIMENSION = 768
|
|
56
|
+
|
|
57
|
+
logger = get_logger(name=__name__, category="providers::utils")
|
|
58
|
+
|
|
59
|
+
# Constants for OpenAI vector stores
|
|
60
|
+
CHUNK_MULTIPLIER = 5
|
|
61
|
+
FILE_BATCH_CLEANUP_INTERVAL_SECONDS = 24 * 60 * 60 # 1 day in seconds
|
|
62
|
+
MAX_CONCURRENT_FILES_PER_BATCH = 3 # Maximum concurrent file processing within a batch
|
|
63
|
+
FILE_BATCH_CHUNK_SIZE = 10 # Process files in chunks of this size
|
|
64
|
+
|
|
65
|
+
VERSION = "v3"
|
|
66
|
+
VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::"
|
|
67
|
+
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::"
|
|
68
|
+
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::"
|
|
69
|
+
OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:{VERSION}::"
|
|
70
|
+
OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX = f"openai_vector_stores_file_batches:{VERSION}::"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class OpenAIVectorStoreMixin(ABC):
|
|
74
|
+
"""
|
|
75
|
+
Mixin class that provides common OpenAI Vector Store API implementation.
|
|
76
|
+
Providers need to implement the abstract storage methods and maintain
|
|
77
|
+
an openai_vector_stores in-memory cache.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
# Implementing classes should call super().__init__() in their __init__ method
|
|
81
|
+
# to properly initialize the mixin attributes.
|
|
82
|
+
def __init__(
|
|
83
|
+
self,
|
|
84
|
+
files_api: Files | None = None,
|
|
85
|
+
kvstore: KVStore | None = None,
|
|
86
|
+
):
|
|
87
|
+
self.openai_vector_stores: dict[str, dict[str, Any]] = {}
|
|
88
|
+
self.openai_file_batches: dict[str, dict[str, Any]] = {}
|
|
89
|
+
self.files_api = files_api
|
|
90
|
+
self.kvstore = kvstore
|
|
91
|
+
self._last_file_batch_cleanup_time = 0
|
|
92
|
+
self._file_batch_tasks: dict[str, asyncio.Task[None]] = {}
|
|
93
|
+
|
|
94
|
+
async def _save_openai_vector_store(self, store_id: str, store_info: dict[str, Any]) -> None:
|
|
95
|
+
"""Save vector store metadata to persistent storage."""
|
|
96
|
+
assert self.kvstore
|
|
97
|
+
key = f"{OPENAI_VECTOR_STORES_PREFIX}{store_id}"
|
|
98
|
+
await self.kvstore.set(key=key, value=json.dumps(store_info))
|
|
99
|
+
# update in-memory cache
|
|
100
|
+
self.openai_vector_stores[store_id] = store_info
|
|
101
|
+
|
|
102
|
+
async def _load_openai_vector_stores(self) -> dict[str, dict[str, Any]]:
|
|
103
|
+
"""Load all vector store metadata from persistent storage."""
|
|
104
|
+
assert self.kvstore
|
|
105
|
+
start_key = OPENAI_VECTOR_STORES_PREFIX
|
|
106
|
+
end_key = f"{OPENAI_VECTOR_STORES_PREFIX}\xff"
|
|
107
|
+
stored_data = await self.kvstore.values_in_range(start_key, end_key)
|
|
108
|
+
|
|
109
|
+
stores: dict[str, dict[str, Any]] = {}
|
|
110
|
+
for item in stored_data:
|
|
111
|
+
info = json.loads(item)
|
|
112
|
+
stores[info["id"]] = info
|
|
113
|
+
return stores
|
|
114
|
+
|
|
115
|
+
async def _update_openai_vector_store(self, store_id: str, store_info: dict[str, Any]) -> None:
|
|
116
|
+
"""Update vector store metadata in persistent storage."""
|
|
117
|
+
assert self.kvstore
|
|
118
|
+
key = f"{OPENAI_VECTOR_STORES_PREFIX}{store_id}"
|
|
119
|
+
await self.kvstore.set(key=key, value=json.dumps(store_info))
|
|
120
|
+
# update in-memory cache
|
|
121
|
+
self.openai_vector_stores[store_id] = store_info
|
|
122
|
+
|
|
123
|
+
async def _delete_openai_vector_store_from_storage(self, store_id: str) -> None:
|
|
124
|
+
"""Delete vector store metadata from persistent storage."""
|
|
125
|
+
assert self.kvstore
|
|
126
|
+
key = f"{OPENAI_VECTOR_STORES_PREFIX}{store_id}"
|
|
127
|
+
await self.kvstore.delete(key)
|
|
128
|
+
# remove from in-memory cache
|
|
129
|
+
self.openai_vector_stores.pop(store_id, None)
|
|
130
|
+
|
|
131
|
+
async def _save_openai_vector_store_file(
|
|
132
|
+
self,
|
|
133
|
+
store_id: str,
|
|
134
|
+
file_id: str,
|
|
135
|
+
file_info: dict[str, Any],
|
|
136
|
+
file_contents: list[dict[str, Any]],
|
|
137
|
+
) -> None:
|
|
138
|
+
"""Save vector store file metadata to persistent storage."""
|
|
139
|
+
assert self.kvstore
|
|
140
|
+
meta_key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}"
|
|
141
|
+
await self.kvstore.set(key=meta_key, value=json.dumps(file_info))
|
|
142
|
+
contents_prefix = f"{OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX}{store_id}:{file_id}:"
|
|
143
|
+
for idx, chunk in enumerate(file_contents):
|
|
144
|
+
await self.kvstore.set(key=f"{contents_prefix}{idx}", value=json.dumps(chunk))
|
|
145
|
+
|
|
146
|
+
async def _load_openai_vector_store_file(self, store_id: str, file_id: str) -> dict[str, Any]:
|
|
147
|
+
"""Load vector store file metadata from persistent storage."""
|
|
148
|
+
assert self.kvstore
|
|
149
|
+
key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}"
|
|
150
|
+
stored_data = await self.kvstore.get(key)
|
|
151
|
+
return json.loads(stored_data) if stored_data else {}
|
|
152
|
+
|
|
153
|
+
async def _load_openai_vector_store_file_contents(self, store_id: str, file_id: str) -> list[dict[str, Any]]:
|
|
154
|
+
"""Load vector store file contents from persistent storage."""
|
|
155
|
+
assert self.kvstore
|
|
156
|
+
prefix = f"{OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX}{store_id}:{file_id}:"
|
|
157
|
+
end_key = f"{prefix}\xff"
|
|
158
|
+
raw_items = await self.kvstore.values_in_range(prefix, end_key)
|
|
159
|
+
return [json.loads(item) for item in raw_items]
|
|
160
|
+
|
|
161
|
+
async def _update_openai_vector_store_file(self, store_id: str, file_id: str, file_info: dict[str, Any]) -> None:
|
|
162
|
+
"""Update vector store file metadata in persistent storage."""
|
|
163
|
+
assert self.kvstore
|
|
164
|
+
key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}"
|
|
165
|
+
await self.kvstore.set(key=key, value=json.dumps(file_info))
|
|
166
|
+
|
|
167
|
+
async def _delete_openai_vector_store_file_from_storage(self, store_id: str, file_id: str) -> None:
|
|
168
|
+
"""Delete vector store file metadata from persistent storage."""
|
|
169
|
+
assert self.kvstore
|
|
170
|
+
|
|
171
|
+
meta_key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}"
|
|
172
|
+
await self.kvstore.delete(meta_key)
|
|
173
|
+
|
|
174
|
+
contents_prefix = f"{OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX}{store_id}:{file_id}:"
|
|
175
|
+
end_key = f"{contents_prefix}\xff"
|
|
176
|
+
# load all stored chunk values (values_in_range is implemented by all backends)
|
|
177
|
+
raw_items = await self.kvstore.values_in_range(contents_prefix, end_key)
|
|
178
|
+
# delete each chunk by its index suffix
|
|
179
|
+
for idx in range(len(raw_items)):
|
|
180
|
+
await self.kvstore.delete(f"{contents_prefix}{idx}")
|
|
181
|
+
|
|
182
|
+
async def _save_openai_vector_store_file_batch(self, batch_id: str, batch_info: dict[str, Any]) -> None:
|
|
183
|
+
"""Save file batch metadata to persistent storage."""
|
|
184
|
+
assert self.kvstore
|
|
185
|
+
key = f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}{batch_id}"
|
|
186
|
+
await self.kvstore.set(key=key, value=json.dumps(batch_info))
|
|
187
|
+
# update in-memory cache
|
|
188
|
+
self.openai_file_batches[batch_id] = batch_info
|
|
189
|
+
|
|
190
|
+
async def _load_openai_vector_store_file_batches(self) -> dict[str, dict[str, Any]]:
|
|
191
|
+
"""Load all file batch metadata from persistent storage."""
|
|
192
|
+
assert self.kvstore
|
|
193
|
+
start_key = OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX
|
|
194
|
+
end_key = f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}\xff"
|
|
195
|
+
stored_data = await self.kvstore.values_in_range(start_key, end_key)
|
|
196
|
+
|
|
197
|
+
batches: dict[str, dict[str, Any]] = {}
|
|
198
|
+
for item in stored_data:
|
|
199
|
+
info = json.loads(item)
|
|
200
|
+
batches[info["id"]] = info
|
|
201
|
+
return batches
|
|
202
|
+
|
|
203
|
+
async def _delete_openai_vector_store_file_batch(self, batch_id: str) -> None:
|
|
204
|
+
"""Delete file batch metadata from persistent storage and in-memory cache."""
|
|
205
|
+
assert self.kvstore
|
|
206
|
+
key = f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}{batch_id}"
|
|
207
|
+
await self.kvstore.delete(key)
|
|
208
|
+
# remove from in-memory cache
|
|
209
|
+
self.openai_file_batches.pop(batch_id, None)
|
|
210
|
+
|
|
211
|
+
async def _cleanup_expired_file_batches(self) -> None:
|
|
212
|
+
"""Clean up expired file batches from persistent storage."""
|
|
213
|
+
assert self.kvstore
|
|
214
|
+
start_key = OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX
|
|
215
|
+
end_key = f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}\xff"
|
|
216
|
+
stored_data = await self.kvstore.values_in_range(start_key, end_key)
|
|
217
|
+
|
|
218
|
+
current_time = int(time.time())
|
|
219
|
+
expired_count = 0
|
|
220
|
+
|
|
221
|
+
for item in stored_data:
|
|
222
|
+
info = json.loads(item)
|
|
223
|
+
expires_at = info.get("expires_at")
|
|
224
|
+
if expires_at and current_time > expires_at:
|
|
225
|
+
logger.info(f"Cleaning up expired file batch: {info['id']}")
|
|
226
|
+
await self.kvstore.delete(f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}{info['id']}")
|
|
227
|
+
# Remove from in-memory cache if present
|
|
228
|
+
self.openai_file_batches.pop(info["id"], None)
|
|
229
|
+
expired_count += 1
|
|
230
|
+
|
|
231
|
+
if expired_count > 0:
|
|
232
|
+
logger.info(f"Cleaned up {expired_count} expired file batches")
|
|
233
|
+
|
|
234
|
+
async def _get_completed_files_in_batch(self, vector_store_id: str, file_ids: list[str]) -> set[str]:
|
|
235
|
+
"""Determine which files in a batch are actually completed by checking vector store file_ids."""
|
|
236
|
+
if vector_store_id not in self.openai_vector_stores:
|
|
237
|
+
return set()
|
|
238
|
+
|
|
239
|
+
store_info = self.openai_vector_stores[vector_store_id]
|
|
240
|
+
completed_files = set(file_ids) & set(store_info["file_ids"])
|
|
241
|
+
return completed_files
|
|
242
|
+
|
|
243
|
+
async def _analyze_batch_completion_on_resume(self, batch_id: str, batch_info: dict[str, Any]) -> list[str]:
|
|
244
|
+
"""Analyze batch completion status and return remaining files to process.
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
List of file IDs that still need processing. Empty list if batch is complete.
|
|
248
|
+
"""
|
|
249
|
+
vector_store_id = batch_info["vector_store_id"]
|
|
250
|
+
all_file_ids = batch_info["file_ids"]
|
|
251
|
+
|
|
252
|
+
# Find files that are actually completed
|
|
253
|
+
completed_files = await self._get_completed_files_in_batch(vector_store_id, all_file_ids)
|
|
254
|
+
remaining_files = [file_id for file_id in all_file_ids if file_id not in completed_files]
|
|
255
|
+
|
|
256
|
+
completed_count = len(completed_files)
|
|
257
|
+
total_count = len(all_file_ids)
|
|
258
|
+
remaining_count = len(remaining_files)
|
|
259
|
+
|
|
260
|
+
# Update file counts to reflect actual state
|
|
261
|
+
batch_info["file_counts"] = {
|
|
262
|
+
"completed": completed_count,
|
|
263
|
+
"failed": 0, # We don't track failed files during resume - they'll be retried
|
|
264
|
+
"in_progress": remaining_count,
|
|
265
|
+
"cancelled": 0,
|
|
266
|
+
"total": total_count,
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
# If all files are already completed, mark batch as completed
|
|
270
|
+
if remaining_count == 0:
|
|
271
|
+
batch_info["status"] = "completed"
|
|
272
|
+
logger.info(f"Batch {batch_id} is already fully completed, updating status")
|
|
273
|
+
|
|
274
|
+
# Save updated batch info
|
|
275
|
+
await self._save_openai_vector_store_file_batch(batch_id, batch_info)
|
|
276
|
+
|
|
277
|
+
return remaining_files
|
|
278
|
+
|
|
279
|
+
async def _resume_incomplete_batches(self) -> None:
|
|
280
|
+
"""Resume processing of incomplete file batches after server restart."""
|
|
281
|
+
for batch_id, batch_info in self.openai_file_batches.items():
|
|
282
|
+
if batch_info["status"] == "in_progress":
|
|
283
|
+
logger.info(f"Analyzing incomplete file batch: {batch_id}")
|
|
284
|
+
|
|
285
|
+
remaining_files = await self._analyze_batch_completion_on_resume(batch_id, batch_info)
|
|
286
|
+
|
|
287
|
+
# Check if batch is now completed after analysis
|
|
288
|
+
if batch_info["status"] == "completed":
|
|
289
|
+
continue
|
|
290
|
+
|
|
291
|
+
if remaining_files:
|
|
292
|
+
logger.info(f"Resuming batch {batch_id} with {len(remaining_files)} remaining files")
|
|
293
|
+
# Restart the background processing task with only remaining files
|
|
294
|
+
task = asyncio.create_task(self._process_file_batch_async(batch_id, batch_info, remaining_files))
|
|
295
|
+
self._file_batch_tasks[batch_id] = task
|
|
296
|
+
|
|
297
|
+
async def initialize_openai_vector_stores(self) -> None:
|
|
298
|
+
"""Load existing OpenAI vector stores and file batches into the in-memory cache."""
|
|
299
|
+
self.openai_vector_stores = await self._load_openai_vector_stores()
|
|
300
|
+
self.openai_file_batches = await self._load_openai_vector_store_file_batches()
|
|
301
|
+
self._file_batch_tasks = {}
|
|
302
|
+
# TODO: Resume only works for single worker deployment. Jobs with multiple workers will need to be handled differently.
|
|
303
|
+
await self._resume_incomplete_batches()
|
|
304
|
+
self._last_file_batch_cleanup_time = 0
|
|
305
|
+
|
|
306
|
+
async def shutdown(self) -> None:
|
|
307
|
+
"""Clean up mixin resources including background tasks."""
|
|
308
|
+
# Cancel any running file batch tasks gracefully
|
|
309
|
+
tasks_to_cancel = list(self._file_batch_tasks.items())
|
|
310
|
+
for _, task in tasks_to_cancel:
|
|
311
|
+
if not task.done():
|
|
312
|
+
task.cancel()
|
|
313
|
+
try:
|
|
314
|
+
await task
|
|
315
|
+
except asyncio.CancelledError:
|
|
316
|
+
pass
|
|
317
|
+
|
|
318
|
+
@abstractmethod
|
|
319
|
+
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
|
320
|
+
"""Delete chunks from a vector store."""
|
|
321
|
+
pass
|
|
322
|
+
|
|
323
|
+
@abstractmethod
|
|
324
|
+
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
|
325
|
+
"""Register a vector database (provider-specific implementation)."""
|
|
326
|
+
pass
|
|
327
|
+
|
|
328
|
+
@abstractmethod
|
|
329
|
+
async def unregister_vector_store(self, vector_store_id: str) -> None:
|
|
330
|
+
"""Unregister a vector database (provider-specific implementation)."""
|
|
331
|
+
pass
|
|
332
|
+
|
|
333
|
+
@abstractmethod
|
|
334
|
+
async def insert_chunks(
|
|
335
|
+
self,
|
|
336
|
+
vector_db_id: str,
|
|
337
|
+
chunks: list[Chunk],
|
|
338
|
+
ttl_seconds: int | None = None,
|
|
339
|
+
) -> None:
|
|
340
|
+
"""Insert chunks into a vector database (provider-specific implementation)."""
|
|
341
|
+
pass
|
|
342
|
+
|
|
343
|
+
@abstractmethod
|
|
344
|
+
async def query_chunks(
|
|
345
|
+
self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None
|
|
346
|
+
) -> QueryChunksResponse:
|
|
347
|
+
"""Query chunks from a vector database (provider-specific implementation)."""
|
|
348
|
+
pass
|
|
349
|
+
|
|
350
|
+
async def openai_create_vector_store(
|
|
351
|
+
self,
|
|
352
|
+
params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)],
|
|
353
|
+
) -> VectorStoreObject:
|
|
354
|
+
"""Creates a vector store."""
|
|
355
|
+
created_at = int(time.time())
|
|
356
|
+
|
|
357
|
+
# Extract llama-stack-specific parameters from extra_body
|
|
358
|
+
extra_body = params.model_extra or {}
|
|
359
|
+
metadata = params.metadata or {}
|
|
360
|
+
|
|
361
|
+
provider_vector_store_id = extra_body.get("provider_vector_store_id")
|
|
362
|
+
|
|
363
|
+
# Use embedding info from metadata if available, otherwise from extra_body
|
|
364
|
+
if metadata.get("embedding_model"):
|
|
365
|
+
# If either is in metadata, use metadata as source
|
|
366
|
+
embedding_model = metadata.get("embedding_model")
|
|
367
|
+
embedding_dimension = (
|
|
368
|
+
int(metadata["embedding_dimension"]) if metadata.get("embedding_dimension") else EMBEDDING_DIMENSION
|
|
369
|
+
)
|
|
370
|
+
logger.debug(
|
|
371
|
+
f"Using embedding config from metadata (takes precedence over extra_body): model='{embedding_model}', dimension={embedding_dimension}"
|
|
372
|
+
)
|
|
373
|
+
else:
|
|
374
|
+
embedding_model = extra_body.get("embedding_model")
|
|
375
|
+
embedding_dimension = extra_body.get("embedding_dimension", EMBEDDING_DIMENSION)
|
|
376
|
+
logger.debug(
|
|
377
|
+
f"Using embedding config from extra_body: model='{embedding_model}', dimension={embedding_dimension}"
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
# use provider_id set by router; fallback to provider's own ID when used directly via --stack-config
|
|
381
|
+
provider_id = extra_body.get("provider_id") or getattr(self, "__provider_id__", None)
|
|
382
|
+
# Derive the canonical vector_store_id (allow override, else generate)
|
|
383
|
+
vector_store_id = provider_vector_store_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}")
|
|
384
|
+
|
|
385
|
+
if embedding_model is None:
|
|
386
|
+
raise ValueError("embedding_model is required")
|
|
387
|
+
|
|
388
|
+
if embedding_dimension is None:
|
|
389
|
+
raise ValueError("Embedding dimension is required")
|
|
390
|
+
|
|
391
|
+
# Register the VectorStore backing this vector store
|
|
392
|
+
if provider_id is None:
|
|
393
|
+
raise ValueError("Provider ID is required but was not provided")
|
|
394
|
+
|
|
395
|
+
# call to the provider to create any index, etc.
|
|
396
|
+
vector_store = VectorStore(
|
|
397
|
+
identifier=vector_store_id,
|
|
398
|
+
embedding_dimension=embedding_dimension,
|
|
399
|
+
embedding_model=embedding_model,
|
|
400
|
+
provider_id=provider_id,
|
|
401
|
+
provider_resource_id=vector_store_id,
|
|
402
|
+
vector_store_name=params.name,
|
|
403
|
+
)
|
|
404
|
+
await self.register_vector_store(vector_store)
|
|
405
|
+
|
|
406
|
+
# Create OpenAI vector store metadata
|
|
407
|
+
status = "completed"
|
|
408
|
+
|
|
409
|
+
# Start with no files attached and update later
|
|
410
|
+
file_counts = VectorStoreFileCounts(
|
|
411
|
+
cancelled=0,
|
|
412
|
+
completed=0,
|
|
413
|
+
failed=0,
|
|
414
|
+
in_progress=0,
|
|
415
|
+
total=0,
|
|
416
|
+
)
|
|
417
|
+
store_info: dict[str, Any] = {
|
|
418
|
+
"id": vector_store_id,
|
|
419
|
+
"object": "vector_store",
|
|
420
|
+
"created_at": created_at,
|
|
421
|
+
"name": params.name,
|
|
422
|
+
"usage_bytes": 0,
|
|
423
|
+
"file_counts": file_counts.model_dump(),
|
|
424
|
+
"status": status,
|
|
425
|
+
"expires_after": params.expires_after,
|
|
426
|
+
"expires_at": None,
|
|
427
|
+
"last_active_at": created_at,
|
|
428
|
+
"file_ids": [],
|
|
429
|
+
"chunking_strategy": params.chunking_strategy,
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
# Add provider information to metadata if provided
|
|
433
|
+
if provider_id:
|
|
434
|
+
metadata["provider_id"] = provider_id
|
|
435
|
+
if provider_vector_store_id:
|
|
436
|
+
metadata["provider_vector_store_id"] = provider_vector_store_id
|
|
437
|
+
store_info["metadata"] = metadata
|
|
438
|
+
|
|
439
|
+
# Save to persistent storage (provider-specific)
|
|
440
|
+
await self._save_openai_vector_store(vector_store_id, store_info)
|
|
441
|
+
|
|
442
|
+
# Store in memory cache
|
|
443
|
+
self.openai_vector_stores[vector_store_id] = store_info
|
|
444
|
+
|
|
445
|
+
# Now that our vector store is created, attach any files that were provided
|
|
446
|
+
file_ids = params.file_ids or []
|
|
447
|
+
tasks = [self.openai_attach_file_to_vector_store(vector_store_id, file_id) for file_id in file_ids]
|
|
448
|
+
await asyncio.gather(*tasks)
|
|
449
|
+
|
|
450
|
+
# Get the updated store info and return it
|
|
451
|
+
store_info = self.openai_vector_stores[vector_store_id]
|
|
452
|
+
return VectorStoreObject.model_validate(store_info)
|
|
453
|
+
|
|
454
|
+
async def openai_list_vector_stores(
|
|
455
|
+
self,
|
|
456
|
+
limit: int | None = 20,
|
|
457
|
+
order: str | None = "desc",
|
|
458
|
+
after: str | None = None,
|
|
459
|
+
before: str | None = None,
|
|
460
|
+
) -> VectorStoreListResponse:
|
|
461
|
+
"""Returns a list of vector stores."""
|
|
462
|
+
limit = limit or 20
|
|
463
|
+
order = order or "desc"
|
|
464
|
+
|
|
465
|
+
# Get all vector stores
|
|
466
|
+
all_stores = list(self.openai_vector_stores.values())
|
|
467
|
+
|
|
468
|
+
# Sort by created_at
|
|
469
|
+
reverse_order = order == "desc"
|
|
470
|
+
all_stores.sort(key=lambda x: x["created_at"], reverse=reverse_order)
|
|
471
|
+
|
|
472
|
+
# Apply cursor-based pagination
|
|
473
|
+
if after:
|
|
474
|
+
after_index = next((i for i, store in enumerate(all_stores) if store["id"] == after), -1)
|
|
475
|
+
if after_index >= 0:
|
|
476
|
+
all_stores = all_stores[after_index + 1 :]
|
|
477
|
+
|
|
478
|
+
if before:
|
|
479
|
+
before_index = next(
|
|
480
|
+
(i for i, store in enumerate(all_stores) if store["id"] == before),
|
|
481
|
+
len(all_stores),
|
|
482
|
+
)
|
|
483
|
+
all_stores = all_stores[:before_index]
|
|
484
|
+
|
|
485
|
+
# Apply limit
|
|
486
|
+
limited_stores = all_stores[:limit]
|
|
487
|
+
# Convert to VectorStoreObject instances
|
|
488
|
+
data = [VectorStoreObject(**store) for store in limited_stores]
|
|
489
|
+
|
|
490
|
+
# Determine pagination info
|
|
491
|
+
has_more = len(all_stores) > limit
|
|
492
|
+
first_id = data[0].id if data else None
|
|
493
|
+
last_id = data[-1].id if data else None
|
|
494
|
+
|
|
495
|
+
return VectorStoreListResponse(
|
|
496
|
+
data=data,
|
|
497
|
+
has_more=has_more,
|
|
498
|
+
first_id=first_id,
|
|
499
|
+
last_id=last_id,
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
async def openai_retrieve_vector_store(
|
|
503
|
+
self,
|
|
504
|
+
vector_store_id: str,
|
|
505
|
+
) -> VectorStoreObject:
|
|
506
|
+
"""Retrieves a vector store."""
|
|
507
|
+
if vector_store_id not in self.openai_vector_stores:
|
|
508
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
509
|
+
|
|
510
|
+
store_info = self.openai_vector_stores[vector_store_id]
|
|
511
|
+
return VectorStoreObject(**store_info)
|
|
512
|
+
|
|
513
|
+
async def openai_update_vector_store(
|
|
514
|
+
self,
|
|
515
|
+
vector_store_id: str,
|
|
516
|
+
name: str | None = None,
|
|
517
|
+
expires_after: dict[str, Any] | None = None,
|
|
518
|
+
metadata: dict[str, Any] | None = None,
|
|
519
|
+
) -> VectorStoreObject:
|
|
520
|
+
"""Modifies a vector store."""
|
|
521
|
+
if vector_store_id not in self.openai_vector_stores:
|
|
522
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
523
|
+
|
|
524
|
+
store_info = self.openai_vector_stores[vector_store_id].copy()
|
|
525
|
+
|
|
526
|
+
# Update fields if provided
|
|
527
|
+
if name is not None:
|
|
528
|
+
store_info["name"] = name
|
|
529
|
+
if expires_after is not None:
|
|
530
|
+
store_info["expires_after"] = expires_after
|
|
531
|
+
if metadata is not None:
|
|
532
|
+
store_info["metadata"] = metadata
|
|
533
|
+
|
|
534
|
+
# Update last_active_at
|
|
535
|
+
store_info["last_active_at"] = int(time.time())
|
|
536
|
+
|
|
537
|
+
# Save to persistent storage (provider-specific)
|
|
538
|
+
await self._update_openai_vector_store(vector_store_id, store_info)
|
|
539
|
+
|
|
540
|
+
# Update in-memory cache
|
|
541
|
+
self.openai_vector_stores[vector_store_id] = store_info
|
|
542
|
+
|
|
543
|
+
return VectorStoreObject(**store_info)
|
|
544
|
+
|
|
545
|
+
async def openai_delete_vector_store(
|
|
546
|
+
self,
|
|
547
|
+
vector_store_id: str,
|
|
548
|
+
) -> VectorStoreDeleteResponse:
|
|
549
|
+
"""Delete a vector store."""
|
|
550
|
+
if vector_store_id not in self.openai_vector_stores:
|
|
551
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
552
|
+
|
|
553
|
+
# Delete from persistent storage (provider-specific)
|
|
554
|
+
await self._delete_openai_vector_store_from_storage(vector_store_id)
|
|
555
|
+
|
|
556
|
+
# Delete from in-memory cache
|
|
557
|
+
self.openai_vector_stores.pop(vector_store_id, None)
|
|
558
|
+
|
|
559
|
+
# Also delete the underlying vector DB
|
|
560
|
+
try:
|
|
561
|
+
await self.unregister_vector_store(vector_store_id)
|
|
562
|
+
except Exception as e:
|
|
563
|
+
logger.warning(f"Failed to delete underlying vector DB {vector_store_id}: {e}")
|
|
564
|
+
|
|
565
|
+
return VectorStoreDeleteResponse(
|
|
566
|
+
id=vector_store_id,
|
|
567
|
+
deleted=True,
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
async def openai_search_vector_store(
|
|
571
|
+
self,
|
|
572
|
+
vector_store_id: str,
|
|
573
|
+
query: str | list[str],
|
|
574
|
+
filters: dict[str, Any] | None = None,
|
|
575
|
+
max_num_results: int | None = 10,
|
|
576
|
+
ranking_options: SearchRankingOptions | None = None,
|
|
577
|
+
rewrite_query: bool | None = False,
|
|
578
|
+
search_mode: (
|
|
579
|
+
str | None
|
|
580
|
+
) = "vector", # Using str instead of Literal due to OpenAPI schema generator limitations
|
|
581
|
+
) -> VectorStoreSearchResponsePage:
|
|
582
|
+
"""Search for chunks in a vector store."""
|
|
583
|
+
max_num_results = max_num_results or 10
|
|
584
|
+
|
|
585
|
+
# Validate search_mode
|
|
586
|
+
valid_modes = {"keyword", "vector", "hybrid"}
|
|
587
|
+
if search_mode not in valid_modes:
|
|
588
|
+
raise ValueError(f"search_mode must be one of {valid_modes}, got {search_mode}")
|
|
589
|
+
|
|
590
|
+
if vector_store_id not in self.openai_vector_stores:
|
|
591
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
592
|
+
|
|
593
|
+
if isinstance(query, list):
|
|
594
|
+
search_query = " ".join(query)
|
|
595
|
+
else:
|
|
596
|
+
search_query = query
|
|
597
|
+
|
|
598
|
+
try:
|
|
599
|
+
score_threshold = (
|
|
600
|
+
ranking_options.score_threshold
|
|
601
|
+
if ranking_options and ranking_options.score_threshold is not None
|
|
602
|
+
else 0.0
|
|
603
|
+
)
|
|
604
|
+
params = {
|
|
605
|
+
"max_chunks": max_num_results * CHUNK_MULTIPLIER,
|
|
606
|
+
"score_threshold": score_threshold,
|
|
607
|
+
"mode": search_mode,
|
|
608
|
+
}
|
|
609
|
+
# TODO: Add support for ranking_options.ranker
|
|
610
|
+
|
|
611
|
+
response = await self.query_chunks(
|
|
612
|
+
vector_db_id=vector_store_id,
|
|
613
|
+
query=search_query,
|
|
614
|
+
params=params,
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
# Convert response to OpenAI format
|
|
618
|
+
data = []
|
|
619
|
+
for chunk, score in zip(response.chunks, response.scores, strict=False):
|
|
620
|
+
# Apply filters if provided
|
|
621
|
+
if filters:
|
|
622
|
+
# Simple metadata filtering
|
|
623
|
+
if not self._matches_filters(chunk.metadata, filters):
|
|
624
|
+
continue
|
|
625
|
+
|
|
626
|
+
content = self._chunk_to_vector_store_content(chunk)
|
|
627
|
+
|
|
628
|
+
response_data_item = VectorStoreSearchResponse(
|
|
629
|
+
file_id=chunk.metadata.get("document_id", ""),
|
|
630
|
+
filename=chunk.metadata.get("filename", ""),
|
|
631
|
+
score=score,
|
|
632
|
+
attributes=chunk.metadata,
|
|
633
|
+
content=content,
|
|
634
|
+
)
|
|
635
|
+
data.append(response_data_item)
|
|
636
|
+
if len(data) >= max_num_results:
|
|
637
|
+
break
|
|
638
|
+
|
|
639
|
+
return VectorStoreSearchResponsePage(
|
|
640
|
+
search_query=search_query,
|
|
641
|
+
data=data,
|
|
642
|
+
has_more=False, # For simplicity, we don't implement pagination here
|
|
643
|
+
next_page=None,
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
except Exception as e:
|
|
647
|
+
logger.error(f"Error searching vector store {vector_store_id}: {e}")
|
|
648
|
+
# Return empty results on error
|
|
649
|
+
return VectorStoreSearchResponsePage(
|
|
650
|
+
search_query=search_query,
|
|
651
|
+
data=[],
|
|
652
|
+
has_more=False,
|
|
653
|
+
next_page=None,
|
|
654
|
+
)
|
|
655
|
+
|
|
656
|
+
def _matches_filters(self, metadata: dict[str, Any], filters: dict[str, Any]) -> bool:
|
|
657
|
+
"""Check if metadata matches the provided filters."""
|
|
658
|
+
if not filters:
|
|
659
|
+
return True
|
|
660
|
+
|
|
661
|
+
filter_type = filters.get("type")
|
|
662
|
+
|
|
663
|
+
if filter_type in ["eq", "ne", "gt", "gte", "lt", "lte"]:
|
|
664
|
+
# Comparison filter
|
|
665
|
+
key = filters.get("key")
|
|
666
|
+
value = filters.get("value")
|
|
667
|
+
|
|
668
|
+
if key not in metadata:
|
|
669
|
+
return False
|
|
670
|
+
|
|
671
|
+
metadata_value = metadata[key]
|
|
672
|
+
|
|
673
|
+
if filter_type == "eq":
|
|
674
|
+
return bool(metadata_value == value)
|
|
675
|
+
elif filter_type == "ne":
|
|
676
|
+
return bool(metadata_value != value)
|
|
677
|
+
elif filter_type == "gt":
|
|
678
|
+
return bool(metadata_value > value)
|
|
679
|
+
elif filter_type == "gte":
|
|
680
|
+
return bool(metadata_value >= value)
|
|
681
|
+
elif filter_type == "lt":
|
|
682
|
+
return bool(metadata_value < value)
|
|
683
|
+
elif filter_type == "lte":
|
|
684
|
+
return bool(metadata_value <= value)
|
|
685
|
+
else:
|
|
686
|
+
raise ValueError(f"Unsupported filter type: {filter_type}")
|
|
687
|
+
|
|
688
|
+
elif filter_type == "and":
|
|
689
|
+
# All filters must match
|
|
690
|
+
sub_filters = filters.get("filters", [])
|
|
691
|
+
return all(self._matches_filters(metadata, f) for f in sub_filters)
|
|
692
|
+
|
|
693
|
+
elif filter_type == "or":
|
|
694
|
+
# At least one filter must match
|
|
695
|
+
sub_filters = filters.get("filters", [])
|
|
696
|
+
return any(self._matches_filters(metadata, f) for f in sub_filters)
|
|
697
|
+
|
|
698
|
+
else:
|
|
699
|
+
# Unknown filter type, default to no match
|
|
700
|
+
raise ValueError(f"Unsupported filter type: {filter_type}")
|
|
701
|
+
|
|
702
|
+
def _chunk_to_vector_store_content(self, chunk: Chunk) -> list[VectorStoreContent]:
|
|
703
|
+
# content is InterleavedContent
|
|
704
|
+
if isinstance(chunk.content, str):
|
|
705
|
+
content = [
|
|
706
|
+
VectorStoreContent(
|
|
707
|
+
type="text",
|
|
708
|
+
text=chunk.content,
|
|
709
|
+
)
|
|
710
|
+
]
|
|
711
|
+
elif isinstance(chunk.content, list):
|
|
712
|
+
# TODO: Add support for other types of content
|
|
713
|
+
content = [
|
|
714
|
+
VectorStoreContent(
|
|
715
|
+
type="text",
|
|
716
|
+
text=item.text,
|
|
717
|
+
)
|
|
718
|
+
for item in chunk.content
|
|
719
|
+
if item.type == "text"
|
|
720
|
+
]
|
|
721
|
+
else:
|
|
722
|
+
if chunk.content.type != "text":
|
|
723
|
+
raise ValueError(f"Unsupported content type: {chunk.content.type}")
|
|
724
|
+
content = [
|
|
725
|
+
VectorStoreContent(
|
|
726
|
+
type="text",
|
|
727
|
+
text=chunk.content.text,
|
|
728
|
+
)
|
|
729
|
+
]
|
|
730
|
+
return content
|
|
731
|
+
|
|
732
|
+
async def openai_attach_file_to_vector_store(
|
|
733
|
+
self,
|
|
734
|
+
vector_store_id: str,
|
|
735
|
+
file_id: str,
|
|
736
|
+
attributes: dict[str, Any] | None = None,
|
|
737
|
+
chunking_strategy: VectorStoreChunkingStrategy | None = None,
|
|
738
|
+
) -> VectorStoreFileObject:
|
|
739
|
+
if vector_store_id not in self.openai_vector_stores:
|
|
740
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
741
|
+
|
|
742
|
+
# Check if file is already attached to this vector store
|
|
743
|
+
store_info = self.openai_vector_stores[vector_store_id]
|
|
744
|
+
if file_id in store_info["file_ids"]:
|
|
745
|
+
logger.warning(f"File {file_id} is already attached to vector store {vector_store_id}, skipping")
|
|
746
|
+
# Return existing file object
|
|
747
|
+
file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
|
|
748
|
+
return VectorStoreFileObject(**file_info)
|
|
749
|
+
|
|
750
|
+
attributes = attributes or {}
|
|
751
|
+
chunking_strategy = chunking_strategy or VectorStoreChunkingStrategyAuto()
|
|
752
|
+
created_at = int(time.time())
|
|
753
|
+
chunks: list[Chunk] = []
|
|
754
|
+
file_response: OpenAIFileObject | None = None
|
|
755
|
+
|
|
756
|
+
vector_store_file_object = VectorStoreFileObject(
|
|
757
|
+
id=file_id,
|
|
758
|
+
attributes=attributes,
|
|
759
|
+
chunking_strategy=chunking_strategy,
|
|
760
|
+
created_at=created_at,
|
|
761
|
+
status="in_progress",
|
|
762
|
+
vector_store_id=vector_store_id,
|
|
763
|
+
)
|
|
764
|
+
|
|
765
|
+
if not hasattr(self, "files_api") or not self.files_api:
|
|
766
|
+
vector_store_file_object.status = "failed"
|
|
767
|
+
vector_store_file_object.last_error = VectorStoreFileLastError(
|
|
768
|
+
code="server_error",
|
|
769
|
+
message="Files API is not available",
|
|
770
|
+
)
|
|
771
|
+
return vector_store_file_object
|
|
772
|
+
|
|
773
|
+
if isinstance(chunking_strategy, VectorStoreChunkingStrategyStatic):
|
|
774
|
+
max_chunk_size_tokens = chunking_strategy.static.max_chunk_size_tokens
|
|
775
|
+
chunk_overlap_tokens = chunking_strategy.static.chunk_overlap_tokens
|
|
776
|
+
else:
|
|
777
|
+
# Default values from OpenAI API spec
|
|
778
|
+
max_chunk_size_tokens = 800
|
|
779
|
+
chunk_overlap_tokens = 400
|
|
780
|
+
|
|
781
|
+
try:
|
|
782
|
+
file_response = await self.files_api.openai_retrieve_file(file_id)
|
|
783
|
+
mime_type, _ = mimetypes.guess_type(file_response.filename)
|
|
784
|
+
content_response = await self.files_api.openai_retrieve_file_content(file_id)
|
|
785
|
+
|
|
786
|
+
content = content_from_data_and_mime_type(content_response.body, mime_type)
|
|
787
|
+
|
|
788
|
+
chunk_attributes = attributes.copy()
|
|
789
|
+
chunk_attributes["filename"] = file_response.filename
|
|
790
|
+
|
|
791
|
+
chunks = make_overlapped_chunks(
|
|
792
|
+
file_id,
|
|
793
|
+
content,
|
|
794
|
+
max_chunk_size_tokens,
|
|
795
|
+
chunk_overlap_tokens,
|
|
796
|
+
chunk_attributes,
|
|
797
|
+
)
|
|
798
|
+
if not chunks:
|
|
799
|
+
vector_store_file_object.status = "failed"
|
|
800
|
+
vector_store_file_object.last_error = VectorStoreFileLastError(
|
|
801
|
+
code="server_error",
|
|
802
|
+
message="No chunks were generated from the file",
|
|
803
|
+
)
|
|
804
|
+
else:
|
|
805
|
+
await self.insert_chunks(
|
|
806
|
+
vector_db_id=vector_store_id,
|
|
807
|
+
chunks=chunks,
|
|
808
|
+
)
|
|
809
|
+
vector_store_file_object.status = "completed"
|
|
810
|
+
except Exception as e:
|
|
811
|
+
logger.exception("Error attaching file to vector store")
|
|
812
|
+
vector_store_file_object.status = "failed"
|
|
813
|
+
vector_store_file_object.last_error = VectorStoreFileLastError(
|
|
814
|
+
code="server_error",
|
|
815
|
+
message=str(e),
|
|
816
|
+
)
|
|
817
|
+
|
|
818
|
+
# Create OpenAI vector store file metadata
|
|
819
|
+
file_info = vector_store_file_object.model_dump(exclude={"last_error"})
|
|
820
|
+
file_info["filename"] = file_response.filename if file_response else ""
|
|
821
|
+
|
|
822
|
+
# Save vector store file to persistent storage (provider-specific)
|
|
823
|
+
dict_chunks = [c.model_dump() for c in chunks]
|
|
824
|
+
# This should be updated to include chunk_id
|
|
825
|
+
await self._save_openai_vector_store_file(vector_store_id, file_id, file_info, dict_chunks)
|
|
826
|
+
|
|
827
|
+
# Update file_ids and file_counts in vector store metadata
|
|
828
|
+
store_info = self.openai_vector_stores[vector_store_id].copy()
|
|
829
|
+
store_info["file_ids"].append(file_id)
|
|
830
|
+
store_info["file_counts"]["total"] += 1
|
|
831
|
+
store_info["file_counts"][vector_store_file_object.status] += 1
|
|
832
|
+
|
|
833
|
+
# Save updated vector store to persistent storage
|
|
834
|
+
await self._save_openai_vector_store(vector_store_id, store_info)
|
|
835
|
+
|
|
836
|
+
# Update vector store in-memory cache
|
|
837
|
+
self.openai_vector_stores[vector_store_id] = store_info
|
|
838
|
+
|
|
839
|
+
return vector_store_file_object
|
|
840
|
+
|
|
841
|
+
async def openai_list_files_in_vector_store(
|
|
842
|
+
self,
|
|
843
|
+
vector_store_id: str,
|
|
844
|
+
limit: int | None = 20,
|
|
845
|
+
order: str | None = "desc",
|
|
846
|
+
after: str | None = None,
|
|
847
|
+
before: str | None = None,
|
|
848
|
+
filter: VectorStoreFileStatus | None = None,
|
|
849
|
+
) -> VectorStoreListFilesResponse:
|
|
850
|
+
"""List files in a vector store."""
|
|
851
|
+
limit = limit or 20
|
|
852
|
+
order = order or "desc"
|
|
853
|
+
|
|
854
|
+
if vector_store_id not in self.openai_vector_stores:
|
|
855
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
856
|
+
|
|
857
|
+
store_info = self.openai_vector_stores[vector_store_id]
|
|
858
|
+
|
|
859
|
+
file_objects: list[VectorStoreFileObject] = []
|
|
860
|
+
for file_id in store_info["file_ids"]:
|
|
861
|
+
file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
|
|
862
|
+
file_object = VectorStoreFileObject(**file_info)
|
|
863
|
+
if filter and file_object.status != filter:
|
|
864
|
+
continue
|
|
865
|
+
file_objects.append(file_object)
|
|
866
|
+
|
|
867
|
+
# Sort by created_at
|
|
868
|
+
reverse_order = order == "desc"
|
|
869
|
+
file_objects.sort(key=lambda x: x.created_at, reverse=reverse_order)
|
|
870
|
+
|
|
871
|
+
# Apply cursor-based pagination
|
|
872
|
+
if after:
|
|
873
|
+
after_index = next((i for i, file in enumerate(file_objects) if file.id == after), -1)
|
|
874
|
+
if after_index >= 0:
|
|
875
|
+
file_objects = file_objects[after_index + 1 :]
|
|
876
|
+
|
|
877
|
+
if before:
|
|
878
|
+
before_index = next(
|
|
879
|
+
(i for i, file in enumerate(file_objects) if file.id == before),
|
|
880
|
+
len(file_objects),
|
|
881
|
+
)
|
|
882
|
+
file_objects = file_objects[:before_index]
|
|
883
|
+
|
|
884
|
+
# Apply limit
|
|
885
|
+
limited_files = file_objects[:limit]
|
|
886
|
+
|
|
887
|
+
# Determine pagination info
|
|
888
|
+
has_more = len(file_objects) > limit
|
|
889
|
+
first_id = file_objects[0].id if file_objects else None
|
|
890
|
+
last_id = file_objects[-1].id if file_objects else None
|
|
891
|
+
|
|
892
|
+
return VectorStoreListFilesResponse(
|
|
893
|
+
data=limited_files,
|
|
894
|
+
has_more=has_more,
|
|
895
|
+
first_id=first_id,
|
|
896
|
+
last_id=last_id,
|
|
897
|
+
)
|
|
898
|
+
|
|
899
|
+
async def openai_retrieve_vector_store_file(
|
|
900
|
+
self,
|
|
901
|
+
vector_store_id: str,
|
|
902
|
+
file_id: str,
|
|
903
|
+
) -> VectorStoreFileObject:
|
|
904
|
+
"""Retrieves a vector store file."""
|
|
905
|
+
if vector_store_id not in self.openai_vector_stores:
|
|
906
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
907
|
+
|
|
908
|
+
store_info = self.openai_vector_stores[vector_store_id]
|
|
909
|
+
if file_id not in store_info["file_ids"]:
|
|
910
|
+
raise ValueError(f"File {file_id} not found in vector store {vector_store_id}")
|
|
911
|
+
|
|
912
|
+
file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
|
|
913
|
+
return VectorStoreFileObject(**file_info)
|
|
914
|
+
|
|
915
|
+
async def openai_retrieve_vector_store_file_contents(
|
|
916
|
+
self,
|
|
917
|
+
vector_store_id: str,
|
|
918
|
+
file_id: str,
|
|
919
|
+
) -> VectorStoreFileContentsResponse:
|
|
920
|
+
"""Retrieves the contents of a vector store file."""
|
|
921
|
+
if vector_store_id not in self.openai_vector_stores:
|
|
922
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
923
|
+
|
|
924
|
+
file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
|
|
925
|
+
dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
|
|
926
|
+
chunks = [Chunk.model_validate(c) for c in dict_chunks]
|
|
927
|
+
content = []
|
|
928
|
+
for chunk in chunks:
|
|
929
|
+
content.extend(self._chunk_to_vector_store_content(chunk))
|
|
930
|
+
return VectorStoreFileContentsResponse(
|
|
931
|
+
file_id=file_id,
|
|
932
|
+
filename=file_info.get("filename", ""),
|
|
933
|
+
attributes=file_info.get("attributes", {}),
|
|
934
|
+
content=content,
|
|
935
|
+
)
|
|
936
|
+
|
|
937
|
+
async def openai_update_vector_store_file(
|
|
938
|
+
self,
|
|
939
|
+
vector_store_id: str,
|
|
940
|
+
file_id: str,
|
|
941
|
+
attributes: dict[str, Any],
|
|
942
|
+
) -> VectorStoreFileObject:
|
|
943
|
+
"""Updates a vector store file."""
|
|
944
|
+
if vector_store_id not in self.openai_vector_stores:
|
|
945
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
946
|
+
|
|
947
|
+
store_info = self.openai_vector_stores[vector_store_id]
|
|
948
|
+
if file_id not in store_info["file_ids"]:
|
|
949
|
+
raise ValueError(f"File {file_id} not found in vector store {vector_store_id}")
|
|
950
|
+
|
|
951
|
+
file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
|
|
952
|
+
file_info["attributes"] = attributes
|
|
953
|
+
await self._update_openai_vector_store_file(vector_store_id, file_id, file_info)
|
|
954
|
+
return VectorStoreFileObject(**file_info)
|
|
955
|
+
|
|
956
|
+
async def openai_delete_vector_store_file(
|
|
957
|
+
self,
|
|
958
|
+
vector_store_id: str,
|
|
959
|
+
file_id: str,
|
|
960
|
+
) -> VectorStoreFileDeleteResponse:
|
|
961
|
+
"""Deletes a vector store file."""
|
|
962
|
+
if vector_store_id not in self.openai_vector_stores:
|
|
963
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
964
|
+
|
|
965
|
+
dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
|
|
966
|
+
chunks = [Chunk.model_validate(c) for c in dict_chunks]
|
|
967
|
+
|
|
968
|
+
# Create ChunkForDeletion objects with both chunk_id and document_id
|
|
969
|
+
chunks_for_deletion = []
|
|
970
|
+
for c in chunks:
|
|
971
|
+
if c.chunk_id:
|
|
972
|
+
document_id = c.metadata.get("document_id") or (
|
|
973
|
+
c.chunk_metadata.document_id if c.chunk_metadata else None
|
|
974
|
+
)
|
|
975
|
+
if document_id:
|
|
976
|
+
chunks_for_deletion.append(ChunkForDeletion(chunk_id=str(c.chunk_id), document_id=document_id))
|
|
977
|
+
else:
|
|
978
|
+
logger.warning(f"Chunk {c.chunk_id} has no document_id, skipping deletion")
|
|
979
|
+
|
|
980
|
+
if chunks_for_deletion:
|
|
981
|
+
await self.delete_chunks(vector_store_id, chunks_for_deletion)
|
|
982
|
+
|
|
983
|
+
store_info = self.openai_vector_stores[vector_store_id].copy()
|
|
984
|
+
|
|
985
|
+
file = await self.openai_retrieve_vector_store_file(vector_store_id, file_id)
|
|
986
|
+
await self._delete_openai_vector_store_file_from_storage(vector_store_id, file_id)
|
|
987
|
+
|
|
988
|
+
# Update in-memory cache
|
|
989
|
+
store_info["file_ids"].remove(file_id)
|
|
990
|
+
store_info["file_counts"][file.status] -= 1
|
|
991
|
+
store_info["file_counts"]["total"] -= 1
|
|
992
|
+
self.openai_vector_stores[vector_store_id] = store_info
|
|
993
|
+
|
|
994
|
+
# Save updated vector store to persistent storage
|
|
995
|
+
await self._save_openai_vector_store(vector_store_id, store_info)
|
|
996
|
+
|
|
997
|
+
return VectorStoreFileDeleteResponse(
|
|
998
|
+
id=file_id,
|
|
999
|
+
deleted=True,
|
|
1000
|
+
)
|
|
1001
|
+
|
|
1002
|
+
async def openai_create_vector_store_file_batch(
|
|
1003
|
+
self,
|
|
1004
|
+
vector_store_id: str,
|
|
1005
|
+
params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)],
|
|
1006
|
+
) -> VectorStoreFileBatchObject:
|
|
1007
|
+
"""Create a vector store file batch."""
|
|
1008
|
+
if vector_store_id not in self.openai_vector_stores:
|
|
1009
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
1010
|
+
|
|
1011
|
+
chunking_strategy = params.chunking_strategy or VectorStoreChunkingStrategyAuto()
|
|
1012
|
+
|
|
1013
|
+
created_at = int(time.time())
|
|
1014
|
+
batch_id = generate_object_id("vector_store_file_batch", lambda: f"batch_{uuid.uuid4()}")
|
|
1015
|
+
# File batches expire after 7 days
|
|
1016
|
+
expires_at = created_at + (7 * 24 * 60 * 60)
|
|
1017
|
+
|
|
1018
|
+
# Initialize batch file counts - all files start as in_progress
|
|
1019
|
+
file_counts = VectorStoreFileCounts(
|
|
1020
|
+
completed=0,
|
|
1021
|
+
cancelled=0,
|
|
1022
|
+
failed=0,
|
|
1023
|
+
in_progress=len(params.file_ids),
|
|
1024
|
+
total=len(params.file_ids),
|
|
1025
|
+
)
|
|
1026
|
+
|
|
1027
|
+
# Create batch object immediately with in_progress status
|
|
1028
|
+
batch_object = VectorStoreFileBatchObject(
|
|
1029
|
+
id=batch_id,
|
|
1030
|
+
created_at=created_at,
|
|
1031
|
+
vector_store_id=vector_store_id,
|
|
1032
|
+
status="in_progress",
|
|
1033
|
+
file_counts=file_counts,
|
|
1034
|
+
)
|
|
1035
|
+
|
|
1036
|
+
batch_info = {
|
|
1037
|
+
**batch_object.model_dump(),
|
|
1038
|
+
"file_ids": params.file_ids,
|
|
1039
|
+
"attributes": params.attributes,
|
|
1040
|
+
"chunking_strategy": chunking_strategy.model_dump(),
|
|
1041
|
+
"expires_at": expires_at,
|
|
1042
|
+
}
|
|
1043
|
+
await self._save_openai_vector_store_file_batch(batch_id, batch_info)
|
|
1044
|
+
|
|
1045
|
+
# Start background processing of files
|
|
1046
|
+
task = asyncio.create_task(self._process_file_batch_async(batch_id, batch_info))
|
|
1047
|
+
self._file_batch_tasks[batch_id] = task
|
|
1048
|
+
|
|
1049
|
+
# Run cleanup if needed (throttled to once every 1 day)
|
|
1050
|
+
current_time = int(time.time())
|
|
1051
|
+
if current_time - self._last_file_batch_cleanup_time >= FILE_BATCH_CLEANUP_INTERVAL_SECONDS:
|
|
1052
|
+
logger.info("Running throttled cleanup of expired file batches")
|
|
1053
|
+
asyncio.create_task(self._cleanup_expired_file_batches())
|
|
1054
|
+
self._last_file_batch_cleanup_time = current_time
|
|
1055
|
+
|
|
1056
|
+
return batch_object
|
|
1057
|
+
|
|
1058
|
+
async def _process_files_with_concurrency(
|
|
1059
|
+
self,
|
|
1060
|
+
file_ids: list[str],
|
|
1061
|
+
vector_store_id: str,
|
|
1062
|
+
attributes: dict[str, Any],
|
|
1063
|
+
chunking_strategy_obj: Any,
|
|
1064
|
+
batch_id: str,
|
|
1065
|
+
batch_info: dict[str, Any],
|
|
1066
|
+
) -> None:
|
|
1067
|
+
"""Process files with controlled concurrency and chunking."""
|
|
1068
|
+
semaphore = asyncio.Semaphore(MAX_CONCURRENT_FILES_PER_BATCH)
|
|
1069
|
+
|
|
1070
|
+
async def process_single_file(file_id: str) -> tuple[str, bool]:
|
|
1071
|
+
"""Process a single file with concurrency control."""
|
|
1072
|
+
async with semaphore:
|
|
1073
|
+
try:
|
|
1074
|
+
vector_store_file_object = await self.openai_attach_file_to_vector_store(
|
|
1075
|
+
vector_store_id=vector_store_id,
|
|
1076
|
+
file_id=file_id,
|
|
1077
|
+
attributes=attributes,
|
|
1078
|
+
chunking_strategy=chunking_strategy_obj,
|
|
1079
|
+
)
|
|
1080
|
+
return file_id, vector_store_file_object.status == "completed"
|
|
1081
|
+
except Exception as e:
|
|
1082
|
+
logger.error(f"Failed to process file {file_id} in batch {batch_id}: {e}")
|
|
1083
|
+
return file_id, False
|
|
1084
|
+
|
|
1085
|
+
# Process files in chunks to avoid creating too many tasks at once
|
|
1086
|
+
total_files = len(file_ids)
|
|
1087
|
+
for chunk_start in range(0, total_files, FILE_BATCH_CHUNK_SIZE):
|
|
1088
|
+
chunk_end = min(chunk_start + FILE_BATCH_CHUNK_SIZE, total_files)
|
|
1089
|
+
chunk = file_ids[chunk_start:chunk_end]
|
|
1090
|
+
|
|
1091
|
+
chunk_num = chunk_start // FILE_BATCH_CHUNK_SIZE + 1
|
|
1092
|
+
total_chunks = (total_files + FILE_BATCH_CHUNK_SIZE - 1) // FILE_BATCH_CHUNK_SIZE
|
|
1093
|
+
logger.info(
|
|
1094
|
+
f"Processing chunk {chunk_num} of {total_chunks} ({len(chunk)} files, {chunk_start + 1}-{chunk_end} of {total_files} total files)"
|
|
1095
|
+
)
|
|
1096
|
+
|
|
1097
|
+
async with asyncio.TaskGroup() as tg:
|
|
1098
|
+
chunk_tasks = [tg.create_task(process_single_file(file_id)) for file_id in chunk]
|
|
1099
|
+
|
|
1100
|
+
chunk_results = [task.result() for task in chunk_tasks]
|
|
1101
|
+
|
|
1102
|
+
# Update counts after each chunk for progressive feedback
|
|
1103
|
+
for _, success in chunk_results:
|
|
1104
|
+
self._update_file_counts(batch_info, success=success)
|
|
1105
|
+
|
|
1106
|
+
# Save progress after each chunk
|
|
1107
|
+
await self._save_openai_vector_store_file_batch(batch_id, batch_info)
|
|
1108
|
+
|
|
1109
|
+
def _update_file_counts(self, batch_info: dict[str, Any], success: bool) -> None:
|
|
1110
|
+
"""Update file counts based on processing result."""
|
|
1111
|
+
if success:
|
|
1112
|
+
batch_info["file_counts"]["completed"] += 1
|
|
1113
|
+
else:
|
|
1114
|
+
batch_info["file_counts"]["failed"] += 1
|
|
1115
|
+
batch_info["file_counts"]["in_progress"] -= 1
|
|
1116
|
+
|
|
1117
|
+
def _update_batch_status(self, batch_info: dict[str, Any]) -> None:
|
|
1118
|
+
"""Update final batch status based on file processing results."""
|
|
1119
|
+
if batch_info["file_counts"]["failed"] == 0:
|
|
1120
|
+
batch_info["status"] = "completed"
|
|
1121
|
+
elif batch_info["file_counts"]["completed"] == 0:
|
|
1122
|
+
batch_info["status"] = "failed"
|
|
1123
|
+
else:
|
|
1124
|
+
batch_info["status"] = "completed" # Partial success counts as completed
|
|
1125
|
+
|
|
1126
|
+
async def _process_file_batch_async(
|
|
1127
|
+
self,
|
|
1128
|
+
batch_id: str,
|
|
1129
|
+
batch_info: dict[str, Any],
|
|
1130
|
+
override_file_ids: list[str] | None = None,
|
|
1131
|
+
) -> None:
|
|
1132
|
+
"""Process files in a batch asynchronously in the background."""
|
|
1133
|
+
file_ids = override_file_ids if override_file_ids is not None else batch_info["file_ids"]
|
|
1134
|
+
attributes = batch_info["attributes"]
|
|
1135
|
+
chunking_strategy = batch_info["chunking_strategy"]
|
|
1136
|
+
vector_store_id = batch_info["vector_store_id"]
|
|
1137
|
+
chunking_strategy_adapter: TypeAdapter[VectorStoreChunkingStrategy] = TypeAdapter(VectorStoreChunkingStrategy)
|
|
1138
|
+
chunking_strategy_obj = chunking_strategy_adapter.validate_python(chunking_strategy)
|
|
1139
|
+
|
|
1140
|
+
try:
|
|
1141
|
+
# Process all files with controlled concurrency
|
|
1142
|
+
await self._process_files_with_concurrency(
|
|
1143
|
+
file_ids=file_ids,
|
|
1144
|
+
vector_store_id=vector_store_id,
|
|
1145
|
+
attributes=attributes,
|
|
1146
|
+
chunking_strategy_obj=chunking_strategy_obj,
|
|
1147
|
+
batch_id=batch_id,
|
|
1148
|
+
batch_info=batch_info,
|
|
1149
|
+
)
|
|
1150
|
+
|
|
1151
|
+
# Update final batch status
|
|
1152
|
+
self._update_batch_status(batch_info)
|
|
1153
|
+
await self._save_openai_vector_store_file_batch(batch_id, batch_info)
|
|
1154
|
+
|
|
1155
|
+
logger.info(f"File batch {batch_id} processing completed with status: {batch_info['status']}")
|
|
1156
|
+
|
|
1157
|
+
except asyncio.CancelledError:
|
|
1158
|
+
logger.info(f"File batch {batch_id} processing was cancelled")
|
|
1159
|
+
# Clean up task reference if it still exists
|
|
1160
|
+
self._file_batch_tasks.pop(batch_id, None)
|
|
1161
|
+
raise # Re-raise to ensure proper cancellation propagation
|
|
1162
|
+
finally:
|
|
1163
|
+
# Always clean up task reference when processing ends
|
|
1164
|
+
self._file_batch_tasks.pop(batch_id, None)
|
|
1165
|
+
|
|
1166
|
+
def _get_and_validate_batch(self, batch_id: str, vector_store_id: str) -> dict[str, Any]:
|
|
1167
|
+
"""Get and validate batch exists and belongs to vector store."""
|
|
1168
|
+
if vector_store_id not in self.openai_vector_stores:
|
|
1169
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
1170
|
+
|
|
1171
|
+
if batch_id not in self.openai_file_batches:
|
|
1172
|
+
raise ValueError(f"File batch {batch_id} not found")
|
|
1173
|
+
|
|
1174
|
+
batch_info = self.openai_file_batches[batch_id]
|
|
1175
|
+
|
|
1176
|
+
# Check if batch has expired (read-only check)
|
|
1177
|
+
expires_at = batch_info.get("expires_at")
|
|
1178
|
+
if expires_at:
|
|
1179
|
+
current_time = int(time.time())
|
|
1180
|
+
if current_time > expires_at:
|
|
1181
|
+
raise ValueError(f"File batch {batch_id} has expired after 7 days from creation")
|
|
1182
|
+
|
|
1183
|
+
if batch_info["vector_store_id"] != vector_store_id:
|
|
1184
|
+
raise ValueError(f"File batch {batch_id} does not belong to vector store {vector_store_id}")
|
|
1185
|
+
|
|
1186
|
+
return batch_info
|
|
1187
|
+
|
|
1188
|
+
def _paginate_objects(
|
|
1189
|
+
self,
|
|
1190
|
+
objects: list[Any],
|
|
1191
|
+
limit: int | None = 20,
|
|
1192
|
+
after: str | None = None,
|
|
1193
|
+
before: str | None = None,
|
|
1194
|
+
) -> tuple[list[Any], bool, str | None, str | None]:
|
|
1195
|
+
"""Apply pagination to a list of objects with id fields."""
|
|
1196
|
+
limit = min(limit or 20, 100) # Cap at 100 as per OpenAI
|
|
1197
|
+
|
|
1198
|
+
# Find start index
|
|
1199
|
+
start_idx = 0
|
|
1200
|
+
if after:
|
|
1201
|
+
for i, obj in enumerate(objects):
|
|
1202
|
+
if obj.id == after:
|
|
1203
|
+
start_idx = i + 1
|
|
1204
|
+
break
|
|
1205
|
+
|
|
1206
|
+
# Find end index
|
|
1207
|
+
end_idx = start_idx + limit
|
|
1208
|
+
if before:
|
|
1209
|
+
for i, obj in enumerate(objects[start_idx:], start_idx):
|
|
1210
|
+
if obj.id == before:
|
|
1211
|
+
end_idx = i
|
|
1212
|
+
break
|
|
1213
|
+
|
|
1214
|
+
# Apply pagination
|
|
1215
|
+
paginated_objects = objects[start_idx:end_idx]
|
|
1216
|
+
|
|
1217
|
+
# Determine pagination info
|
|
1218
|
+
has_more = end_idx < len(objects)
|
|
1219
|
+
first_id = paginated_objects[0].id if paginated_objects else None
|
|
1220
|
+
last_id = paginated_objects[-1].id if paginated_objects else None
|
|
1221
|
+
|
|
1222
|
+
return paginated_objects, has_more, first_id, last_id
|
|
1223
|
+
|
|
1224
|
+
async def openai_retrieve_vector_store_file_batch(
|
|
1225
|
+
self,
|
|
1226
|
+
batch_id: str,
|
|
1227
|
+
vector_store_id: str,
|
|
1228
|
+
) -> VectorStoreFileBatchObject:
|
|
1229
|
+
"""Retrieve a vector store file batch."""
|
|
1230
|
+
batch_info = self._get_and_validate_batch(batch_id, vector_store_id)
|
|
1231
|
+
return VectorStoreFileBatchObject(**batch_info)
|
|
1232
|
+
|
|
1233
|
+
async def openai_list_files_in_vector_store_file_batch(
|
|
1234
|
+
self,
|
|
1235
|
+
batch_id: str,
|
|
1236
|
+
vector_store_id: str,
|
|
1237
|
+
after: str | None = None,
|
|
1238
|
+
before: str | None = None,
|
|
1239
|
+
filter: str | None = None,
|
|
1240
|
+
limit: int | None = 20,
|
|
1241
|
+
order: str | None = "desc",
|
|
1242
|
+
) -> VectorStoreFilesListInBatchResponse:
|
|
1243
|
+
"""Returns a list of vector store files in a batch."""
|
|
1244
|
+
batch_info = self._get_and_validate_batch(batch_id, vector_store_id)
|
|
1245
|
+
batch_file_ids = batch_info["file_ids"]
|
|
1246
|
+
|
|
1247
|
+
# Load file objects for files in this batch
|
|
1248
|
+
batch_file_objects = []
|
|
1249
|
+
|
|
1250
|
+
for file_id in batch_file_ids:
|
|
1251
|
+
try:
|
|
1252
|
+
file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
|
|
1253
|
+
file_object = VectorStoreFileObject(**file_info)
|
|
1254
|
+
|
|
1255
|
+
# Apply status filter if provided
|
|
1256
|
+
if filter and file_object.status != filter:
|
|
1257
|
+
continue
|
|
1258
|
+
|
|
1259
|
+
batch_file_objects.append(file_object)
|
|
1260
|
+
except Exception as e:
|
|
1261
|
+
logger.warning(f"Could not load file {file_id} from batch {batch_id}: {e}")
|
|
1262
|
+
continue
|
|
1263
|
+
|
|
1264
|
+
# Sort by created_at
|
|
1265
|
+
reverse_order = order == "desc"
|
|
1266
|
+
batch_file_objects.sort(key=lambda x: x.created_at, reverse=reverse_order)
|
|
1267
|
+
|
|
1268
|
+
# Apply pagination using helper
|
|
1269
|
+
paginated_files, has_more, first_id, last_id = self._paginate_objects(batch_file_objects, limit, after, before)
|
|
1270
|
+
|
|
1271
|
+
return VectorStoreFilesListInBatchResponse(
|
|
1272
|
+
data=paginated_files,
|
|
1273
|
+
first_id=first_id,
|
|
1274
|
+
last_id=last_id,
|
|
1275
|
+
has_more=has_more,
|
|
1276
|
+
)
|
|
1277
|
+
|
|
1278
|
+
async def openai_cancel_vector_store_file_batch(
|
|
1279
|
+
self,
|
|
1280
|
+
batch_id: str,
|
|
1281
|
+
vector_store_id: str,
|
|
1282
|
+
) -> VectorStoreFileBatchObject:
|
|
1283
|
+
"""Cancel a vector store file batch."""
|
|
1284
|
+
batch_info = self._get_and_validate_batch(batch_id, vector_store_id)
|
|
1285
|
+
|
|
1286
|
+
if batch_info["status"] not in ["in_progress"]:
|
|
1287
|
+
raise ValueError(f"Cannot cancel batch {batch_id} with status {batch_info['status']}")
|
|
1288
|
+
|
|
1289
|
+
# Cancel the actual processing task if it exists
|
|
1290
|
+
if batch_id in self._file_batch_tasks:
|
|
1291
|
+
task = self._file_batch_tasks[batch_id]
|
|
1292
|
+
if not task.done():
|
|
1293
|
+
task.cancel()
|
|
1294
|
+
logger.info(f"Cancelled processing task for file batch: {batch_id}")
|
|
1295
|
+
# Remove from task tracking
|
|
1296
|
+
del self._file_batch_tasks[batch_id]
|
|
1297
|
+
|
|
1298
|
+
batch_info["status"] = "cancelled"
|
|
1299
|
+
|
|
1300
|
+
await self._save_openai_vector_store_file_batch(batch_id, batch_info)
|
|
1301
|
+
|
|
1302
|
+
updated_batch = VectorStoreFileBatchObject(**batch_info)
|
|
1303
|
+
|
|
1304
|
+
return updated_batch
|