llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (738) hide show
  1. llama_stack/__init__.py +5 -0
  2. llama_stack/apis/agents/__init__.py +1 -1
  3. llama_stack/apis/agents/agents.py +700 -281
  4. llama_stack/apis/agents/openai_responses.py +1311 -0
  5. llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
  6. llama_stack/apis/batches/batches.py +100 -0
  7. llama_stack/apis/benchmarks/__init__.py +7 -0
  8. llama_stack/apis/benchmarks/benchmarks.py +108 -0
  9. llama_stack/apis/common/content_types.py +143 -0
  10. llama_stack/apis/common/errors.py +103 -0
  11. llama_stack/apis/common/job_types.py +38 -0
  12. llama_stack/apis/common/responses.py +36 -0
  13. llama_stack/apis/common/training_types.py +36 -5
  14. llama_stack/apis/common/type_system.py +158 -0
  15. llama_stack/apis/conversations/__init__.py +31 -0
  16. llama_stack/apis/conversations/conversations.py +286 -0
  17. llama_stack/apis/datasetio/__init__.py +7 -0
  18. llama_stack/apis/datasetio/datasetio.py +59 -0
  19. llama_stack/apis/datasets/__init__.py +7 -0
  20. llama_stack/apis/datasets/datasets.py +251 -0
  21. llama_stack/apis/datatypes.py +160 -0
  22. llama_stack/apis/eval/__init__.py +7 -0
  23. llama_stack/apis/eval/eval.py +169 -0
  24. llama_stack/apis/files/__init__.py +7 -0
  25. llama_stack/apis/files/files.py +199 -0
  26. llama_stack/apis/inference/__init__.py +1 -1
  27. llama_stack/apis/inference/inference.py +1169 -113
  28. llama_stack/apis/inspect/__init__.py +1 -1
  29. llama_stack/apis/inspect/inspect.py +69 -16
  30. llama_stack/apis/models/__init__.py +1 -1
  31. llama_stack/apis/models/models.py +148 -21
  32. llama_stack/apis/post_training/__init__.py +1 -1
  33. llama_stack/apis/post_training/post_training.py +265 -120
  34. llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
  35. llama_stack/apis/prompts/prompts.py +204 -0
  36. llama_stack/apis/providers/__init__.py +7 -0
  37. llama_stack/apis/providers/providers.py +69 -0
  38. llama_stack/apis/resource.py +37 -0
  39. llama_stack/apis/safety/__init__.py +1 -1
  40. llama_stack/apis/safety/safety.py +95 -12
  41. llama_stack/apis/scoring/__init__.py +7 -0
  42. llama_stack/apis/scoring/scoring.py +93 -0
  43. llama_stack/apis/scoring_functions/__init__.py +7 -0
  44. llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
  45. llama_stack/apis/shields/__init__.py +1 -1
  46. llama_stack/apis/shields/shields.py +76 -33
  47. llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
  48. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
  49. llama_stack/apis/telemetry/__init__.py +1 -1
  50. llama_stack/apis/telemetry/telemetry.py +322 -31
  51. llama_stack/apis/{dataset → tools}/__init__.py +2 -1
  52. llama_stack/apis/tools/rag_tool.py +218 -0
  53. llama_stack/apis/tools/tools.py +221 -0
  54. llama_stack/apis/vector_io/__init__.py +7 -0
  55. llama_stack/apis/vector_io/vector_io.py +960 -0
  56. llama_stack/apis/vector_stores/__init__.py +7 -0
  57. llama_stack/apis/vector_stores/vector_stores.py +51 -0
  58. llama_stack/apis/version.py +9 -0
  59. llama_stack/cli/llama.py +13 -5
  60. llama_stack/cli/stack/_list_deps.py +182 -0
  61. llama_stack/cli/stack/list_apis.py +1 -1
  62. llama_stack/cli/stack/list_deps.py +55 -0
  63. llama_stack/cli/stack/list_providers.py +24 -10
  64. llama_stack/cli/stack/list_stacks.py +56 -0
  65. llama_stack/cli/stack/remove.py +115 -0
  66. llama_stack/cli/stack/run.py +169 -56
  67. llama_stack/cli/stack/stack.py +18 -4
  68. llama_stack/cli/stack/utils.py +151 -0
  69. llama_stack/cli/table.py +23 -61
  70. llama_stack/cli/utils.py +29 -0
  71. llama_stack/core/access_control/access_control.py +131 -0
  72. llama_stack/core/access_control/conditions.py +129 -0
  73. llama_stack/core/access_control/datatypes.py +107 -0
  74. llama_stack/core/build.py +164 -0
  75. llama_stack/core/client.py +205 -0
  76. llama_stack/core/common.sh +37 -0
  77. llama_stack/{distribution → core}/configure.py +74 -55
  78. llama_stack/core/conversations/conversations.py +309 -0
  79. llama_stack/core/datatypes.py +625 -0
  80. llama_stack/core/distribution.py +276 -0
  81. llama_stack/core/external.py +54 -0
  82. llama_stack/core/id_generation.py +42 -0
  83. llama_stack/core/inspect.py +86 -0
  84. llama_stack/core/library_client.py +539 -0
  85. llama_stack/core/prompts/prompts.py +234 -0
  86. llama_stack/core/providers.py +137 -0
  87. llama_stack/core/request_headers.py +115 -0
  88. llama_stack/core/resolver.py +506 -0
  89. llama_stack/core/routers/__init__.py +101 -0
  90. llama_stack/core/routers/datasets.py +73 -0
  91. llama_stack/core/routers/eval_scoring.py +155 -0
  92. llama_stack/core/routers/inference.py +645 -0
  93. llama_stack/core/routers/safety.py +85 -0
  94. llama_stack/core/routers/tool_runtime.py +91 -0
  95. llama_stack/core/routers/vector_io.py +442 -0
  96. llama_stack/core/routing_tables/benchmarks.py +62 -0
  97. llama_stack/core/routing_tables/common.py +254 -0
  98. llama_stack/core/routing_tables/datasets.py +91 -0
  99. llama_stack/core/routing_tables/models.py +163 -0
  100. llama_stack/core/routing_tables/scoring_functions.py +66 -0
  101. llama_stack/core/routing_tables/shields.py +61 -0
  102. llama_stack/core/routing_tables/toolgroups.py +129 -0
  103. llama_stack/core/routing_tables/vector_stores.py +292 -0
  104. llama_stack/core/server/auth.py +187 -0
  105. llama_stack/core/server/auth_providers.py +494 -0
  106. llama_stack/core/server/quota.py +110 -0
  107. llama_stack/core/server/routes.py +141 -0
  108. llama_stack/core/server/server.py +542 -0
  109. llama_stack/core/server/tracing.py +80 -0
  110. llama_stack/core/stack.py +546 -0
  111. llama_stack/core/start_stack.sh +117 -0
  112. llama_stack/core/storage/datatypes.py +283 -0
  113. llama_stack/{cli/model → core/store}/__init__.py +1 -1
  114. llama_stack/core/store/registry.py +199 -0
  115. llama_stack/core/testing_context.py +49 -0
  116. llama_stack/core/ui/app.py +55 -0
  117. llama_stack/core/ui/modules/api.py +32 -0
  118. llama_stack/core/ui/modules/utils.py +42 -0
  119. llama_stack/core/ui/page/distribution/datasets.py +18 -0
  120. llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
  121. llama_stack/core/ui/page/distribution/models.py +18 -0
  122. llama_stack/core/ui/page/distribution/providers.py +27 -0
  123. llama_stack/core/ui/page/distribution/resources.py +48 -0
  124. llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
  125. llama_stack/core/ui/page/distribution/shields.py +19 -0
  126. llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
  127. llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
  128. llama_stack/core/ui/page/playground/chat.py +130 -0
  129. llama_stack/core/ui/page/playground/tools.py +352 -0
  130. llama_stack/core/utils/config.py +30 -0
  131. llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
  132. llama_stack/core/utils/config_resolution.py +125 -0
  133. llama_stack/core/utils/context.py +84 -0
  134. llama_stack/core/utils/exec.py +96 -0
  135. llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
  136. llama_stack/{distribution → core}/utils/model_utils.py +2 -2
  137. llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
  138. llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
  139. llama_stack/distributions/dell/build.yaml +33 -0
  140. llama_stack/distributions/dell/dell.py +158 -0
  141. llama_stack/distributions/dell/run-with-safety.yaml +141 -0
  142. llama_stack/distributions/dell/run.yaml +132 -0
  143. llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
  144. llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
  145. llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
  146. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
  147. llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
  148. llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
  149. llama_stack/distributions/nvidia/build.yaml +29 -0
  150. llama_stack/distributions/nvidia/nvidia.py +154 -0
  151. llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
  152. llama_stack/distributions/nvidia/run.yaml +116 -0
  153. llama_stack/distributions/open-benchmark/__init__.py +7 -0
  154. llama_stack/distributions/open-benchmark/build.yaml +36 -0
  155. llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
  156. llama_stack/distributions/open-benchmark/run.yaml +252 -0
  157. llama_stack/distributions/postgres-demo/__init__.py +7 -0
  158. llama_stack/distributions/postgres-demo/build.yaml +23 -0
  159. llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
  160. llama_stack/distributions/postgres-demo/run.yaml +115 -0
  161. llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
  162. llama_stack/distributions/starter/build.yaml +61 -0
  163. llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
  164. llama_stack/distributions/starter/run.yaml +276 -0
  165. llama_stack/distributions/starter/starter.py +345 -0
  166. llama_stack/distributions/starter-gpu/__init__.py +7 -0
  167. llama_stack/distributions/starter-gpu/build.yaml +61 -0
  168. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
  169. llama_stack/distributions/starter-gpu/run.yaml +279 -0
  170. llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
  171. llama_stack/distributions/template.py +456 -0
  172. llama_stack/distributions/watsonx/__init__.py +7 -0
  173. llama_stack/distributions/watsonx/build.yaml +33 -0
  174. llama_stack/distributions/watsonx/run.yaml +133 -0
  175. llama_stack/distributions/watsonx/watsonx.py +95 -0
  176. llama_stack/env.py +24 -0
  177. llama_stack/log.py +314 -0
  178. llama_stack/models/llama/checkpoint.py +164 -0
  179. llama_stack/models/llama/datatypes.py +164 -0
  180. llama_stack/models/llama/hadamard_utils.py +86 -0
  181. llama_stack/models/llama/llama3/args.py +74 -0
  182. llama_stack/models/llama/llama3/chat_format.py +286 -0
  183. llama_stack/models/llama/llama3/generation.py +376 -0
  184. llama_stack/models/llama/llama3/interface.py +255 -0
  185. llama_stack/models/llama/llama3/model.py +304 -0
  186. llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
  187. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
  188. llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
  189. llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
  190. llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
  191. llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
  192. llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
  193. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
  194. llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
  195. llama_stack/models/llama/llama3/quantization/loader.py +316 -0
  196. llama_stack/models/llama/llama3/template_data.py +116 -0
  197. llama_stack/models/llama/llama3/tokenizer.model +128000 -0
  198. llama_stack/models/llama/llama3/tokenizer.py +198 -0
  199. llama_stack/models/llama/llama3/tool_utils.py +266 -0
  200. llama_stack/models/llama/llama3_1/__init__.py +12 -0
  201. llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
  202. llama_stack/models/llama/llama3_1/prompts.py +258 -0
  203. llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
  204. llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
  205. llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
  206. llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
  207. llama_stack/models/llama/llama3_3/prompts.py +259 -0
  208. llama_stack/models/llama/llama4/args.py +107 -0
  209. llama_stack/models/llama/llama4/chat_format.py +317 -0
  210. llama_stack/models/llama/llama4/datatypes.py +56 -0
  211. llama_stack/models/llama/llama4/ffn.py +58 -0
  212. llama_stack/models/llama/llama4/generation.py +313 -0
  213. llama_stack/models/llama/llama4/model.py +437 -0
  214. llama_stack/models/llama/llama4/moe.py +214 -0
  215. llama_stack/models/llama/llama4/preprocess.py +435 -0
  216. llama_stack/models/llama/llama4/prompt_format.md +304 -0
  217. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
  218. llama_stack/models/llama/llama4/prompts.py +279 -0
  219. llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
  220. llama_stack/models/llama/llama4/quantization/loader.py +226 -0
  221. llama_stack/models/llama/llama4/tokenizer.model +200000 -0
  222. llama_stack/models/llama/llama4/tokenizer.py +263 -0
  223. llama_stack/models/llama/llama4/vision/__init__.py +5 -0
  224. llama_stack/models/llama/llama4/vision/embedding.py +210 -0
  225. llama_stack/models/llama/llama4/vision/encoder.py +412 -0
  226. llama_stack/models/llama/prompt_format.py +191 -0
  227. llama_stack/models/llama/quantize_impls.py +316 -0
  228. llama_stack/models/llama/sku_list.py +1029 -0
  229. llama_stack/models/llama/sku_types.py +233 -0
  230. llama_stack/models/llama/tokenizer_utils.py +40 -0
  231. llama_stack/providers/datatypes.py +136 -107
  232. llama_stack/providers/inline/__init__.py +5 -0
  233. llama_stack/providers/inline/agents/__init__.py +5 -0
  234. llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
  235. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
  236. llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
  237. llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
  238. llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
  239. llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
  240. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
  241. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
  242. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
  243. llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
  244. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
  245. llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
  246. llama_stack/providers/inline/batches/__init__.py +5 -0
  247. llama_stack/providers/inline/batches/reference/__init__.py +36 -0
  248. llama_stack/providers/inline/batches/reference/batches.py +679 -0
  249. llama_stack/providers/inline/batches/reference/config.py +40 -0
  250. llama_stack/providers/inline/datasetio/__init__.py +5 -0
  251. llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
  252. llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
  253. llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
  254. llama_stack/providers/inline/eval/__init__.py +5 -0
  255. llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
  256. llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
  257. llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
  258. llama_stack/providers/inline/files/localfs/__init__.py +20 -0
  259. llama_stack/providers/inline/files/localfs/config.py +31 -0
  260. llama_stack/providers/inline/files/localfs/files.py +219 -0
  261. llama_stack/providers/inline/inference/__init__.py +5 -0
  262. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
  263. llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
  264. llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
  265. llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
  266. llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
  267. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
  268. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
  269. llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
  270. llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
  271. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
  272. llama_stack/providers/inline/post_training/__init__.py +5 -0
  273. llama_stack/providers/inline/post_training/common/__init__.py +5 -0
  274. llama_stack/providers/inline/post_training/common/utils.py +35 -0
  275. llama_stack/providers/inline/post_training/common/validator.py +36 -0
  276. llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
  277. llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
  278. llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
  279. llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
  280. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
  281. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
  282. llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
  283. llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
  284. llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
  285. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
  286. llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
  287. llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
  288. llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
  289. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
  290. llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
  291. llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
  292. llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
  293. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
  294. llama_stack/providers/inline/safety/__init__.py +5 -0
  295. llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
  296. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
  297. llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
  298. llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
  299. llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
  300. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
  301. llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
  302. llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
  303. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
  304. llama_stack/providers/inline/scoring/__init__.py +5 -0
  305. llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
  306. llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
  307. llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
  308. llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
  309. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
  310. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
  311. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
  312. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
  313. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
  314. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
  315. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
  316. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
  317. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
  318. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
  319. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
  320. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
  321. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
  322. llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
  323. llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
  324. llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
  325. llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
  326. llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
  327. llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
  328. llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
  329. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
  330. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
  331. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
  332. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
  333. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
  334. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
  335. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
  336. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
  337. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
  338. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
  339. llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
  340. llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
  341. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
  342. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
  343. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
  344. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
  345. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
  346. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
  347. llama_stack/providers/inline/telemetry/__init__.py +5 -0
  348. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
  349. llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
  350. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
  351. llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
  352. llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
  353. llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
  354. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
  355. llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
  356. llama_stack/providers/inline/vector_io/__init__.py +5 -0
  357. llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
  358. llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
  359. llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
  360. llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
  361. llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
  362. llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
  363. llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
  364. llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
  365. llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
  366. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
  367. llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
  368. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
  369. llama_stack/providers/registry/agents.py +16 -18
  370. llama_stack/providers/registry/batches.py +26 -0
  371. llama_stack/providers/registry/datasetio.py +49 -0
  372. llama_stack/providers/registry/eval.py +46 -0
  373. llama_stack/providers/registry/files.py +31 -0
  374. llama_stack/providers/registry/inference.py +273 -118
  375. llama_stack/providers/registry/post_training.py +69 -0
  376. llama_stack/providers/registry/safety.py +46 -41
  377. llama_stack/providers/registry/scoring.py +51 -0
  378. llama_stack/providers/registry/tool_runtime.py +87 -0
  379. llama_stack/providers/registry/vector_io.py +828 -0
  380. llama_stack/providers/remote/__init__.py +5 -0
  381. llama_stack/providers/remote/agents/__init__.py +5 -0
  382. llama_stack/providers/remote/datasetio/__init__.py +5 -0
  383. llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
  384. llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
  385. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
  386. llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
  387. llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
  388. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
  389. llama_stack/providers/remote/eval/__init__.py +5 -0
  390. llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
  391. llama_stack/providers/remote/eval/nvidia/config.py +29 -0
  392. llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
  393. llama_stack/providers/remote/files/s3/__init__.py +19 -0
  394. llama_stack/providers/remote/files/s3/config.py +42 -0
  395. llama_stack/providers/remote/files/s3/files.py +313 -0
  396. llama_stack/providers/remote/inference/__init__.py +5 -0
  397. llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
  398. llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
  399. llama_stack/providers/remote/inference/anthropic/config.py +28 -0
  400. llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
  401. llama_stack/providers/remote/inference/azure/azure.py +25 -0
  402. llama_stack/providers/remote/inference/azure/config.py +61 -0
  403. llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
  404. llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
  405. llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
  406. llama_stack/providers/remote/inference/bedrock/models.py +29 -0
  407. llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
  408. llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
  409. llama_stack/providers/remote/inference/cerebras/config.py +30 -0
  410. llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
  411. llama_stack/providers/remote/inference/databricks/config.py +37 -0
  412. llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
  413. llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
  414. llama_stack/providers/remote/inference/fireworks/config.py +27 -0
  415. llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
  416. llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
  417. llama_stack/providers/remote/inference/gemini/config.py +28 -0
  418. llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
  419. llama_stack/providers/remote/inference/groq/__init__.py +15 -0
  420. llama_stack/providers/remote/inference/groq/config.py +34 -0
  421. llama_stack/providers/remote/inference/groq/groq.py +18 -0
  422. llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
  423. llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
  424. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
  425. llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
  426. llama_stack/providers/remote/inference/nvidia/config.py +64 -0
  427. llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
  428. llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
  429. llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
  430. llama_stack/providers/remote/inference/ollama/config.py +25 -0
  431. llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
  432. llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
  433. llama_stack/providers/remote/inference/openai/config.py +39 -0
  434. llama_stack/providers/remote/inference/openai/openai.py +38 -0
  435. llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
  436. llama_stack/providers/remote/inference/passthrough/config.py +34 -0
  437. llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
  438. llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
  439. llama_stack/providers/remote/inference/runpod/config.py +32 -0
  440. llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
  441. llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
  442. llama_stack/providers/remote/inference/sambanova/config.py +34 -0
  443. llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
  444. llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
  445. llama_stack/providers/remote/inference/tgi/config.py +76 -0
  446. llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
  447. llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
  448. llama_stack/providers/remote/inference/together/config.py +27 -0
  449. llama_stack/providers/remote/inference/together/together.py +102 -0
  450. llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
  451. llama_stack/providers/remote/inference/vertexai/config.py +48 -0
  452. llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
  453. llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
  454. llama_stack/providers/remote/inference/vllm/config.py +59 -0
  455. llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
  456. llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
  457. llama_stack/providers/remote/inference/watsonx/config.py +45 -0
  458. llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
  459. llama_stack/providers/remote/post_training/__init__.py +5 -0
  460. llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
  461. llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
  462. llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
  463. llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
  464. llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
  465. llama_stack/providers/remote/safety/__init__.py +5 -0
  466. llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
  467. llama_stack/providers/remote/safety/bedrock/config.py +14 -0
  468. llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
  469. llama_stack/providers/remote/safety/nvidia/config.py +40 -0
  470. llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
  471. llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
  472. llama_stack/providers/remote/safety/sambanova/config.py +37 -0
  473. llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
  474. llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
  475. llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
  476. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
  477. llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
  478. llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
  479. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
  480. llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
  481. llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
  482. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
  483. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
  484. llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
  485. llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
  486. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
  487. llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
  488. llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
  489. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
  490. llama_stack/providers/remote/vector_io/__init__.py +5 -0
  491. llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
  492. llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
  493. llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
  494. llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
  495. llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
  496. llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
  497. llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
  498. llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
  499. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
  500. llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
  501. llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
  502. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
  503. llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
  504. llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
  505. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
  506. llama_stack/providers/utils/bedrock/__init__.py +5 -0
  507. llama_stack/providers/utils/bedrock/client.py +74 -0
  508. llama_stack/providers/utils/bedrock/config.py +64 -0
  509. llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
  510. llama_stack/providers/utils/common/__init__.py +5 -0
  511. llama_stack/providers/utils/common/data_schema_validator.py +103 -0
  512. llama_stack/providers/utils/datasetio/__init__.py +5 -0
  513. llama_stack/providers/utils/datasetio/url_utils.py +47 -0
  514. llama_stack/providers/utils/files/__init__.py +5 -0
  515. llama_stack/providers/utils/files/form_data.py +69 -0
  516. llama_stack/providers/utils/inference/__init__.py +8 -7
  517. llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
  518. llama_stack/providers/utils/inference/inference_store.py +264 -0
  519. llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
  520. llama_stack/providers/utils/inference/model_registry.py +173 -23
  521. llama_stack/providers/utils/inference/openai_compat.py +1261 -49
  522. llama_stack/providers/utils/inference/openai_mixin.py +506 -0
  523. llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
  524. llama_stack/providers/utils/kvstore/api.py +6 -6
  525. llama_stack/providers/utils/kvstore/config.py +28 -48
  526. llama_stack/providers/utils/kvstore/kvstore.py +61 -15
  527. llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
  528. llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
  529. llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
  530. llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
  531. llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
  532. llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
  533. llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
  534. llama_stack/providers/utils/memory/file_utils.py +1 -1
  535. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
  536. llama_stack/providers/utils/memory/vector_store.py +220 -82
  537. llama_stack/providers/utils/pagination.py +43 -0
  538. llama_stack/providers/utils/responses/__init__.py +5 -0
  539. llama_stack/providers/utils/responses/responses_store.py +292 -0
  540. llama_stack/providers/utils/scheduler.py +270 -0
  541. llama_stack/providers/utils/scoring/__init__.py +5 -0
  542. llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
  543. llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
  544. llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
  545. llama_stack/providers/utils/sqlstore/__init__.py +5 -0
  546. llama_stack/providers/utils/sqlstore/api.py +128 -0
  547. llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
  548. llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
  549. llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
  550. llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
  551. llama_stack/providers/utils/telemetry/tracing.py +192 -53
  552. llama_stack/providers/utils/tools/__init__.py +5 -0
  553. llama_stack/providers/utils/tools/mcp.py +148 -0
  554. llama_stack/providers/utils/tools/ttl_dict.py +70 -0
  555. llama_stack/providers/utils/vector_io/__init__.py +5 -0
  556. llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
  557. llama_stack/schema_utils.py +118 -0
  558. llama_stack/strong_typing/__init__.py +19 -0
  559. llama_stack/strong_typing/auxiliary.py +228 -0
  560. llama_stack/strong_typing/classdef.py +440 -0
  561. llama_stack/strong_typing/core.py +46 -0
  562. llama_stack/strong_typing/deserializer.py +877 -0
  563. llama_stack/strong_typing/docstring.py +409 -0
  564. llama_stack/strong_typing/exception.py +23 -0
  565. llama_stack/strong_typing/inspection.py +1085 -0
  566. llama_stack/strong_typing/mapping.py +40 -0
  567. llama_stack/strong_typing/name.py +182 -0
  568. llama_stack/strong_typing/py.typed +0 -0
  569. llama_stack/strong_typing/schema.py +792 -0
  570. llama_stack/strong_typing/serialization.py +97 -0
  571. llama_stack/strong_typing/serializer.py +500 -0
  572. llama_stack/strong_typing/slots.py +27 -0
  573. llama_stack/strong_typing/topological.py +89 -0
  574. llama_stack/testing/__init__.py +5 -0
  575. llama_stack/testing/api_recorder.py +956 -0
  576. llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
  577. llama_stack-0.3.4.dist-info/METADATA +261 -0
  578. llama_stack-0.3.4.dist-info/RECORD +625 -0
  579. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
  580. llama_stack/apis/agents/client.py +0 -292
  581. llama_stack/apis/agents/event_logger.py +0 -184
  582. llama_stack/apis/batch_inference/batch_inference.py +0 -72
  583. llama_stack/apis/common/deployment_types.py +0 -31
  584. llama_stack/apis/dataset/dataset.py +0 -63
  585. llama_stack/apis/evals/evals.py +0 -122
  586. llama_stack/apis/inference/client.py +0 -197
  587. llama_stack/apis/inspect/client.py +0 -82
  588. llama_stack/apis/memory/client.py +0 -155
  589. llama_stack/apis/memory/memory.py +0 -65
  590. llama_stack/apis/memory_banks/__init__.py +0 -7
  591. llama_stack/apis/memory_banks/client.py +0 -101
  592. llama_stack/apis/memory_banks/memory_banks.py +0 -78
  593. llama_stack/apis/models/client.py +0 -83
  594. llama_stack/apis/reward_scoring/__init__.py +0 -7
  595. llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
  596. llama_stack/apis/safety/client.py +0 -105
  597. llama_stack/apis/shields/client.py +0 -79
  598. llama_stack/cli/download.py +0 -340
  599. llama_stack/cli/model/describe.py +0 -82
  600. llama_stack/cli/model/download.py +0 -24
  601. llama_stack/cli/model/list.py +0 -62
  602. llama_stack/cli/model/model.py +0 -34
  603. llama_stack/cli/model/prompt_format.py +0 -112
  604. llama_stack/cli/model/safety_models.py +0 -52
  605. llama_stack/cli/stack/build.py +0 -299
  606. llama_stack/cli/stack/configure.py +0 -178
  607. llama_stack/distribution/build.py +0 -123
  608. llama_stack/distribution/build_conda_env.sh +0 -136
  609. llama_stack/distribution/build_container.sh +0 -142
  610. llama_stack/distribution/common.sh +0 -40
  611. llama_stack/distribution/configure_container.sh +0 -47
  612. llama_stack/distribution/datatypes.py +0 -139
  613. llama_stack/distribution/distribution.py +0 -58
  614. llama_stack/distribution/inspect.py +0 -67
  615. llama_stack/distribution/request_headers.py +0 -57
  616. llama_stack/distribution/resolver.py +0 -323
  617. llama_stack/distribution/routers/__init__.py +0 -48
  618. llama_stack/distribution/routers/routers.py +0 -158
  619. llama_stack/distribution/routers/routing_tables.py +0 -173
  620. llama_stack/distribution/server/endpoints.py +0 -48
  621. llama_stack/distribution/server/server.py +0 -343
  622. llama_stack/distribution/start_conda_env.sh +0 -42
  623. llama_stack/distribution/start_container.sh +0 -64
  624. llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
  625. llama_stack/distribution/templates/local-build.yaml +0 -10
  626. llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
  627. llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
  628. llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
  629. llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
  630. llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
  631. llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
  632. llama_stack/distribution/templates/local-together-build.yaml +0 -10
  633. llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
  634. llama_stack/distribution/utils/exec.py +0 -105
  635. llama_stack/providers/adapters/agents/sample/sample.py +0 -18
  636. llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
  637. llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
  638. llama_stack/providers/adapters/inference/databricks/config.py +0 -21
  639. llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
  640. llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
  641. llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
  642. llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
  643. llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
  644. llama_stack/providers/adapters/inference/sample/sample.py +0 -23
  645. llama_stack/providers/adapters/inference/tgi/config.py +0 -43
  646. llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
  647. llama_stack/providers/adapters/inference/together/config.py +0 -22
  648. llama_stack/providers/adapters/inference/together/together.py +0 -143
  649. llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
  650. llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
  651. llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
  652. llama_stack/providers/adapters/memory/sample/sample.py +0 -23
  653. llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
  654. llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
  655. llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
  656. llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
  657. llama_stack/providers/adapters/safety/sample/sample.py +0 -23
  658. llama_stack/providers/adapters/safety/together/__init__.py +0 -18
  659. llama_stack/providers/adapters/safety/together/config.py +0 -26
  660. llama_stack/providers/adapters/safety/together/together.py +0 -101
  661. llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
  662. llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
  663. llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
  664. llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
  665. llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
  666. llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
  667. llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
  668. llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
  669. llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
  670. llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
  671. llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
  672. llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
  673. llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
  674. llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
  675. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
  676. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
  677. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
  678. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
  679. llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
  680. llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
  681. llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
  682. llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
  683. llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
  684. llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
  685. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
  686. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
  687. llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
  688. llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
  689. llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
  690. llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
  691. llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
  692. llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
  693. llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
  694. llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
  695. llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
  696. llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
  697. llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
  698. llama_stack/providers/impls/vllm/config.py +0 -35
  699. llama_stack/providers/impls/vllm/vllm.py +0 -241
  700. llama_stack/providers/registry/memory.py +0 -78
  701. llama_stack/providers/registry/telemetry.py +0 -44
  702. llama_stack/providers/tests/agents/test_agents.py +0 -210
  703. llama_stack/providers/tests/inference/test_inference.py +0 -257
  704. llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
  705. llama_stack/providers/tests/memory/test_memory.py +0 -136
  706. llama_stack/providers/tests/resolver.py +0 -100
  707. llama_stack/providers/tests/safety/test_safety.py +0 -77
  708. llama_stack-0.0.42.dist-info/METADATA +0 -137
  709. llama_stack-0.0.42.dist-info/RECORD +0 -256
  710. /llama_stack/{distribution → core}/__init__.py +0 -0
  711. /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
  712. /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
  713. /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
  714. /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
  715. /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
  716. /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
  717. /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
  718. /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
  719. /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
  720. /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
  721. /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
  722. /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
  723. /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
  724. /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
  725. /llama_stack/{distribution → core}/utils/serialize.py +0 -0
  726. /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
  727. /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
  728. /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
  729. /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
  730. /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
  731. /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
  732. /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
  733. /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
  734. /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
  735. /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
  736. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
  737. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
  738. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1304 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ import asyncio
8
+ import json
9
+ import mimetypes
10
+ import time
11
+ import uuid
12
+ from abc import ABC, abstractmethod
13
+ from typing import Annotated, Any
14
+
15
+ from fastapi import Body
16
+ from pydantic import TypeAdapter
17
+
18
+ from llama_stack.apis.common.errors import VectorStoreNotFoundError
19
+ from llama_stack.apis.files import Files, OpenAIFileObject
20
+ from llama_stack.apis.vector_io import (
21
+ Chunk,
22
+ OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
23
+ OpenAICreateVectorStoreRequestWithExtraBody,
24
+ QueryChunksResponse,
25
+ SearchRankingOptions,
26
+ VectorStoreChunkingStrategy,
27
+ VectorStoreChunkingStrategyAuto,
28
+ VectorStoreChunkingStrategyStatic,
29
+ VectorStoreContent,
30
+ VectorStoreDeleteResponse,
31
+ VectorStoreFileBatchObject,
32
+ VectorStoreFileContentsResponse,
33
+ VectorStoreFileCounts,
34
+ VectorStoreFileDeleteResponse,
35
+ VectorStoreFileLastError,
36
+ VectorStoreFileObject,
37
+ VectorStoreFilesListInBatchResponse,
38
+ VectorStoreFileStatus,
39
+ VectorStoreListFilesResponse,
40
+ VectorStoreListResponse,
41
+ VectorStoreObject,
42
+ VectorStoreSearchResponse,
43
+ VectorStoreSearchResponsePage,
44
+ )
45
+ from llama_stack.apis.vector_stores import VectorStore
46
+ from llama_stack.core.id_generation import generate_object_id
47
+ from llama_stack.log import get_logger
48
+ from llama_stack.providers.utils.kvstore.api import KVStore
49
+ from llama_stack.providers.utils.memory.vector_store import (
50
+ ChunkForDeletion,
51
+ content_from_data_and_mime_type,
52
+ make_overlapped_chunks,
53
+ )
54
+
55
+ EMBEDDING_DIMENSION = 768
56
+
57
+ logger = get_logger(name=__name__, category="providers::utils")
58
+
59
+ # Constants for OpenAI vector stores
60
+ CHUNK_MULTIPLIER = 5
61
+ FILE_BATCH_CLEANUP_INTERVAL_SECONDS = 24 * 60 * 60 # 1 day in seconds
62
+ MAX_CONCURRENT_FILES_PER_BATCH = 3 # Maximum concurrent file processing within a batch
63
+ FILE_BATCH_CHUNK_SIZE = 10 # Process files in chunks of this size
64
+
65
+ VERSION = "v3"
66
+ VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::"
67
+ OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::"
68
+ OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::"
69
+ OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:{VERSION}::"
70
+ OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX = f"openai_vector_stores_file_batches:{VERSION}::"
71
+
72
+
73
+ class OpenAIVectorStoreMixin(ABC):
74
+ """
75
+ Mixin class that provides common OpenAI Vector Store API implementation.
76
+ Providers need to implement the abstract storage methods and maintain
77
+ an openai_vector_stores in-memory cache.
78
+ """
79
+
80
+ # Implementing classes should call super().__init__() in their __init__ method
81
+ # to properly initialize the mixin attributes.
82
+ def __init__(
83
+ self,
84
+ files_api: Files | None = None,
85
+ kvstore: KVStore | None = None,
86
+ ):
87
+ self.openai_vector_stores: dict[str, dict[str, Any]] = {}
88
+ self.openai_file_batches: dict[str, dict[str, Any]] = {}
89
+ self.files_api = files_api
90
+ self.kvstore = kvstore
91
+ self._last_file_batch_cleanup_time = 0
92
+ self._file_batch_tasks: dict[str, asyncio.Task[None]] = {}
93
+
94
+ async def _save_openai_vector_store(self, store_id: str, store_info: dict[str, Any]) -> None:
95
+ """Save vector store metadata to persistent storage."""
96
+ assert self.kvstore
97
+ key = f"{OPENAI_VECTOR_STORES_PREFIX}{store_id}"
98
+ await self.kvstore.set(key=key, value=json.dumps(store_info))
99
+ # update in-memory cache
100
+ self.openai_vector_stores[store_id] = store_info
101
+
102
+ async def _load_openai_vector_stores(self) -> dict[str, dict[str, Any]]:
103
+ """Load all vector store metadata from persistent storage."""
104
+ assert self.kvstore
105
+ start_key = OPENAI_VECTOR_STORES_PREFIX
106
+ end_key = f"{OPENAI_VECTOR_STORES_PREFIX}\xff"
107
+ stored_data = await self.kvstore.values_in_range(start_key, end_key)
108
+
109
+ stores: dict[str, dict[str, Any]] = {}
110
+ for item in stored_data:
111
+ info = json.loads(item)
112
+ stores[info["id"]] = info
113
+ return stores
114
+
115
+ async def _update_openai_vector_store(self, store_id: str, store_info: dict[str, Any]) -> None:
116
+ """Update vector store metadata in persistent storage."""
117
+ assert self.kvstore
118
+ key = f"{OPENAI_VECTOR_STORES_PREFIX}{store_id}"
119
+ await self.kvstore.set(key=key, value=json.dumps(store_info))
120
+ # update in-memory cache
121
+ self.openai_vector_stores[store_id] = store_info
122
+
123
+ async def _delete_openai_vector_store_from_storage(self, store_id: str) -> None:
124
+ """Delete vector store metadata from persistent storage."""
125
+ assert self.kvstore
126
+ key = f"{OPENAI_VECTOR_STORES_PREFIX}{store_id}"
127
+ await self.kvstore.delete(key)
128
+ # remove from in-memory cache
129
+ self.openai_vector_stores.pop(store_id, None)
130
+
131
+ async def _save_openai_vector_store_file(
132
+ self,
133
+ store_id: str,
134
+ file_id: str,
135
+ file_info: dict[str, Any],
136
+ file_contents: list[dict[str, Any]],
137
+ ) -> None:
138
+ """Save vector store file metadata to persistent storage."""
139
+ assert self.kvstore
140
+ meta_key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}"
141
+ await self.kvstore.set(key=meta_key, value=json.dumps(file_info))
142
+ contents_prefix = f"{OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX}{store_id}:{file_id}:"
143
+ for idx, chunk in enumerate(file_contents):
144
+ await self.kvstore.set(key=f"{contents_prefix}{idx}", value=json.dumps(chunk))
145
+
146
+ async def _load_openai_vector_store_file(self, store_id: str, file_id: str) -> dict[str, Any]:
147
+ """Load vector store file metadata from persistent storage."""
148
+ assert self.kvstore
149
+ key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}"
150
+ stored_data = await self.kvstore.get(key)
151
+ return json.loads(stored_data) if stored_data else {}
152
+
153
+ async def _load_openai_vector_store_file_contents(self, store_id: str, file_id: str) -> list[dict[str, Any]]:
154
+ """Load vector store file contents from persistent storage."""
155
+ assert self.kvstore
156
+ prefix = f"{OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX}{store_id}:{file_id}:"
157
+ end_key = f"{prefix}\xff"
158
+ raw_items = await self.kvstore.values_in_range(prefix, end_key)
159
+ return [json.loads(item) for item in raw_items]
160
+
161
+ async def _update_openai_vector_store_file(self, store_id: str, file_id: str, file_info: dict[str, Any]) -> None:
162
+ """Update vector store file metadata in persistent storage."""
163
+ assert self.kvstore
164
+ key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}"
165
+ await self.kvstore.set(key=key, value=json.dumps(file_info))
166
+
167
+ async def _delete_openai_vector_store_file_from_storage(self, store_id: str, file_id: str) -> None:
168
+ """Delete vector store file metadata from persistent storage."""
169
+ assert self.kvstore
170
+
171
+ meta_key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}"
172
+ await self.kvstore.delete(meta_key)
173
+
174
+ contents_prefix = f"{OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX}{store_id}:{file_id}:"
175
+ end_key = f"{contents_prefix}\xff"
176
+ # load all stored chunk values (values_in_range is implemented by all backends)
177
+ raw_items = await self.kvstore.values_in_range(contents_prefix, end_key)
178
+ # delete each chunk by its index suffix
179
+ for idx in range(len(raw_items)):
180
+ await self.kvstore.delete(f"{contents_prefix}{idx}")
181
+
182
+ async def _save_openai_vector_store_file_batch(self, batch_id: str, batch_info: dict[str, Any]) -> None:
183
+ """Save file batch metadata to persistent storage."""
184
+ assert self.kvstore
185
+ key = f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}{batch_id}"
186
+ await self.kvstore.set(key=key, value=json.dumps(batch_info))
187
+ # update in-memory cache
188
+ self.openai_file_batches[batch_id] = batch_info
189
+
190
+ async def _load_openai_vector_store_file_batches(self) -> dict[str, dict[str, Any]]:
191
+ """Load all file batch metadata from persistent storage."""
192
+ assert self.kvstore
193
+ start_key = OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX
194
+ end_key = f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}\xff"
195
+ stored_data = await self.kvstore.values_in_range(start_key, end_key)
196
+
197
+ batches: dict[str, dict[str, Any]] = {}
198
+ for item in stored_data:
199
+ info = json.loads(item)
200
+ batches[info["id"]] = info
201
+ return batches
202
+
203
+ async def _delete_openai_vector_store_file_batch(self, batch_id: str) -> None:
204
+ """Delete file batch metadata from persistent storage and in-memory cache."""
205
+ assert self.kvstore
206
+ key = f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}{batch_id}"
207
+ await self.kvstore.delete(key)
208
+ # remove from in-memory cache
209
+ self.openai_file_batches.pop(batch_id, None)
210
+
211
+ async def _cleanup_expired_file_batches(self) -> None:
212
+ """Clean up expired file batches from persistent storage."""
213
+ assert self.kvstore
214
+ start_key = OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX
215
+ end_key = f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}\xff"
216
+ stored_data = await self.kvstore.values_in_range(start_key, end_key)
217
+
218
+ current_time = int(time.time())
219
+ expired_count = 0
220
+
221
+ for item in stored_data:
222
+ info = json.loads(item)
223
+ expires_at = info.get("expires_at")
224
+ if expires_at and current_time > expires_at:
225
+ logger.info(f"Cleaning up expired file batch: {info['id']}")
226
+ await self.kvstore.delete(f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}{info['id']}")
227
+ # Remove from in-memory cache if present
228
+ self.openai_file_batches.pop(info["id"], None)
229
+ expired_count += 1
230
+
231
+ if expired_count > 0:
232
+ logger.info(f"Cleaned up {expired_count} expired file batches")
233
+
234
+ async def _get_completed_files_in_batch(self, vector_store_id: str, file_ids: list[str]) -> set[str]:
235
+ """Determine which files in a batch are actually completed by checking vector store file_ids."""
236
+ if vector_store_id not in self.openai_vector_stores:
237
+ return set()
238
+
239
+ store_info = self.openai_vector_stores[vector_store_id]
240
+ completed_files = set(file_ids) & set(store_info["file_ids"])
241
+ return completed_files
242
+
243
+ async def _analyze_batch_completion_on_resume(self, batch_id: str, batch_info: dict[str, Any]) -> list[str]:
244
+ """Analyze batch completion status and return remaining files to process.
245
+
246
+ Returns:
247
+ List of file IDs that still need processing. Empty list if batch is complete.
248
+ """
249
+ vector_store_id = batch_info["vector_store_id"]
250
+ all_file_ids = batch_info["file_ids"]
251
+
252
+ # Find files that are actually completed
253
+ completed_files = await self._get_completed_files_in_batch(vector_store_id, all_file_ids)
254
+ remaining_files = [file_id for file_id in all_file_ids if file_id not in completed_files]
255
+
256
+ completed_count = len(completed_files)
257
+ total_count = len(all_file_ids)
258
+ remaining_count = len(remaining_files)
259
+
260
+ # Update file counts to reflect actual state
261
+ batch_info["file_counts"] = {
262
+ "completed": completed_count,
263
+ "failed": 0, # We don't track failed files during resume - they'll be retried
264
+ "in_progress": remaining_count,
265
+ "cancelled": 0,
266
+ "total": total_count,
267
+ }
268
+
269
+ # If all files are already completed, mark batch as completed
270
+ if remaining_count == 0:
271
+ batch_info["status"] = "completed"
272
+ logger.info(f"Batch {batch_id} is already fully completed, updating status")
273
+
274
+ # Save updated batch info
275
+ await self._save_openai_vector_store_file_batch(batch_id, batch_info)
276
+
277
+ return remaining_files
278
+
279
+ async def _resume_incomplete_batches(self) -> None:
280
+ """Resume processing of incomplete file batches after server restart."""
281
+ for batch_id, batch_info in self.openai_file_batches.items():
282
+ if batch_info["status"] == "in_progress":
283
+ logger.info(f"Analyzing incomplete file batch: {batch_id}")
284
+
285
+ remaining_files = await self._analyze_batch_completion_on_resume(batch_id, batch_info)
286
+
287
+ # Check if batch is now completed after analysis
288
+ if batch_info["status"] == "completed":
289
+ continue
290
+
291
+ if remaining_files:
292
+ logger.info(f"Resuming batch {batch_id} with {len(remaining_files)} remaining files")
293
+ # Restart the background processing task with only remaining files
294
+ task = asyncio.create_task(self._process_file_batch_async(batch_id, batch_info, remaining_files))
295
+ self._file_batch_tasks[batch_id] = task
296
+
297
+ async def initialize_openai_vector_stores(self) -> None:
298
+ """Load existing OpenAI vector stores and file batches into the in-memory cache."""
299
+ self.openai_vector_stores = await self._load_openai_vector_stores()
300
+ self.openai_file_batches = await self._load_openai_vector_store_file_batches()
301
+ self._file_batch_tasks = {}
302
+ # TODO: Resume only works for single worker deployment. Jobs with multiple workers will need to be handled differently.
303
+ await self._resume_incomplete_batches()
304
+ self._last_file_batch_cleanup_time = 0
305
+
306
+ async def shutdown(self) -> None:
307
+ """Clean up mixin resources including background tasks."""
308
+ # Cancel any running file batch tasks gracefully
309
+ tasks_to_cancel = list(self._file_batch_tasks.items())
310
+ for _, task in tasks_to_cancel:
311
+ if not task.done():
312
+ task.cancel()
313
+ try:
314
+ await task
315
+ except asyncio.CancelledError:
316
+ pass
317
+
318
+ @abstractmethod
319
+ async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
320
+ """Delete chunks from a vector store."""
321
+ pass
322
+
323
+ @abstractmethod
324
+ async def register_vector_store(self, vector_store: VectorStore) -> None:
325
+ """Register a vector database (provider-specific implementation)."""
326
+ pass
327
+
328
+ @abstractmethod
329
+ async def unregister_vector_store(self, vector_store_id: str) -> None:
330
+ """Unregister a vector database (provider-specific implementation)."""
331
+ pass
332
+
333
+ @abstractmethod
334
+ async def insert_chunks(
335
+ self,
336
+ vector_db_id: str,
337
+ chunks: list[Chunk],
338
+ ttl_seconds: int | None = None,
339
+ ) -> None:
340
+ """Insert chunks into a vector database (provider-specific implementation)."""
341
+ pass
342
+
343
+ @abstractmethod
344
+ async def query_chunks(
345
+ self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None
346
+ ) -> QueryChunksResponse:
347
+ """Query chunks from a vector database (provider-specific implementation)."""
348
+ pass
349
+
350
+ async def openai_create_vector_store(
351
+ self,
352
+ params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)],
353
+ ) -> VectorStoreObject:
354
+ """Creates a vector store."""
355
+ created_at = int(time.time())
356
+
357
+ # Extract llama-stack-specific parameters from extra_body
358
+ extra_body = params.model_extra or {}
359
+ metadata = params.metadata or {}
360
+
361
+ provider_vector_store_id = extra_body.get("provider_vector_store_id")
362
+
363
+ # Use embedding info from metadata if available, otherwise from extra_body
364
+ if metadata.get("embedding_model"):
365
+ # If either is in metadata, use metadata as source
366
+ embedding_model = metadata.get("embedding_model")
367
+ embedding_dimension = (
368
+ int(metadata["embedding_dimension"]) if metadata.get("embedding_dimension") else EMBEDDING_DIMENSION
369
+ )
370
+ logger.debug(
371
+ f"Using embedding config from metadata (takes precedence over extra_body): model='{embedding_model}', dimension={embedding_dimension}"
372
+ )
373
+ else:
374
+ embedding_model = extra_body.get("embedding_model")
375
+ embedding_dimension = extra_body.get("embedding_dimension", EMBEDDING_DIMENSION)
376
+ logger.debug(
377
+ f"Using embedding config from extra_body: model='{embedding_model}', dimension={embedding_dimension}"
378
+ )
379
+
380
+ # use provider_id set by router; fallback to provider's own ID when used directly via --stack-config
381
+ provider_id = extra_body.get("provider_id") or getattr(self, "__provider_id__", None)
382
+ # Derive the canonical vector_store_id (allow override, else generate)
383
+ vector_store_id = provider_vector_store_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}")
384
+
385
+ if embedding_model is None:
386
+ raise ValueError("embedding_model is required")
387
+
388
+ if embedding_dimension is None:
389
+ raise ValueError("Embedding dimension is required")
390
+
391
+ # Register the VectorStore backing this vector store
392
+ if provider_id is None:
393
+ raise ValueError("Provider ID is required but was not provided")
394
+
395
+ # call to the provider to create any index, etc.
396
+ vector_store = VectorStore(
397
+ identifier=vector_store_id,
398
+ embedding_dimension=embedding_dimension,
399
+ embedding_model=embedding_model,
400
+ provider_id=provider_id,
401
+ provider_resource_id=vector_store_id,
402
+ vector_store_name=params.name,
403
+ )
404
+ await self.register_vector_store(vector_store)
405
+
406
+ # Create OpenAI vector store metadata
407
+ status = "completed"
408
+
409
+ # Start with no files attached and update later
410
+ file_counts = VectorStoreFileCounts(
411
+ cancelled=0,
412
+ completed=0,
413
+ failed=0,
414
+ in_progress=0,
415
+ total=0,
416
+ )
417
+ store_info: dict[str, Any] = {
418
+ "id": vector_store_id,
419
+ "object": "vector_store",
420
+ "created_at": created_at,
421
+ "name": params.name,
422
+ "usage_bytes": 0,
423
+ "file_counts": file_counts.model_dump(),
424
+ "status": status,
425
+ "expires_after": params.expires_after,
426
+ "expires_at": None,
427
+ "last_active_at": created_at,
428
+ "file_ids": [],
429
+ "chunking_strategy": params.chunking_strategy,
430
+ }
431
+
432
+ # Add provider information to metadata if provided
433
+ if provider_id:
434
+ metadata["provider_id"] = provider_id
435
+ if provider_vector_store_id:
436
+ metadata["provider_vector_store_id"] = provider_vector_store_id
437
+ store_info["metadata"] = metadata
438
+
439
+ # Save to persistent storage (provider-specific)
440
+ await self._save_openai_vector_store(vector_store_id, store_info)
441
+
442
+ # Store in memory cache
443
+ self.openai_vector_stores[vector_store_id] = store_info
444
+
445
+ # Now that our vector store is created, attach any files that were provided
446
+ file_ids = params.file_ids or []
447
+ tasks = [self.openai_attach_file_to_vector_store(vector_store_id, file_id) for file_id in file_ids]
448
+ await asyncio.gather(*tasks)
449
+
450
+ # Get the updated store info and return it
451
+ store_info = self.openai_vector_stores[vector_store_id]
452
+ return VectorStoreObject.model_validate(store_info)
453
+
454
+ async def openai_list_vector_stores(
455
+ self,
456
+ limit: int | None = 20,
457
+ order: str | None = "desc",
458
+ after: str | None = None,
459
+ before: str | None = None,
460
+ ) -> VectorStoreListResponse:
461
+ """Returns a list of vector stores."""
462
+ limit = limit or 20
463
+ order = order or "desc"
464
+
465
+ # Get all vector stores
466
+ all_stores = list(self.openai_vector_stores.values())
467
+
468
+ # Sort by created_at
469
+ reverse_order = order == "desc"
470
+ all_stores.sort(key=lambda x: x["created_at"], reverse=reverse_order)
471
+
472
+ # Apply cursor-based pagination
473
+ if after:
474
+ after_index = next((i for i, store in enumerate(all_stores) if store["id"] == after), -1)
475
+ if after_index >= 0:
476
+ all_stores = all_stores[after_index + 1 :]
477
+
478
+ if before:
479
+ before_index = next(
480
+ (i for i, store in enumerate(all_stores) if store["id"] == before),
481
+ len(all_stores),
482
+ )
483
+ all_stores = all_stores[:before_index]
484
+
485
+ # Apply limit
486
+ limited_stores = all_stores[:limit]
487
+ # Convert to VectorStoreObject instances
488
+ data = [VectorStoreObject(**store) for store in limited_stores]
489
+
490
+ # Determine pagination info
491
+ has_more = len(all_stores) > limit
492
+ first_id = data[0].id if data else None
493
+ last_id = data[-1].id if data else None
494
+
495
+ return VectorStoreListResponse(
496
+ data=data,
497
+ has_more=has_more,
498
+ first_id=first_id,
499
+ last_id=last_id,
500
+ )
501
+
502
+ async def openai_retrieve_vector_store(
503
+ self,
504
+ vector_store_id: str,
505
+ ) -> VectorStoreObject:
506
+ """Retrieves a vector store."""
507
+ if vector_store_id not in self.openai_vector_stores:
508
+ raise VectorStoreNotFoundError(vector_store_id)
509
+
510
+ store_info = self.openai_vector_stores[vector_store_id]
511
+ return VectorStoreObject(**store_info)
512
+
513
+ async def openai_update_vector_store(
514
+ self,
515
+ vector_store_id: str,
516
+ name: str | None = None,
517
+ expires_after: dict[str, Any] | None = None,
518
+ metadata: dict[str, Any] | None = None,
519
+ ) -> VectorStoreObject:
520
+ """Modifies a vector store."""
521
+ if vector_store_id not in self.openai_vector_stores:
522
+ raise VectorStoreNotFoundError(vector_store_id)
523
+
524
+ store_info = self.openai_vector_stores[vector_store_id].copy()
525
+
526
+ # Update fields if provided
527
+ if name is not None:
528
+ store_info["name"] = name
529
+ if expires_after is not None:
530
+ store_info["expires_after"] = expires_after
531
+ if metadata is not None:
532
+ store_info["metadata"] = metadata
533
+
534
+ # Update last_active_at
535
+ store_info["last_active_at"] = int(time.time())
536
+
537
+ # Save to persistent storage (provider-specific)
538
+ await self._update_openai_vector_store(vector_store_id, store_info)
539
+
540
+ # Update in-memory cache
541
+ self.openai_vector_stores[vector_store_id] = store_info
542
+
543
+ return VectorStoreObject(**store_info)
544
+
545
+ async def openai_delete_vector_store(
546
+ self,
547
+ vector_store_id: str,
548
+ ) -> VectorStoreDeleteResponse:
549
+ """Delete a vector store."""
550
+ if vector_store_id not in self.openai_vector_stores:
551
+ raise VectorStoreNotFoundError(vector_store_id)
552
+
553
+ # Delete from persistent storage (provider-specific)
554
+ await self._delete_openai_vector_store_from_storage(vector_store_id)
555
+
556
+ # Delete from in-memory cache
557
+ self.openai_vector_stores.pop(vector_store_id, None)
558
+
559
+ # Also delete the underlying vector DB
560
+ try:
561
+ await self.unregister_vector_store(vector_store_id)
562
+ except Exception as e:
563
+ logger.warning(f"Failed to delete underlying vector DB {vector_store_id}: {e}")
564
+
565
+ return VectorStoreDeleteResponse(
566
+ id=vector_store_id,
567
+ deleted=True,
568
+ )
569
+
570
+ async def openai_search_vector_store(
571
+ self,
572
+ vector_store_id: str,
573
+ query: str | list[str],
574
+ filters: dict[str, Any] | None = None,
575
+ max_num_results: int | None = 10,
576
+ ranking_options: SearchRankingOptions | None = None,
577
+ rewrite_query: bool | None = False,
578
+ search_mode: (
579
+ str | None
580
+ ) = "vector", # Using str instead of Literal due to OpenAPI schema generator limitations
581
+ ) -> VectorStoreSearchResponsePage:
582
+ """Search for chunks in a vector store."""
583
+ max_num_results = max_num_results or 10
584
+
585
+ # Validate search_mode
586
+ valid_modes = {"keyword", "vector", "hybrid"}
587
+ if search_mode not in valid_modes:
588
+ raise ValueError(f"search_mode must be one of {valid_modes}, got {search_mode}")
589
+
590
+ if vector_store_id not in self.openai_vector_stores:
591
+ raise VectorStoreNotFoundError(vector_store_id)
592
+
593
+ if isinstance(query, list):
594
+ search_query = " ".join(query)
595
+ else:
596
+ search_query = query
597
+
598
+ try:
599
+ score_threshold = (
600
+ ranking_options.score_threshold
601
+ if ranking_options and ranking_options.score_threshold is not None
602
+ else 0.0
603
+ )
604
+ params = {
605
+ "max_chunks": max_num_results * CHUNK_MULTIPLIER,
606
+ "score_threshold": score_threshold,
607
+ "mode": search_mode,
608
+ }
609
+ # TODO: Add support for ranking_options.ranker
610
+
611
+ response = await self.query_chunks(
612
+ vector_db_id=vector_store_id,
613
+ query=search_query,
614
+ params=params,
615
+ )
616
+
617
+ # Convert response to OpenAI format
618
+ data = []
619
+ for chunk, score in zip(response.chunks, response.scores, strict=False):
620
+ # Apply filters if provided
621
+ if filters:
622
+ # Simple metadata filtering
623
+ if not self._matches_filters(chunk.metadata, filters):
624
+ continue
625
+
626
+ content = self._chunk_to_vector_store_content(chunk)
627
+
628
+ response_data_item = VectorStoreSearchResponse(
629
+ file_id=chunk.metadata.get("document_id", ""),
630
+ filename=chunk.metadata.get("filename", ""),
631
+ score=score,
632
+ attributes=chunk.metadata,
633
+ content=content,
634
+ )
635
+ data.append(response_data_item)
636
+ if len(data) >= max_num_results:
637
+ break
638
+
639
+ return VectorStoreSearchResponsePage(
640
+ search_query=search_query,
641
+ data=data,
642
+ has_more=False, # For simplicity, we don't implement pagination here
643
+ next_page=None,
644
+ )
645
+
646
+ except Exception as e:
647
+ logger.error(f"Error searching vector store {vector_store_id}: {e}")
648
+ # Return empty results on error
649
+ return VectorStoreSearchResponsePage(
650
+ search_query=search_query,
651
+ data=[],
652
+ has_more=False,
653
+ next_page=None,
654
+ )
655
+
656
+ def _matches_filters(self, metadata: dict[str, Any], filters: dict[str, Any]) -> bool:
657
+ """Check if metadata matches the provided filters."""
658
+ if not filters:
659
+ return True
660
+
661
+ filter_type = filters.get("type")
662
+
663
+ if filter_type in ["eq", "ne", "gt", "gte", "lt", "lte"]:
664
+ # Comparison filter
665
+ key = filters.get("key")
666
+ value = filters.get("value")
667
+
668
+ if key not in metadata:
669
+ return False
670
+
671
+ metadata_value = metadata[key]
672
+
673
+ if filter_type == "eq":
674
+ return bool(metadata_value == value)
675
+ elif filter_type == "ne":
676
+ return bool(metadata_value != value)
677
+ elif filter_type == "gt":
678
+ return bool(metadata_value > value)
679
+ elif filter_type == "gte":
680
+ return bool(metadata_value >= value)
681
+ elif filter_type == "lt":
682
+ return bool(metadata_value < value)
683
+ elif filter_type == "lte":
684
+ return bool(metadata_value <= value)
685
+ else:
686
+ raise ValueError(f"Unsupported filter type: {filter_type}")
687
+
688
+ elif filter_type == "and":
689
+ # All filters must match
690
+ sub_filters = filters.get("filters", [])
691
+ return all(self._matches_filters(metadata, f) for f in sub_filters)
692
+
693
+ elif filter_type == "or":
694
+ # At least one filter must match
695
+ sub_filters = filters.get("filters", [])
696
+ return any(self._matches_filters(metadata, f) for f in sub_filters)
697
+
698
+ else:
699
+ # Unknown filter type, default to no match
700
+ raise ValueError(f"Unsupported filter type: {filter_type}")
701
+
702
+ def _chunk_to_vector_store_content(self, chunk: Chunk) -> list[VectorStoreContent]:
703
+ # content is InterleavedContent
704
+ if isinstance(chunk.content, str):
705
+ content = [
706
+ VectorStoreContent(
707
+ type="text",
708
+ text=chunk.content,
709
+ )
710
+ ]
711
+ elif isinstance(chunk.content, list):
712
+ # TODO: Add support for other types of content
713
+ content = [
714
+ VectorStoreContent(
715
+ type="text",
716
+ text=item.text,
717
+ )
718
+ for item in chunk.content
719
+ if item.type == "text"
720
+ ]
721
+ else:
722
+ if chunk.content.type != "text":
723
+ raise ValueError(f"Unsupported content type: {chunk.content.type}")
724
+ content = [
725
+ VectorStoreContent(
726
+ type="text",
727
+ text=chunk.content.text,
728
+ )
729
+ ]
730
+ return content
731
+
732
+ async def openai_attach_file_to_vector_store(
733
+ self,
734
+ vector_store_id: str,
735
+ file_id: str,
736
+ attributes: dict[str, Any] | None = None,
737
+ chunking_strategy: VectorStoreChunkingStrategy | None = None,
738
+ ) -> VectorStoreFileObject:
739
+ if vector_store_id not in self.openai_vector_stores:
740
+ raise VectorStoreNotFoundError(vector_store_id)
741
+
742
+ # Check if file is already attached to this vector store
743
+ store_info = self.openai_vector_stores[vector_store_id]
744
+ if file_id in store_info["file_ids"]:
745
+ logger.warning(f"File {file_id} is already attached to vector store {vector_store_id}, skipping")
746
+ # Return existing file object
747
+ file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
748
+ return VectorStoreFileObject(**file_info)
749
+
750
+ attributes = attributes or {}
751
+ chunking_strategy = chunking_strategy or VectorStoreChunkingStrategyAuto()
752
+ created_at = int(time.time())
753
+ chunks: list[Chunk] = []
754
+ file_response: OpenAIFileObject | None = None
755
+
756
+ vector_store_file_object = VectorStoreFileObject(
757
+ id=file_id,
758
+ attributes=attributes,
759
+ chunking_strategy=chunking_strategy,
760
+ created_at=created_at,
761
+ status="in_progress",
762
+ vector_store_id=vector_store_id,
763
+ )
764
+
765
+ if not hasattr(self, "files_api") or not self.files_api:
766
+ vector_store_file_object.status = "failed"
767
+ vector_store_file_object.last_error = VectorStoreFileLastError(
768
+ code="server_error",
769
+ message="Files API is not available",
770
+ )
771
+ return vector_store_file_object
772
+
773
+ if isinstance(chunking_strategy, VectorStoreChunkingStrategyStatic):
774
+ max_chunk_size_tokens = chunking_strategy.static.max_chunk_size_tokens
775
+ chunk_overlap_tokens = chunking_strategy.static.chunk_overlap_tokens
776
+ else:
777
+ # Default values from OpenAI API spec
778
+ max_chunk_size_tokens = 800
779
+ chunk_overlap_tokens = 400
780
+
781
+ try:
782
+ file_response = await self.files_api.openai_retrieve_file(file_id)
783
+ mime_type, _ = mimetypes.guess_type(file_response.filename)
784
+ content_response = await self.files_api.openai_retrieve_file_content(file_id)
785
+
786
+ content = content_from_data_and_mime_type(content_response.body, mime_type)
787
+
788
+ chunk_attributes = attributes.copy()
789
+ chunk_attributes["filename"] = file_response.filename
790
+
791
+ chunks = make_overlapped_chunks(
792
+ file_id,
793
+ content,
794
+ max_chunk_size_tokens,
795
+ chunk_overlap_tokens,
796
+ chunk_attributes,
797
+ )
798
+ if not chunks:
799
+ vector_store_file_object.status = "failed"
800
+ vector_store_file_object.last_error = VectorStoreFileLastError(
801
+ code="server_error",
802
+ message="No chunks were generated from the file",
803
+ )
804
+ else:
805
+ await self.insert_chunks(
806
+ vector_db_id=vector_store_id,
807
+ chunks=chunks,
808
+ )
809
+ vector_store_file_object.status = "completed"
810
+ except Exception as e:
811
+ logger.exception("Error attaching file to vector store")
812
+ vector_store_file_object.status = "failed"
813
+ vector_store_file_object.last_error = VectorStoreFileLastError(
814
+ code="server_error",
815
+ message=str(e),
816
+ )
817
+
818
+ # Create OpenAI vector store file metadata
819
+ file_info = vector_store_file_object.model_dump(exclude={"last_error"})
820
+ file_info["filename"] = file_response.filename if file_response else ""
821
+
822
+ # Save vector store file to persistent storage (provider-specific)
823
+ dict_chunks = [c.model_dump() for c in chunks]
824
+ # This should be updated to include chunk_id
825
+ await self._save_openai_vector_store_file(vector_store_id, file_id, file_info, dict_chunks)
826
+
827
+ # Update file_ids and file_counts in vector store metadata
828
+ store_info = self.openai_vector_stores[vector_store_id].copy()
829
+ store_info["file_ids"].append(file_id)
830
+ store_info["file_counts"]["total"] += 1
831
+ store_info["file_counts"][vector_store_file_object.status] += 1
832
+
833
+ # Save updated vector store to persistent storage
834
+ await self._save_openai_vector_store(vector_store_id, store_info)
835
+
836
+ # Update vector store in-memory cache
837
+ self.openai_vector_stores[vector_store_id] = store_info
838
+
839
+ return vector_store_file_object
840
+
841
+ async def openai_list_files_in_vector_store(
842
+ self,
843
+ vector_store_id: str,
844
+ limit: int | None = 20,
845
+ order: str | None = "desc",
846
+ after: str | None = None,
847
+ before: str | None = None,
848
+ filter: VectorStoreFileStatus | None = None,
849
+ ) -> VectorStoreListFilesResponse:
850
+ """List files in a vector store."""
851
+ limit = limit or 20
852
+ order = order or "desc"
853
+
854
+ if vector_store_id not in self.openai_vector_stores:
855
+ raise VectorStoreNotFoundError(vector_store_id)
856
+
857
+ store_info = self.openai_vector_stores[vector_store_id]
858
+
859
+ file_objects: list[VectorStoreFileObject] = []
860
+ for file_id in store_info["file_ids"]:
861
+ file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
862
+ file_object = VectorStoreFileObject(**file_info)
863
+ if filter and file_object.status != filter:
864
+ continue
865
+ file_objects.append(file_object)
866
+
867
+ # Sort by created_at
868
+ reverse_order = order == "desc"
869
+ file_objects.sort(key=lambda x: x.created_at, reverse=reverse_order)
870
+
871
+ # Apply cursor-based pagination
872
+ if after:
873
+ after_index = next((i for i, file in enumerate(file_objects) if file.id == after), -1)
874
+ if after_index >= 0:
875
+ file_objects = file_objects[after_index + 1 :]
876
+
877
+ if before:
878
+ before_index = next(
879
+ (i for i, file in enumerate(file_objects) if file.id == before),
880
+ len(file_objects),
881
+ )
882
+ file_objects = file_objects[:before_index]
883
+
884
+ # Apply limit
885
+ limited_files = file_objects[:limit]
886
+
887
+ # Determine pagination info
888
+ has_more = len(file_objects) > limit
889
+ first_id = file_objects[0].id if file_objects else None
890
+ last_id = file_objects[-1].id if file_objects else None
891
+
892
+ return VectorStoreListFilesResponse(
893
+ data=limited_files,
894
+ has_more=has_more,
895
+ first_id=first_id,
896
+ last_id=last_id,
897
+ )
898
+
899
+ async def openai_retrieve_vector_store_file(
900
+ self,
901
+ vector_store_id: str,
902
+ file_id: str,
903
+ ) -> VectorStoreFileObject:
904
+ """Retrieves a vector store file."""
905
+ if vector_store_id not in self.openai_vector_stores:
906
+ raise VectorStoreNotFoundError(vector_store_id)
907
+
908
+ store_info = self.openai_vector_stores[vector_store_id]
909
+ if file_id not in store_info["file_ids"]:
910
+ raise ValueError(f"File {file_id} not found in vector store {vector_store_id}")
911
+
912
+ file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
913
+ return VectorStoreFileObject(**file_info)
914
+
915
+ async def openai_retrieve_vector_store_file_contents(
916
+ self,
917
+ vector_store_id: str,
918
+ file_id: str,
919
+ ) -> VectorStoreFileContentsResponse:
920
+ """Retrieves the contents of a vector store file."""
921
+ if vector_store_id not in self.openai_vector_stores:
922
+ raise VectorStoreNotFoundError(vector_store_id)
923
+
924
+ file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
925
+ dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
926
+ chunks = [Chunk.model_validate(c) for c in dict_chunks]
927
+ content = []
928
+ for chunk in chunks:
929
+ content.extend(self._chunk_to_vector_store_content(chunk))
930
+ return VectorStoreFileContentsResponse(
931
+ file_id=file_id,
932
+ filename=file_info.get("filename", ""),
933
+ attributes=file_info.get("attributes", {}),
934
+ content=content,
935
+ )
936
+
937
+ async def openai_update_vector_store_file(
938
+ self,
939
+ vector_store_id: str,
940
+ file_id: str,
941
+ attributes: dict[str, Any],
942
+ ) -> VectorStoreFileObject:
943
+ """Updates a vector store file."""
944
+ if vector_store_id not in self.openai_vector_stores:
945
+ raise VectorStoreNotFoundError(vector_store_id)
946
+
947
+ store_info = self.openai_vector_stores[vector_store_id]
948
+ if file_id not in store_info["file_ids"]:
949
+ raise ValueError(f"File {file_id} not found in vector store {vector_store_id}")
950
+
951
+ file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
952
+ file_info["attributes"] = attributes
953
+ await self._update_openai_vector_store_file(vector_store_id, file_id, file_info)
954
+ return VectorStoreFileObject(**file_info)
955
+
956
+ async def openai_delete_vector_store_file(
957
+ self,
958
+ vector_store_id: str,
959
+ file_id: str,
960
+ ) -> VectorStoreFileDeleteResponse:
961
+ """Deletes a vector store file."""
962
+ if vector_store_id not in self.openai_vector_stores:
963
+ raise VectorStoreNotFoundError(vector_store_id)
964
+
965
+ dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
966
+ chunks = [Chunk.model_validate(c) for c in dict_chunks]
967
+
968
+ # Create ChunkForDeletion objects with both chunk_id and document_id
969
+ chunks_for_deletion = []
970
+ for c in chunks:
971
+ if c.chunk_id:
972
+ document_id = c.metadata.get("document_id") or (
973
+ c.chunk_metadata.document_id if c.chunk_metadata else None
974
+ )
975
+ if document_id:
976
+ chunks_for_deletion.append(ChunkForDeletion(chunk_id=str(c.chunk_id), document_id=document_id))
977
+ else:
978
+ logger.warning(f"Chunk {c.chunk_id} has no document_id, skipping deletion")
979
+
980
+ if chunks_for_deletion:
981
+ await self.delete_chunks(vector_store_id, chunks_for_deletion)
982
+
983
+ store_info = self.openai_vector_stores[vector_store_id].copy()
984
+
985
+ file = await self.openai_retrieve_vector_store_file(vector_store_id, file_id)
986
+ await self._delete_openai_vector_store_file_from_storage(vector_store_id, file_id)
987
+
988
+ # Update in-memory cache
989
+ store_info["file_ids"].remove(file_id)
990
+ store_info["file_counts"][file.status] -= 1
991
+ store_info["file_counts"]["total"] -= 1
992
+ self.openai_vector_stores[vector_store_id] = store_info
993
+
994
+ # Save updated vector store to persistent storage
995
+ await self._save_openai_vector_store(vector_store_id, store_info)
996
+
997
+ return VectorStoreFileDeleteResponse(
998
+ id=file_id,
999
+ deleted=True,
1000
+ )
1001
+
1002
+ async def openai_create_vector_store_file_batch(
1003
+ self,
1004
+ vector_store_id: str,
1005
+ params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)],
1006
+ ) -> VectorStoreFileBatchObject:
1007
+ """Create a vector store file batch."""
1008
+ if vector_store_id not in self.openai_vector_stores:
1009
+ raise VectorStoreNotFoundError(vector_store_id)
1010
+
1011
+ chunking_strategy = params.chunking_strategy or VectorStoreChunkingStrategyAuto()
1012
+
1013
+ created_at = int(time.time())
1014
+ batch_id = generate_object_id("vector_store_file_batch", lambda: f"batch_{uuid.uuid4()}")
1015
+ # File batches expire after 7 days
1016
+ expires_at = created_at + (7 * 24 * 60 * 60)
1017
+
1018
+ # Initialize batch file counts - all files start as in_progress
1019
+ file_counts = VectorStoreFileCounts(
1020
+ completed=0,
1021
+ cancelled=0,
1022
+ failed=0,
1023
+ in_progress=len(params.file_ids),
1024
+ total=len(params.file_ids),
1025
+ )
1026
+
1027
+ # Create batch object immediately with in_progress status
1028
+ batch_object = VectorStoreFileBatchObject(
1029
+ id=batch_id,
1030
+ created_at=created_at,
1031
+ vector_store_id=vector_store_id,
1032
+ status="in_progress",
1033
+ file_counts=file_counts,
1034
+ )
1035
+
1036
+ batch_info = {
1037
+ **batch_object.model_dump(),
1038
+ "file_ids": params.file_ids,
1039
+ "attributes": params.attributes,
1040
+ "chunking_strategy": chunking_strategy.model_dump(),
1041
+ "expires_at": expires_at,
1042
+ }
1043
+ await self._save_openai_vector_store_file_batch(batch_id, batch_info)
1044
+
1045
+ # Start background processing of files
1046
+ task = asyncio.create_task(self._process_file_batch_async(batch_id, batch_info))
1047
+ self._file_batch_tasks[batch_id] = task
1048
+
1049
+ # Run cleanup if needed (throttled to once every 1 day)
1050
+ current_time = int(time.time())
1051
+ if current_time - self._last_file_batch_cleanup_time >= FILE_BATCH_CLEANUP_INTERVAL_SECONDS:
1052
+ logger.info("Running throttled cleanup of expired file batches")
1053
+ asyncio.create_task(self._cleanup_expired_file_batches())
1054
+ self._last_file_batch_cleanup_time = current_time
1055
+
1056
+ return batch_object
1057
+
1058
+ async def _process_files_with_concurrency(
1059
+ self,
1060
+ file_ids: list[str],
1061
+ vector_store_id: str,
1062
+ attributes: dict[str, Any],
1063
+ chunking_strategy_obj: Any,
1064
+ batch_id: str,
1065
+ batch_info: dict[str, Any],
1066
+ ) -> None:
1067
+ """Process files with controlled concurrency and chunking."""
1068
+ semaphore = asyncio.Semaphore(MAX_CONCURRENT_FILES_PER_BATCH)
1069
+
1070
+ async def process_single_file(file_id: str) -> tuple[str, bool]:
1071
+ """Process a single file with concurrency control."""
1072
+ async with semaphore:
1073
+ try:
1074
+ vector_store_file_object = await self.openai_attach_file_to_vector_store(
1075
+ vector_store_id=vector_store_id,
1076
+ file_id=file_id,
1077
+ attributes=attributes,
1078
+ chunking_strategy=chunking_strategy_obj,
1079
+ )
1080
+ return file_id, vector_store_file_object.status == "completed"
1081
+ except Exception as e:
1082
+ logger.error(f"Failed to process file {file_id} in batch {batch_id}: {e}")
1083
+ return file_id, False
1084
+
1085
+ # Process files in chunks to avoid creating too many tasks at once
1086
+ total_files = len(file_ids)
1087
+ for chunk_start in range(0, total_files, FILE_BATCH_CHUNK_SIZE):
1088
+ chunk_end = min(chunk_start + FILE_BATCH_CHUNK_SIZE, total_files)
1089
+ chunk = file_ids[chunk_start:chunk_end]
1090
+
1091
+ chunk_num = chunk_start // FILE_BATCH_CHUNK_SIZE + 1
1092
+ total_chunks = (total_files + FILE_BATCH_CHUNK_SIZE - 1) // FILE_BATCH_CHUNK_SIZE
1093
+ logger.info(
1094
+ f"Processing chunk {chunk_num} of {total_chunks} ({len(chunk)} files, {chunk_start + 1}-{chunk_end} of {total_files} total files)"
1095
+ )
1096
+
1097
+ async with asyncio.TaskGroup() as tg:
1098
+ chunk_tasks = [tg.create_task(process_single_file(file_id)) for file_id in chunk]
1099
+
1100
+ chunk_results = [task.result() for task in chunk_tasks]
1101
+
1102
+ # Update counts after each chunk for progressive feedback
1103
+ for _, success in chunk_results:
1104
+ self._update_file_counts(batch_info, success=success)
1105
+
1106
+ # Save progress after each chunk
1107
+ await self._save_openai_vector_store_file_batch(batch_id, batch_info)
1108
+
1109
+ def _update_file_counts(self, batch_info: dict[str, Any], success: bool) -> None:
1110
+ """Update file counts based on processing result."""
1111
+ if success:
1112
+ batch_info["file_counts"]["completed"] += 1
1113
+ else:
1114
+ batch_info["file_counts"]["failed"] += 1
1115
+ batch_info["file_counts"]["in_progress"] -= 1
1116
+
1117
+ def _update_batch_status(self, batch_info: dict[str, Any]) -> None:
1118
+ """Update final batch status based on file processing results."""
1119
+ if batch_info["file_counts"]["failed"] == 0:
1120
+ batch_info["status"] = "completed"
1121
+ elif batch_info["file_counts"]["completed"] == 0:
1122
+ batch_info["status"] = "failed"
1123
+ else:
1124
+ batch_info["status"] = "completed" # Partial success counts as completed
1125
+
1126
+ async def _process_file_batch_async(
1127
+ self,
1128
+ batch_id: str,
1129
+ batch_info: dict[str, Any],
1130
+ override_file_ids: list[str] | None = None,
1131
+ ) -> None:
1132
+ """Process files in a batch asynchronously in the background."""
1133
+ file_ids = override_file_ids if override_file_ids is not None else batch_info["file_ids"]
1134
+ attributes = batch_info["attributes"]
1135
+ chunking_strategy = batch_info["chunking_strategy"]
1136
+ vector_store_id = batch_info["vector_store_id"]
1137
+ chunking_strategy_adapter: TypeAdapter[VectorStoreChunkingStrategy] = TypeAdapter(VectorStoreChunkingStrategy)
1138
+ chunking_strategy_obj = chunking_strategy_adapter.validate_python(chunking_strategy)
1139
+
1140
+ try:
1141
+ # Process all files with controlled concurrency
1142
+ await self._process_files_with_concurrency(
1143
+ file_ids=file_ids,
1144
+ vector_store_id=vector_store_id,
1145
+ attributes=attributes,
1146
+ chunking_strategy_obj=chunking_strategy_obj,
1147
+ batch_id=batch_id,
1148
+ batch_info=batch_info,
1149
+ )
1150
+
1151
+ # Update final batch status
1152
+ self._update_batch_status(batch_info)
1153
+ await self._save_openai_vector_store_file_batch(batch_id, batch_info)
1154
+
1155
+ logger.info(f"File batch {batch_id} processing completed with status: {batch_info['status']}")
1156
+
1157
+ except asyncio.CancelledError:
1158
+ logger.info(f"File batch {batch_id} processing was cancelled")
1159
+ # Clean up task reference if it still exists
1160
+ self._file_batch_tasks.pop(batch_id, None)
1161
+ raise # Re-raise to ensure proper cancellation propagation
1162
+ finally:
1163
+ # Always clean up task reference when processing ends
1164
+ self._file_batch_tasks.pop(batch_id, None)
1165
+
1166
+ def _get_and_validate_batch(self, batch_id: str, vector_store_id: str) -> dict[str, Any]:
1167
+ """Get and validate batch exists and belongs to vector store."""
1168
+ if vector_store_id not in self.openai_vector_stores:
1169
+ raise VectorStoreNotFoundError(vector_store_id)
1170
+
1171
+ if batch_id not in self.openai_file_batches:
1172
+ raise ValueError(f"File batch {batch_id} not found")
1173
+
1174
+ batch_info = self.openai_file_batches[batch_id]
1175
+
1176
+ # Check if batch has expired (read-only check)
1177
+ expires_at = batch_info.get("expires_at")
1178
+ if expires_at:
1179
+ current_time = int(time.time())
1180
+ if current_time > expires_at:
1181
+ raise ValueError(f"File batch {batch_id} has expired after 7 days from creation")
1182
+
1183
+ if batch_info["vector_store_id"] != vector_store_id:
1184
+ raise ValueError(f"File batch {batch_id} does not belong to vector store {vector_store_id}")
1185
+
1186
+ return batch_info
1187
+
1188
+ def _paginate_objects(
1189
+ self,
1190
+ objects: list[Any],
1191
+ limit: int | None = 20,
1192
+ after: str | None = None,
1193
+ before: str | None = None,
1194
+ ) -> tuple[list[Any], bool, str | None, str | None]:
1195
+ """Apply pagination to a list of objects with id fields."""
1196
+ limit = min(limit or 20, 100) # Cap at 100 as per OpenAI
1197
+
1198
+ # Find start index
1199
+ start_idx = 0
1200
+ if after:
1201
+ for i, obj in enumerate(objects):
1202
+ if obj.id == after:
1203
+ start_idx = i + 1
1204
+ break
1205
+
1206
+ # Find end index
1207
+ end_idx = start_idx + limit
1208
+ if before:
1209
+ for i, obj in enumerate(objects[start_idx:], start_idx):
1210
+ if obj.id == before:
1211
+ end_idx = i
1212
+ break
1213
+
1214
+ # Apply pagination
1215
+ paginated_objects = objects[start_idx:end_idx]
1216
+
1217
+ # Determine pagination info
1218
+ has_more = end_idx < len(objects)
1219
+ first_id = paginated_objects[0].id if paginated_objects else None
1220
+ last_id = paginated_objects[-1].id if paginated_objects else None
1221
+
1222
+ return paginated_objects, has_more, first_id, last_id
1223
+
1224
+ async def openai_retrieve_vector_store_file_batch(
1225
+ self,
1226
+ batch_id: str,
1227
+ vector_store_id: str,
1228
+ ) -> VectorStoreFileBatchObject:
1229
+ """Retrieve a vector store file batch."""
1230
+ batch_info = self._get_and_validate_batch(batch_id, vector_store_id)
1231
+ return VectorStoreFileBatchObject(**batch_info)
1232
+
1233
+ async def openai_list_files_in_vector_store_file_batch(
1234
+ self,
1235
+ batch_id: str,
1236
+ vector_store_id: str,
1237
+ after: str | None = None,
1238
+ before: str | None = None,
1239
+ filter: str | None = None,
1240
+ limit: int | None = 20,
1241
+ order: str | None = "desc",
1242
+ ) -> VectorStoreFilesListInBatchResponse:
1243
+ """Returns a list of vector store files in a batch."""
1244
+ batch_info = self._get_and_validate_batch(batch_id, vector_store_id)
1245
+ batch_file_ids = batch_info["file_ids"]
1246
+
1247
+ # Load file objects for files in this batch
1248
+ batch_file_objects = []
1249
+
1250
+ for file_id in batch_file_ids:
1251
+ try:
1252
+ file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
1253
+ file_object = VectorStoreFileObject(**file_info)
1254
+
1255
+ # Apply status filter if provided
1256
+ if filter and file_object.status != filter:
1257
+ continue
1258
+
1259
+ batch_file_objects.append(file_object)
1260
+ except Exception as e:
1261
+ logger.warning(f"Could not load file {file_id} from batch {batch_id}: {e}")
1262
+ continue
1263
+
1264
+ # Sort by created_at
1265
+ reverse_order = order == "desc"
1266
+ batch_file_objects.sort(key=lambda x: x.created_at, reverse=reverse_order)
1267
+
1268
+ # Apply pagination using helper
1269
+ paginated_files, has_more, first_id, last_id = self._paginate_objects(batch_file_objects, limit, after, before)
1270
+
1271
+ return VectorStoreFilesListInBatchResponse(
1272
+ data=paginated_files,
1273
+ first_id=first_id,
1274
+ last_id=last_id,
1275
+ has_more=has_more,
1276
+ )
1277
+
1278
+ async def openai_cancel_vector_store_file_batch(
1279
+ self,
1280
+ batch_id: str,
1281
+ vector_store_id: str,
1282
+ ) -> VectorStoreFileBatchObject:
1283
+ """Cancel a vector store file batch."""
1284
+ batch_info = self._get_and_validate_batch(batch_id, vector_store_id)
1285
+
1286
+ if batch_info["status"] not in ["in_progress"]:
1287
+ raise ValueError(f"Cannot cancel batch {batch_id} with status {batch_info['status']}")
1288
+
1289
+ # Cancel the actual processing task if it exists
1290
+ if batch_id in self._file_batch_tasks:
1291
+ task = self._file_batch_tasks[batch_id]
1292
+ if not task.done():
1293
+ task.cancel()
1294
+ logger.info(f"Cancelled processing task for file batch: {batch_id}")
1295
+ # Remove from task tracking
1296
+ del self._file_batch_tasks[batch_id]
1297
+
1298
+ batch_info["status"] = "cancelled"
1299
+
1300
+ await self._save_openai_vector_store_file_batch(batch_id, batch_info)
1301
+
1302
+ updated_batch = VectorStoreFileBatchObject(**batch_info)
1303
+
1304
+ return updated_batch