llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (738) hide show
  1. llama_stack/__init__.py +5 -0
  2. llama_stack/apis/agents/__init__.py +1 -1
  3. llama_stack/apis/agents/agents.py +700 -281
  4. llama_stack/apis/agents/openai_responses.py +1311 -0
  5. llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
  6. llama_stack/apis/batches/batches.py +100 -0
  7. llama_stack/apis/benchmarks/__init__.py +7 -0
  8. llama_stack/apis/benchmarks/benchmarks.py +108 -0
  9. llama_stack/apis/common/content_types.py +143 -0
  10. llama_stack/apis/common/errors.py +103 -0
  11. llama_stack/apis/common/job_types.py +38 -0
  12. llama_stack/apis/common/responses.py +36 -0
  13. llama_stack/apis/common/training_types.py +36 -5
  14. llama_stack/apis/common/type_system.py +158 -0
  15. llama_stack/apis/conversations/__init__.py +31 -0
  16. llama_stack/apis/conversations/conversations.py +286 -0
  17. llama_stack/apis/datasetio/__init__.py +7 -0
  18. llama_stack/apis/datasetio/datasetio.py +59 -0
  19. llama_stack/apis/datasets/__init__.py +7 -0
  20. llama_stack/apis/datasets/datasets.py +251 -0
  21. llama_stack/apis/datatypes.py +160 -0
  22. llama_stack/apis/eval/__init__.py +7 -0
  23. llama_stack/apis/eval/eval.py +169 -0
  24. llama_stack/apis/files/__init__.py +7 -0
  25. llama_stack/apis/files/files.py +199 -0
  26. llama_stack/apis/inference/__init__.py +1 -1
  27. llama_stack/apis/inference/inference.py +1169 -113
  28. llama_stack/apis/inspect/__init__.py +1 -1
  29. llama_stack/apis/inspect/inspect.py +69 -16
  30. llama_stack/apis/models/__init__.py +1 -1
  31. llama_stack/apis/models/models.py +148 -21
  32. llama_stack/apis/post_training/__init__.py +1 -1
  33. llama_stack/apis/post_training/post_training.py +265 -120
  34. llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
  35. llama_stack/apis/prompts/prompts.py +204 -0
  36. llama_stack/apis/providers/__init__.py +7 -0
  37. llama_stack/apis/providers/providers.py +69 -0
  38. llama_stack/apis/resource.py +37 -0
  39. llama_stack/apis/safety/__init__.py +1 -1
  40. llama_stack/apis/safety/safety.py +95 -12
  41. llama_stack/apis/scoring/__init__.py +7 -0
  42. llama_stack/apis/scoring/scoring.py +93 -0
  43. llama_stack/apis/scoring_functions/__init__.py +7 -0
  44. llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
  45. llama_stack/apis/shields/__init__.py +1 -1
  46. llama_stack/apis/shields/shields.py +76 -33
  47. llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
  48. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
  49. llama_stack/apis/telemetry/__init__.py +1 -1
  50. llama_stack/apis/telemetry/telemetry.py +322 -31
  51. llama_stack/apis/{dataset → tools}/__init__.py +2 -1
  52. llama_stack/apis/tools/rag_tool.py +218 -0
  53. llama_stack/apis/tools/tools.py +221 -0
  54. llama_stack/apis/vector_io/__init__.py +7 -0
  55. llama_stack/apis/vector_io/vector_io.py +960 -0
  56. llama_stack/apis/vector_stores/__init__.py +7 -0
  57. llama_stack/apis/vector_stores/vector_stores.py +51 -0
  58. llama_stack/apis/version.py +9 -0
  59. llama_stack/cli/llama.py +13 -5
  60. llama_stack/cli/stack/_list_deps.py +182 -0
  61. llama_stack/cli/stack/list_apis.py +1 -1
  62. llama_stack/cli/stack/list_deps.py +55 -0
  63. llama_stack/cli/stack/list_providers.py +24 -10
  64. llama_stack/cli/stack/list_stacks.py +56 -0
  65. llama_stack/cli/stack/remove.py +115 -0
  66. llama_stack/cli/stack/run.py +169 -56
  67. llama_stack/cli/stack/stack.py +18 -4
  68. llama_stack/cli/stack/utils.py +151 -0
  69. llama_stack/cli/table.py +23 -61
  70. llama_stack/cli/utils.py +29 -0
  71. llama_stack/core/access_control/access_control.py +131 -0
  72. llama_stack/core/access_control/conditions.py +129 -0
  73. llama_stack/core/access_control/datatypes.py +107 -0
  74. llama_stack/core/build.py +164 -0
  75. llama_stack/core/client.py +205 -0
  76. llama_stack/core/common.sh +37 -0
  77. llama_stack/{distribution → core}/configure.py +74 -55
  78. llama_stack/core/conversations/conversations.py +309 -0
  79. llama_stack/core/datatypes.py +625 -0
  80. llama_stack/core/distribution.py +276 -0
  81. llama_stack/core/external.py +54 -0
  82. llama_stack/core/id_generation.py +42 -0
  83. llama_stack/core/inspect.py +86 -0
  84. llama_stack/core/library_client.py +539 -0
  85. llama_stack/core/prompts/prompts.py +234 -0
  86. llama_stack/core/providers.py +137 -0
  87. llama_stack/core/request_headers.py +115 -0
  88. llama_stack/core/resolver.py +506 -0
  89. llama_stack/core/routers/__init__.py +101 -0
  90. llama_stack/core/routers/datasets.py +73 -0
  91. llama_stack/core/routers/eval_scoring.py +155 -0
  92. llama_stack/core/routers/inference.py +645 -0
  93. llama_stack/core/routers/safety.py +85 -0
  94. llama_stack/core/routers/tool_runtime.py +91 -0
  95. llama_stack/core/routers/vector_io.py +442 -0
  96. llama_stack/core/routing_tables/benchmarks.py +62 -0
  97. llama_stack/core/routing_tables/common.py +254 -0
  98. llama_stack/core/routing_tables/datasets.py +91 -0
  99. llama_stack/core/routing_tables/models.py +163 -0
  100. llama_stack/core/routing_tables/scoring_functions.py +66 -0
  101. llama_stack/core/routing_tables/shields.py +61 -0
  102. llama_stack/core/routing_tables/toolgroups.py +129 -0
  103. llama_stack/core/routing_tables/vector_stores.py +292 -0
  104. llama_stack/core/server/auth.py +187 -0
  105. llama_stack/core/server/auth_providers.py +494 -0
  106. llama_stack/core/server/quota.py +110 -0
  107. llama_stack/core/server/routes.py +141 -0
  108. llama_stack/core/server/server.py +542 -0
  109. llama_stack/core/server/tracing.py +80 -0
  110. llama_stack/core/stack.py +546 -0
  111. llama_stack/core/start_stack.sh +117 -0
  112. llama_stack/core/storage/datatypes.py +283 -0
  113. llama_stack/{cli/model → core/store}/__init__.py +1 -1
  114. llama_stack/core/store/registry.py +199 -0
  115. llama_stack/core/testing_context.py +49 -0
  116. llama_stack/core/ui/app.py +55 -0
  117. llama_stack/core/ui/modules/api.py +32 -0
  118. llama_stack/core/ui/modules/utils.py +42 -0
  119. llama_stack/core/ui/page/distribution/datasets.py +18 -0
  120. llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
  121. llama_stack/core/ui/page/distribution/models.py +18 -0
  122. llama_stack/core/ui/page/distribution/providers.py +27 -0
  123. llama_stack/core/ui/page/distribution/resources.py +48 -0
  124. llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
  125. llama_stack/core/ui/page/distribution/shields.py +19 -0
  126. llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
  127. llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
  128. llama_stack/core/ui/page/playground/chat.py +130 -0
  129. llama_stack/core/ui/page/playground/tools.py +352 -0
  130. llama_stack/core/utils/config.py +30 -0
  131. llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
  132. llama_stack/core/utils/config_resolution.py +125 -0
  133. llama_stack/core/utils/context.py +84 -0
  134. llama_stack/core/utils/exec.py +96 -0
  135. llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
  136. llama_stack/{distribution → core}/utils/model_utils.py +2 -2
  137. llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
  138. llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
  139. llama_stack/distributions/dell/build.yaml +33 -0
  140. llama_stack/distributions/dell/dell.py +158 -0
  141. llama_stack/distributions/dell/run-with-safety.yaml +141 -0
  142. llama_stack/distributions/dell/run.yaml +132 -0
  143. llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
  144. llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
  145. llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
  146. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
  147. llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
  148. llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
  149. llama_stack/distributions/nvidia/build.yaml +29 -0
  150. llama_stack/distributions/nvidia/nvidia.py +154 -0
  151. llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
  152. llama_stack/distributions/nvidia/run.yaml +116 -0
  153. llama_stack/distributions/open-benchmark/__init__.py +7 -0
  154. llama_stack/distributions/open-benchmark/build.yaml +36 -0
  155. llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
  156. llama_stack/distributions/open-benchmark/run.yaml +252 -0
  157. llama_stack/distributions/postgres-demo/__init__.py +7 -0
  158. llama_stack/distributions/postgres-demo/build.yaml +23 -0
  159. llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
  160. llama_stack/distributions/postgres-demo/run.yaml +115 -0
  161. llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
  162. llama_stack/distributions/starter/build.yaml +61 -0
  163. llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
  164. llama_stack/distributions/starter/run.yaml +276 -0
  165. llama_stack/distributions/starter/starter.py +345 -0
  166. llama_stack/distributions/starter-gpu/__init__.py +7 -0
  167. llama_stack/distributions/starter-gpu/build.yaml +61 -0
  168. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
  169. llama_stack/distributions/starter-gpu/run.yaml +279 -0
  170. llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
  171. llama_stack/distributions/template.py +456 -0
  172. llama_stack/distributions/watsonx/__init__.py +7 -0
  173. llama_stack/distributions/watsonx/build.yaml +33 -0
  174. llama_stack/distributions/watsonx/run.yaml +133 -0
  175. llama_stack/distributions/watsonx/watsonx.py +95 -0
  176. llama_stack/env.py +24 -0
  177. llama_stack/log.py +314 -0
  178. llama_stack/models/llama/checkpoint.py +164 -0
  179. llama_stack/models/llama/datatypes.py +164 -0
  180. llama_stack/models/llama/hadamard_utils.py +86 -0
  181. llama_stack/models/llama/llama3/args.py +74 -0
  182. llama_stack/models/llama/llama3/chat_format.py +286 -0
  183. llama_stack/models/llama/llama3/generation.py +376 -0
  184. llama_stack/models/llama/llama3/interface.py +255 -0
  185. llama_stack/models/llama/llama3/model.py +304 -0
  186. llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
  187. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
  188. llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
  189. llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
  190. llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
  191. llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
  192. llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
  193. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
  194. llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
  195. llama_stack/models/llama/llama3/quantization/loader.py +316 -0
  196. llama_stack/models/llama/llama3/template_data.py +116 -0
  197. llama_stack/models/llama/llama3/tokenizer.model +128000 -0
  198. llama_stack/models/llama/llama3/tokenizer.py +198 -0
  199. llama_stack/models/llama/llama3/tool_utils.py +266 -0
  200. llama_stack/models/llama/llama3_1/__init__.py +12 -0
  201. llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
  202. llama_stack/models/llama/llama3_1/prompts.py +258 -0
  203. llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
  204. llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
  205. llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
  206. llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
  207. llama_stack/models/llama/llama3_3/prompts.py +259 -0
  208. llama_stack/models/llama/llama4/args.py +107 -0
  209. llama_stack/models/llama/llama4/chat_format.py +317 -0
  210. llama_stack/models/llama/llama4/datatypes.py +56 -0
  211. llama_stack/models/llama/llama4/ffn.py +58 -0
  212. llama_stack/models/llama/llama4/generation.py +313 -0
  213. llama_stack/models/llama/llama4/model.py +437 -0
  214. llama_stack/models/llama/llama4/moe.py +214 -0
  215. llama_stack/models/llama/llama4/preprocess.py +435 -0
  216. llama_stack/models/llama/llama4/prompt_format.md +304 -0
  217. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
  218. llama_stack/models/llama/llama4/prompts.py +279 -0
  219. llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
  220. llama_stack/models/llama/llama4/quantization/loader.py +226 -0
  221. llama_stack/models/llama/llama4/tokenizer.model +200000 -0
  222. llama_stack/models/llama/llama4/tokenizer.py +263 -0
  223. llama_stack/models/llama/llama4/vision/__init__.py +5 -0
  224. llama_stack/models/llama/llama4/vision/embedding.py +210 -0
  225. llama_stack/models/llama/llama4/vision/encoder.py +412 -0
  226. llama_stack/models/llama/prompt_format.py +191 -0
  227. llama_stack/models/llama/quantize_impls.py +316 -0
  228. llama_stack/models/llama/sku_list.py +1029 -0
  229. llama_stack/models/llama/sku_types.py +233 -0
  230. llama_stack/models/llama/tokenizer_utils.py +40 -0
  231. llama_stack/providers/datatypes.py +136 -107
  232. llama_stack/providers/inline/__init__.py +5 -0
  233. llama_stack/providers/inline/agents/__init__.py +5 -0
  234. llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
  235. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
  236. llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
  237. llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
  238. llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
  239. llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
  240. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
  241. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
  242. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
  243. llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
  244. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
  245. llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
  246. llama_stack/providers/inline/batches/__init__.py +5 -0
  247. llama_stack/providers/inline/batches/reference/__init__.py +36 -0
  248. llama_stack/providers/inline/batches/reference/batches.py +679 -0
  249. llama_stack/providers/inline/batches/reference/config.py +40 -0
  250. llama_stack/providers/inline/datasetio/__init__.py +5 -0
  251. llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
  252. llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
  253. llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
  254. llama_stack/providers/inline/eval/__init__.py +5 -0
  255. llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
  256. llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
  257. llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
  258. llama_stack/providers/inline/files/localfs/__init__.py +20 -0
  259. llama_stack/providers/inline/files/localfs/config.py +31 -0
  260. llama_stack/providers/inline/files/localfs/files.py +219 -0
  261. llama_stack/providers/inline/inference/__init__.py +5 -0
  262. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
  263. llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
  264. llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
  265. llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
  266. llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
  267. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
  268. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
  269. llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
  270. llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
  271. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
  272. llama_stack/providers/inline/post_training/__init__.py +5 -0
  273. llama_stack/providers/inline/post_training/common/__init__.py +5 -0
  274. llama_stack/providers/inline/post_training/common/utils.py +35 -0
  275. llama_stack/providers/inline/post_training/common/validator.py +36 -0
  276. llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
  277. llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
  278. llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
  279. llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
  280. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
  281. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
  282. llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
  283. llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
  284. llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
  285. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
  286. llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
  287. llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
  288. llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
  289. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
  290. llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
  291. llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
  292. llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
  293. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
  294. llama_stack/providers/inline/safety/__init__.py +5 -0
  295. llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
  296. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
  297. llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
  298. llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
  299. llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
  300. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
  301. llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
  302. llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
  303. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
  304. llama_stack/providers/inline/scoring/__init__.py +5 -0
  305. llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
  306. llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
  307. llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
  308. llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
  309. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
  310. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
  311. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
  312. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
  313. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
  314. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
  315. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
  316. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
  317. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
  318. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
  319. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
  320. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
  321. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
  322. llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
  323. llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
  324. llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
  325. llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
  326. llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
  327. llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
  328. llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
  329. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
  330. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
  331. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
  332. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
  333. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
  334. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
  335. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
  336. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
  337. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
  338. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
  339. llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
  340. llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
  341. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
  342. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
  343. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
  344. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
  345. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
  346. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
  347. llama_stack/providers/inline/telemetry/__init__.py +5 -0
  348. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
  349. llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
  350. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
  351. llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
  352. llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
  353. llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
  354. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
  355. llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
  356. llama_stack/providers/inline/vector_io/__init__.py +5 -0
  357. llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
  358. llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
  359. llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
  360. llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
  361. llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
  362. llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
  363. llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
  364. llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
  365. llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
  366. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
  367. llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
  368. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
  369. llama_stack/providers/registry/agents.py +16 -18
  370. llama_stack/providers/registry/batches.py +26 -0
  371. llama_stack/providers/registry/datasetio.py +49 -0
  372. llama_stack/providers/registry/eval.py +46 -0
  373. llama_stack/providers/registry/files.py +31 -0
  374. llama_stack/providers/registry/inference.py +273 -118
  375. llama_stack/providers/registry/post_training.py +69 -0
  376. llama_stack/providers/registry/safety.py +46 -41
  377. llama_stack/providers/registry/scoring.py +51 -0
  378. llama_stack/providers/registry/tool_runtime.py +87 -0
  379. llama_stack/providers/registry/vector_io.py +828 -0
  380. llama_stack/providers/remote/__init__.py +5 -0
  381. llama_stack/providers/remote/agents/__init__.py +5 -0
  382. llama_stack/providers/remote/datasetio/__init__.py +5 -0
  383. llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
  384. llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
  385. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
  386. llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
  387. llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
  388. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
  389. llama_stack/providers/remote/eval/__init__.py +5 -0
  390. llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
  391. llama_stack/providers/remote/eval/nvidia/config.py +29 -0
  392. llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
  393. llama_stack/providers/remote/files/s3/__init__.py +19 -0
  394. llama_stack/providers/remote/files/s3/config.py +42 -0
  395. llama_stack/providers/remote/files/s3/files.py +313 -0
  396. llama_stack/providers/remote/inference/__init__.py +5 -0
  397. llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
  398. llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
  399. llama_stack/providers/remote/inference/anthropic/config.py +28 -0
  400. llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
  401. llama_stack/providers/remote/inference/azure/azure.py +25 -0
  402. llama_stack/providers/remote/inference/azure/config.py +61 -0
  403. llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
  404. llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
  405. llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
  406. llama_stack/providers/remote/inference/bedrock/models.py +29 -0
  407. llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
  408. llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
  409. llama_stack/providers/remote/inference/cerebras/config.py +30 -0
  410. llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
  411. llama_stack/providers/remote/inference/databricks/config.py +37 -0
  412. llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
  413. llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
  414. llama_stack/providers/remote/inference/fireworks/config.py +27 -0
  415. llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
  416. llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
  417. llama_stack/providers/remote/inference/gemini/config.py +28 -0
  418. llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
  419. llama_stack/providers/remote/inference/groq/__init__.py +15 -0
  420. llama_stack/providers/remote/inference/groq/config.py +34 -0
  421. llama_stack/providers/remote/inference/groq/groq.py +18 -0
  422. llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
  423. llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
  424. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
  425. llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
  426. llama_stack/providers/remote/inference/nvidia/config.py +64 -0
  427. llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
  428. llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
  429. llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
  430. llama_stack/providers/remote/inference/ollama/config.py +25 -0
  431. llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
  432. llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
  433. llama_stack/providers/remote/inference/openai/config.py +39 -0
  434. llama_stack/providers/remote/inference/openai/openai.py +38 -0
  435. llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
  436. llama_stack/providers/remote/inference/passthrough/config.py +34 -0
  437. llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
  438. llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
  439. llama_stack/providers/remote/inference/runpod/config.py +32 -0
  440. llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
  441. llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
  442. llama_stack/providers/remote/inference/sambanova/config.py +34 -0
  443. llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
  444. llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
  445. llama_stack/providers/remote/inference/tgi/config.py +76 -0
  446. llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
  447. llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
  448. llama_stack/providers/remote/inference/together/config.py +27 -0
  449. llama_stack/providers/remote/inference/together/together.py +102 -0
  450. llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
  451. llama_stack/providers/remote/inference/vertexai/config.py +48 -0
  452. llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
  453. llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
  454. llama_stack/providers/remote/inference/vllm/config.py +59 -0
  455. llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
  456. llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
  457. llama_stack/providers/remote/inference/watsonx/config.py +45 -0
  458. llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
  459. llama_stack/providers/remote/post_training/__init__.py +5 -0
  460. llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
  461. llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
  462. llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
  463. llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
  464. llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
  465. llama_stack/providers/remote/safety/__init__.py +5 -0
  466. llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
  467. llama_stack/providers/remote/safety/bedrock/config.py +14 -0
  468. llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
  469. llama_stack/providers/remote/safety/nvidia/config.py +40 -0
  470. llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
  471. llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
  472. llama_stack/providers/remote/safety/sambanova/config.py +37 -0
  473. llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
  474. llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
  475. llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
  476. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
  477. llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
  478. llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
  479. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
  480. llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
  481. llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
  482. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
  483. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
  484. llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
  485. llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
  486. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
  487. llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
  488. llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
  489. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
  490. llama_stack/providers/remote/vector_io/__init__.py +5 -0
  491. llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
  492. llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
  493. llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
  494. llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
  495. llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
  496. llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
  497. llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
  498. llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
  499. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
  500. llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
  501. llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
  502. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
  503. llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
  504. llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
  505. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
  506. llama_stack/providers/utils/bedrock/__init__.py +5 -0
  507. llama_stack/providers/utils/bedrock/client.py +74 -0
  508. llama_stack/providers/utils/bedrock/config.py +64 -0
  509. llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
  510. llama_stack/providers/utils/common/__init__.py +5 -0
  511. llama_stack/providers/utils/common/data_schema_validator.py +103 -0
  512. llama_stack/providers/utils/datasetio/__init__.py +5 -0
  513. llama_stack/providers/utils/datasetio/url_utils.py +47 -0
  514. llama_stack/providers/utils/files/__init__.py +5 -0
  515. llama_stack/providers/utils/files/form_data.py +69 -0
  516. llama_stack/providers/utils/inference/__init__.py +8 -7
  517. llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
  518. llama_stack/providers/utils/inference/inference_store.py +264 -0
  519. llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
  520. llama_stack/providers/utils/inference/model_registry.py +173 -23
  521. llama_stack/providers/utils/inference/openai_compat.py +1261 -49
  522. llama_stack/providers/utils/inference/openai_mixin.py +506 -0
  523. llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
  524. llama_stack/providers/utils/kvstore/api.py +6 -6
  525. llama_stack/providers/utils/kvstore/config.py +28 -48
  526. llama_stack/providers/utils/kvstore/kvstore.py +61 -15
  527. llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
  528. llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
  529. llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
  530. llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
  531. llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
  532. llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
  533. llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
  534. llama_stack/providers/utils/memory/file_utils.py +1 -1
  535. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
  536. llama_stack/providers/utils/memory/vector_store.py +220 -82
  537. llama_stack/providers/utils/pagination.py +43 -0
  538. llama_stack/providers/utils/responses/__init__.py +5 -0
  539. llama_stack/providers/utils/responses/responses_store.py +292 -0
  540. llama_stack/providers/utils/scheduler.py +270 -0
  541. llama_stack/providers/utils/scoring/__init__.py +5 -0
  542. llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
  543. llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
  544. llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
  545. llama_stack/providers/utils/sqlstore/__init__.py +5 -0
  546. llama_stack/providers/utils/sqlstore/api.py +128 -0
  547. llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
  548. llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
  549. llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
  550. llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
  551. llama_stack/providers/utils/telemetry/tracing.py +192 -53
  552. llama_stack/providers/utils/tools/__init__.py +5 -0
  553. llama_stack/providers/utils/tools/mcp.py +148 -0
  554. llama_stack/providers/utils/tools/ttl_dict.py +70 -0
  555. llama_stack/providers/utils/vector_io/__init__.py +5 -0
  556. llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
  557. llama_stack/schema_utils.py +118 -0
  558. llama_stack/strong_typing/__init__.py +19 -0
  559. llama_stack/strong_typing/auxiliary.py +228 -0
  560. llama_stack/strong_typing/classdef.py +440 -0
  561. llama_stack/strong_typing/core.py +46 -0
  562. llama_stack/strong_typing/deserializer.py +877 -0
  563. llama_stack/strong_typing/docstring.py +409 -0
  564. llama_stack/strong_typing/exception.py +23 -0
  565. llama_stack/strong_typing/inspection.py +1085 -0
  566. llama_stack/strong_typing/mapping.py +40 -0
  567. llama_stack/strong_typing/name.py +182 -0
  568. llama_stack/strong_typing/py.typed +0 -0
  569. llama_stack/strong_typing/schema.py +792 -0
  570. llama_stack/strong_typing/serialization.py +97 -0
  571. llama_stack/strong_typing/serializer.py +500 -0
  572. llama_stack/strong_typing/slots.py +27 -0
  573. llama_stack/strong_typing/topological.py +89 -0
  574. llama_stack/testing/__init__.py +5 -0
  575. llama_stack/testing/api_recorder.py +956 -0
  576. llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
  577. llama_stack-0.3.4.dist-info/METADATA +261 -0
  578. llama_stack-0.3.4.dist-info/RECORD +625 -0
  579. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
  580. llama_stack/apis/agents/client.py +0 -292
  581. llama_stack/apis/agents/event_logger.py +0 -184
  582. llama_stack/apis/batch_inference/batch_inference.py +0 -72
  583. llama_stack/apis/common/deployment_types.py +0 -31
  584. llama_stack/apis/dataset/dataset.py +0 -63
  585. llama_stack/apis/evals/evals.py +0 -122
  586. llama_stack/apis/inference/client.py +0 -197
  587. llama_stack/apis/inspect/client.py +0 -82
  588. llama_stack/apis/memory/client.py +0 -155
  589. llama_stack/apis/memory/memory.py +0 -65
  590. llama_stack/apis/memory_banks/__init__.py +0 -7
  591. llama_stack/apis/memory_banks/client.py +0 -101
  592. llama_stack/apis/memory_banks/memory_banks.py +0 -78
  593. llama_stack/apis/models/client.py +0 -83
  594. llama_stack/apis/reward_scoring/__init__.py +0 -7
  595. llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
  596. llama_stack/apis/safety/client.py +0 -105
  597. llama_stack/apis/shields/client.py +0 -79
  598. llama_stack/cli/download.py +0 -340
  599. llama_stack/cli/model/describe.py +0 -82
  600. llama_stack/cli/model/download.py +0 -24
  601. llama_stack/cli/model/list.py +0 -62
  602. llama_stack/cli/model/model.py +0 -34
  603. llama_stack/cli/model/prompt_format.py +0 -112
  604. llama_stack/cli/model/safety_models.py +0 -52
  605. llama_stack/cli/stack/build.py +0 -299
  606. llama_stack/cli/stack/configure.py +0 -178
  607. llama_stack/distribution/build.py +0 -123
  608. llama_stack/distribution/build_conda_env.sh +0 -136
  609. llama_stack/distribution/build_container.sh +0 -142
  610. llama_stack/distribution/common.sh +0 -40
  611. llama_stack/distribution/configure_container.sh +0 -47
  612. llama_stack/distribution/datatypes.py +0 -139
  613. llama_stack/distribution/distribution.py +0 -58
  614. llama_stack/distribution/inspect.py +0 -67
  615. llama_stack/distribution/request_headers.py +0 -57
  616. llama_stack/distribution/resolver.py +0 -323
  617. llama_stack/distribution/routers/__init__.py +0 -48
  618. llama_stack/distribution/routers/routers.py +0 -158
  619. llama_stack/distribution/routers/routing_tables.py +0 -173
  620. llama_stack/distribution/server/endpoints.py +0 -48
  621. llama_stack/distribution/server/server.py +0 -343
  622. llama_stack/distribution/start_conda_env.sh +0 -42
  623. llama_stack/distribution/start_container.sh +0 -64
  624. llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
  625. llama_stack/distribution/templates/local-build.yaml +0 -10
  626. llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
  627. llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
  628. llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
  629. llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
  630. llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
  631. llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
  632. llama_stack/distribution/templates/local-together-build.yaml +0 -10
  633. llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
  634. llama_stack/distribution/utils/exec.py +0 -105
  635. llama_stack/providers/adapters/agents/sample/sample.py +0 -18
  636. llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
  637. llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
  638. llama_stack/providers/adapters/inference/databricks/config.py +0 -21
  639. llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
  640. llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
  641. llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
  642. llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
  643. llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
  644. llama_stack/providers/adapters/inference/sample/sample.py +0 -23
  645. llama_stack/providers/adapters/inference/tgi/config.py +0 -43
  646. llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
  647. llama_stack/providers/adapters/inference/together/config.py +0 -22
  648. llama_stack/providers/adapters/inference/together/together.py +0 -143
  649. llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
  650. llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
  651. llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
  652. llama_stack/providers/adapters/memory/sample/sample.py +0 -23
  653. llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
  654. llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
  655. llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
  656. llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
  657. llama_stack/providers/adapters/safety/sample/sample.py +0 -23
  658. llama_stack/providers/adapters/safety/together/__init__.py +0 -18
  659. llama_stack/providers/adapters/safety/together/config.py +0 -26
  660. llama_stack/providers/adapters/safety/together/together.py +0 -101
  661. llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
  662. llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
  663. llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
  664. llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
  665. llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
  666. llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
  667. llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
  668. llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
  669. llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
  670. llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
  671. llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
  672. llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
  673. llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
  674. llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
  675. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
  676. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
  677. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
  678. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
  679. llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
  680. llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
  681. llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
  682. llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
  683. llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
  684. llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
  685. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
  686. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
  687. llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
  688. llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
  689. llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
  690. llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
  691. llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
  692. llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
  693. llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
  694. llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
  695. llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
  696. llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
  697. llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
  698. llama_stack/providers/impls/vllm/config.py +0 -35
  699. llama_stack/providers/impls/vllm/vllm.py +0 -241
  700. llama_stack/providers/registry/memory.py +0 -78
  701. llama_stack/providers/registry/telemetry.py +0 -44
  702. llama_stack/providers/tests/agents/test_agents.py +0 -210
  703. llama_stack/providers/tests/inference/test_inference.py +0 -257
  704. llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
  705. llama_stack/providers/tests/memory/test_memory.py +0 -136
  706. llama_stack/providers/tests/resolver.py +0 -100
  707. llama_stack/providers/tests/safety/test_safety.py +0 -77
  708. llama_stack-0.0.42.dist-info/METADATA +0 -137
  709. llama_stack-0.0.42.dist-info/RECORD +0 -256
  710. /llama_stack/{distribution → core}/__init__.py +0 -0
  711. /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
  712. /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
  713. /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
  714. /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
  715. /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
  716. /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
  717. /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
  718. /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
  719. /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
  720. /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
  721. /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
  722. /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
  723. /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
  724. /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
  725. /llama_stack/{distribution → core}/utils/serialize.py +0 -0
  726. /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
  727. /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
  728. /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
  729. /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
  730. /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
  731. /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
  732. /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
  733. /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
  734. /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
  735. /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
  736. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
  737. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
  738. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,960 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
7
+ # All rights reserved.
8
+ #
9
+ # This source code is licensed under the terms described in the LICENSE file in
10
+ # the root directory of this source tree.
11
+ import uuid
12
+ from typing import Annotated, Any, Literal, Protocol, runtime_checkable
13
+
14
+ from fastapi import Body
15
+ from pydantic import BaseModel, Field
16
+
17
+ from llama_stack.apis.inference import InterleavedContent
18
+ from llama_stack.apis.vector_stores import VectorStore
19
+ from llama_stack.apis.version import LLAMA_STACK_API_V1
20
+ from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
21
+ from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
22
+ from llama_stack.schema_utils import json_schema_type, webmethod
23
+ from llama_stack.strong_typing.schema import register_schema
24
+
25
+
26
+ @json_schema_type
27
+ class ChunkMetadata(BaseModel):
28
+ """
29
+ `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that
30
+ will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata`
31
+ is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after.
32
+ Use `Chunk.metadata` for metadata that will be used in the context during inference.
33
+ :param chunk_id: The ID of the chunk. If not set, it will be generated based on the document ID and content.
34
+ :param document_id: The ID of the document this chunk belongs to.
35
+ :param source: The source of the content, such as a URL, file path, or other identifier.
36
+ :param created_timestamp: An optional timestamp indicating when the chunk was created.
37
+ :param updated_timestamp: An optional timestamp indicating when the chunk was last updated.
38
+ :param chunk_window: The window of the chunk, which can be used to group related chunks together.
39
+ :param chunk_tokenizer: The tokenizer used to create the chunk. Default is Tiktoken.
40
+ :param chunk_embedding_model: The embedding model used to create the chunk's embedding.
41
+ :param chunk_embedding_dimension: The dimension of the embedding vector for the chunk.
42
+ :param content_token_count: The number of tokens in the content of the chunk.
43
+ :param metadata_token_count: The number of tokens in the metadata of the chunk.
44
+ """
45
+
46
+ chunk_id: str | None = None
47
+ document_id: str | None = None
48
+ source: str | None = None
49
+ created_timestamp: int | None = None
50
+ updated_timestamp: int | None = None
51
+ chunk_window: str | None = None
52
+ chunk_tokenizer: str | None = None
53
+ chunk_embedding_model: str | None = None
54
+ chunk_embedding_dimension: int | None = None
55
+ content_token_count: int | None = None
56
+ metadata_token_count: int | None = None
57
+
58
+
59
+ @json_schema_type
60
+ class Chunk(BaseModel):
61
+ """
62
+ A chunk of content that can be inserted into a vector database.
63
+ :param content: The content of the chunk, which can be interleaved text, images, or other types.
64
+ :param embedding: Optional embedding for the chunk. If not provided, it will be computed later.
65
+ :param metadata: Metadata associated with the chunk that will be used in the model context during inference.
66
+ :param stored_chunk_id: The chunk ID that is stored in the vector database. Used for backend functionality.
67
+ :param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference.
68
+ The `chunk_metadata` is required backend functionality.
69
+ """
70
+
71
+ content: InterleavedContent
72
+ metadata: dict[str, Any] = Field(default_factory=dict)
73
+ embedding: list[float] | None = None
74
+ # The alias parameter serializes the field as "chunk_id" in JSON but keeps the internal name as "stored_chunk_id"
75
+ stored_chunk_id: str | None = Field(default=None, alias="chunk_id")
76
+ chunk_metadata: ChunkMetadata | None = None
77
+
78
+ model_config = {"populate_by_name": True}
79
+
80
+ def model_post_init(self, __context):
81
+ # Extract chunk_id from metadata if present
82
+ if self.metadata and "chunk_id" in self.metadata:
83
+ self.stored_chunk_id = self.metadata.pop("chunk_id")
84
+
85
+ @property
86
+ def chunk_id(self) -> str:
87
+ """Returns the chunk ID, which is either an input `chunk_id` or a generated one if not set."""
88
+ if self.stored_chunk_id:
89
+ return self.stored_chunk_id
90
+
91
+ if "document_id" in self.metadata:
92
+ return generate_chunk_id(self.metadata["document_id"], str(self.content))
93
+
94
+ return generate_chunk_id(str(uuid.uuid4()), str(self.content))
95
+
96
+ @property
97
+ def document_id(self) -> str | None:
98
+ """Returns the document_id from either metadata or chunk_metadata, with metadata taking precedence."""
99
+ # Check metadata first (takes precedence)
100
+ doc_id = self.metadata.get("document_id")
101
+ if doc_id is not None:
102
+ if not isinstance(doc_id, str):
103
+ raise TypeError(f"metadata['document_id'] must be a string, got {type(doc_id).__name__}: {doc_id!r}")
104
+ return doc_id
105
+
106
+ # Fall back to chunk_metadata if available (Pydantic ensures type safety)
107
+ if self.chunk_metadata is not None:
108
+ return self.chunk_metadata.document_id
109
+
110
+ return None
111
+
112
+
113
+ @json_schema_type
114
+ class QueryChunksResponse(BaseModel):
115
+ """Response from querying chunks in a vector database.
116
+
117
+ :param chunks: List of content chunks returned from the query
118
+ :param scores: Relevance scores corresponding to each returned chunk
119
+ """
120
+
121
+ chunks: list[Chunk]
122
+ scores: list[float]
123
+
124
+
125
+ @json_schema_type
126
+ class VectorStoreFileCounts(BaseModel):
127
+ """File processing status counts for a vector store.
128
+
129
+ :param completed: Number of files that have been successfully processed
130
+ :param cancelled: Number of files that had their processing cancelled
131
+ :param failed: Number of files that failed to process
132
+ :param in_progress: Number of files currently being processed
133
+ :param total: Total number of files in the vector store
134
+ """
135
+
136
+ completed: int
137
+ cancelled: int
138
+ failed: int
139
+ in_progress: int
140
+ total: int
141
+
142
+
143
+ # TODO: rename this as OpenAIVectorStore
144
+ @json_schema_type
145
+ class VectorStoreObject(BaseModel):
146
+ """OpenAI Vector Store object.
147
+
148
+ :param id: Unique identifier for the vector store
149
+ :param object: Object type identifier, always "vector_store"
150
+ :param created_at: Timestamp when the vector store was created
151
+ :param name: (Optional) Name of the vector store
152
+ :param usage_bytes: Storage space used by the vector store in bytes
153
+ :param file_counts: File processing status counts for the vector store
154
+ :param status: Current status of the vector store
155
+ :param expires_after: (Optional) Expiration policy for the vector store
156
+ :param expires_at: (Optional) Timestamp when the vector store will expire
157
+ :param last_active_at: (Optional) Timestamp of last activity on the vector store
158
+ :param metadata: Set of key-value pairs that can be attached to the vector store
159
+ """
160
+
161
+ id: str
162
+ object: str = "vector_store"
163
+ created_at: int
164
+ name: str | None = None
165
+ usage_bytes: int = 0
166
+ file_counts: VectorStoreFileCounts
167
+ status: str = "completed"
168
+ expires_after: dict[str, Any] | None = None
169
+ expires_at: int | None = None
170
+ last_active_at: int | None = None
171
+ metadata: dict[str, Any] = Field(default_factory=dict)
172
+
173
+
174
+ @json_schema_type
175
+ class VectorStoreCreateRequest(BaseModel):
176
+ """Request to create a vector store.
177
+
178
+ :param name: (Optional) Name for the vector store
179
+ :param file_ids: List of file IDs to include in the vector store
180
+ :param expires_after: (Optional) Expiration policy for the vector store
181
+ :param chunking_strategy: (Optional) Strategy for splitting files into chunks
182
+ :param metadata: Set of key-value pairs that can be attached to the vector store
183
+ """
184
+
185
+ name: str | None = None
186
+ file_ids: list[str] = Field(default_factory=list)
187
+ expires_after: dict[str, Any] | None = None
188
+ chunking_strategy: dict[str, Any] | None = None
189
+ metadata: dict[str, Any] = Field(default_factory=dict)
190
+
191
+
192
+ @json_schema_type
193
+ class VectorStoreModifyRequest(BaseModel):
194
+ """Request to modify a vector store.
195
+
196
+ :param name: (Optional) Updated name for the vector store
197
+ :param expires_after: (Optional) Updated expiration policy for the vector store
198
+ :param metadata: (Optional) Updated set of key-value pairs for the vector store
199
+ """
200
+
201
+ name: str | None = None
202
+ expires_after: dict[str, Any] | None = None
203
+ metadata: dict[str, Any] | None = None
204
+
205
+
206
+ @json_schema_type
207
+ class VectorStoreListResponse(BaseModel):
208
+ """Response from listing vector stores.
209
+
210
+ :param object: Object type identifier, always "list"
211
+ :param data: List of vector store objects
212
+ :param first_id: (Optional) ID of the first vector store in the list for pagination
213
+ :param last_id: (Optional) ID of the last vector store in the list for pagination
214
+ :param has_more: Whether there are more vector stores available beyond this page
215
+ """
216
+
217
+ object: str = "list"
218
+ data: list[VectorStoreObject]
219
+ first_id: str | None = None
220
+ last_id: str | None = None
221
+ has_more: bool = False
222
+
223
+
224
+ @json_schema_type
225
+ class VectorStoreSearchRequest(BaseModel):
226
+ """Request to search a vector store.
227
+
228
+ :param query: Search query as a string or list of strings
229
+ :param filters: (Optional) Filters based on file attributes to narrow search results
230
+ :param max_num_results: Maximum number of results to return, defaults to 10
231
+ :param ranking_options: (Optional) Options for ranking and filtering search results
232
+ :param rewrite_query: Whether to rewrite the query for better vector search performance
233
+ """
234
+
235
+ query: str | list[str]
236
+ filters: dict[str, Any] | None = None
237
+ max_num_results: int = 10
238
+ ranking_options: dict[str, Any] | None = None
239
+ rewrite_query: bool = False
240
+
241
+
242
+ @json_schema_type
243
+ class VectorStoreContent(BaseModel):
244
+ """Content item from a vector store file or search result.
245
+
246
+ :param type: Content type, currently only "text" is supported
247
+ :param text: The actual text content
248
+ """
249
+
250
+ type: Literal["text"]
251
+ text: str
252
+
253
+
254
+ @json_schema_type
255
+ class VectorStoreSearchResponse(BaseModel):
256
+ """Response from searching a vector store.
257
+
258
+ :param file_id: Unique identifier of the file containing the result
259
+ :param filename: Name of the file containing the result
260
+ :param score: Relevance score for this search result
261
+ :param attributes: (Optional) Key-value attributes associated with the file
262
+ :param content: List of content items matching the search query
263
+ """
264
+
265
+ file_id: str
266
+ filename: str
267
+ score: float
268
+ attributes: dict[str, str | float | bool] | None = None
269
+ content: list[VectorStoreContent]
270
+
271
+
272
+ @json_schema_type
273
+ class VectorStoreSearchResponsePage(BaseModel):
274
+ """Paginated response from searching a vector store.
275
+
276
+ :param object: Object type identifier for the search results page
277
+ :param search_query: The original search query that was executed
278
+ :param data: List of search result objects
279
+ :param has_more: Whether there are more results available beyond this page
280
+ :param next_page: (Optional) Token for retrieving the next page of results
281
+ """
282
+
283
+ object: str = "vector_store.search_results.page"
284
+ search_query: str
285
+ data: list[VectorStoreSearchResponse]
286
+ has_more: bool = False
287
+ next_page: str | None = None
288
+
289
+
290
+ @json_schema_type
291
+ class VectorStoreDeleteResponse(BaseModel):
292
+ """Response from deleting a vector store.
293
+
294
+ :param id: Unique identifier of the deleted vector store
295
+ :param object: Object type identifier for the deletion response
296
+ :param deleted: Whether the deletion operation was successful
297
+ """
298
+
299
+ id: str
300
+ object: str = "vector_store.deleted"
301
+ deleted: bool = True
302
+
303
+
304
+ @json_schema_type
305
+ class VectorStoreChunkingStrategyAuto(BaseModel):
306
+ """Automatic chunking strategy for vector store files.
307
+
308
+ :param type: Strategy type, always "auto" for automatic chunking
309
+ """
310
+
311
+ type: Literal["auto"] = "auto"
312
+
313
+
314
+ @json_schema_type
315
+ class VectorStoreChunkingStrategyStaticConfig(BaseModel):
316
+ """Configuration for static chunking strategy.
317
+
318
+ :param chunk_overlap_tokens: Number of tokens to overlap between adjacent chunks
319
+ :param max_chunk_size_tokens: Maximum number of tokens per chunk, must be between 100 and 4096
320
+ """
321
+
322
+ chunk_overlap_tokens: int = 400
323
+ max_chunk_size_tokens: int = Field(800, ge=100, le=4096)
324
+
325
+
326
+ @json_schema_type
327
+ class VectorStoreChunkingStrategyStatic(BaseModel):
328
+ """Static chunking strategy with configurable parameters.
329
+
330
+ :param type: Strategy type, always "static" for static chunking
331
+ :param static: Configuration parameters for the static chunking strategy
332
+ """
333
+
334
+ type: Literal["static"] = "static"
335
+ static: VectorStoreChunkingStrategyStaticConfig
336
+
337
+
338
+ VectorStoreChunkingStrategy = Annotated[
339
+ VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic,
340
+ Field(discriminator="type"),
341
+ ]
342
+ register_schema(VectorStoreChunkingStrategy, name="VectorStoreChunkingStrategy")
343
+
344
+
345
+ class SearchRankingOptions(BaseModel):
346
+ """Options for ranking and filtering search results.
347
+
348
+ :param ranker: (Optional) Name of the ranking algorithm to use
349
+ :param score_threshold: (Optional) Minimum relevance score threshold for results
350
+ """
351
+
352
+ ranker: str | None = None
353
+ # NOTE: OpenAI File Search Tool requires threshold to be between 0 and 1, however
354
+ # we don't guarantee that the score is between 0 and 1, so will leave this unconstrained
355
+ # and let the provider handle it
356
+ score_threshold: float | None = Field(default=0.0)
357
+
358
+
359
+ @json_schema_type
360
+ class VectorStoreFileLastError(BaseModel):
361
+ """Error information for failed vector store file processing.
362
+
363
+ :param code: Error code indicating the type of failure
364
+ :param message: Human-readable error message describing the failure
365
+ """
366
+
367
+ code: Literal["server_error"] | Literal["rate_limit_exceeded"]
368
+ message: str
369
+
370
+
371
+ VectorStoreFileStatus = Literal["completed"] | Literal["in_progress"] | Literal["cancelled"] | Literal["failed"]
372
+ register_schema(VectorStoreFileStatus, name="VectorStoreFileStatus")
373
+
374
+
375
+ @json_schema_type
376
+ class VectorStoreFileObject(BaseModel):
377
+ """OpenAI Vector Store File object.
378
+
379
+ :param id: Unique identifier for the file
380
+ :param object: Object type identifier, always "vector_store.file"
381
+ :param attributes: Key-value attributes associated with the file
382
+ :param chunking_strategy: Strategy used for splitting the file into chunks
383
+ :param created_at: Timestamp when the file was added to the vector store
384
+ :param last_error: (Optional) Error information if file processing failed
385
+ :param status: Current processing status of the file
386
+ :param usage_bytes: Storage space used by this file in bytes
387
+ :param vector_store_id: ID of the vector store containing this file
388
+ """
389
+
390
+ id: str
391
+ object: str = "vector_store.file"
392
+ attributes: dict[str, Any] = Field(default_factory=dict)
393
+ chunking_strategy: VectorStoreChunkingStrategy
394
+ created_at: int
395
+ last_error: VectorStoreFileLastError | None = None
396
+ status: VectorStoreFileStatus
397
+ usage_bytes: int = 0
398
+ vector_store_id: str
399
+
400
+
401
+ @json_schema_type
402
+ class VectorStoreListFilesResponse(BaseModel):
403
+ """Response from listing files in a vector store.
404
+
405
+ :param object: Object type identifier, always "list"
406
+ :param data: List of vector store file objects
407
+ :param first_id: (Optional) ID of the first file in the list for pagination
408
+ :param last_id: (Optional) ID of the last file in the list for pagination
409
+ :param has_more: Whether there are more files available beyond this page
410
+ """
411
+
412
+ object: str = "list"
413
+ data: list[VectorStoreFileObject]
414
+ first_id: str | None = None
415
+ last_id: str | None = None
416
+ has_more: bool = False
417
+
418
+
419
+ @json_schema_type
420
+ class VectorStoreFileContentsResponse(BaseModel):
421
+ """Response from retrieving the contents of a vector store file.
422
+
423
+ :param file_id: Unique identifier for the file
424
+ :param filename: Name of the file
425
+ :param attributes: Key-value attributes associated with the file
426
+ :param content: List of content items from the file
427
+ """
428
+
429
+ file_id: str
430
+ filename: str
431
+ attributes: dict[str, Any]
432
+ content: list[VectorStoreContent]
433
+
434
+
435
+ @json_schema_type
436
+ class VectorStoreFileDeleteResponse(BaseModel):
437
+ """Response from deleting a vector store file.
438
+
439
+ :param id: Unique identifier of the deleted file
440
+ :param object: Object type identifier for the deletion response
441
+ :param deleted: Whether the deletion operation was successful
442
+ """
443
+
444
+ id: str
445
+ object: str = "vector_store.file.deleted"
446
+ deleted: bool = True
447
+
448
+
449
+ @json_schema_type
450
+ class VectorStoreFileBatchObject(BaseModel):
451
+ """OpenAI Vector Store File Batch object.
452
+
453
+ :param id: Unique identifier for the file batch
454
+ :param object: Object type identifier, always "vector_store.file_batch"
455
+ :param created_at: Timestamp when the file batch was created
456
+ :param vector_store_id: ID of the vector store containing the file batch
457
+ :param status: Current processing status of the file batch
458
+ :param file_counts: File processing status counts for the batch
459
+ """
460
+
461
+ id: str
462
+ object: str = "vector_store.file_batch"
463
+ created_at: int
464
+ vector_store_id: str
465
+ status: VectorStoreFileStatus
466
+ file_counts: VectorStoreFileCounts
467
+
468
+
469
+ @json_schema_type
470
+ class VectorStoreFilesListInBatchResponse(BaseModel):
471
+ """Response from listing files in a vector store file batch.
472
+
473
+ :param object: Object type identifier, always "list"
474
+ :param data: List of vector store file objects in the batch
475
+ :param first_id: (Optional) ID of the first file in the list for pagination
476
+ :param last_id: (Optional) ID of the last file in the list for pagination
477
+ :param has_more: Whether there are more files available beyond this page
478
+ """
479
+
480
+ object: str = "list"
481
+ data: list[VectorStoreFileObject]
482
+ first_id: str | None = None
483
+ last_id: str | None = None
484
+ has_more: bool = False
485
+
486
+
487
+ # extra_body can be accessed via .model_extra
488
+ @json_schema_type
489
+ class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"):
490
+ """Request to create a vector store with extra_body support.
491
+
492
+ :param name: (Optional) A name for the vector store
493
+ :param file_ids: List of file IDs to include in the vector store
494
+ :param expires_after: (Optional) Expiration policy for the vector store
495
+ :param chunking_strategy: (Optional) Strategy for splitting files into chunks
496
+ :param metadata: Set of key-value pairs that can be attached to the vector store
497
+ """
498
+
499
+ name: str | None = None
500
+ file_ids: list[str] | None = None
501
+ expires_after: dict[str, Any] | None = None
502
+ chunking_strategy: dict[str, Any] | None = None
503
+ metadata: dict[str, Any] | None = None
504
+
505
+
506
+ # extra_body can be accessed via .model_extra
507
+ @json_schema_type
508
+ class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="allow"):
509
+ """Request to create a vector store file batch with extra_body support.
510
+
511
+ :param file_ids: A list of File IDs that the vector store should use
512
+ :param attributes: (Optional) Key-value attributes to store with the files
513
+ :param chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto
514
+ """
515
+
516
+ file_ids: list[str]
517
+ attributes: dict[str, Any] | None = None
518
+ chunking_strategy: VectorStoreChunkingStrategy | None = None
519
+
520
+
521
+ class VectorStoreTable(Protocol):
522
+ def get_vector_store(self, vector_store_id: str) -> VectorStore | None: ...
523
+
524
+
525
+ @runtime_checkable
526
+ @trace_protocol
527
+ class VectorIO(Protocol):
528
+ vector_store_table: VectorStoreTable | None = None
529
+
530
+ # this will just block now until chunks are inserted, but it should
531
+ # probably return a Job instance which can be polled for completion
532
+ # TODO: rename vector_db_id to vector_store_id once Stainless is working
533
+ @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
534
+ async def insert_chunks(
535
+ self,
536
+ vector_db_id: str,
537
+ chunks: list[Chunk],
538
+ ttl_seconds: int | None = None,
539
+ ) -> None:
540
+ """Insert chunks into a vector database.
541
+
542
+ :param vector_db_id: The identifier of the vector database to insert the chunks into.
543
+ :param chunks: The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types.
544
+ `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional.
545
+ If `metadata` is provided, you configure how Llama Stack formats the chunk during generation.
546
+ If `embedding` is not provided, it will be computed later.
547
+ :param ttl_seconds: The time to live of the chunks.
548
+ """
549
+ ...
550
+
551
+ # TODO: rename vector_db_id to vector_store_id once Stainless is working
552
+ @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
553
+ async def query_chunks(
554
+ self,
555
+ vector_db_id: str,
556
+ query: InterleavedContent,
557
+ params: dict[str, Any] | None = None,
558
+ ) -> QueryChunksResponse:
559
+ """Query chunks from a vector database.
560
+
561
+ :param vector_db_id: The identifier of the vector database to query.
562
+ :param query: The query to search for.
563
+ :param params: The parameters of the query.
564
+ :returns: A QueryChunksResponse.
565
+ """
566
+ ...
567
+
568
+ # OpenAI Vector Stores API endpoints
569
+ @webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
570
+ @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
571
+ async def openai_create_vector_store(
572
+ self,
573
+ params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)],
574
+ ) -> VectorStoreObject:
575
+ """Creates a vector store.
576
+
577
+ Generate an OpenAI-compatible vector store with the given parameters.
578
+ :returns: A VectorStoreObject representing the created vector store.
579
+ """
580
+ ...
581
+
582
+ @webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
583
+ @webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
584
+ async def openai_list_vector_stores(
585
+ self,
586
+ limit: int | None = 20,
587
+ order: str | None = "desc",
588
+ after: str | None = None,
589
+ before: str | None = None,
590
+ ) -> VectorStoreListResponse:
591
+ """Returns a list of vector stores.
592
+
593
+ :param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
594
+ :param order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.
595
+ :param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list.
596
+ :param before: A cursor for use in pagination. `before` is an object ID that defines your place in the list.
597
+ :returns: A VectorStoreListResponse containing the list of vector stores.
598
+ """
599
+ ...
600
+
601
+ @webmethod(
602
+ route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
603
+ )
604
+ @webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
605
+ async def openai_retrieve_vector_store(
606
+ self,
607
+ vector_store_id: str,
608
+ ) -> VectorStoreObject:
609
+ """Retrieves a vector store.
610
+
611
+ :param vector_store_id: The ID of the vector store to retrieve.
612
+ :returns: A VectorStoreObject representing the vector store.
613
+ """
614
+ ...
615
+
616
+ @webmethod(
617
+ route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
618
+ )
619
+ @webmethod(
620
+ route="/vector_stores/{vector_store_id}",
621
+ method="POST",
622
+ level=LLAMA_STACK_API_V1,
623
+ )
624
+ async def openai_update_vector_store(
625
+ self,
626
+ vector_store_id: str,
627
+ name: str | None = None,
628
+ expires_after: dict[str, Any] | None = None,
629
+ metadata: dict[str, Any] | None = None,
630
+ ) -> VectorStoreObject:
631
+ """Updates a vector store.
632
+
633
+ :param vector_store_id: The ID of the vector store to update.
634
+ :param name: The name of the vector store.
635
+ :param expires_after: The expiration policy for a vector store.
636
+ :param metadata: Set of 16 key-value pairs that can be attached to an object.
637
+ :returns: A VectorStoreObject representing the updated vector store.
638
+ """
639
+ ...
640
+
641
+ @webmethod(
642
+ route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True
643
+ )
644
+ @webmethod(
645
+ route="/vector_stores/{vector_store_id}",
646
+ method="DELETE",
647
+ level=LLAMA_STACK_API_V1,
648
+ )
649
+ async def openai_delete_vector_store(
650
+ self,
651
+ vector_store_id: str,
652
+ ) -> VectorStoreDeleteResponse:
653
+ """Delete a vector store.
654
+
655
+ :param vector_store_id: The ID of the vector store to delete.
656
+ :returns: A VectorStoreDeleteResponse indicating the deletion status.
657
+ """
658
+ ...
659
+
660
+ @webmethod(
661
+ route="/openai/v1/vector_stores/{vector_store_id}/search",
662
+ method="POST",
663
+ level=LLAMA_STACK_API_V1,
664
+ deprecated=True,
665
+ )
666
+ @webmethod(
667
+ route="/vector_stores/{vector_store_id}/search",
668
+ method="POST",
669
+ level=LLAMA_STACK_API_V1,
670
+ )
671
+ async def openai_search_vector_store(
672
+ self,
673
+ vector_store_id: str,
674
+ query: str | list[str],
675
+ filters: dict[str, Any] | None = None,
676
+ max_num_results: int | None = 10,
677
+ ranking_options: SearchRankingOptions | None = None,
678
+ rewrite_query: bool | None = False,
679
+ search_mode: (
680
+ str | None
681
+ ) = "vector", # Using str instead of Literal due to OpenAPI schema generator limitations
682
+ ) -> VectorStoreSearchResponsePage:
683
+ """Search for chunks in a vector store.
684
+
685
+ Searches a vector store for relevant chunks based on a query and optional file attribute filters.
686
+
687
+ :param vector_store_id: The ID of the vector store to search.
688
+ :param query: The query string or array for performing the search.
689
+ :param filters: Filters based on file attributes to narrow the search results.
690
+ :param max_num_results: Maximum number of results to return (1 to 50 inclusive, default 10).
691
+ :param ranking_options: Ranking options for fine-tuning the search results.
692
+ :param rewrite_query: Whether to rewrite the natural language query for vector search (default false)
693
+ :param search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
694
+ :returns: A VectorStoreSearchResponse containing the search results.
695
+ """
696
+ ...
697
+
698
+ @webmethod(
699
+ route="/openai/v1/vector_stores/{vector_store_id}/files",
700
+ method="POST",
701
+ level=LLAMA_STACK_API_V1,
702
+ deprecated=True,
703
+ )
704
+ @webmethod(
705
+ route="/vector_stores/{vector_store_id}/files",
706
+ method="POST",
707
+ level=LLAMA_STACK_API_V1,
708
+ )
709
+ async def openai_attach_file_to_vector_store(
710
+ self,
711
+ vector_store_id: str,
712
+ file_id: str,
713
+ attributes: dict[str, Any] | None = None,
714
+ chunking_strategy: VectorStoreChunkingStrategy | None = None,
715
+ ) -> VectorStoreFileObject:
716
+ """Attach a file to a vector store.
717
+
718
+ :param vector_store_id: The ID of the vector store to attach the file to.
719
+ :param file_id: The ID of the file to attach to the vector store.
720
+ :param attributes: The key-value attributes stored with the file, which can be used for filtering.
721
+ :param chunking_strategy: The chunking strategy to use for the file.
722
+ :returns: A VectorStoreFileObject representing the attached file.
723
+ """
724
+ ...
725
+
726
+ @webmethod(
727
+ route="/openai/v1/vector_stores/{vector_store_id}/files",
728
+ method="GET",
729
+ level=LLAMA_STACK_API_V1,
730
+ deprecated=True,
731
+ )
732
+ @webmethod(
733
+ route="/vector_stores/{vector_store_id}/files",
734
+ method="GET",
735
+ level=LLAMA_STACK_API_V1,
736
+ )
737
+ async def openai_list_files_in_vector_store(
738
+ self,
739
+ vector_store_id: str,
740
+ limit: int | None = 20,
741
+ order: str | None = "desc",
742
+ after: str | None = None,
743
+ before: str | None = None,
744
+ filter: VectorStoreFileStatus | None = None,
745
+ ) -> VectorStoreListFilesResponse:
746
+ """List files in a vector store.
747
+
748
+ :param vector_store_id: The ID of the vector store to list files from.
749
+ :param limit: (Optional) A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
750
+ :param order: (Optional) Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.
751
+ :param after: (Optional) A cursor for use in pagination. `after` is an object ID that defines your place in the list.
752
+ :param before: (Optional) A cursor for use in pagination. `before` is an object ID that defines your place in the list.
753
+ :param filter: (Optional) Filter by file status to only return files with the specified status.
754
+ :returns: A VectorStoreListFilesResponse containing the list of files.
755
+ """
756
+ ...
757
+
758
+ @webmethod(
759
+ route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
760
+ method="GET",
761
+ level=LLAMA_STACK_API_V1,
762
+ deprecated=True,
763
+ )
764
+ @webmethod(
765
+ route="/vector_stores/{vector_store_id}/files/{file_id}",
766
+ method="GET",
767
+ level=LLAMA_STACK_API_V1,
768
+ )
769
+ async def openai_retrieve_vector_store_file(
770
+ self,
771
+ vector_store_id: str,
772
+ file_id: str,
773
+ ) -> VectorStoreFileObject:
774
+ """Retrieves a vector store file.
775
+
776
+ :param vector_store_id: The ID of the vector store containing the file to retrieve.
777
+ :param file_id: The ID of the file to retrieve.
778
+ :returns: A VectorStoreFileObject representing the file.
779
+ """
780
+ ...
781
+
782
+ @webmethod(
783
+ route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
784
+ method="GET",
785
+ level=LLAMA_STACK_API_V1,
786
+ deprecated=True,
787
+ )
788
+ @webmethod(
789
+ route="/vector_stores/{vector_store_id}/files/{file_id}/content",
790
+ method="GET",
791
+ level=LLAMA_STACK_API_V1,
792
+ )
793
+ async def openai_retrieve_vector_store_file_contents(
794
+ self,
795
+ vector_store_id: str,
796
+ file_id: str,
797
+ ) -> VectorStoreFileContentsResponse:
798
+ """Retrieves the contents of a vector store file.
799
+
800
+ :param vector_store_id: The ID of the vector store containing the file to retrieve.
801
+ :param file_id: The ID of the file to retrieve.
802
+ :returns: A list of InterleavedContent representing the file contents.
803
+ """
804
+ ...
805
+
806
+ @webmethod(
807
+ route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
808
+ method="POST",
809
+ level=LLAMA_STACK_API_V1,
810
+ deprecated=True,
811
+ )
812
+ @webmethod(
813
+ route="/vector_stores/{vector_store_id}/files/{file_id}",
814
+ method="POST",
815
+ level=LLAMA_STACK_API_V1,
816
+ )
817
+ async def openai_update_vector_store_file(
818
+ self,
819
+ vector_store_id: str,
820
+ file_id: str,
821
+ attributes: dict[str, Any],
822
+ ) -> VectorStoreFileObject:
823
+ """Updates a vector store file.
824
+
825
+ :param vector_store_id: The ID of the vector store containing the file to update.
826
+ :param file_id: The ID of the file to update.
827
+ :param attributes: The updated key-value attributes to store with the file.
828
+ :returns: A VectorStoreFileObject representing the updated file.
829
+ """
830
+ ...
831
+
832
+ @webmethod(
833
+ route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
834
+ method="DELETE",
835
+ level=LLAMA_STACK_API_V1,
836
+ deprecated=True,
837
+ )
838
+ @webmethod(
839
+ route="/vector_stores/{vector_store_id}/files/{file_id}",
840
+ method="DELETE",
841
+ level=LLAMA_STACK_API_V1,
842
+ )
843
+ async def openai_delete_vector_store_file(
844
+ self,
845
+ vector_store_id: str,
846
+ file_id: str,
847
+ ) -> VectorStoreFileDeleteResponse:
848
+ """Delete a vector store file.
849
+
850
+ :param vector_store_id: The ID of the vector store containing the file to delete.
851
+ :param file_id: The ID of the file to delete.
852
+ :returns: A VectorStoreFileDeleteResponse indicating the deletion status.
853
+ """
854
+ ...
855
+
856
+ @webmethod(
857
+ route="/vector_stores/{vector_store_id}/file_batches",
858
+ method="POST",
859
+ level=LLAMA_STACK_API_V1,
860
+ )
861
+ @webmethod(
862
+ route="/openai/v1/vector_stores/{vector_store_id}/file_batches",
863
+ method="POST",
864
+ level=LLAMA_STACK_API_V1,
865
+ deprecated=True,
866
+ )
867
+ async def openai_create_vector_store_file_batch(
868
+ self,
869
+ vector_store_id: str,
870
+ params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)],
871
+ ) -> VectorStoreFileBatchObject:
872
+ """Create a vector store file batch.
873
+
874
+ Generate an OpenAI-compatible vector store file batch for the given vector store.
875
+ :param vector_store_id: The ID of the vector store to create the file batch for.
876
+ :returns: A VectorStoreFileBatchObject representing the created file batch.
877
+ """
878
+ ...
879
+
880
+ @webmethod(
881
+ route="/vector_stores/{vector_store_id}/file_batches/{batch_id}",
882
+ method="GET",
883
+ level=LLAMA_STACK_API_V1,
884
+ )
885
+ @webmethod(
886
+ route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
887
+ method="GET",
888
+ level=LLAMA_STACK_API_V1,
889
+ deprecated=True,
890
+ )
891
+ async def openai_retrieve_vector_store_file_batch(
892
+ self,
893
+ batch_id: str,
894
+ vector_store_id: str,
895
+ ) -> VectorStoreFileBatchObject:
896
+ """Retrieve a vector store file batch.
897
+
898
+ :param batch_id: The ID of the file batch to retrieve.
899
+ :param vector_store_id: The ID of the vector store containing the file batch.
900
+ :returns: A VectorStoreFileBatchObject representing the file batch.
901
+ """
902
+ ...
903
+
904
+ @webmethod(
905
+ route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
906
+ method="GET",
907
+ level=LLAMA_STACK_API_V1,
908
+ deprecated=True,
909
+ )
910
+ @webmethod(
911
+ route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
912
+ method="GET",
913
+ level=LLAMA_STACK_API_V1,
914
+ )
915
+ async def openai_list_files_in_vector_store_file_batch(
916
+ self,
917
+ batch_id: str,
918
+ vector_store_id: str,
919
+ after: str | None = None,
920
+ before: str | None = None,
921
+ filter: str | None = None,
922
+ limit: int | None = 20,
923
+ order: str | None = "desc",
924
+ ) -> VectorStoreFilesListInBatchResponse:
925
+ """Returns a list of vector store files in a batch.
926
+
927
+ :param batch_id: The ID of the file batch to list files from.
928
+ :param vector_store_id: The ID of the vector store containing the file batch.
929
+ :param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list.
930
+ :param before: A cursor for use in pagination. `before` is an object ID that defines your place in the list.
931
+ :param filter: Filter by file status. One of in_progress, completed, failed, cancelled.
932
+ :param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
933
+ :param order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending order and `desc` for descending order.
934
+ :returns: A VectorStoreFilesListInBatchResponse containing the list of files in the batch.
935
+ """
936
+ ...
937
+
938
+ @webmethod(
939
+ route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
940
+ method="POST",
941
+ level=LLAMA_STACK_API_V1,
942
+ deprecated=True,
943
+ )
944
+ @webmethod(
945
+ route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
946
+ method="POST",
947
+ level=LLAMA_STACK_API_V1,
948
+ )
949
+ async def openai_cancel_vector_store_file_batch(
950
+ self,
951
+ batch_id: str,
952
+ vector_store_id: str,
953
+ ) -> VectorStoreFileBatchObject:
954
+ """Cancels a vector store file batch.
955
+
956
+ :param batch_id: The ID of the file batch to cancel.
957
+ :param vector_store_id: The ID of the vector store containing the file batch.
958
+ :returns: A VectorStoreFileBatchObject representing the cancelled file batch.
959
+ """
960
+ ...