llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (738) hide show
  1. llama_stack/__init__.py +5 -0
  2. llama_stack/apis/agents/__init__.py +1 -1
  3. llama_stack/apis/agents/agents.py +700 -281
  4. llama_stack/apis/agents/openai_responses.py +1311 -0
  5. llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
  6. llama_stack/apis/batches/batches.py +100 -0
  7. llama_stack/apis/benchmarks/__init__.py +7 -0
  8. llama_stack/apis/benchmarks/benchmarks.py +108 -0
  9. llama_stack/apis/common/content_types.py +143 -0
  10. llama_stack/apis/common/errors.py +103 -0
  11. llama_stack/apis/common/job_types.py +38 -0
  12. llama_stack/apis/common/responses.py +36 -0
  13. llama_stack/apis/common/training_types.py +36 -5
  14. llama_stack/apis/common/type_system.py +158 -0
  15. llama_stack/apis/conversations/__init__.py +31 -0
  16. llama_stack/apis/conversations/conversations.py +286 -0
  17. llama_stack/apis/datasetio/__init__.py +7 -0
  18. llama_stack/apis/datasetio/datasetio.py +59 -0
  19. llama_stack/apis/datasets/__init__.py +7 -0
  20. llama_stack/apis/datasets/datasets.py +251 -0
  21. llama_stack/apis/datatypes.py +160 -0
  22. llama_stack/apis/eval/__init__.py +7 -0
  23. llama_stack/apis/eval/eval.py +169 -0
  24. llama_stack/apis/files/__init__.py +7 -0
  25. llama_stack/apis/files/files.py +199 -0
  26. llama_stack/apis/inference/__init__.py +1 -1
  27. llama_stack/apis/inference/inference.py +1169 -113
  28. llama_stack/apis/inspect/__init__.py +1 -1
  29. llama_stack/apis/inspect/inspect.py +69 -16
  30. llama_stack/apis/models/__init__.py +1 -1
  31. llama_stack/apis/models/models.py +148 -21
  32. llama_stack/apis/post_training/__init__.py +1 -1
  33. llama_stack/apis/post_training/post_training.py +265 -120
  34. llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
  35. llama_stack/apis/prompts/prompts.py +204 -0
  36. llama_stack/apis/providers/__init__.py +7 -0
  37. llama_stack/apis/providers/providers.py +69 -0
  38. llama_stack/apis/resource.py +37 -0
  39. llama_stack/apis/safety/__init__.py +1 -1
  40. llama_stack/apis/safety/safety.py +95 -12
  41. llama_stack/apis/scoring/__init__.py +7 -0
  42. llama_stack/apis/scoring/scoring.py +93 -0
  43. llama_stack/apis/scoring_functions/__init__.py +7 -0
  44. llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
  45. llama_stack/apis/shields/__init__.py +1 -1
  46. llama_stack/apis/shields/shields.py +76 -33
  47. llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
  48. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
  49. llama_stack/apis/telemetry/__init__.py +1 -1
  50. llama_stack/apis/telemetry/telemetry.py +322 -31
  51. llama_stack/apis/{dataset → tools}/__init__.py +2 -1
  52. llama_stack/apis/tools/rag_tool.py +218 -0
  53. llama_stack/apis/tools/tools.py +221 -0
  54. llama_stack/apis/vector_io/__init__.py +7 -0
  55. llama_stack/apis/vector_io/vector_io.py +960 -0
  56. llama_stack/apis/vector_stores/__init__.py +7 -0
  57. llama_stack/apis/vector_stores/vector_stores.py +51 -0
  58. llama_stack/apis/version.py +9 -0
  59. llama_stack/cli/llama.py +13 -5
  60. llama_stack/cli/stack/_list_deps.py +182 -0
  61. llama_stack/cli/stack/list_apis.py +1 -1
  62. llama_stack/cli/stack/list_deps.py +55 -0
  63. llama_stack/cli/stack/list_providers.py +24 -10
  64. llama_stack/cli/stack/list_stacks.py +56 -0
  65. llama_stack/cli/stack/remove.py +115 -0
  66. llama_stack/cli/stack/run.py +169 -56
  67. llama_stack/cli/stack/stack.py +18 -4
  68. llama_stack/cli/stack/utils.py +151 -0
  69. llama_stack/cli/table.py +23 -61
  70. llama_stack/cli/utils.py +29 -0
  71. llama_stack/core/access_control/access_control.py +131 -0
  72. llama_stack/core/access_control/conditions.py +129 -0
  73. llama_stack/core/access_control/datatypes.py +107 -0
  74. llama_stack/core/build.py +164 -0
  75. llama_stack/core/client.py +205 -0
  76. llama_stack/core/common.sh +37 -0
  77. llama_stack/{distribution → core}/configure.py +74 -55
  78. llama_stack/core/conversations/conversations.py +309 -0
  79. llama_stack/core/datatypes.py +625 -0
  80. llama_stack/core/distribution.py +276 -0
  81. llama_stack/core/external.py +54 -0
  82. llama_stack/core/id_generation.py +42 -0
  83. llama_stack/core/inspect.py +86 -0
  84. llama_stack/core/library_client.py +539 -0
  85. llama_stack/core/prompts/prompts.py +234 -0
  86. llama_stack/core/providers.py +137 -0
  87. llama_stack/core/request_headers.py +115 -0
  88. llama_stack/core/resolver.py +506 -0
  89. llama_stack/core/routers/__init__.py +101 -0
  90. llama_stack/core/routers/datasets.py +73 -0
  91. llama_stack/core/routers/eval_scoring.py +155 -0
  92. llama_stack/core/routers/inference.py +645 -0
  93. llama_stack/core/routers/safety.py +85 -0
  94. llama_stack/core/routers/tool_runtime.py +91 -0
  95. llama_stack/core/routers/vector_io.py +442 -0
  96. llama_stack/core/routing_tables/benchmarks.py +62 -0
  97. llama_stack/core/routing_tables/common.py +254 -0
  98. llama_stack/core/routing_tables/datasets.py +91 -0
  99. llama_stack/core/routing_tables/models.py +163 -0
  100. llama_stack/core/routing_tables/scoring_functions.py +66 -0
  101. llama_stack/core/routing_tables/shields.py +61 -0
  102. llama_stack/core/routing_tables/toolgroups.py +129 -0
  103. llama_stack/core/routing_tables/vector_stores.py +292 -0
  104. llama_stack/core/server/auth.py +187 -0
  105. llama_stack/core/server/auth_providers.py +494 -0
  106. llama_stack/core/server/quota.py +110 -0
  107. llama_stack/core/server/routes.py +141 -0
  108. llama_stack/core/server/server.py +542 -0
  109. llama_stack/core/server/tracing.py +80 -0
  110. llama_stack/core/stack.py +546 -0
  111. llama_stack/core/start_stack.sh +117 -0
  112. llama_stack/core/storage/datatypes.py +283 -0
  113. llama_stack/{cli/model → core/store}/__init__.py +1 -1
  114. llama_stack/core/store/registry.py +199 -0
  115. llama_stack/core/testing_context.py +49 -0
  116. llama_stack/core/ui/app.py +55 -0
  117. llama_stack/core/ui/modules/api.py +32 -0
  118. llama_stack/core/ui/modules/utils.py +42 -0
  119. llama_stack/core/ui/page/distribution/datasets.py +18 -0
  120. llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
  121. llama_stack/core/ui/page/distribution/models.py +18 -0
  122. llama_stack/core/ui/page/distribution/providers.py +27 -0
  123. llama_stack/core/ui/page/distribution/resources.py +48 -0
  124. llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
  125. llama_stack/core/ui/page/distribution/shields.py +19 -0
  126. llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
  127. llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
  128. llama_stack/core/ui/page/playground/chat.py +130 -0
  129. llama_stack/core/ui/page/playground/tools.py +352 -0
  130. llama_stack/core/utils/config.py +30 -0
  131. llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
  132. llama_stack/core/utils/config_resolution.py +125 -0
  133. llama_stack/core/utils/context.py +84 -0
  134. llama_stack/core/utils/exec.py +96 -0
  135. llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
  136. llama_stack/{distribution → core}/utils/model_utils.py +2 -2
  137. llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
  138. llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
  139. llama_stack/distributions/dell/build.yaml +33 -0
  140. llama_stack/distributions/dell/dell.py +158 -0
  141. llama_stack/distributions/dell/run-with-safety.yaml +141 -0
  142. llama_stack/distributions/dell/run.yaml +132 -0
  143. llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
  144. llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
  145. llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
  146. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
  147. llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
  148. llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
  149. llama_stack/distributions/nvidia/build.yaml +29 -0
  150. llama_stack/distributions/nvidia/nvidia.py +154 -0
  151. llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
  152. llama_stack/distributions/nvidia/run.yaml +116 -0
  153. llama_stack/distributions/open-benchmark/__init__.py +7 -0
  154. llama_stack/distributions/open-benchmark/build.yaml +36 -0
  155. llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
  156. llama_stack/distributions/open-benchmark/run.yaml +252 -0
  157. llama_stack/distributions/postgres-demo/__init__.py +7 -0
  158. llama_stack/distributions/postgres-demo/build.yaml +23 -0
  159. llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
  160. llama_stack/distributions/postgres-demo/run.yaml +115 -0
  161. llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
  162. llama_stack/distributions/starter/build.yaml +61 -0
  163. llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
  164. llama_stack/distributions/starter/run.yaml +276 -0
  165. llama_stack/distributions/starter/starter.py +345 -0
  166. llama_stack/distributions/starter-gpu/__init__.py +7 -0
  167. llama_stack/distributions/starter-gpu/build.yaml +61 -0
  168. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
  169. llama_stack/distributions/starter-gpu/run.yaml +279 -0
  170. llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
  171. llama_stack/distributions/template.py +456 -0
  172. llama_stack/distributions/watsonx/__init__.py +7 -0
  173. llama_stack/distributions/watsonx/build.yaml +33 -0
  174. llama_stack/distributions/watsonx/run.yaml +133 -0
  175. llama_stack/distributions/watsonx/watsonx.py +95 -0
  176. llama_stack/env.py +24 -0
  177. llama_stack/log.py +314 -0
  178. llama_stack/models/llama/checkpoint.py +164 -0
  179. llama_stack/models/llama/datatypes.py +164 -0
  180. llama_stack/models/llama/hadamard_utils.py +86 -0
  181. llama_stack/models/llama/llama3/args.py +74 -0
  182. llama_stack/models/llama/llama3/chat_format.py +286 -0
  183. llama_stack/models/llama/llama3/generation.py +376 -0
  184. llama_stack/models/llama/llama3/interface.py +255 -0
  185. llama_stack/models/llama/llama3/model.py +304 -0
  186. llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
  187. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
  188. llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
  189. llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
  190. llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
  191. llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
  192. llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
  193. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
  194. llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
  195. llama_stack/models/llama/llama3/quantization/loader.py +316 -0
  196. llama_stack/models/llama/llama3/template_data.py +116 -0
  197. llama_stack/models/llama/llama3/tokenizer.model +128000 -0
  198. llama_stack/models/llama/llama3/tokenizer.py +198 -0
  199. llama_stack/models/llama/llama3/tool_utils.py +266 -0
  200. llama_stack/models/llama/llama3_1/__init__.py +12 -0
  201. llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
  202. llama_stack/models/llama/llama3_1/prompts.py +258 -0
  203. llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
  204. llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
  205. llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
  206. llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
  207. llama_stack/models/llama/llama3_3/prompts.py +259 -0
  208. llama_stack/models/llama/llama4/args.py +107 -0
  209. llama_stack/models/llama/llama4/chat_format.py +317 -0
  210. llama_stack/models/llama/llama4/datatypes.py +56 -0
  211. llama_stack/models/llama/llama4/ffn.py +58 -0
  212. llama_stack/models/llama/llama4/generation.py +313 -0
  213. llama_stack/models/llama/llama4/model.py +437 -0
  214. llama_stack/models/llama/llama4/moe.py +214 -0
  215. llama_stack/models/llama/llama4/preprocess.py +435 -0
  216. llama_stack/models/llama/llama4/prompt_format.md +304 -0
  217. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
  218. llama_stack/models/llama/llama4/prompts.py +279 -0
  219. llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
  220. llama_stack/models/llama/llama4/quantization/loader.py +226 -0
  221. llama_stack/models/llama/llama4/tokenizer.model +200000 -0
  222. llama_stack/models/llama/llama4/tokenizer.py +263 -0
  223. llama_stack/models/llama/llama4/vision/__init__.py +5 -0
  224. llama_stack/models/llama/llama4/vision/embedding.py +210 -0
  225. llama_stack/models/llama/llama4/vision/encoder.py +412 -0
  226. llama_stack/models/llama/prompt_format.py +191 -0
  227. llama_stack/models/llama/quantize_impls.py +316 -0
  228. llama_stack/models/llama/sku_list.py +1029 -0
  229. llama_stack/models/llama/sku_types.py +233 -0
  230. llama_stack/models/llama/tokenizer_utils.py +40 -0
  231. llama_stack/providers/datatypes.py +136 -107
  232. llama_stack/providers/inline/__init__.py +5 -0
  233. llama_stack/providers/inline/agents/__init__.py +5 -0
  234. llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
  235. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
  236. llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
  237. llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
  238. llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
  239. llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
  240. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
  241. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
  242. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
  243. llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
  244. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
  245. llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
  246. llama_stack/providers/inline/batches/__init__.py +5 -0
  247. llama_stack/providers/inline/batches/reference/__init__.py +36 -0
  248. llama_stack/providers/inline/batches/reference/batches.py +679 -0
  249. llama_stack/providers/inline/batches/reference/config.py +40 -0
  250. llama_stack/providers/inline/datasetio/__init__.py +5 -0
  251. llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
  252. llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
  253. llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
  254. llama_stack/providers/inline/eval/__init__.py +5 -0
  255. llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
  256. llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
  257. llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
  258. llama_stack/providers/inline/files/localfs/__init__.py +20 -0
  259. llama_stack/providers/inline/files/localfs/config.py +31 -0
  260. llama_stack/providers/inline/files/localfs/files.py +219 -0
  261. llama_stack/providers/inline/inference/__init__.py +5 -0
  262. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
  263. llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
  264. llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
  265. llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
  266. llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
  267. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
  268. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
  269. llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
  270. llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
  271. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
  272. llama_stack/providers/inline/post_training/__init__.py +5 -0
  273. llama_stack/providers/inline/post_training/common/__init__.py +5 -0
  274. llama_stack/providers/inline/post_training/common/utils.py +35 -0
  275. llama_stack/providers/inline/post_training/common/validator.py +36 -0
  276. llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
  277. llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
  278. llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
  279. llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
  280. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
  281. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
  282. llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
  283. llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
  284. llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
  285. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
  286. llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
  287. llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
  288. llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
  289. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
  290. llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
  291. llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
  292. llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
  293. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
  294. llama_stack/providers/inline/safety/__init__.py +5 -0
  295. llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
  296. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
  297. llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
  298. llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
  299. llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
  300. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
  301. llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
  302. llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
  303. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
  304. llama_stack/providers/inline/scoring/__init__.py +5 -0
  305. llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
  306. llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
  307. llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
  308. llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
  309. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
  310. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
  311. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
  312. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
  313. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
  314. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
  315. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
  316. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
  317. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
  318. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
  319. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
  320. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
  321. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
  322. llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
  323. llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
  324. llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
  325. llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
  326. llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
  327. llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
  328. llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
  329. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
  330. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
  331. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
  332. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
  333. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
  334. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
  335. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
  336. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
  337. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
  338. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
  339. llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
  340. llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
  341. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
  342. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
  343. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
  344. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
  345. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
  346. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
  347. llama_stack/providers/inline/telemetry/__init__.py +5 -0
  348. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
  349. llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
  350. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
  351. llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
  352. llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
  353. llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
  354. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
  355. llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
  356. llama_stack/providers/inline/vector_io/__init__.py +5 -0
  357. llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
  358. llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
  359. llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
  360. llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
  361. llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
  362. llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
  363. llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
  364. llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
  365. llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
  366. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
  367. llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
  368. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
  369. llama_stack/providers/registry/agents.py +16 -18
  370. llama_stack/providers/registry/batches.py +26 -0
  371. llama_stack/providers/registry/datasetio.py +49 -0
  372. llama_stack/providers/registry/eval.py +46 -0
  373. llama_stack/providers/registry/files.py +31 -0
  374. llama_stack/providers/registry/inference.py +273 -118
  375. llama_stack/providers/registry/post_training.py +69 -0
  376. llama_stack/providers/registry/safety.py +46 -41
  377. llama_stack/providers/registry/scoring.py +51 -0
  378. llama_stack/providers/registry/tool_runtime.py +87 -0
  379. llama_stack/providers/registry/vector_io.py +828 -0
  380. llama_stack/providers/remote/__init__.py +5 -0
  381. llama_stack/providers/remote/agents/__init__.py +5 -0
  382. llama_stack/providers/remote/datasetio/__init__.py +5 -0
  383. llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
  384. llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
  385. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
  386. llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
  387. llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
  388. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
  389. llama_stack/providers/remote/eval/__init__.py +5 -0
  390. llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
  391. llama_stack/providers/remote/eval/nvidia/config.py +29 -0
  392. llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
  393. llama_stack/providers/remote/files/s3/__init__.py +19 -0
  394. llama_stack/providers/remote/files/s3/config.py +42 -0
  395. llama_stack/providers/remote/files/s3/files.py +313 -0
  396. llama_stack/providers/remote/inference/__init__.py +5 -0
  397. llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
  398. llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
  399. llama_stack/providers/remote/inference/anthropic/config.py +28 -0
  400. llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
  401. llama_stack/providers/remote/inference/azure/azure.py +25 -0
  402. llama_stack/providers/remote/inference/azure/config.py +61 -0
  403. llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
  404. llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
  405. llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
  406. llama_stack/providers/remote/inference/bedrock/models.py +29 -0
  407. llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
  408. llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
  409. llama_stack/providers/remote/inference/cerebras/config.py +30 -0
  410. llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
  411. llama_stack/providers/remote/inference/databricks/config.py +37 -0
  412. llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
  413. llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
  414. llama_stack/providers/remote/inference/fireworks/config.py +27 -0
  415. llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
  416. llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
  417. llama_stack/providers/remote/inference/gemini/config.py +28 -0
  418. llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
  419. llama_stack/providers/remote/inference/groq/__init__.py +15 -0
  420. llama_stack/providers/remote/inference/groq/config.py +34 -0
  421. llama_stack/providers/remote/inference/groq/groq.py +18 -0
  422. llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
  423. llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
  424. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
  425. llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
  426. llama_stack/providers/remote/inference/nvidia/config.py +64 -0
  427. llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
  428. llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
  429. llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
  430. llama_stack/providers/remote/inference/ollama/config.py +25 -0
  431. llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
  432. llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
  433. llama_stack/providers/remote/inference/openai/config.py +39 -0
  434. llama_stack/providers/remote/inference/openai/openai.py +38 -0
  435. llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
  436. llama_stack/providers/remote/inference/passthrough/config.py +34 -0
  437. llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
  438. llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
  439. llama_stack/providers/remote/inference/runpod/config.py +32 -0
  440. llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
  441. llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
  442. llama_stack/providers/remote/inference/sambanova/config.py +34 -0
  443. llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
  444. llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
  445. llama_stack/providers/remote/inference/tgi/config.py +76 -0
  446. llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
  447. llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
  448. llama_stack/providers/remote/inference/together/config.py +27 -0
  449. llama_stack/providers/remote/inference/together/together.py +102 -0
  450. llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
  451. llama_stack/providers/remote/inference/vertexai/config.py +48 -0
  452. llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
  453. llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
  454. llama_stack/providers/remote/inference/vllm/config.py +59 -0
  455. llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
  456. llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
  457. llama_stack/providers/remote/inference/watsonx/config.py +45 -0
  458. llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
  459. llama_stack/providers/remote/post_training/__init__.py +5 -0
  460. llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
  461. llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
  462. llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
  463. llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
  464. llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
  465. llama_stack/providers/remote/safety/__init__.py +5 -0
  466. llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
  467. llama_stack/providers/remote/safety/bedrock/config.py +14 -0
  468. llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
  469. llama_stack/providers/remote/safety/nvidia/config.py +40 -0
  470. llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
  471. llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
  472. llama_stack/providers/remote/safety/sambanova/config.py +37 -0
  473. llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
  474. llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
  475. llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
  476. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
  477. llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
  478. llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
  479. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
  480. llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
  481. llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
  482. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
  483. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
  484. llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
  485. llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
  486. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
  487. llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
  488. llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
  489. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
  490. llama_stack/providers/remote/vector_io/__init__.py +5 -0
  491. llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
  492. llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
  493. llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
  494. llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
  495. llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
  496. llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
  497. llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
  498. llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
  499. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
  500. llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
  501. llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
  502. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
  503. llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
  504. llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
  505. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
  506. llama_stack/providers/utils/bedrock/__init__.py +5 -0
  507. llama_stack/providers/utils/bedrock/client.py +74 -0
  508. llama_stack/providers/utils/bedrock/config.py +64 -0
  509. llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
  510. llama_stack/providers/utils/common/__init__.py +5 -0
  511. llama_stack/providers/utils/common/data_schema_validator.py +103 -0
  512. llama_stack/providers/utils/datasetio/__init__.py +5 -0
  513. llama_stack/providers/utils/datasetio/url_utils.py +47 -0
  514. llama_stack/providers/utils/files/__init__.py +5 -0
  515. llama_stack/providers/utils/files/form_data.py +69 -0
  516. llama_stack/providers/utils/inference/__init__.py +8 -7
  517. llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
  518. llama_stack/providers/utils/inference/inference_store.py +264 -0
  519. llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
  520. llama_stack/providers/utils/inference/model_registry.py +173 -23
  521. llama_stack/providers/utils/inference/openai_compat.py +1261 -49
  522. llama_stack/providers/utils/inference/openai_mixin.py +506 -0
  523. llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
  524. llama_stack/providers/utils/kvstore/api.py +6 -6
  525. llama_stack/providers/utils/kvstore/config.py +28 -48
  526. llama_stack/providers/utils/kvstore/kvstore.py +61 -15
  527. llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
  528. llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
  529. llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
  530. llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
  531. llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
  532. llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
  533. llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
  534. llama_stack/providers/utils/memory/file_utils.py +1 -1
  535. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
  536. llama_stack/providers/utils/memory/vector_store.py +220 -82
  537. llama_stack/providers/utils/pagination.py +43 -0
  538. llama_stack/providers/utils/responses/__init__.py +5 -0
  539. llama_stack/providers/utils/responses/responses_store.py +292 -0
  540. llama_stack/providers/utils/scheduler.py +270 -0
  541. llama_stack/providers/utils/scoring/__init__.py +5 -0
  542. llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
  543. llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
  544. llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
  545. llama_stack/providers/utils/sqlstore/__init__.py +5 -0
  546. llama_stack/providers/utils/sqlstore/api.py +128 -0
  547. llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
  548. llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
  549. llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
  550. llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
  551. llama_stack/providers/utils/telemetry/tracing.py +192 -53
  552. llama_stack/providers/utils/tools/__init__.py +5 -0
  553. llama_stack/providers/utils/tools/mcp.py +148 -0
  554. llama_stack/providers/utils/tools/ttl_dict.py +70 -0
  555. llama_stack/providers/utils/vector_io/__init__.py +5 -0
  556. llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
  557. llama_stack/schema_utils.py +118 -0
  558. llama_stack/strong_typing/__init__.py +19 -0
  559. llama_stack/strong_typing/auxiliary.py +228 -0
  560. llama_stack/strong_typing/classdef.py +440 -0
  561. llama_stack/strong_typing/core.py +46 -0
  562. llama_stack/strong_typing/deserializer.py +877 -0
  563. llama_stack/strong_typing/docstring.py +409 -0
  564. llama_stack/strong_typing/exception.py +23 -0
  565. llama_stack/strong_typing/inspection.py +1085 -0
  566. llama_stack/strong_typing/mapping.py +40 -0
  567. llama_stack/strong_typing/name.py +182 -0
  568. llama_stack/strong_typing/py.typed +0 -0
  569. llama_stack/strong_typing/schema.py +792 -0
  570. llama_stack/strong_typing/serialization.py +97 -0
  571. llama_stack/strong_typing/serializer.py +500 -0
  572. llama_stack/strong_typing/slots.py +27 -0
  573. llama_stack/strong_typing/topological.py +89 -0
  574. llama_stack/testing/__init__.py +5 -0
  575. llama_stack/testing/api_recorder.py +956 -0
  576. llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
  577. llama_stack-0.3.4.dist-info/METADATA +261 -0
  578. llama_stack-0.3.4.dist-info/RECORD +625 -0
  579. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
  580. llama_stack/apis/agents/client.py +0 -292
  581. llama_stack/apis/agents/event_logger.py +0 -184
  582. llama_stack/apis/batch_inference/batch_inference.py +0 -72
  583. llama_stack/apis/common/deployment_types.py +0 -31
  584. llama_stack/apis/dataset/dataset.py +0 -63
  585. llama_stack/apis/evals/evals.py +0 -122
  586. llama_stack/apis/inference/client.py +0 -197
  587. llama_stack/apis/inspect/client.py +0 -82
  588. llama_stack/apis/memory/client.py +0 -155
  589. llama_stack/apis/memory/memory.py +0 -65
  590. llama_stack/apis/memory_banks/__init__.py +0 -7
  591. llama_stack/apis/memory_banks/client.py +0 -101
  592. llama_stack/apis/memory_banks/memory_banks.py +0 -78
  593. llama_stack/apis/models/client.py +0 -83
  594. llama_stack/apis/reward_scoring/__init__.py +0 -7
  595. llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
  596. llama_stack/apis/safety/client.py +0 -105
  597. llama_stack/apis/shields/client.py +0 -79
  598. llama_stack/cli/download.py +0 -340
  599. llama_stack/cli/model/describe.py +0 -82
  600. llama_stack/cli/model/download.py +0 -24
  601. llama_stack/cli/model/list.py +0 -62
  602. llama_stack/cli/model/model.py +0 -34
  603. llama_stack/cli/model/prompt_format.py +0 -112
  604. llama_stack/cli/model/safety_models.py +0 -52
  605. llama_stack/cli/stack/build.py +0 -299
  606. llama_stack/cli/stack/configure.py +0 -178
  607. llama_stack/distribution/build.py +0 -123
  608. llama_stack/distribution/build_conda_env.sh +0 -136
  609. llama_stack/distribution/build_container.sh +0 -142
  610. llama_stack/distribution/common.sh +0 -40
  611. llama_stack/distribution/configure_container.sh +0 -47
  612. llama_stack/distribution/datatypes.py +0 -139
  613. llama_stack/distribution/distribution.py +0 -58
  614. llama_stack/distribution/inspect.py +0 -67
  615. llama_stack/distribution/request_headers.py +0 -57
  616. llama_stack/distribution/resolver.py +0 -323
  617. llama_stack/distribution/routers/__init__.py +0 -48
  618. llama_stack/distribution/routers/routers.py +0 -158
  619. llama_stack/distribution/routers/routing_tables.py +0 -173
  620. llama_stack/distribution/server/endpoints.py +0 -48
  621. llama_stack/distribution/server/server.py +0 -343
  622. llama_stack/distribution/start_conda_env.sh +0 -42
  623. llama_stack/distribution/start_container.sh +0 -64
  624. llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
  625. llama_stack/distribution/templates/local-build.yaml +0 -10
  626. llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
  627. llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
  628. llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
  629. llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
  630. llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
  631. llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
  632. llama_stack/distribution/templates/local-together-build.yaml +0 -10
  633. llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
  634. llama_stack/distribution/utils/exec.py +0 -105
  635. llama_stack/providers/adapters/agents/sample/sample.py +0 -18
  636. llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
  637. llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
  638. llama_stack/providers/adapters/inference/databricks/config.py +0 -21
  639. llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
  640. llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
  641. llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
  642. llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
  643. llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
  644. llama_stack/providers/adapters/inference/sample/sample.py +0 -23
  645. llama_stack/providers/adapters/inference/tgi/config.py +0 -43
  646. llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
  647. llama_stack/providers/adapters/inference/together/config.py +0 -22
  648. llama_stack/providers/adapters/inference/together/together.py +0 -143
  649. llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
  650. llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
  651. llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
  652. llama_stack/providers/adapters/memory/sample/sample.py +0 -23
  653. llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
  654. llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
  655. llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
  656. llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
  657. llama_stack/providers/adapters/safety/sample/sample.py +0 -23
  658. llama_stack/providers/adapters/safety/together/__init__.py +0 -18
  659. llama_stack/providers/adapters/safety/together/config.py +0 -26
  660. llama_stack/providers/adapters/safety/together/together.py +0 -101
  661. llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
  662. llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
  663. llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
  664. llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
  665. llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
  666. llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
  667. llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
  668. llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
  669. llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
  670. llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
  671. llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
  672. llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
  673. llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
  674. llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
  675. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
  676. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
  677. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
  678. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
  679. llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
  680. llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
  681. llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
  682. llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
  683. llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
  684. llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
  685. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
  686. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
  687. llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
  688. llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
  689. llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
  690. llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
  691. llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
  692. llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
  693. llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
  694. llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
  695. llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
  696. llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
  697. llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
  698. llama_stack/providers/impls/vllm/config.py +0 -35
  699. llama_stack/providers/impls/vllm/vllm.py +0 -241
  700. llama_stack/providers/registry/memory.py +0 -78
  701. llama_stack/providers/registry/telemetry.py +0 -44
  702. llama_stack/providers/tests/agents/test_agents.py +0 -210
  703. llama_stack/providers/tests/inference/test_inference.py +0 -257
  704. llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
  705. llama_stack/providers/tests/memory/test_memory.py +0 -136
  706. llama_stack/providers/tests/resolver.py +0 -100
  707. llama_stack/providers/tests/safety/test_safety.py +0 -77
  708. llama_stack-0.0.42.dist-info/METADATA +0 -137
  709. llama_stack-0.0.42.dist-info/RECORD +0 -256
  710. /llama_stack/{distribution → core}/__init__.py +0 -0
  711. /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
  712. /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
  713. /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
  714. /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
  715. /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
  716. /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
  717. /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
  718. /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
  719. /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
  720. /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
  721. /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
  722. /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
  723. /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
  724. /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
  725. /llama_stack/{distribution → core}/utils/serialize.py +0 -0
  726. /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
  727. /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
  728. /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
  729. /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
  730. /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
  731. /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
  732. /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
  733. /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
  734. /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
  735. /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
  736. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
  737. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
  738. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,85 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from typing import Any
8
+
9
+ from llama_stack.apis.inference import Message
10
+ from llama_stack.apis.safety import RunShieldResponse, Safety
11
+ from llama_stack.apis.safety.safety import ModerationObject
12
+ from llama_stack.apis.shields import Shield
13
+ from llama_stack.log import get_logger
14
+ from llama_stack.providers.datatypes import RoutingTable
15
+
16
+ logger = get_logger(name=__name__, category="core::routers")
17
+
18
+
19
+ class SafetyRouter(Safety):
20
+ def __init__(
21
+ self,
22
+ routing_table: RoutingTable,
23
+ ) -> None:
24
+ logger.debug("Initializing SafetyRouter")
25
+ self.routing_table = routing_table
26
+
27
+ async def initialize(self) -> None:
28
+ logger.debug("SafetyRouter.initialize")
29
+ pass
30
+
31
+ async def shutdown(self) -> None:
32
+ logger.debug("SafetyRouter.shutdown")
33
+ pass
34
+
35
+ async def register_shield(
36
+ self,
37
+ shield_id: str,
38
+ provider_shield_id: str | None = None,
39
+ provider_id: str | None = None,
40
+ params: dict[str, Any] | None = None,
41
+ ) -> Shield:
42
+ logger.debug(f"SafetyRouter.register_shield: {shield_id}")
43
+ return await self.routing_table.register_shield(shield_id, provider_shield_id, provider_id, params)
44
+
45
+ async def unregister_shield(self, identifier: str) -> None:
46
+ logger.debug(f"SafetyRouter.unregister_shield: {identifier}")
47
+ return await self.routing_table.unregister_shield(identifier)
48
+
49
+ async def run_shield(
50
+ self,
51
+ shield_id: str,
52
+ messages: list[Message],
53
+ params: dict[str, Any] = None,
54
+ ) -> RunShieldResponse:
55
+ logger.debug(f"SafetyRouter.run_shield: {shield_id}")
56
+ provider = await self.routing_table.get_provider_impl(shield_id)
57
+ return await provider.run_shield(
58
+ shield_id=shield_id,
59
+ messages=messages,
60
+ params=params,
61
+ )
62
+
63
+ async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
64
+ async def get_shield_id(self, model: str) -> str:
65
+ """Get Shield id from model (provider_resource_id) of shield."""
66
+ list_shields_response = await self.routing_table.list_shields()
67
+
68
+ matches = [s.identifier for s in list_shields_response.data if model == s.provider_resource_id]
69
+
70
+ if not matches:
71
+ raise ValueError(f"No shield associated with provider_resource id {model}")
72
+ if len(matches) > 1:
73
+ raise ValueError(f"Multiple shields associated with provider_resource id {model}")
74
+ return matches[0]
75
+
76
+ shield_id = await get_shield_id(self, model)
77
+ logger.debug(f"SafetyRouter.run_moderation: {shield_id}")
78
+ provider = await self.routing_table.get_provider_impl(shield_id)
79
+
80
+ response = await provider.run_moderation(
81
+ input=input,
82
+ model=model,
83
+ )
84
+
85
+ return response
@@ -0,0 +1,91 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from typing import Any
8
+
9
+ from llama_stack.apis.common.content_types import (
10
+ URL,
11
+ InterleavedContent,
12
+ )
13
+ from llama_stack.apis.tools import (
14
+ ListToolDefsResponse,
15
+ RAGDocument,
16
+ RAGQueryConfig,
17
+ RAGQueryResult,
18
+ RAGToolRuntime,
19
+ ToolRuntime,
20
+ )
21
+ from llama_stack.log import get_logger
22
+
23
+ from ..routing_tables.toolgroups import ToolGroupsRoutingTable
24
+
25
+ logger = get_logger(name=__name__, category="core::routers")
26
+
27
+
28
+ class ToolRuntimeRouter(ToolRuntime):
29
+ class RagToolImpl(RAGToolRuntime):
30
+ def __init__(
31
+ self,
32
+ routing_table: ToolGroupsRoutingTable,
33
+ ) -> None:
34
+ logger.debug("Initializing ToolRuntimeRouter.RagToolImpl")
35
+ self.routing_table = routing_table
36
+
37
+ async def query(
38
+ self,
39
+ content: InterleavedContent,
40
+ vector_store_ids: list[str],
41
+ query_config: RAGQueryConfig | None = None,
42
+ ) -> RAGQueryResult:
43
+ logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_store_ids}")
44
+ provider = await self.routing_table.get_provider_impl("knowledge_search")
45
+ return await provider.query(content, vector_store_ids, query_config)
46
+
47
+ async def insert(
48
+ self,
49
+ documents: list[RAGDocument],
50
+ vector_store_id: str,
51
+ chunk_size_in_tokens: int = 512,
52
+ ) -> None:
53
+ logger.debug(
54
+ f"ToolRuntimeRouter.RagToolImpl.insert: {vector_store_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}"
55
+ )
56
+ provider = await self.routing_table.get_provider_impl("insert_into_memory")
57
+ return await provider.insert(documents, vector_store_id, chunk_size_in_tokens)
58
+
59
+ def __init__(
60
+ self,
61
+ routing_table: ToolGroupsRoutingTable,
62
+ ) -> None:
63
+ logger.debug("Initializing ToolRuntimeRouter")
64
+ self.routing_table = routing_table
65
+
66
+ # HACK ALERT this should be in sync with "get_all_api_endpoints()"
67
+ self.rag_tool = self.RagToolImpl(routing_table)
68
+ for method in ("query", "insert"):
69
+ setattr(self, f"rag_tool.{method}", getattr(self.rag_tool, method))
70
+
71
+ async def initialize(self) -> None:
72
+ logger.debug("ToolRuntimeRouter.initialize")
73
+ pass
74
+
75
+ async def shutdown(self) -> None:
76
+ logger.debug("ToolRuntimeRouter.shutdown")
77
+ pass
78
+
79
+ async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> Any:
80
+ logger.debug(f"ToolRuntimeRouter.invoke_tool: {tool_name}")
81
+ provider = await self.routing_table.get_provider_impl(tool_name)
82
+ return await provider.invoke_tool(
83
+ tool_name=tool_name,
84
+ kwargs=kwargs,
85
+ )
86
+
87
+ async def list_runtime_tools(
88
+ self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
89
+ ) -> ListToolDefsResponse:
90
+ logger.debug(f"ToolRuntimeRouter.list_runtime_tools: {tool_group_id}")
91
+ return await self.routing_table.list_tools(tool_group_id)
@@ -0,0 +1,442 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ import asyncio
8
+ import uuid
9
+ from typing import Annotated, Any
10
+
11
+ from fastapi import Body
12
+
13
+ from llama_stack.apis.common.content_types import InterleavedContent
14
+ from llama_stack.apis.models import ModelType
15
+ from llama_stack.apis.vector_io import (
16
+ Chunk,
17
+ OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
18
+ OpenAICreateVectorStoreRequestWithExtraBody,
19
+ QueryChunksResponse,
20
+ SearchRankingOptions,
21
+ VectorIO,
22
+ VectorStoreChunkingStrategy,
23
+ VectorStoreDeleteResponse,
24
+ VectorStoreFileBatchObject,
25
+ VectorStoreFileContentsResponse,
26
+ VectorStoreFileDeleteResponse,
27
+ VectorStoreFileObject,
28
+ VectorStoreFilesListInBatchResponse,
29
+ VectorStoreFileStatus,
30
+ VectorStoreListResponse,
31
+ VectorStoreObject,
32
+ VectorStoreSearchResponsePage,
33
+ )
34
+ from llama_stack.core.datatypes import VectorStoresConfig
35
+ from llama_stack.log import get_logger
36
+ from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
37
+
38
+ logger = get_logger(name=__name__, category="core::routers")
39
+
40
+
41
+ class VectorIORouter(VectorIO):
42
+ """Routes to an provider based on the vector db identifier"""
43
+
44
+ def __init__(
45
+ self,
46
+ routing_table: RoutingTable,
47
+ vector_stores_config: VectorStoresConfig | None = None,
48
+ ) -> None:
49
+ logger.debug("Initializing VectorIORouter")
50
+ self.routing_table = routing_table
51
+ self.vector_stores_config = vector_stores_config
52
+
53
+ async def initialize(self) -> None:
54
+ logger.debug("VectorIORouter.initialize")
55
+ pass
56
+
57
+ async def shutdown(self) -> None:
58
+ logger.debug("VectorIORouter.shutdown")
59
+ pass
60
+
61
+ async def _get_embedding_model_dimension(self, embedding_model_id: str) -> int:
62
+ """Get the embedding dimension for a specific embedding model."""
63
+ all_models = await self.routing_table.get_all_with_type("model")
64
+
65
+ for model in all_models:
66
+ if model.identifier == embedding_model_id and model.model_type == ModelType.embedding:
67
+ dimension = model.metadata.get("embedding_dimension")
68
+ if dimension is None:
69
+ raise ValueError(f"Embedding model '{embedding_model_id}' has no embedding_dimension in metadata")
70
+ return int(dimension)
71
+
72
+ raise ValueError(f"Embedding model '{embedding_model_id}' not found or not an embedding model")
73
+
74
+ async def insert_chunks(
75
+ self,
76
+ vector_db_id: str,
77
+ chunks: list[Chunk],
78
+ ttl_seconds: int | None = None,
79
+ ) -> None:
80
+ doc_ids = [chunk.document_id for chunk in chunks[:3]]
81
+ logger.debug(
82
+ f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, "
83
+ f"ttl_seconds={ttl_seconds}, chunk_ids={doc_ids}{' and more...' if len(chunks) > 3 else ''}"
84
+ )
85
+ provider = await self.routing_table.get_provider_impl(vector_db_id)
86
+ return await provider.insert_chunks(vector_db_id, chunks, ttl_seconds)
87
+
88
+ async def query_chunks(
89
+ self,
90
+ vector_db_id: str,
91
+ query: InterleavedContent,
92
+ params: dict[str, Any] | None = None,
93
+ ) -> QueryChunksResponse:
94
+ logger.debug(f"VectorIORouter.query_chunks: {vector_db_id}")
95
+ provider = await self.routing_table.get_provider_impl(vector_db_id)
96
+ return await provider.query_chunks(vector_db_id, query, params)
97
+
98
+ # OpenAI Vector Stores API endpoints
99
+ async def openai_create_vector_store(
100
+ self,
101
+ params: Annotated[OpenAICreateVectorStoreRequestWithExtraBody, Body(...)],
102
+ ) -> VectorStoreObject:
103
+ # Extract llama-stack-specific parameters from extra_body
104
+ extra = params.model_extra or {}
105
+ embedding_model = extra.get("embedding_model")
106
+ embedding_dimension = extra.get("embedding_dimension")
107
+ provider_id = extra.get("provider_id")
108
+
109
+ # Use default embedding model if not specified
110
+ if (
111
+ embedding_model is None
112
+ and self.vector_stores_config
113
+ and self.vector_stores_config.default_embedding_model is not None
114
+ ):
115
+ # Construct the full model ID with provider prefix
116
+ embedding_provider_id = self.vector_stores_config.default_embedding_model.provider_id
117
+ model_id = self.vector_stores_config.default_embedding_model.model_id
118
+ embedding_model = f"{embedding_provider_id}/{model_id}"
119
+
120
+ if embedding_model is not None and embedding_dimension is None:
121
+ embedding_dimension = await self._get_embedding_model_dimension(embedding_model)
122
+
123
+ # Auto-select provider if not specified
124
+ if provider_id is None:
125
+ num_providers = len(self.routing_table.impls_by_provider_id)
126
+ if num_providers == 0:
127
+ raise ValueError("No vector_io providers available")
128
+ if num_providers > 1:
129
+ available_providers = list(self.routing_table.impls_by_provider_id.keys())
130
+ # Use default configured provider
131
+ if self.vector_stores_config and self.vector_stores_config.default_provider_id:
132
+ default_provider = self.vector_stores_config.default_provider_id
133
+ if default_provider in available_providers:
134
+ provider_id = default_provider
135
+ logger.debug(f"Using configured default vector store provider: {provider_id}")
136
+ else:
137
+ raise ValueError(
138
+ f"Configured default vector store provider '{default_provider}' not found. "
139
+ f"Available providers: {available_providers}"
140
+ )
141
+ else:
142
+ raise ValueError(
143
+ f"Multiple vector_io providers available. Please specify provider_id in extra_body. "
144
+ f"Available providers: {available_providers}"
145
+ )
146
+ else:
147
+ provider_id = list(self.routing_table.impls_by_provider_id.keys())[0]
148
+
149
+ vector_store_id = f"vs_{uuid.uuid4()}"
150
+ registered_vector_store = await self.routing_table.register_vector_store(
151
+ vector_store_id=vector_store_id,
152
+ embedding_model=embedding_model,
153
+ embedding_dimension=embedding_dimension,
154
+ provider_id=provider_id,
155
+ provider_vector_store_id=vector_store_id,
156
+ vector_store_name=params.name,
157
+ )
158
+ provider = await self.routing_table.get_provider_impl(registered_vector_store.identifier)
159
+
160
+ # Update model_extra with registered values so provider uses the already-registered vector_store
161
+ if params.model_extra is None:
162
+ params.model_extra = {}
163
+ params.model_extra["provider_vector_store_id"] = registered_vector_store.provider_resource_id
164
+ params.model_extra["provider_id"] = registered_vector_store.provider_id
165
+ if embedding_model is not None:
166
+ params.model_extra["embedding_model"] = embedding_model
167
+ if embedding_dimension is not None:
168
+ params.model_extra["embedding_dimension"] = embedding_dimension
169
+
170
+ return await provider.openai_create_vector_store(params)
171
+
172
+ async def openai_list_vector_stores(
173
+ self,
174
+ limit: int | None = 20,
175
+ order: str | None = "desc",
176
+ after: str | None = None,
177
+ before: str | None = None,
178
+ ) -> VectorStoreListResponse:
179
+ logger.debug(f"VectorIORouter.openai_list_vector_stores: limit={limit}")
180
+ # Route to default provider for now - could aggregate from all providers in the future
181
+ # call retrieve on each vector dbs to get list of vector stores
182
+ vector_stores = await self.routing_table.get_all_with_type("vector_store")
183
+ all_stores = []
184
+ for vector_store in vector_stores:
185
+ try:
186
+ provider = await self.routing_table.get_provider_impl(vector_store.identifier)
187
+ vector_store = await provider.openai_retrieve_vector_store(vector_store.identifier)
188
+ all_stores.append(vector_store)
189
+ except Exception as e:
190
+ logger.error(f"Error retrieving vector store {vector_store.identifier}: {e}")
191
+ continue
192
+
193
+ # Sort by created_at
194
+ reverse_order = order == "desc"
195
+ all_stores.sort(key=lambda x: x.created_at, reverse=reverse_order)
196
+
197
+ # Apply cursor-based pagination
198
+ if after:
199
+ after_index = next((i for i, store in enumerate(all_stores) if store.id == after), -1)
200
+ if after_index >= 0:
201
+ all_stores = all_stores[after_index + 1 :]
202
+
203
+ if before:
204
+ before_index = next(
205
+ (i for i, store in enumerate(all_stores) if store.id == before),
206
+ len(all_stores),
207
+ )
208
+ all_stores = all_stores[:before_index]
209
+
210
+ # Apply limit
211
+ limited_stores = all_stores[:limit]
212
+
213
+ # Determine pagination info
214
+ has_more = len(all_stores) > limit
215
+ first_id = limited_stores[0].id if limited_stores else None
216
+ last_id = limited_stores[-1].id if limited_stores else None
217
+
218
+ return VectorStoreListResponse(
219
+ data=limited_stores,
220
+ has_more=has_more,
221
+ first_id=first_id,
222
+ last_id=last_id,
223
+ )
224
+
225
+ async def openai_retrieve_vector_store(
226
+ self,
227
+ vector_store_id: str,
228
+ ) -> VectorStoreObject:
229
+ logger.debug(f"VectorIORouter.openai_retrieve_vector_store: {vector_store_id}")
230
+ provider = await self.routing_table.get_provider_impl(vector_store_id)
231
+ return await provider.openai_retrieve_vector_store(vector_store_id)
232
+
233
+ async def openai_update_vector_store(
234
+ self,
235
+ vector_store_id: str,
236
+ name: str | None = None,
237
+ expires_after: dict[str, Any] | None = None,
238
+ metadata: dict[str, Any] | None = None,
239
+ ) -> VectorStoreObject:
240
+ logger.debug(f"VectorIORouter.openai_update_vector_store: {vector_store_id}")
241
+ provider = await self.routing_table.get_provider_impl(vector_store_id)
242
+ return await provider.openai_update_vector_store(
243
+ vector_store_id=vector_store_id,
244
+ name=name,
245
+ expires_after=expires_after,
246
+ metadata=metadata,
247
+ )
248
+
249
+ async def openai_delete_vector_store(
250
+ self,
251
+ vector_store_id: str,
252
+ ) -> VectorStoreDeleteResponse:
253
+ logger.debug(f"VectorIORouter.openai_delete_vector_store: {vector_store_id}")
254
+ return await self.routing_table.openai_delete_vector_store(vector_store_id)
255
+
256
+ async def openai_search_vector_store(
257
+ self,
258
+ vector_store_id: str,
259
+ query: str | list[str],
260
+ filters: dict[str, Any] | None = None,
261
+ max_num_results: int | None = 10,
262
+ ranking_options: SearchRankingOptions | None = None,
263
+ rewrite_query: bool | None = False,
264
+ search_mode: str | None = "vector",
265
+ ) -> VectorStoreSearchResponsePage:
266
+ logger.debug(f"VectorIORouter.openai_search_vector_store: {vector_store_id}")
267
+ provider = await self.routing_table.get_provider_impl(vector_store_id)
268
+ return await provider.openai_search_vector_store(
269
+ vector_store_id=vector_store_id,
270
+ query=query,
271
+ filters=filters,
272
+ max_num_results=max_num_results,
273
+ ranking_options=ranking_options,
274
+ rewrite_query=rewrite_query,
275
+ search_mode=search_mode,
276
+ )
277
+
278
+ async def openai_attach_file_to_vector_store(
279
+ self,
280
+ vector_store_id: str,
281
+ file_id: str,
282
+ attributes: dict[str, Any] | None = None,
283
+ chunking_strategy: VectorStoreChunkingStrategy | None = None,
284
+ ) -> VectorStoreFileObject:
285
+ logger.debug(f"VectorIORouter.openai_attach_file_to_vector_store: {vector_store_id}, {file_id}")
286
+ provider = await self.routing_table.get_provider_impl(vector_store_id)
287
+ return await provider.openai_attach_file_to_vector_store(
288
+ vector_store_id=vector_store_id,
289
+ file_id=file_id,
290
+ attributes=attributes,
291
+ chunking_strategy=chunking_strategy,
292
+ )
293
+
294
+ async def openai_list_files_in_vector_store(
295
+ self,
296
+ vector_store_id: str,
297
+ limit: int | None = 20,
298
+ order: str | None = "desc",
299
+ after: str | None = None,
300
+ before: str | None = None,
301
+ filter: VectorStoreFileStatus | None = None,
302
+ ) -> list[VectorStoreFileObject]:
303
+ logger.debug(f"VectorIORouter.openai_list_files_in_vector_store: {vector_store_id}")
304
+ provider = await self.routing_table.get_provider_impl(vector_store_id)
305
+ return await provider.openai_list_files_in_vector_store(
306
+ vector_store_id=vector_store_id,
307
+ limit=limit,
308
+ order=order,
309
+ after=after,
310
+ before=before,
311
+ filter=filter,
312
+ )
313
+
314
+ async def openai_retrieve_vector_store_file(
315
+ self,
316
+ vector_store_id: str,
317
+ file_id: str,
318
+ ) -> VectorStoreFileObject:
319
+ logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file: {vector_store_id}, {file_id}")
320
+ provider = await self.routing_table.get_provider_impl(vector_store_id)
321
+ return await provider.openai_retrieve_vector_store_file(
322
+ vector_store_id=vector_store_id,
323
+ file_id=file_id,
324
+ )
325
+
326
+ async def openai_retrieve_vector_store_file_contents(
327
+ self,
328
+ vector_store_id: str,
329
+ file_id: str,
330
+ ) -> VectorStoreFileContentsResponse:
331
+ logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}")
332
+ provider = await self.routing_table.get_provider_impl(vector_store_id)
333
+ return await provider.openai_retrieve_vector_store_file_contents(
334
+ vector_store_id=vector_store_id,
335
+ file_id=file_id,
336
+ )
337
+
338
+ async def openai_update_vector_store_file(
339
+ self,
340
+ vector_store_id: str,
341
+ file_id: str,
342
+ attributes: dict[str, Any],
343
+ ) -> VectorStoreFileObject:
344
+ logger.debug(f"VectorIORouter.openai_update_vector_store_file: {vector_store_id}, {file_id}")
345
+ provider = await self.routing_table.get_provider_impl(vector_store_id)
346
+ return await provider.openai_update_vector_store_file(
347
+ vector_store_id=vector_store_id,
348
+ file_id=file_id,
349
+ attributes=attributes,
350
+ )
351
+
352
+ async def openai_delete_vector_store_file(
353
+ self,
354
+ vector_store_id: str,
355
+ file_id: str,
356
+ ) -> VectorStoreFileDeleteResponse:
357
+ logger.debug(f"VectorIORouter.openai_delete_vector_store_file: {vector_store_id}, {file_id}")
358
+ provider = await self.routing_table.get_provider_impl(vector_store_id)
359
+ return await provider.openai_delete_vector_store_file(
360
+ vector_store_id=vector_store_id,
361
+ file_id=file_id,
362
+ )
363
+
364
+ async def health(self) -> dict[str, HealthResponse]:
365
+ health_statuses = {}
366
+ timeout = 1 # increasing the timeout to 1 second for health checks
367
+ for provider_id, impl in self.routing_table.impls_by_provider_id.items():
368
+ try:
369
+ # check if the provider has a health method
370
+ if not hasattr(impl, "health"):
371
+ continue
372
+ health = await asyncio.wait_for(impl.health(), timeout=timeout)
373
+ health_statuses[provider_id] = health
374
+ except TimeoutError:
375
+ health_statuses[provider_id] = HealthResponse(
376
+ status=HealthStatus.ERROR,
377
+ message=f"Health check timed out after {timeout} seconds",
378
+ )
379
+ except NotImplementedError:
380
+ health_statuses[provider_id] = HealthResponse(status=HealthStatus.NOT_IMPLEMENTED)
381
+ except Exception as e:
382
+ health_statuses[provider_id] = HealthResponse(
383
+ status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}"
384
+ )
385
+ return health_statuses
386
+
387
+ async def openai_create_vector_store_file_batch(
388
+ self,
389
+ vector_store_id: str,
390
+ params: Annotated[OpenAICreateVectorStoreFileBatchRequestWithExtraBody, Body(...)],
391
+ ) -> VectorStoreFileBatchObject:
392
+ logger.debug(
393
+ f"VectorIORouter.openai_create_vector_store_file_batch: {vector_store_id}, {len(params.file_ids)} files"
394
+ )
395
+ provider = await self.routing_table.get_provider_impl(vector_store_id)
396
+ return await provider.openai_create_vector_store_file_batch(vector_store_id, params)
397
+
398
+ async def openai_retrieve_vector_store_file_batch(
399
+ self,
400
+ batch_id: str,
401
+ vector_store_id: str,
402
+ ) -> VectorStoreFileBatchObject:
403
+ logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_batch: {batch_id}, {vector_store_id}")
404
+ provider = await self.routing_table.get_provider_impl(vector_store_id)
405
+ return await provider.openai_retrieve_vector_store_file_batch(
406
+ batch_id=batch_id,
407
+ vector_store_id=vector_store_id,
408
+ )
409
+
410
+ async def openai_list_files_in_vector_store_file_batch(
411
+ self,
412
+ batch_id: str,
413
+ vector_store_id: str,
414
+ after: str | None = None,
415
+ before: str | None = None,
416
+ filter: str | None = None,
417
+ limit: int | None = 20,
418
+ order: str | None = "desc",
419
+ ) -> VectorStoreFilesListInBatchResponse:
420
+ logger.debug(f"VectorIORouter.openai_list_files_in_vector_store_file_batch: {batch_id}, {vector_store_id}")
421
+ provider = await self.routing_table.get_provider_impl(vector_store_id)
422
+ return await provider.openai_list_files_in_vector_store_file_batch(
423
+ batch_id=batch_id,
424
+ vector_store_id=vector_store_id,
425
+ after=after,
426
+ before=before,
427
+ filter=filter,
428
+ limit=limit,
429
+ order=order,
430
+ )
431
+
432
+ async def openai_cancel_vector_store_file_batch(
433
+ self,
434
+ batch_id: str,
435
+ vector_store_id: str,
436
+ ) -> VectorStoreFileBatchObject:
437
+ logger.debug(f"VectorIORouter.openai_cancel_vector_store_file_batch: {batch_id}, {vector_store_id}")
438
+ provider = await self.routing_table.get_provider_impl(vector_store_id)
439
+ return await provider.openai_cancel_vector_store_file_batch(
440
+ batch_id=batch_id,
441
+ vector_store_id=vector_store_id,
442
+ )
@@ -0,0 +1,62 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from typing import Any
8
+
9
+ from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
10
+ from llama_stack.core.datatypes import (
11
+ BenchmarkWithOwner,
12
+ )
13
+ from llama_stack.log import get_logger
14
+
15
+ from .common import CommonRoutingTableImpl
16
+
17
+ logger = get_logger(name=__name__, category="core::routing_tables")
18
+
19
+
20
+ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
21
+ async def list_benchmarks(self) -> ListBenchmarksResponse:
22
+ return ListBenchmarksResponse(data=await self.get_all_with_type("benchmark"))
23
+
24
+ async def get_benchmark(self, benchmark_id: str) -> Benchmark:
25
+ benchmark = await self.get_object_by_identifier("benchmark", benchmark_id)
26
+ if benchmark is None:
27
+ raise ValueError(f"Benchmark '{benchmark_id}' not found")
28
+ return benchmark
29
+
30
+ async def register_benchmark(
31
+ self,
32
+ benchmark_id: str,
33
+ dataset_id: str,
34
+ scoring_functions: list[str],
35
+ metadata: dict[str, Any] | None = None,
36
+ provider_benchmark_id: str | None = None,
37
+ provider_id: str | None = None,
38
+ ) -> None:
39
+ if metadata is None:
40
+ metadata = {}
41
+ if provider_id is None:
42
+ if len(self.impls_by_provider_id) == 1:
43
+ provider_id = list(self.impls_by_provider_id.keys())[0]
44
+ else:
45
+ raise ValueError(
46
+ "No provider specified and multiple providers available. Please specify a provider_id."
47
+ )
48
+ if provider_benchmark_id is None:
49
+ provider_benchmark_id = benchmark_id
50
+ benchmark = BenchmarkWithOwner(
51
+ identifier=benchmark_id,
52
+ dataset_id=dataset_id,
53
+ scoring_functions=scoring_functions,
54
+ metadata=metadata,
55
+ provider_id=provider_id,
56
+ provider_resource_id=provider_benchmark_id,
57
+ )
58
+ await self.register_object(benchmark)
59
+
60
+ async def unregister_benchmark(self, benchmark_id: str) -> None:
61
+ existing_benchmark = await self.get_benchmark(benchmark_id)
62
+ await self.unregister_object(existing_benchmark)