llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (738) hide show
  1. llama_stack/__init__.py +5 -0
  2. llama_stack/apis/agents/__init__.py +1 -1
  3. llama_stack/apis/agents/agents.py +700 -281
  4. llama_stack/apis/agents/openai_responses.py +1311 -0
  5. llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
  6. llama_stack/apis/batches/batches.py +100 -0
  7. llama_stack/apis/benchmarks/__init__.py +7 -0
  8. llama_stack/apis/benchmarks/benchmarks.py +108 -0
  9. llama_stack/apis/common/content_types.py +143 -0
  10. llama_stack/apis/common/errors.py +103 -0
  11. llama_stack/apis/common/job_types.py +38 -0
  12. llama_stack/apis/common/responses.py +36 -0
  13. llama_stack/apis/common/training_types.py +36 -5
  14. llama_stack/apis/common/type_system.py +158 -0
  15. llama_stack/apis/conversations/__init__.py +31 -0
  16. llama_stack/apis/conversations/conversations.py +286 -0
  17. llama_stack/apis/datasetio/__init__.py +7 -0
  18. llama_stack/apis/datasetio/datasetio.py +59 -0
  19. llama_stack/apis/datasets/__init__.py +7 -0
  20. llama_stack/apis/datasets/datasets.py +251 -0
  21. llama_stack/apis/datatypes.py +160 -0
  22. llama_stack/apis/eval/__init__.py +7 -0
  23. llama_stack/apis/eval/eval.py +169 -0
  24. llama_stack/apis/files/__init__.py +7 -0
  25. llama_stack/apis/files/files.py +199 -0
  26. llama_stack/apis/inference/__init__.py +1 -1
  27. llama_stack/apis/inference/inference.py +1169 -113
  28. llama_stack/apis/inspect/__init__.py +1 -1
  29. llama_stack/apis/inspect/inspect.py +69 -16
  30. llama_stack/apis/models/__init__.py +1 -1
  31. llama_stack/apis/models/models.py +148 -21
  32. llama_stack/apis/post_training/__init__.py +1 -1
  33. llama_stack/apis/post_training/post_training.py +265 -120
  34. llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
  35. llama_stack/apis/prompts/prompts.py +204 -0
  36. llama_stack/apis/providers/__init__.py +7 -0
  37. llama_stack/apis/providers/providers.py +69 -0
  38. llama_stack/apis/resource.py +37 -0
  39. llama_stack/apis/safety/__init__.py +1 -1
  40. llama_stack/apis/safety/safety.py +95 -12
  41. llama_stack/apis/scoring/__init__.py +7 -0
  42. llama_stack/apis/scoring/scoring.py +93 -0
  43. llama_stack/apis/scoring_functions/__init__.py +7 -0
  44. llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
  45. llama_stack/apis/shields/__init__.py +1 -1
  46. llama_stack/apis/shields/shields.py +76 -33
  47. llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
  48. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
  49. llama_stack/apis/telemetry/__init__.py +1 -1
  50. llama_stack/apis/telemetry/telemetry.py +322 -31
  51. llama_stack/apis/{dataset → tools}/__init__.py +2 -1
  52. llama_stack/apis/tools/rag_tool.py +218 -0
  53. llama_stack/apis/tools/tools.py +221 -0
  54. llama_stack/apis/vector_io/__init__.py +7 -0
  55. llama_stack/apis/vector_io/vector_io.py +960 -0
  56. llama_stack/apis/vector_stores/__init__.py +7 -0
  57. llama_stack/apis/vector_stores/vector_stores.py +51 -0
  58. llama_stack/apis/version.py +9 -0
  59. llama_stack/cli/llama.py +13 -5
  60. llama_stack/cli/stack/_list_deps.py +182 -0
  61. llama_stack/cli/stack/list_apis.py +1 -1
  62. llama_stack/cli/stack/list_deps.py +55 -0
  63. llama_stack/cli/stack/list_providers.py +24 -10
  64. llama_stack/cli/stack/list_stacks.py +56 -0
  65. llama_stack/cli/stack/remove.py +115 -0
  66. llama_stack/cli/stack/run.py +169 -56
  67. llama_stack/cli/stack/stack.py +18 -4
  68. llama_stack/cli/stack/utils.py +151 -0
  69. llama_stack/cli/table.py +23 -61
  70. llama_stack/cli/utils.py +29 -0
  71. llama_stack/core/access_control/access_control.py +131 -0
  72. llama_stack/core/access_control/conditions.py +129 -0
  73. llama_stack/core/access_control/datatypes.py +107 -0
  74. llama_stack/core/build.py +164 -0
  75. llama_stack/core/client.py +205 -0
  76. llama_stack/core/common.sh +37 -0
  77. llama_stack/{distribution → core}/configure.py +74 -55
  78. llama_stack/core/conversations/conversations.py +309 -0
  79. llama_stack/core/datatypes.py +625 -0
  80. llama_stack/core/distribution.py +276 -0
  81. llama_stack/core/external.py +54 -0
  82. llama_stack/core/id_generation.py +42 -0
  83. llama_stack/core/inspect.py +86 -0
  84. llama_stack/core/library_client.py +539 -0
  85. llama_stack/core/prompts/prompts.py +234 -0
  86. llama_stack/core/providers.py +137 -0
  87. llama_stack/core/request_headers.py +115 -0
  88. llama_stack/core/resolver.py +506 -0
  89. llama_stack/core/routers/__init__.py +101 -0
  90. llama_stack/core/routers/datasets.py +73 -0
  91. llama_stack/core/routers/eval_scoring.py +155 -0
  92. llama_stack/core/routers/inference.py +645 -0
  93. llama_stack/core/routers/safety.py +85 -0
  94. llama_stack/core/routers/tool_runtime.py +91 -0
  95. llama_stack/core/routers/vector_io.py +442 -0
  96. llama_stack/core/routing_tables/benchmarks.py +62 -0
  97. llama_stack/core/routing_tables/common.py +254 -0
  98. llama_stack/core/routing_tables/datasets.py +91 -0
  99. llama_stack/core/routing_tables/models.py +163 -0
  100. llama_stack/core/routing_tables/scoring_functions.py +66 -0
  101. llama_stack/core/routing_tables/shields.py +61 -0
  102. llama_stack/core/routing_tables/toolgroups.py +129 -0
  103. llama_stack/core/routing_tables/vector_stores.py +292 -0
  104. llama_stack/core/server/auth.py +187 -0
  105. llama_stack/core/server/auth_providers.py +494 -0
  106. llama_stack/core/server/quota.py +110 -0
  107. llama_stack/core/server/routes.py +141 -0
  108. llama_stack/core/server/server.py +542 -0
  109. llama_stack/core/server/tracing.py +80 -0
  110. llama_stack/core/stack.py +546 -0
  111. llama_stack/core/start_stack.sh +117 -0
  112. llama_stack/core/storage/datatypes.py +283 -0
  113. llama_stack/{cli/model → core/store}/__init__.py +1 -1
  114. llama_stack/core/store/registry.py +199 -0
  115. llama_stack/core/testing_context.py +49 -0
  116. llama_stack/core/ui/app.py +55 -0
  117. llama_stack/core/ui/modules/api.py +32 -0
  118. llama_stack/core/ui/modules/utils.py +42 -0
  119. llama_stack/core/ui/page/distribution/datasets.py +18 -0
  120. llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
  121. llama_stack/core/ui/page/distribution/models.py +18 -0
  122. llama_stack/core/ui/page/distribution/providers.py +27 -0
  123. llama_stack/core/ui/page/distribution/resources.py +48 -0
  124. llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
  125. llama_stack/core/ui/page/distribution/shields.py +19 -0
  126. llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
  127. llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
  128. llama_stack/core/ui/page/playground/chat.py +130 -0
  129. llama_stack/core/ui/page/playground/tools.py +352 -0
  130. llama_stack/core/utils/config.py +30 -0
  131. llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
  132. llama_stack/core/utils/config_resolution.py +125 -0
  133. llama_stack/core/utils/context.py +84 -0
  134. llama_stack/core/utils/exec.py +96 -0
  135. llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
  136. llama_stack/{distribution → core}/utils/model_utils.py +2 -2
  137. llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
  138. llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
  139. llama_stack/distributions/dell/build.yaml +33 -0
  140. llama_stack/distributions/dell/dell.py +158 -0
  141. llama_stack/distributions/dell/run-with-safety.yaml +141 -0
  142. llama_stack/distributions/dell/run.yaml +132 -0
  143. llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
  144. llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
  145. llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
  146. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
  147. llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
  148. llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
  149. llama_stack/distributions/nvidia/build.yaml +29 -0
  150. llama_stack/distributions/nvidia/nvidia.py +154 -0
  151. llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
  152. llama_stack/distributions/nvidia/run.yaml +116 -0
  153. llama_stack/distributions/open-benchmark/__init__.py +7 -0
  154. llama_stack/distributions/open-benchmark/build.yaml +36 -0
  155. llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
  156. llama_stack/distributions/open-benchmark/run.yaml +252 -0
  157. llama_stack/distributions/postgres-demo/__init__.py +7 -0
  158. llama_stack/distributions/postgres-demo/build.yaml +23 -0
  159. llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
  160. llama_stack/distributions/postgres-demo/run.yaml +115 -0
  161. llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
  162. llama_stack/distributions/starter/build.yaml +61 -0
  163. llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
  164. llama_stack/distributions/starter/run.yaml +276 -0
  165. llama_stack/distributions/starter/starter.py +345 -0
  166. llama_stack/distributions/starter-gpu/__init__.py +7 -0
  167. llama_stack/distributions/starter-gpu/build.yaml +61 -0
  168. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
  169. llama_stack/distributions/starter-gpu/run.yaml +279 -0
  170. llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
  171. llama_stack/distributions/template.py +456 -0
  172. llama_stack/distributions/watsonx/__init__.py +7 -0
  173. llama_stack/distributions/watsonx/build.yaml +33 -0
  174. llama_stack/distributions/watsonx/run.yaml +133 -0
  175. llama_stack/distributions/watsonx/watsonx.py +95 -0
  176. llama_stack/env.py +24 -0
  177. llama_stack/log.py +314 -0
  178. llama_stack/models/llama/checkpoint.py +164 -0
  179. llama_stack/models/llama/datatypes.py +164 -0
  180. llama_stack/models/llama/hadamard_utils.py +86 -0
  181. llama_stack/models/llama/llama3/args.py +74 -0
  182. llama_stack/models/llama/llama3/chat_format.py +286 -0
  183. llama_stack/models/llama/llama3/generation.py +376 -0
  184. llama_stack/models/llama/llama3/interface.py +255 -0
  185. llama_stack/models/llama/llama3/model.py +304 -0
  186. llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
  187. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
  188. llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
  189. llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
  190. llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
  191. llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
  192. llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
  193. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
  194. llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
  195. llama_stack/models/llama/llama3/quantization/loader.py +316 -0
  196. llama_stack/models/llama/llama3/template_data.py +116 -0
  197. llama_stack/models/llama/llama3/tokenizer.model +128000 -0
  198. llama_stack/models/llama/llama3/tokenizer.py +198 -0
  199. llama_stack/models/llama/llama3/tool_utils.py +266 -0
  200. llama_stack/models/llama/llama3_1/__init__.py +12 -0
  201. llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
  202. llama_stack/models/llama/llama3_1/prompts.py +258 -0
  203. llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
  204. llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
  205. llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
  206. llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
  207. llama_stack/models/llama/llama3_3/prompts.py +259 -0
  208. llama_stack/models/llama/llama4/args.py +107 -0
  209. llama_stack/models/llama/llama4/chat_format.py +317 -0
  210. llama_stack/models/llama/llama4/datatypes.py +56 -0
  211. llama_stack/models/llama/llama4/ffn.py +58 -0
  212. llama_stack/models/llama/llama4/generation.py +313 -0
  213. llama_stack/models/llama/llama4/model.py +437 -0
  214. llama_stack/models/llama/llama4/moe.py +214 -0
  215. llama_stack/models/llama/llama4/preprocess.py +435 -0
  216. llama_stack/models/llama/llama4/prompt_format.md +304 -0
  217. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
  218. llama_stack/models/llama/llama4/prompts.py +279 -0
  219. llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
  220. llama_stack/models/llama/llama4/quantization/loader.py +226 -0
  221. llama_stack/models/llama/llama4/tokenizer.model +200000 -0
  222. llama_stack/models/llama/llama4/tokenizer.py +263 -0
  223. llama_stack/models/llama/llama4/vision/__init__.py +5 -0
  224. llama_stack/models/llama/llama4/vision/embedding.py +210 -0
  225. llama_stack/models/llama/llama4/vision/encoder.py +412 -0
  226. llama_stack/models/llama/prompt_format.py +191 -0
  227. llama_stack/models/llama/quantize_impls.py +316 -0
  228. llama_stack/models/llama/sku_list.py +1029 -0
  229. llama_stack/models/llama/sku_types.py +233 -0
  230. llama_stack/models/llama/tokenizer_utils.py +40 -0
  231. llama_stack/providers/datatypes.py +136 -107
  232. llama_stack/providers/inline/__init__.py +5 -0
  233. llama_stack/providers/inline/agents/__init__.py +5 -0
  234. llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
  235. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
  236. llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
  237. llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
  238. llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
  239. llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
  240. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
  241. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
  242. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
  243. llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
  244. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
  245. llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
  246. llama_stack/providers/inline/batches/__init__.py +5 -0
  247. llama_stack/providers/inline/batches/reference/__init__.py +36 -0
  248. llama_stack/providers/inline/batches/reference/batches.py +679 -0
  249. llama_stack/providers/inline/batches/reference/config.py +40 -0
  250. llama_stack/providers/inline/datasetio/__init__.py +5 -0
  251. llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
  252. llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
  253. llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
  254. llama_stack/providers/inline/eval/__init__.py +5 -0
  255. llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
  256. llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
  257. llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
  258. llama_stack/providers/inline/files/localfs/__init__.py +20 -0
  259. llama_stack/providers/inline/files/localfs/config.py +31 -0
  260. llama_stack/providers/inline/files/localfs/files.py +219 -0
  261. llama_stack/providers/inline/inference/__init__.py +5 -0
  262. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
  263. llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
  264. llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
  265. llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
  266. llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
  267. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
  268. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
  269. llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
  270. llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
  271. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
  272. llama_stack/providers/inline/post_training/__init__.py +5 -0
  273. llama_stack/providers/inline/post_training/common/__init__.py +5 -0
  274. llama_stack/providers/inline/post_training/common/utils.py +35 -0
  275. llama_stack/providers/inline/post_training/common/validator.py +36 -0
  276. llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
  277. llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
  278. llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
  279. llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
  280. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
  281. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
  282. llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
  283. llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
  284. llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
  285. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
  286. llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
  287. llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
  288. llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
  289. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
  290. llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
  291. llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
  292. llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
  293. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
  294. llama_stack/providers/inline/safety/__init__.py +5 -0
  295. llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
  296. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
  297. llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
  298. llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
  299. llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
  300. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
  301. llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
  302. llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
  303. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
  304. llama_stack/providers/inline/scoring/__init__.py +5 -0
  305. llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
  306. llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
  307. llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
  308. llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
  309. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
  310. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
  311. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
  312. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
  313. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
  314. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
  315. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
  316. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
  317. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
  318. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
  319. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
  320. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
  321. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
  322. llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
  323. llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
  324. llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
  325. llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
  326. llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
  327. llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
  328. llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
  329. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
  330. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
  331. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
  332. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
  333. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
  334. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
  335. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
  336. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
  337. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
  338. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
  339. llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
  340. llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
  341. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
  342. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
  343. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
  344. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
  345. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
  346. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
  347. llama_stack/providers/inline/telemetry/__init__.py +5 -0
  348. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
  349. llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
  350. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
  351. llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
  352. llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
  353. llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
  354. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
  355. llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
  356. llama_stack/providers/inline/vector_io/__init__.py +5 -0
  357. llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
  358. llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
  359. llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
  360. llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
  361. llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
  362. llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
  363. llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
  364. llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
  365. llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
  366. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
  367. llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
  368. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
  369. llama_stack/providers/registry/agents.py +16 -18
  370. llama_stack/providers/registry/batches.py +26 -0
  371. llama_stack/providers/registry/datasetio.py +49 -0
  372. llama_stack/providers/registry/eval.py +46 -0
  373. llama_stack/providers/registry/files.py +31 -0
  374. llama_stack/providers/registry/inference.py +273 -118
  375. llama_stack/providers/registry/post_training.py +69 -0
  376. llama_stack/providers/registry/safety.py +46 -41
  377. llama_stack/providers/registry/scoring.py +51 -0
  378. llama_stack/providers/registry/tool_runtime.py +87 -0
  379. llama_stack/providers/registry/vector_io.py +828 -0
  380. llama_stack/providers/remote/__init__.py +5 -0
  381. llama_stack/providers/remote/agents/__init__.py +5 -0
  382. llama_stack/providers/remote/datasetio/__init__.py +5 -0
  383. llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
  384. llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
  385. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
  386. llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
  387. llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
  388. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
  389. llama_stack/providers/remote/eval/__init__.py +5 -0
  390. llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
  391. llama_stack/providers/remote/eval/nvidia/config.py +29 -0
  392. llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
  393. llama_stack/providers/remote/files/s3/__init__.py +19 -0
  394. llama_stack/providers/remote/files/s3/config.py +42 -0
  395. llama_stack/providers/remote/files/s3/files.py +313 -0
  396. llama_stack/providers/remote/inference/__init__.py +5 -0
  397. llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
  398. llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
  399. llama_stack/providers/remote/inference/anthropic/config.py +28 -0
  400. llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
  401. llama_stack/providers/remote/inference/azure/azure.py +25 -0
  402. llama_stack/providers/remote/inference/azure/config.py +61 -0
  403. llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
  404. llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
  405. llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
  406. llama_stack/providers/remote/inference/bedrock/models.py +29 -0
  407. llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
  408. llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
  409. llama_stack/providers/remote/inference/cerebras/config.py +30 -0
  410. llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
  411. llama_stack/providers/remote/inference/databricks/config.py +37 -0
  412. llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
  413. llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
  414. llama_stack/providers/remote/inference/fireworks/config.py +27 -0
  415. llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
  416. llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
  417. llama_stack/providers/remote/inference/gemini/config.py +28 -0
  418. llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
  419. llama_stack/providers/remote/inference/groq/__init__.py +15 -0
  420. llama_stack/providers/remote/inference/groq/config.py +34 -0
  421. llama_stack/providers/remote/inference/groq/groq.py +18 -0
  422. llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
  423. llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
  424. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
  425. llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
  426. llama_stack/providers/remote/inference/nvidia/config.py +64 -0
  427. llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
  428. llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
  429. llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
  430. llama_stack/providers/remote/inference/ollama/config.py +25 -0
  431. llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
  432. llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
  433. llama_stack/providers/remote/inference/openai/config.py +39 -0
  434. llama_stack/providers/remote/inference/openai/openai.py +38 -0
  435. llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
  436. llama_stack/providers/remote/inference/passthrough/config.py +34 -0
  437. llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
  438. llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
  439. llama_stack/providers/remote/inference/runpod/config.py +32 -0
  440. llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
  441. llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
  442. llama_stack/providers/remote/inference/sambanova/config.py +34 -0
  443. llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
  444. llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
  445. llama_stack/providers/remote/inference/tgi/config.py +76 -0
  446. llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
  447. llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
  448. llama_stack/providers/remote/inference/together/config.py +27 -0
  449. llama_stack/providers/remote/inference/together/together.py +102 -0
  450. llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
  451. llama_stack/providers/remote/inference/vertexai/config.py +48 -0
  452. llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
  453. llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
  454. llama_stack/providers/remote/inference/vllm/config.py +59 -0
  455. llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
  456. llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
  457. llama_stack/providers/remote/inference/watsonx/config.py +45 -0
  458. llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
  459. llama_stack/providers/remote/post_training/__init__.py +5 -0
  460. llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
  461. llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
  462. llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
  463. llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
  464. llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
  465. llama_stack/providers/remote/safety/__init__.py +5 -0
  466. llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
  467. llama_stack/providers/remote/safety/bedrock/config.py +14 -0
  468. llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
  469. llama_stack/providers/remote/safety/nvidia/config.py +40 -0
  470. llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
  471. llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
  472. llama_stack/providers/remote/safety/sambanova/config.py +37 -0
  473. llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
  474. llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
  475. llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
  476. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
  477. llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
  478. llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
  479. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
  480. llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
  481. llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
  482. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
  483. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
  484. llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
  485. llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
  486. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
  487. llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
  488. llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
  489. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
  490. llama_stack/providers/remote/vector_io/__init__.py +5 -0
  491. llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
  492. llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
  493. llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
  494. llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
  495. llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
  496. llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
  497. llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
  498. llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
  499. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
  500. llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
  501. llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
  502. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
  503. llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
  504. llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
  505. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
  506. llama_stack/providers/utils/bedrock/__init__.py +5 -0
  507. llama_stack/providers/utils/bedrock/client.py +74 -0
  508. llama_stack/providers/utils/bedrock/config.py +64 -0
  509. llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
  510. llama_stack/providers/utils/common/__init__.py +5 -0
  511. llama_stack/providers/utils/common/data_schema_validator.py +103 -0
  512. llama_stack/providers/utils/datasetio/__init__.py +5 -0
  513. llama_stack/providers/utils/datasetio/url_utils.py +47 -0
  514. llama_stack/providers/utils/files/__init__.py +5 -0
  515. llama_stack/providers/utils/files/form_data.py +69 -0
  516. llama_stack/providers/utils/inference/__init__.py +8 -7
  517. llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
  518. llama_stack/providers/utils/inference/inference_store.py +264 -0
  519. llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
  520. llama_stack/providers/utils/inference/model_registry.py +173 -23
  521. llama_stack/providers/utils/inference/openai_compat.py +1261 -49
  522. llama_stack/providers/utils/inference/openai_mixin.py +506 -0
  523. llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
  524. llama_stack/providers/utils/kvstore/api.py +6 -6
  525. llama_stack/providers/utils/kvstore/config.py +28 -48
  526. llama_stack/providers/utils/kvstore/kvstore.py +61 -15
  527. llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
  528. llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
  529. llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
  530. llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
  531. llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
  532. llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
  533. llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
  534. llama_stack/providers/utils/memory/file_utils.py +1 -1
  535. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
  536. llama_stack/providers/utils/memory/vector_store.py +220 -82
  537. llama_stack/providers/utils/pagination.py +43 -0
  538. llama_stack/providers/utils/responses/__init__.py +5 -0
  539. llama_stack/providers/utils/responses/responses_store.py +292 -0
  540. llama_stack/providers/utils/scheduler.py +270 -0
  541. llama_stack/providers/utils/scoring/__init__.py +5 -0
  542. llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
  543. llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
  544. llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
  545. llama_stack/providers/utils/sqlstore/__init__.py +5 -0
  546. llama_stack/providers/utils/sqlstore/api.py +128 -0
  547. llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
  548. llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
  549. llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
  550. llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
  551. llama_stack/providers/utils/telemetry/tracing.py +192 -53
  552. llama_stack/providers/utils/tools/__init__.py +5 -0
  553. llama_stack/providers/utils/tools/mcp.py +148 -0
  554. llama_stack/providers/utils/tools/ttl_dict.py +70 -0
  555. llama_stack/providers/utils/vector_io/__init__.py +5 -0
  556. llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
  557. llama_stack/schema_utils.py +118 -0
  558. llama_stack/strong_typing/__init__.py +19 -0
  559. llama_stack/strong_typing/auxiliary.py +228 -0
  560. llama_stack/strong_typing/classdef.py +440 -0
  561. llama_stack/strong_typing/core.py +46 -0
  562. llama_stack/strong_typing/deserializer.py +877 -0
  563. llama_stack/strong_typing/docstring.py +409 -0
  564. llama_stack/strong_typing/exception.py +23 -0
  565. llama_stack/strong_typing/inspection.py +1085 -0
  566. llama_stack/strong_typing/mapping.py +40 -0
  567. llama_stack/strong_typing/name.py +182 -0
  568. llama_stack/strong_typing/py.typed +0 -0
  569. llama_stack/strong_typing/schema.py +792 -0
  570. llama_stack/strong_typing/serialization.py +97 -0
  571. llama_stack/strong_typing/serializer.py +500 -0
  572. llama_stack/strong_typing/slots.py +27 -0
  573. llama_stack/strong_typing/topological.py +89 -0
  574. llama_stack/testing/__init__.py +5 -0
  575. llama_stack/testing/api_recorder.py +956 -0
  576. llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
  577. llama_stack-0.3.4.dist-info/METADATA +261 -0
  578. llama_stack-0.3.4.dist-info/RECORD +625 -0
  579. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
  580. llama_stack/apis/agents/client.py +0 -292
  581. llama_stack/apis/agents/event_logger.py +0 -184
  582. llama_stack/apis/batch_inference/batch_inference.py +0 -72
  583. llama_stack/apis/common/deployment_types.py +0 -31
  584. llama_stack/apis/dataset/dataset.py +0 -63
  585. llama_stack/apis/evals/evals.py +0 -122
  586. llama_stack/apis/inference/client.py +0 -197
  587. llama_stack/apis/inspect/client.py +0 -82
  588. llama_stack/apis/memory/client.py +0 -155
  589. llama_stack/apis/memory/memory.py +0 -65
  590. llama_stack/apis/memory_banks/__init__.py +0 -7
  591. llama_stack/apis/memory_banks/client.py +0 -101
  592. llama_stack/apis/memory_banks/memory_banks.py +0 -78
  593. llama_stack/apis/models/client.py +0 -83
  594. llama_stack/apis/reward_scoring/__init__.py +0 -7
  595. llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
  596. llama_stack/apis/safety/client.py +0 -105
  597. llama_stack/apis/shields/client.py +0 -79
  598. llama_stack/cli/download.py +0 -340
  599. llama_stack/cli/model/describe.py +0 -82
  600. llama_stack/cli/model/download.py +0 -24
  601. llama_stack/cli/model/list.py +0 -62
  602. llama_stack/cli/model/model.py +0 -34
  603. llama_stack/cli/model/prompt_format.py +0 -112
  604. llama_stack/cli/model/safety_models.py +0 -52
  605. llama_stack/cli/stack/build.py +0 -299
  606. llama_stack/cli/stack/configure.py +0 -178
  607. llama_stack/distribution/build.py +0 -123
  608. llama_stack/distribution/build_conda_env.sh +0 -136
  609. llama_stack/distribution/build_container.sh +0 -142
  610. llama_stack/distribution/common.sh +0 -40
  611. llama_stack/distribution/configure_container.sh +0 -47
  612. llama_stack/distribution/datatypes.py +0 -139
  613. llama_stack/distribution/distribution.py +0 -58
  614. llama_stack/distribution/inspect.py +0 -67
  615. llama_stack/distribution/request_headers.py +0 -57
  616. llama_stack/distribution/resolver.py +0 -323
  617. llama_stack/distribution/routers/__init__.py +0 -48
  618. llama_stack/distribution/routers/routers.py +0 -158
  619. llama_stack/distribution/routers/routing_tables.py +0 -173
  620. llama_stack/distribution/server/endpoints.py +0 -48
  621. llama_stack/distribution/server/server.py +0 -343
  622. llama_stack/distribution/start_conda_env.sh +0 -42
  623. llama_stack/distribution/start_container.sh +0 -64
  624. llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
  625. llama_stack/distribution/templates/local-build.yaml +0 -10
  626. llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
  627. llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
  628. llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
  629. llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
  630. llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
  631. llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
  632. llama_stack/distribution/templates/local-together-build.yaml +0 -10
  633. llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
  634. llama_stack/distribution/utils/exec.py +0 -105
  635. llama_stack/providers/adapters/agents/sample/sample.py +0 -18
  636. llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
  637. llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
  638. llama_stack/providers/adapters/inference/databricks/config.py +0 -21
  639. llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
  640. llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
  641. llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
  642. llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
  643. llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
  644. llama_stack/providers/adapters/inference/sample/sample.py +0 -23
  645. llama_stack/providers/adapters/inference/tgi/config.py +0 -43
  646. llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
  647. llama_stack/providers/adapters/inference/together/config.py +0 -22
  648. llama_stack/providers/adapters/inference/together/together.py +0 -143
  649. llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
  650. llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
  651. llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
  652. llama_stack/providers/adapters/memory/sample/sample.py +0 -23
  653. llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
  654. llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
  655. llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
  656. llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
  657. llama_stack/providers/adapters/safety/sample/sample.py +0 -23
  658. llama_stack/providers/adapters/safety/together/__init__.py +0 -18
  659. llama_stack/providers/adapters/safety/together/config.py +0 -26
  660. llama_stack/providers/adapters/safety/together/together.py +0 -101
  661. llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
  662. llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
  663. llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
  664. llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
  665. llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
  666. llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
  667. llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
  668. llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
  669. llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
  670. llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
  671. llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
  672. llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
  673. llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
  674. llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
  675. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
  676. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
  677. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
  678. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
  679. llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
  680. llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
  681. llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
  682. llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
  683. llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
  684. llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
  685. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
  686. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
  687. llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
  688. llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
  689. llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
  690. llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
  691. llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
  692. llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
  693. llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
  694. llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
  695. llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
  696. llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
  697. llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
  698. llama_stack/providers/impls/vllm/config.py +0 -35
  699. llama_stack/providers/impls/vllm/vllm.py +0 -241
  700. llama_stack/providers/registry/memory.py +0 -78
  701. llama_stack/providers/registry/telemetry.py +0 -44
  702. llama_stack/providers/tests/agents/test_agents.py +0 -210
  703. llama_stack/providers/tests/inference/test_inference.py +0 -257
  704. llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
  705. llama_stack/providers/tests/memory/test_memory.py +0 -136
  706. llama_stack/providers/tests/resolver.py +0 -100
  707. llama_stack/providers/tests/safety/test_safety.py +0 -77
  708. llama_stack-0.0.42.dist-info/METADATA +0 -137
  709. llama_stack-0.0.42.dist-info/RECORD +0 -256
  710. /llama_stack/{distribution → core}/__init__.py +0 -0
  711. /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
  712. /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
  713. /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
  714. /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
  715. /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
  716. /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
  717. /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
  718. /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
  719. /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
  720. /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
  721. /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
  722. /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
  723. /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
  724. /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
  725. /llama_stack/{distribution → core}/utils/serialize.py +0 -0
  726. /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
  727. /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
  728. /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
  729. /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
  730. /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
  731. /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
  732. /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
  733. /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
  734. /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
  735. /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
  736. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
  737. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
  738. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,828 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+
8
+ from llama_stack.providers.datatypes import (
9
+ Api,
10
+ InlineProviderSpec,
11
+ ProviderSpec,
12
+ RemoteProviderSpec,
13
+ )
14
+
15
+ # Common dependencies for all vector IO providers that support document processing
16
+ DEFAULT_VECTOR_IO_DEPS = ["chardet", "pypdf"]
17
+
18
+
19
+ def available_providers() -> list[ProviderSpec]:
20
+ return [
21
+ InlineProviderSpec(
22
+ api=Api.vector_io,
23
+ provider_type="inline::meta-reference",
24
+ pip_packages=["faiss-cpu"] + DEFAULT_VECTOR_IO_DEPS,
25
+ module="llama_stack.providers.inline.vector_io.faiss",
26
+ config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
27
+ deprecation_warning="Please use the `inline::faiss` provider instead.",
28
+ api_dependencies=[Api.inference],
29
+ optional_api_dependencies=[Api.files, Api.models],
30
+ description="Meta's reference implementation of a vector database.",
31
+ ),
32
+ InlineProviderSpec(
33
+ api=Api.vector_io,
34
+ provider_type="inline::faiss",
35
+ pip_packages=["faiss-cpu"] + DEFAULT_VECTOR_IO_DEPS,
36
+ module="llama_stack.providers.inline.vector_io.faiss",
37
+ config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
38
+ api_dependencies=[Api.inference],
39
+ optional_api_dependencies=[Api.files, Api.models],
40
+ description="""
41
+ [Faiss](https://github.com/facebookresearch/faiss) is an inline vector database provider for Llama Stack. It
42
+ allows you to store and query vectors directly in memory.
43
+ That means you'll get fast and efficient vector retrieval.
44
+
45
+ ## Features
46
+
47
+ - Lightweight and easy to use
48
+ - Fully integrated with Llama Stack
49
+ - GPU support
50
+ - **Vector search** - FAISS supports pure vector similarity search using embeddings
51
+
52
+ ## Search Modes
53
+
54
+ **Supported:**
55
+ - **Vector Search** (`mode="vector"`): Performs vector similarity search using embeddings
56
+
57
+ **Not Supported:**
58
+ - **Keyword Search** (`mode="keyword"`): Not supported by FAISS
59
+ - **Hybrid Search** (`mode="hybrid"`): Not supported by FAISS
60
+
61
+ > **Note**: FAISS is designed as a pure vector similarity search library. See the [FAISS GitHub repository](https://github.com/facebookresearch/faiss) for more details about FAISS's core functionality.
62
+
63
+ ## Usage
64
+
65
+ To use Faiss in your Llama Stack project, follow these steps:
66
+
67
+ 1. Install the necessary dependencies.
68
+ 2. Configure your Llama Stack project to use Faiss.
69
+ 3. Start storing and querying vectors.
70
+
71
+ ## Installation
72
+
73
+ You can install Faiss using pip:
74
+
75
+ ```bash
76
+ pip install faiss-cpu
77
+ ```
78
+ ## Documentation
79
+ See [Faiss' documentation](https://faiss.ai/) or the [Faiss Wiki](https://github.com/facebookresearch/faiss/wiki) for
80
+ more details about Faiss in general.
81
+ """,
82
+ ),
83
+ # NOTE: sqlite-vec cannot be bundled into the container image because it does not have a
84
+ # source distribution and the wheels are not available for all platforms.
85
+ InlineProviderSpec(
86
+ api=Api.vector_io,
87
+ provider_type="inline::sqlite-vec",
88
+ pip_packages=["sqlite-vec"] + DEFAULT_VECTOR_IO_DEPS,
89
+ module="llama_stack.providers.inline.vector_io.sqlite_vec",
90
+ config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
91
+ api_dependencies=[Api.inference],
92
+ optional_api_dependencies=[Api.files, Api.models],
93
+ description="""
94
+ [SQLite-Vec](https://github.com/asg017/sqlite-vec) is an inline vector database provider for Llama Stack. It
95
+ allows you to store and query vectors directly within an SQLite database.
96
+ That means you're not limited to storing vectors in memory or in a separate service.
97
+
98
+ ## Features
99
+
100
+ - Lightweight and easy to use
101
+ - Fully integrated with Llama Stacks
102
+ - Uses disk-based storage for persistence, allowing for larger vector storage
103
+
104
+ ### Comparison to Faiss
105
+
106
+ The choice between Faiss and sqlite-vec should be made based on the needs of your application,
107
+ as they have different strengths.
108
+
109
+ #### Choosing the Right Provider
110
+
111
+ Scenario | Recommended Tool | Reason
112
+ -- |-----------------| --
113
+ Online Analytical Processing (OLAP) | Faiss | Fast, in-memory searches
114
+ Online Transaction Processing (OLTP) | sqlite-vec | Frequent writes and reads
115
+ Frequent writes | sqlite-vec | Efficient disk-based storage and incremental indexing
116
+ Large datasets | sqlite-vec | Disk-based storage for larger vector storage
117
+ Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, indexing, and GPU acceleration
118
+
119
+ #### Empirical Example
120
+
121
+ Consider the histogram below in which 10,000 randomly generated strings were inserted
122
+ in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`.
123
+
124
+ ```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
125
+ :alt: Comparison of SQLite-Vec and Faiss write times
126
+ :width: 400px
127
+ ```
128
+
129
+ You will notice that the average write time for `sqlite-vec` was 788ms, compared to
130
+ 47,640ms for Faiss. While the number is jarring, if you look at the distribution, you can see that it is rather
131
+ uniformly spread across the [1500, 100000] interval.
132
+
133
+ Looking at each individual write in the order that the documents are inserted you'll see the increase in
134
+ write speed as Faiss reindexes the vectors after each write.
135
+ ```{image} ../../../../_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png
136
+ :alt: Comparison of SQLite-Vec and Faiss write times
137
+ :width: 400px
138
+ ```
139
+
140
+ In comparison, the read times for Faiss was on average 10% faster than sqlite-vec.
141
+ The modes of the two distributions highlight the differences much further where Faiss
142
+ will likely yield faster read performance.
143
+
144
+ ```{image} ../../../../_static/providers/vector_io/read_time_comparison_sqlite-vec-faiss.png
145
+ :alt: Comparison of SQLite-Vec and Faiss read times
146
+ :width: 400px
147
+ ```
148
+
149
+ ## Usage
150
+
151
+ To use sqlite-vec in your Llama Stack project, follow these steps:
152
+
153
+ 1. Install the necessary dependencies.
154
+ 2. Configure your Llama Stack project to use SQLite-Vec.
155
+ 3. Start storing and querying vectors.
156
+
157
+ The SQLite-vec provider supports three search modes:
158
+
159
+ 1. **Vector Search** (`mode="vector"`): Performs pure vector similarity search using the embeddings.
160
+ 2. **Keyword Search** (`mode="keyword"`): Performs full-text search using SQLite's FTS5.
161
+ 3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword search for better results. First performs keyword search to get candidate matches, then applies vector similarity search on those candidates.
162
+
163
+ Example with hybrid search:
164
+ ```python
165
+ response = await vector_io.query_chunks(
166
+ vector_db_id="my_db",
167
+ query="your query here",
168
+ params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
169
+ )
170
+
171
+ # Using RRF ranker
172
+ response = await vector_io.query_chunks(
173
+ vector_db_id="my_db",
174
+ query="your query here",
175
+ params={
176
+ "mode": "hybrid",
177
+ "max_chunks": 3,
178
+ "score_threshold": 0.7,
179
+ "ranker": {"type": "rrf", "impact_factor": 60.0},
180
+ },
181
+ )
182
+
183
+ # Using weighted ranker
184
+ response = await vector_io.query_chunks(
185
+ vector_db_id="my_db",
186
+ query="your query here",
187
+ params={
188
+ "mode": "hybrid",
189
+ "max_chunks": 3,
190
+ "score_threshold": 0.7,
191
+ "ranker": {"type": "weighted", "alpha": 0.7}, # 70% vector, 30% keyword
192
+ },
193
+ )
194
+ ```
195
+
196
+ Example with explicit vector search:
197
+ ```python
198
+ response = await vector_io.query_chunks(
199
+ vector_db_id="my_db",
200
+ query="your query here",
201
+ params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
202
+ )
203
+ ```
204
+
205
+ Example with keyword search:
206
+ ```python
207
+ response = await vector_io.query_chunks(
208
+ vector_db_id="my_db",
209
+ query="your query here",
210
+ params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
211
+ )
212
+ ```
213
+
214
+ ## Supported Search Modes
215
+
216
+ The SQLite vector store supports three search modes:
217
+
218
+ 1. **Vector Search** (`mode="vector"`): Uses vector similarity to find relevant chunks
219
+ 2. **Keyword Search** (`mode="keyword"`): Uses keyword matching to find relevant chunks
220
+ 3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword scores using a ranker
221
+
222
+ ### Hybrid Search
223
+
224
+ Hybrid search combines the strengths of both vector and keyword search by:
225
+ - Computing vector similarity scores
226
+ - Computing keyword match scores
227
+ - Using a ranker to combine these scores
228
+
229
+ Two ranker types are supported:
230
+
231
+ 1. **RRF (Reciprocal Rank Fusion)**:
232
+ - Combines ranks from both vector and keyword results
233
+ - Uses an impact factor (default: 60.0) to control the weight of higher-ranked results
234
+ - Good for balancing between vector and keyword results
235
+ - The default impact factor of 60.0 comes from the original RRF paper by Cormack et al. (2009) [^1], which found this value to provide optimal performance across various retrieval tasks
236
+
237
+ 2. **Weighted**:
238
+ - Linearly combines normalized vector and keyword scores
239
+ - Uses an alpha parameter (0-1) to control the blend:
240
+ - alpha=0: Only use keyword scores
241
+ - alpha=1: Only use vector scores
242
+ - alpha=0.5: Equal weight to both (default)
243
+
244
+ Example using RAGQueryConfig with different search modes:
245
+
246
+ ```python
247
+ from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
248
+
249
+ # Vector search
250
+ config = RAGQueryConfig(mode="vector", max_chunks=5)
251
+
252
+ # Keyword search
253
+ config = RAGQueryConfig(mode="keyword", max_chunks=5)
254
+
255
+ # Hybrid search with custom RRF ranker
256
+ config = RAGQueryConfig(
257
+ mode="hybrid",
258
+ max_chunks=5,
259
+ ranker=RRFRanker(impact_factor=50.0), # Custom impact factor
260
+ )
261
+
262
+ # Hybrid search with weighted ranker
263
+ config = RAGQueryConfig(
264
+ mode="hybrid",
265
+ max_chunks=5,
266
+ ranker=WeightedRanker(alpha=0.7), # 70% vector, 30% keyword
267
+ )
268
+
269
+ # Hybrid search with default RRF ranker
270
+ config = RAGQueryConfig(
271
+ mode="hybrid", max_chunks=5
272
+ ) # Will use RRF with impact_factor=60.0
273
+ ```
274
+
275
+ Note: The ranker configuration is only used in hybrid mode. For vector or keyword modes, the ranker parameter is ignored.
276
+
277
+ ## Installation
278
+
279
+ You can install SQLite-Vec using pip:
280
+
281
+ ```bash
282
+ pip install sqlite-vec
283
+ ```
284
+
285
+ ## Documentation
286
+
287
+ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) for more details about sqlite-vec in general.
288
+
289
+ [^1]: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). [Reciprocal rank fusion outperforms condorcet and individual rank learning methods](https://dl.acm.org/doi/10.1145/1571941.1572114). In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (pp. 758-759).
290
+ """,
291
+ ),
292
+ InlineProviderSpec(
293
+ api=Api.vector_io,
294
+ provider_type="inline::sqlite_vec",
295
+ pip_packages=["sqlite-vec"] + DEFAULT_VECTOR_IO_DEPS,
296
+ module="llama_stack.providers.inline.vector_io.sqlite_vec",
297
+ config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
298
+ deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.",
299
+ api_dependencies=[Api.inference],
300
+ optional_api_dependencies=[Api.files, Api.models],
301
+ description="""
302
+ Please refer to the sqlite-vec provider documentation.
303
+ """,
304
+ ),
305
+ RemoteProviderSpec(
306
+ api=Api.vector_io,
307
+ adapter_type="chromadb",
308
+ provider_type="remote::chromadb",
309
+ pip_packages=["chromadb-client"] + DEFAULT_VECTOR_IO_DEPS,
310
+ module="llama_stack.providers.remote.vector_io.chroma",
311
+ config_class="llama_stack.providers.remote.vector_io.chroma.ChromaVectorIOConfig",
312
+ api_dependencies=[Api.inference],
313
+ optional_api_dependencies=[Api.files, Api.models],
314
+ description="""
315
+ [Chroma](https://www.trychroma.com/) is an inline and remote vector
316
+ database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
317
+ That means you're not limited to storing vectors in memory or in a separate service.
318
+
319
+ ## Features
320
+ Chroma supports:
321
+ - Store embeddings and their metadata
322
+ - Vector search
323
+ - Full-text search
324
+ - Document storage
325
+ - Metadata filtering
326
+ - Multi-modal retrieval
327
+
328
+ ## Usage
329
+
330
+ To use Chrome in your Llama Stack project, follow these steps:
331
+
332
+ 1. Install the necessary dependencies.
333
+ 2. Configure your Llama Stack project to use chroma.
334
+ 3. Start storing and querying vectors.
335
+
336
+ ## Installation
337
+
338
+ You can install chroma using pip:
339
+
340
+ ```bash
341
+ pip install chromadb
342
+ ```
343
+
344
+ ## Documentation
345
+ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
346
+ """,
347
+ ),
348
+ InlineProviderSpec(
349
+ api=Api.vector_io,
350
+ provider_type="inline::chromadb",
351
+ pip_packages=["chromadb"] + DEFAULT_VECTOR_IO_DEPS,
352
+ module="llama_stack.providers.inline.vector_io.chroma",
353
+ config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig",
354
+ api_dependencies=[Api.inference],
355
+ optional_api_dependencies=[Api.files, Api.models],
356
+ description="""
357
+ [Chroma](https://www.trychroma.com/) is an inline and remote vector
358
+ database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
359
+ That means you're not limited to storing vectors in memory or in a separate service.
360
+
361
+ ## Features
362
+ Chroma supports:
363
+ - Store embeddings and their metadata
364
+ - Vector search
365
+ - Full-text search
366
+ - Document storage
367
+ - Metadata filtering
368
+ - Multi-modal retrieval
369
+
370
+ ## Usage
371
+
372
+ To use Chrome in your Llama Stack project, follow these steps:
373
+
374
+ 1. Install the necessary dependencies.
375
+ 2. Configure your Llama Stack project to use chroma.
376
+ 3. Start storing and querying vectors.
377
+
378
+ ## Installation
379
+
380
+ You can install chroma using pip:
381
+
382
+ ```bash
383
+ pip install chromadb
384
+ ```
385
+
386
+ ## Documentation
387
+ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
388
+
389
+ """,
390
+ ),
391
+ RemoteProviderSpec(
392
+ api=Api.vector_io,
393
+ adapter_type="pgvector",
394
+ provider_type="remote::pgvector",
395
+ pip_packages=["psycopg2-binary"] + DEFAULT_VECTOR_IO_DEPS,
396
+ module="llama_stack.providers.remote.vector_io.pgvector",
397
+ config_class="llama_stack.providers.remote.vector_io.pgvector.PGVectorVectorIOConfig",
398
+ api_dependencies=[Api.inference],
399
+ optional_api_dependencies=[Api.files, Api.models],
400
+ description="""
401
+ [PGVector](https://github.com/pgvector/pgvector) is a remote vector database provider for Llama Stack. It
402
+ allows you to store and query vectors directly in memory.
403
+ That means you'll get fast and efficient vector retrieval.
404
+
405
+ ## Features
406
+
407
+ - Easy to use
408
+ - Fully integrated with Llama Stack
409
+
410
+ There are three implementations of search for PGVectoIndex available:
411
+
412
+ 1. Vector Search:
413
+ - How it works:
414
+ - Uses PostgreSQL's vector extension (pgvector) to perform similarity search
415
+ - Compares query embeddings against stored embeddings using Cosine distance or other distance metrics
416
+ - Eg. SQL query: SELECT document, embedding <=> %s::vector AS distance FROM table ORDER BY distance
417
+
418
+ -Characteristics:
419
+ - Semantic understanding - finds documents similar in meaning even if they don't share keywords
420
+ - Works with high-dimensional vector embeddings (typically 768, 1024, or higher dimensions)
421
+ - Best for: Finding conceptually related content, handling synonyms, cross-language search
422
+
423
+ 2. Keyword Search
424
+ - How it works:
425
+ - Uses PostgreSQL's full-text search capabilities with tsvector and ts_rank
426
+ - Converts text to searchable tokens using to_tsvector('english', text). Default language is English.
427
+ - Eg. SQL query: SELECT document, ts_rank(tokenized_content, plainto_tsquery('english', %s)) AS score
428
+
429
+ - Characteristics:
430
+ - Lexical matching - finds exact keyword matches and variations
431
+ - Uses GIN (Generalized Inverted Index) for fast text search performance
432
+ - Scoring: Uses PostgreSQL's ts_rank function for relevance scoring
433
+ - Best for: Exact term matching, proper names, technical terms, Boolean-style queries
434
+
435
+ 3. Hybrid Search
436
+ - How it works:
437
+ - Combines both vector and keyword search results
438
+ - Runs both searches independently, then merges results using configurable reranking
439
+
440
+ - Two reranking strategies available:
441
+ - Reciprocal Rank Fusion (RRF) - (default: 60.0)
442
+ - Weighted Average - (default: 0.5)
443
+
444
+ - Characteristics:
445
+ - Best of both worlds: semantic understanding + exact matching
446
+ - Documents appearing in both searches get boosted scores
447
+ - Configurable balance between semantic and lexical matching
448
+ - Best for: General-purpose search where you want both precision and recall
449
+
450
+ 4. Database Schema
451
+ The PGVector implementation stores data optimized for all three search types:
452
+ CREATE TABLE vector_store_xxx (
453
+ id TEXT PRIMARY KEY,
454
+ document JSONB, -- Original document
455
+ embedding vector(dimension), -- For vector search
456
+ content_text TEXT, -- Raw text content
457
+ tokenized_content TSVECTOR -- For keyword search
458
+ );
459
+
460
+ -- Indexes for performance
461
+ CREATE INDEX content_gin_idx ON table USING GIN(tokenized_content); -- Keyword search
462
+ -- Vector index created automatically by pgvector
463
+
464
+ ## Usage
465
+
466
+ To use PGVector in your Llama Stack project, follow these steps:
467
+
468
+ 1. Install the necessary dependencies.
469
+ 2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector).
470
+ 3. Start storing and querying vectors.
471
+
472
+ ## This is an example how you can set up your environment for using PGVector
473
+
474
+ 1. Export env vars:
475
+ ```bash
476
+ export ENABLE_PGVECTOR=true
477
+ export PGVECTOR_HOST=localhost
478
+ export PGVECTOR_PORT=5432
479
+ export PGVECTOR_DB=llamastack
480
+ export PGVECTOR_USER=llamastack
481
+ export PGVECTOR_PASSWORD=llamastack
482
+ ```
483
+
484
+ 2. Create DB:
485
+ ```bash
486
+ psql -h localhost -U postgres -c "CREATE ROLE llamastack LOGIN PASSWORD 'llamastack';"
487
+ psql -h localhost -U postgres -c "CREATE DATABASE llamastack OWNER llamastack;"
488
+ psql -h localhost -U llamastack -d llamastack -c "CREATE EXTENSION IF NOT EXISTS vector;"
489
+ ```
490
+
491
+ ## Installation
492
+
493
+ You can install PGVector using docker:
494
+
495
+ ```bash
496
+ docker pull pgvector/pgvector:pg17
497
+ ```
498
+ ## Documentation
499
+ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general.
500
+ """,
501
+ ),
502
+ RemoteProviderSpec(
503
+ api=Api.vector_io,
504
+ adapter_type="weaviate",
505
+ provider_type="remote::weaviate",
506
+ pip_packages=["weaviate-client>=4.16.5"] + DEFAULT_VECTOR_IO_DEPS,
507
+ module="llama_stack.providers.remote.vector_io.weaviate",
508
+ config_class="llama_stack.providers.remote.vector_io.weaviate.WeaviateVectorIOConfig",
509
+ provider_data_validator="llama_stack.providers.remote.vector_io.weaviate.WeaviateRequestProviderData",
510
+ api_dependencies=[Api.inference],
511
+ optional_api_dependencies=[Api.files, Api.models],
512
+ description="""
513
+ [Weaviate](https://weaviate.io/) is a vector database provider for Llama Stack.
514
+ It allows you to store and query vectors directly within a Weaviate database.
515
+ That means you're not limited to storing vectors in memory or in a separate service.
516
+
517
+ ## Features
518
+ Weaviate supports:
519
+ - Store embeddings and their metadata
520
+ - Vector search
521
+ - Full-text search
522
+ - Hybrid search
523
+ - Document storage
524
+ - Metadata filtering
525
+ - Multi-modal retrieval
526
+
527
+
528
+ ## Usage
529
+
530
+ To use Weaviate in your Llama Stack project, follow these steps:
531
+
532
+ 1. Install the necessary dependencies.
533
+ 2. Configure your Llama Stack project to use chroma.
534
+ 3. Start storing and querying vectors.
535
+
536
+ ## Installation
537
+
538
+ To install Weaviate see the [Weaviate quickstart documentation](https://weaviate.io/developers/weaviate/quickstart).
539
+
540
+ ## Documentation
541
+ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general.
542
+ """,
543
+ ),
544
+ InlineProviderSpec(
545
+ api=Api.vector_io,
546
+ provider_type="inline::qdrant",
547
+ pip_packages=["qdrant-client"] + DEFAULT_VECTOR_IO_DEPS,
548
+ module="llama_stack.providers.inline.vector_io.qdrant",
549
+ config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig",
550
+ api_dependencies=[Api.inference],
551
+ optional_api_dependencies=[Api.files, Api.models],
552
+ description=r"""
553
+ [Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It
554
+ allows you to store and query vectors directly in memory.
555
+ That means you'll get fast and efficient vector retrieval.
556
+
557
+ > By default, Qdrant stores vectors in RAM, delivering incredibly fast access for datasets that fit comfortably in
558
+ > memory. But when your dataset exceeds RAM capacity, Qdrant offers Memmap as an alternative.
559
+ >
560
+ > \[[An Introduction to Vector Databases](https://qdrant.tech/articles/what-is-a-vector-database/)\]
561
+
562
+
563
+
564
+ ## Features
565
+
566
+ - Lightweight and easy to use
567
+ - Fully integrated with Llama Stack
568
+ - Apache 2.0 license terms
569
+ - Store embeddings and their metadata
570
+ - Supports search by
571
+ [Keyword](https://qdrant.tech/articles/qdrant-introduces-full-text-filters-and-indexes/)
572
+ and [Hybrid](https://qdrant.tech/articles/hybrid-search/#building-a-hybrid-search-system-in-qdrant) search
573
+ - [Multilingual and Multimodal retrieval](https://qdrant.tech/documentation/multimodal-search/)
574
+ - [Medatata filtering](https://qdrant.tech/articles/vector-search-filtering/)
575
+ - [GPU support](https://qdrant.tech/documentation/guides/running-with-gpu/)
576
+
577
+ ## Usage
578
+
579
+ To use Qdrant in your Llama Stack project, follow these steps:
580
+
581
+ 1. Install the necessary dependencies.
582
+ 2. Configure your Llama Stack project to use Qdrant.
583
+ 3. Start storing and querying vectors.
584
+
585
+ ## Installation
586
+
587
+ You can install Qdrant using docker:
588
+
589
+ ```bash
590
+ docker pull qdrant/qdrant
591
+ ```
592
+ ## Documentation
593
+ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more details about Qdrant in general.
594
+ """,
595
+ ),
596
+ RemoteProviderSpec(
597
+ api=Api.vector_io,
598
+ adapter_type="qdrant",
599
+ provider_type="remote::qdrant",
600
+ pip_packages=["qdrant-client"] + DEFAULT_VECTOR_IO_DEPS,
601
+ module="llama_stack.providers.remote.vector_io.qdrant",
602
+ config_class="llama_stack.providers.remote.vector_io.qdrant.QdrantVectorIOConfig",
603
+ api_dependencies=[Api.inference],
604
+ optional_api_dependencies=[Api.files, Api.models],
605
+ description="""
606
+ Please refer to the inline provider documentation.
607
+ """,
608
+ ),
609
+ RemoteProviderSpec(
610
+ api=Api.vector_io,
611
+ adapter_type="milvus",
612
+ provider_type="remote::milvus",
613
+ pip_packages=["pymilvus>=2.4.10"] + DEFAULT_VECTOR_IO_DEPS,
614
+ module="llama_stack.providers.remote.vector_io.milvus",
615
+ config_class="llama_stack.providers.remote.vector_io.milvus.MilvusVectorIOConfig",
616
+ api_dependencies=[Api.inference],
617
+ optional_api_dependencies=[Api.files, Api.models],
618
+ description="""
619
+ [Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It
620
+ allows you to store and query vectors directly within a Milvus database.
621
+ That means you're not limited to storing vectors in memory or in a separate service.
622
+
623
+ ## Features
624
+
625
+ - Easy to use
626
+ - Fully integrated with Llama Stack
627
+ - Supports all search modes: vector, keyword, and hybrid search (both inline and remote configurations)
628
+
629
+ ## Usage
630
+
631
+ To use Milvus in your Llama Stack project, follow these steps:
632
+
633
+ 1. Install the necessary dependencies.
634
+ 2. Configure your Llama Stack project to use Milvus.
635
+ 3. Start storing and querying vectors.
636
+
637
+ ## Installation
638
+
639
+ If you want to use inline Milvus, you can install:
640
+
641
+ ```bash
642
+ pip install pymilvus[milvus-lite]
643
+ ```
644
+
645
+ If you want to use remote Milvus, you can install:
646
+
647
+ ```bash
648
+ pip install pymilvus
649
+ ```
650
+
651
+ ## Configuration
652
+
653
+ In Llama Stack, Milvus can be configured in two ways:
654
+ - **Inline (Local) Configuration** - Uses Milvus-Lite for local storage
655
+ - **Remote Configuration** - Connects to a remote Milvus server
656
+
657
+ ### Inline (Local) Configuration
658
+
659
+ The simplest method is local configuration, which requires setting `db_path`, a path for locally storing Milvus-Lite files:
660
+
661
+ ```yaml
662
+ vector_io:
663
+ - provider_id: milvus
664
+ provider_type: inline::milvus
665
+ config:
666
+ db_path: ~/.llama/distributions/together/milvus_store.db
667
+ ```
668
+
669
+ ### Remote Configuration
670
+
671
+ Remote configuration is suitable for larger data storage requirements:
672
+
673
+ #### Standard Remote Connection
674
+
675
+ ```yaml
676
+ vector_io:
677
+ - provider_id: milvus
678
+ provider_type: remote::milvus
679
+ config:
680
+ uri: "http://<host>:<port>"
681
+ token: "<user>:<password>"
682
+ ```
683
+
684
+ #### TLS-Enabled Remote Connection (One-way TLS)
685
+
686
+ For connections to Milvus instances with one-way TLS enabled:
687
+
688
+ ```yaml
689
+ vector_io:
690
+ - provider_id: milvus
691
+ provider_type: remote::milvus
692
+ config:
693
+ uri: "https://<host>:<port>"
694
+ token: "<user>:<password>"
695
+ secure: True
696
+ server_pem_path: "/path/to/server.pem"
697
+ ```
698
+
699
+ #### Mutual TLS (mTLS) Remote Connection
700
+
701
+ For connections to Milvus instances with mutual TLS (mTLS) enabled:
702
+
703
+ ```yaml
704
+ vector_io:
705
+ - provider_id: milvus
706
+ provider_type: remote::milvus
707
+ config:
708
+ uri: "https://<host>:<port>"
709
+ token: "<user>:<password>"
710
+ secure: True
711
+ ca_pem_path: "/path/to/ca.pem"
712
+ client_pem_path: "/path/to/client.pem"
713
+ client_key_path: "/path/to/client.key"
714
+ ```
715
+
716
+ #### Key Parameters for TLS Configuration
717
+
718
+ - **`secure`**: Enables TLS encryption when set to `true`. Defaults to `false`.
719
+ - **`server_pem_path`**: Path to the **server certificate** for verifying the server's identity (used in one-way TLS).
720
+ - **`ca_pem_path`**: Path to the **Certificate Authority (CA) certificate** for validating the server certificate (required in mTLS).
721
+ - **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
722
+ - **`client_key_path`**: Path to the **client private key** file (required for mTLS).
723
+
724
+ ## Search Modes
725
+
726
+ Milvus supports three different search modes for both inline and remote configurations:
727
+
728
+ ### Vector Search
729
+ Vector search uses semantic similarity to find the most relevant chunks based on embedding vectors. This is the default search mode and works well for finding conceptually similar content.
730
+
731
+ ```python
732
+ # Vector search example
733
+ search_response = client.vector_stores.search(
734
+ vector_store_id=vector_store.id,
735
+ query="What is machine learning?",
736
+ search_mode="vector",
737
+ max_num_results=5,
738
+ )
739
+ ```
740
+
741
+ ### Keyword Search
742
+ Keyword search uses traditional text-based matching to find chunks containing specific terms or phrases. This is useful when you need exact term matches.
743
+
744
+ ```python
745
+ # Keyword search example
746
+ search_response = client.vector_stores.search(
747
+ vector_store_id=vector_store.id,
748
+ query="Python programming language",
749
+ search_mode="keyword",
750
+ max_num_results=5,
751
+ )
752
+ ```
753
+
754
+ ### Hybrid Search
755
+ Hybrid search combines both vector and keyword search methods to provide more comprehensive results. It leverages the strengths of both semantic similarity and exact term matching.
756
+
757
+ #### Basic Hybrid Search
758
+ ```python
759
+ # Basic hybrid search example (uses RRF ranker with default impact_factor=60.0)
760
+ search_response = client.vector_stores.search(
761
+ vector_store_id=vector_store.id,
762
+ query="neural networks in Python",
763
+ search_mode="hybrid",
764
+ max_num_results=5,
765
+ )
766
+ ```
767
+
768
+ **Note**: The default `impact_factor` value of 60.0 was empirically determined to be optimal in the original RRF research paper: ["Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods"](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) (Cormack et al., 2009).
769
+
770
+ #### Hybrid Search with RRF (Reciprocal Rank Fusion) Ranker
771
+ RRF combines rankings from vector and keyword search by using reciprocal ranks. The impact factor controls how much weight is given to higher-ranked results.
772
+
773
+ ```python
774
+ # Hybrid search with custom RRF parameters
775
+ search_response = client.vector_stores.search(
776
+ vector_store_id=vector_store.id,
777
+ query="neural networks in Python",
778
+ search_mode="hybrid",
779
+ max_num_results=5,
780
+ ranking_options={
781
+ "ranker": {
782
+ "type": "rrf",
783
+ "impact_factor": 100.0, # Higher values give more weight to top-ranked results
784
+ }
785
+ },
786
+ )
787
+ ```
788
+
789
+ #### Hybrid Search with Weighted Ranker
790
+ Weighted ranker linearly combines normalized scores from vector and keyword search. The alpha parameter controls the balance between the two search methods.
791
+
792
+ ```python
793
+ # Hybrid search with weighted ranker
794
+ search_response = client.vector_stores.search(
795
+ vector_store_id=vector_store.id,
796
+ query="neural networks in Python",
797
+ search_mode="hybrid",
798
+ max_num_results=5,
799
+ ranking_options={
800
+ "ranker": {
801
+ "type": "weighted",
802
+ "alpha": 0.7, # 70% vector search, 30% keyword search
803
+ }
804
+ },
805
+ )
806
+ ```
807
+
808
+ For detailed documentation on RRF and Weighted rankers, please refer to the [Milvus Reranking Guide](https://milvus.io/docs/reranking.md).
809
+
810
+ ## Documentation
811
+ See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
812
+
813
+ For more details on TLS configuration, refer to the [TLS setup guide](https://milvus.io/docs/tls.md).
814
+ """,
815
+ ),
816
+ InlineProviderSpec(
817
+ api=Api.vector_io,
818
+ provider_type="inline::milvus",
819
+ pip_packages=["pymilvus[milvus-lite]>=2.4.10"] + DEFAULT_VECTOR_IO_DEPS,
820
+ module="llama_stack.providers.inline.vector_io.milvus",
821
+ config_class="llama_stack.providers.inline.vector_io.milvus.MilvusVectorIOConfig",
822
+ api_dependencies=[Api.inference],
823
+ optional_api_dependencies=[Api.files, Api.models],
824
+ description="""
825
+ Please refer to the remote provider documentation.
826
+ """,
827
+ ),
828
+ ]