llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (738) hide show
  1. llama_stack/__init__.py +5 -0
  2. llama_stack/apis/agents/__init__.py +1 -1
  3. llama_stack/apis/agents/agents.py +700 -281
  4. llama_stack/apis/agents/openai_responses.py +1311 -0
  5. llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
  6. llama_stack/apis/batches/batches.py +100 -0
  7. llama_stack/apis/benchmarks/__init__.py +7 -0
  8. llama_stack/apis/benchmarks/benchmarks.py +108 -0
  9. llama_stack/apis/common/content_types.py +143 -0
  10. llama_stack/apis/common/errors.py +103 -0
  11. llama_stack/apis/common/job_types.py +38 -0
  12. llama_stack/apis/common/responses.py +36 -0
  13. llama_stack/apis/common/training_types.py +36 -5
  14. llama_stack/apis/common/type_system.py +158 -0
  15. llama_stack/apis/conversations/__init__.py +31 -0
  16. llama_stack/apis/conversations/conversations.py +286 -0
  17. llama_stack/apis/datasetio/__init__.py +7 -0
  18. llama_stack/apis/datasetio/datasetio.py +59 -0
  19. llama_stack/apis/datasets/__init__.py +7 -0
  20. llama_stack/apis/datasets/datasets.py +251 -0
  21. llama_stack/apis/datatypes.py +160 -0
  22. llama_stack/apis/eval/__init__.py +7 -0
  23. llama_stack/apis/eval/eval.py +169 -0
  24. llama_stack/apis/files/__init__.py +7 -0
  25. llama_stack/apis/files/files.py +199 -0
  26. llama_stack/apis/inference/__init__.py +1 -1
  27. llama_stack/apis/inference/inference.py +1169 -113
  28. llama_stack/apis/inspect/__init__.py +1 -1
  29. llama_stack/apis/inspect/inspect.py +69 -16
  30. llama_stack/apis/models/__init__.py +1 -1
  31. llama_stack/apis/models/models.py +148 -21
  32. llama_stack/apis/post_training/__init__.py +1 -1
  33. llama_stack/apis/post_training/post_training.py +265 -120
  34. llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
  35. llama_stack/apis/prompts/prompts.py +204 -0
  36. llama_stack/apis/providers/__init__.py +7 -0
  37. llama_stack/apis/providers/providers.py +69 -0
  38. llama_stack/apis/resource.py +37 -0
  39. llama_stack/apis/safety/__init__.py +1 -1
  40. llama_stack/apis/safety/safety.py +95 -12
  41. llama_stack/apis/scoring/__init__.py +7 -0
  42. llama_stack/apis/scoring/scoring.py +93 -0
  43. llama_stack/apis/scoring_functions/__init__.py +7 -0
  44. llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
  45. llama_stack/apis/shields/__init__.py +1 -1
  46. llama_stack/apis/shields/shields.py +76 -33
  47. llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
  48. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
  49. llama_stack/apis/telemetry/__init__.py +1 -1
  50. llama_stack/apis/telemetry/telemetry.py +322 -31
  51. llama_stack/apis/{dataset → tools}/__init__.py +2 -1
  52. llama_stack/apis/tools/rag_tool.py +218 -0
  53. llama_stack/apis/tools/tools.py +221 -0
  54. llama_stack/apis/vector_io/__init__.py +7 -0
  55. llama_stack/apis/vector_io/vector_io.py +960 -0
  56. llama_stack/apis/vector_stores/__init__.py +7 -0
  57. llama_stack/apis/vector_stores/vector_stores.py +51 -0
  58. llama_stack/apis/version.py +9 -0
  59. llama_stack/cli/llama.py +13 -5
  60. llama_stack/cli/stack/_list_deps.py +182 -0
  61. llama_stack/cli/stack/list_apis.py +1 -1
  62. llama_stack/cli/stack/list_deps.py +55 -0
  63. llama_stack/cli/stack/list_providers.py +24 -10
  64. llama_stack/cli/stack/list_stacks.py +56 -0
  65. llama_stack/cli/stack/remove.py +115 -0
  66. llama_stack/cli/stack/run.py +169 -56
  67. llama_stack/cli/stack/stack.py +18 -4
  68. llama_stack/cli/stack/utils.py +151 -0
  69. llama_stack/cli/table.py +23 -61
  70. llama_stack/cli/utils.py +29 -0
  71. llama_stack/core/access_control/access_control.py +131 -0
  72. llama_stack/core/access_control/conditions.py +129 -0
  73. llama_stack/core/access_control/datatypes.py +107 -0
  74. llama_stack/core/build.py +164 -0
  75. llama_stack/core/client.py +205 -0
  76. llama_stack/core/common.sh +37 -0
  77. llama_stack/{distribution → core}/configure.py +74 -55
  78. llama_stack/core/conversations/conversations.py +309 -0
  79. llama_stack/core/datatypes.py +625 -0
  80. llama_stack/core/distribution.py +276 -0
  81. llama_stack/core/external.py +54 -0
  82. llama_stack/core/id_generation.py +42 -0
  83. llama_stack/core/inspect.py +86 -0
  84. llama_stack/core/library_client.py +539 -0
  85. llama_stack/core/prompts/prompts.py +234 -0
  86. llama_stack/core/providers.py +137 -0
  87. llama_stack/core/request_headers.py +115 -0
  88. llama_stack/core/resolver.py +506 -0
  89. llama_stack/core/routers/__init__.py +101 -0
  90. llama_stack/core/routers/datasets.py +73 -0
  91. llama_stack/core/routers/eval_scoring.py +155 -0
  92. llama_stack/core/routers/inference.py +645 -0
  93. llama_stack/core/routers/safety.py +85 -0
  94. llama_stack/core/routers/tool_runtime.py +91 -0
  95. llama_stack/core/routers/vector_io.py +442 -0
  96. llama_stack/core/routing_tables/benchmarks.py +62 -0
  97. llama_stack/core/routing_tables/common.py +254 -0
  98. llama_stack/core/routing_tables/datasets.py +91 -0
  99. llama_stack/core/routing_tables/models.py +163 -0
  100. llama_stack/core/routing_tables/scoring_functions.py +66 -0
  101. llama_stack/core/routing_tables/shields.py +61 -0
  102. llama_stack/core/routing_tables/toolgroups.py +129 -0
  103. llama_stack/core/routing_tables/vector_stores.py +292 -0
  104. llama_stack/core/server/auth.py +187 -0
  105. llama_stack/core/server/auth_providers.py +494 -0
  106. llama_stack/core/server/quota.py +110 -0
  107. llama_stack/core/server/routes.py +141 -0
  108. llama_stack/core/server/server.py +542 -0
  109. llama_stack/core/server/tracing.py +80 -0
  110. llama_stack/core/stack.py +546 -0
  111. llama_stack/core/start_stack.sh +117 -0
  112. llama_stack/core/storage/datatypes.py +283 -0
  113. llama_stack/{cli/model → core/store}/__init__.py +1 -1
  114. llama_stack/core/store/registry.py +199 -0
  115. llama_stack/core/testing_context.py +49 -0
  116. llama_stack/core/ui/app.py +55 -0
  117. llama_stack/core/ui/modules/api.py +32 -0
  118. llama_stack/core/ui/modules/utils.py +42 -0
  119. llama_stack/core/ui/page/distribution/datasets.py +18 -0
  120. llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
  121. llama_stack/core/ui/page/distribution/models.py +18 -0
  122. llama_stack/core/ui/page/distribution/providers.py +27 -0
  123. llama_stack/core/ui/page/distribution/resources.py +48 -0
  124. llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
  125. llama_stack/core/ui/page/distribution/shields.py +19 -0
  126. llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
  127. llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
  128. llama_stack/core/ui/page/playground/chat.py +130 -0
  129. llama_stack/core/ui/page/playground/tools.py +352 -0
  130. llama_stack/core/utils/config.py +30 -0
  131. llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
  132. llama_stack/core/utils/config_resolution.py +125 -0
  133. llama_stack/core/utils/context.py +84 -0
  134. llama_stack/core/utils/exec.py +96 -0
  135. llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
  136. llama_stack/{distribution → core}/utils/model_utils.py +2 -2
  137. llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
  138. llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
  139. llama_stack/distributions/dell/build.yaml +33 -0
  140. llama_stack/distributions/dell/dell.py +158 -0
  141. llama_stack/distributions/dell/run-with-safety.yaml +141 -0
  142. llama_stack/distributions/dell/run.yaml +132 -0
  143. llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
  144. llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
  145. llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
  146. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
  147. llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
  148. llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
  149. llama_stack/distributions/nvidia/build.yaml +29 -0
  150. llama_stack/distributions/nvidia/nvidia.py +154 -0
  151. llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
  152. llama_stack/distributions/nvidia/run.yaml +116 -0
  153. llama_stack/distributions/open-benchmark/__init__.py +7 -0
  154. llama_stack/distributions/open-benchmark/build.yaml +36 -0
  155. llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
  156. llama_stack/distributions/open-benchmark/run.yaml +252 -0
  157. llama_stack/distributions/postgres-demo/__init__.py +7 -0
  158. llama_stack/distributions/postgres-demo/build.yaml +23 -0
  159. llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
  160. llama_stack/distributions/postgres-demo/run.yaml +115 -0
  161. llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
  162. llama_stack/distributions/starter/build.yaml +61 -0
  163. llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
  164. llama_stack/distributions/starter/run.yaml +276 -0
  165. llama_stack/distributions/starter/starter.py +345 -0
  166. llama_stack/distributions/starter-gpu/__init__.py +7 -0
  167. llama_stack/distributions/starter-gpu/build.yaml +61 -0
  168. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
  169. llama_stack/distributions/starter-gpu/run.yaml +279 -0
  170. llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
  171. llama_stack/distributions/template.py +456 -0
  172. llama_stack/distributions/watsonx/__init__.py +7 -0
  173. llama_stack/distributions/watsonx/build.yaml +33 -0
  174. llama_stack/distributions/watsonx/run.yaml +133 -0
  175. llama_stack/distributions/watsonx/watsonx.py +95 -0
  176. llama_stack/env.py +24 -0
  177. llama_stack/log.py +314 -0
  178. llama_stack/models/llama/checkpoint.py +164 -0
  179. llama_stack/models/llama/datatypes.py +164 -0
  180. llama_stack/models/llama/hadamard_utils.py +86 -0
  181. llama_stack/models/llama/llama3/args.py +74 -0
  182. llama_stack/models/llama/llama3/chat_format.py +286 -0
  183. llama_stack/models/llama/llama3/generation.py +376 -0
  184. llama_stack/models/llama/llama3/interface.py +255 -0
  185. llama_stack/models/llama/llama3/model.py +304 -0
  186. llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
  187. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
  188. llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
  189. llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
  190. llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
  191. llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
  192. llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
  193. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
  194. llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
  195. llama_stack/models/llama/llama3/quantization/loader.py +316 -0
  196. llama_stack/models/llama/llama3/template_data.py +116 -0
  197. llama_stack/models/llama/llama3/tokenizer.model +128000 -0
  198. llama_stack/models/llama/llama3/tokenizer.py +198 -0
  199. llama_stack/models/llama/llama3/tool_utils.py +266 -0
  200. llama_stack/models/llama/llama3_1/__init__.py +12 -0
  201. llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
  202. llama_stack/models/llama/llama3_1/prompts.py +258 -0
  203. llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
  204. llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
  205. llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
  206. llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
  207. llama_stack/models/llama/llama3_3/prompts.py +259 -0
  208. llama_stack/models/llama/llama4/args.py +107 -0
  209. llama_stack/models/llama/llama4/chat_format.py +317 -0
  210. llama_stack/models/llama/llama4/datatypes.py +56 -0
  211. llama_stack/models/llama/llama4/ffn.py +58 -0
  212. llama_stack/models/llama/llama4/generation.py +313 -0
  213. llama_stack/models/llama/llama4/model.py +437 -0
  214. llama_stack/models/llama/llama4/moe.py +214 -0
  215. llama_stack/models/llama/llama4/preprocess.py +435 -0
  216. llama_stack/models/llama/llama4/prompt_format.md +304 -0
  217. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
  218. llama_stack/models/llama/llama4/prompts.py +279 -0
  219. llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
  220. llama_stack/models/llama/llama4/quantization/loader.py +226 -0
  221. llama_stack/models/llama/llama4/tokenizer.model +200000 -0
  222. llama_stack/models/llama/llama4/tokenizer.py +263 -0
  223. llama_stack/models/llama/llama4/vision/__init__.py +5 -0
  224. llama_stack/models/llama/llama4/vision/embedding.py +210 -0
  225. llama_stack/models/llama/llama4/vision/encoder.py +412 -0
  226. llama_stack/models/llama/prompt_format.py +191 -0
  227. llama_stack/models/llama/quantize_impls.py +316 -0
  228. llama_stack/models/llama/sku_list.py +1029 -0
  229. llama_stack/models/llama/sku_types.py +233 -0
  230. llama_stack/models/llama/tokenizer_utils.py +40 -0
  231. llama_stack/providers/datatypes.py +136 -107
  232. llama_stack/providers/inline/__init__.py +5 -0
  233. llama_stack/providers/inline/agents/__init__.py +5 -0
  234. llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
  235. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
  236. llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
  237. llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
  238. llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
  239. llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
  240. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
  241. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
  242. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
  243. llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
  244. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
  245. llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
  246. llama_stack/providers/inline/batches/__init__.py +5 -0
  247. llama_stack/providers/inline/batches/reference/__init__.py +36 -0
  248. llama_stack/providers/inline/batches/reference/batches.py +679 -0
  249. llama_stack/providers/inline/batches/reference/config.py +40 -0
  250. llama_stack/providers/inline/datasetio/__init__.py +5 -0
  251. llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
  252. llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
  253. llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
  254. llama_stack/providers/inline/eval/__init__.py +5 -0
  255. llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
  256. llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
  257. llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
  258. llama_stack/providers/inline/files/localfs/__init__.py +20 -0
  259. llama_stack/providers/inline/files/localfs/config.py +31 -0
  260. llama_stack/providers/inline/files/localfs/files.py +219 -0
  261. llama_stack/providers/inline/inference/__init__.py +5 -0
  262. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
  263. llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
  264. llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
  265. llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
  266. llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
  267. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
  268. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
  269. llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
  270. llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
  271. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
  272. llama_stack/providers/inline/post_training/__init__.py +5 -0
  273. llama_stack/providers/inline/post_training/common/__init__.py +5 -0
  274. llama_stack/providers/inline/post_training/common/utils.py +35 -0
  275. llama_stack/providers/inline/post_training/common/validator.py +36 -0
  276. llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
  277. llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
  278. llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
  279. llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
  280. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
  281. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
  282. llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
  283. llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
  284. llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
  285. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
  286. llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
  287. llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
  288. llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
  289. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
  290. llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
  291. llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
  292. llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
  293. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
  294. llama_stack/providers/inline/safety/__init__.py +5 -0
  295. llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
  296. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
  297. llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
  298. llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
  299. llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
  300. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
  301. llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
  302. llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
  303. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
  304. llama_stack/providers/inline/scoring/__init__.py +5 -0
  305. llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
  306. llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
  307. llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
  308. llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
  309. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
  310. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
  311. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
  312. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
  313. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
  314. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
  315. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
  316. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
  317. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
  318. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
  319. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
  320. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
  321. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
  322. llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
  323. llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
  324. llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
  325. llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
  326. llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
  327. llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
  328. llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
  329. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
  330. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
  331. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
  332. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
  333. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
  334. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
  335. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
  336. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
  337. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
  338. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
  339. llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
  340. llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
  341. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
  342. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
  343. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
  344. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
  345. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
  346. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
  347. llama_stack/providers/inline/telemetry/__init__.py +5 -0
  348. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
  349. llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
  350. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
  351. llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
  352. llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
  353. llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
  354. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
  355. llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
  356. llama_stack/providers/inline/vector_io/__init__.py +5 -0
  357. llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
  358. llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
  359. llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
  360. llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
  361. llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
  362. llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
  363. llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
  364. llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
  365. llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
  366. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
  367. llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
  368. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
  369. llama_stack/providers/registry/agents.py +16 -18
  370. llama_stack/providers/registry/batches.py +26 -0
  371. llama_stack/providers/registry/datasetio.py +49 -0
  372. llama_stack/providers/registry/eval.py +46 -0
  373. llama_stack/providers/registry/files.py +31 -0
  374. llama_stack/providers/registry/inference.py +273 -118
  375. llama_stack/providers/registry/post_training.py +69 -0
  376. llama_stack/providers/registry/safety.py +46 -41
  377. llama_stack/providers/registry/scoring.py +51 -0
  378. llama_stack/providers/registry/tool_runtime.py +87 -0
  379. llama_stack/providers/registry/vector_io.py +828 -0
  380. llama_stack/providers/remote/__init__.py +5 -0
  381. llama_stack/providers/remote/agents/__init__.py +5 -0
  382. llama_stack/providers/remote/datasetio/__init__.py +5 -0
  383. llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
  384. llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
  385. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
  386. llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
  387. llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
  388. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
  389. llama_stack/providers/remote/eval/__init__.py +5 -0
  390. llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
  391. llama_stack/providers/remote/eval/nvidia/config.py +29 -0
  392. llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
  393. llama_stack/providers/remote/files/s3/__init__.py +19 -0
  394. llama_stack/providers/remote/files/s3/config.py +42 -0
  395. llama_stack/providers/remote/files/s3/files.py +313 -0
  396. llama_stack/providers/remote/inference/__init__.py +5 -0
  397. llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
  398. llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
  399. llama_stack/providers/remote/inference/anthropic/config.py +28 -0
  400. llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
  401. llama_stack/providers/remote/inference/azure/azure.py +25 -0
  402. llama_stack/providers/remote/inference/azure/config.py +61 -0
  403. llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
  404. llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
  405. llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
  406. llama_stack/providers/remote/inference/bedrock/models.py +29 -0
  407. llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
  408. llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
  409. llama_stack/providers/remote/inference/cerebras/config.py +30 -0
  410. llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
  411. llama_stack/providers/remote/inference/databricks/config.py +37 -0
  412. llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
  413. llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
  414. llama_stack/providers/remote/inference/fireworks/config.py +27 -0
  415. llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
  416. llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
  417. llama_stack/providers/remote/inference/gemini/config.py +28 -0
  418. llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
  419. llama_stack/providers/remote/inference/groq/__init__.py +15 -0
  420. llama_stack/providers/remote/inference/groq/config.py +34 -0
  421. llama_stack/providers/remote/inference/groq/groq.py +18 -0
  422. llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
  423. llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
  424. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
  425. llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
  426. llama_stack/providers/remote/inference/nvidia/config.py +64 -0
  427. llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
  428. llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
  429. llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
  430. llama_stack/providers/remote/inference/ollama/config.py +25 -0
  431. llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
  432. llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
  433. llama_stack/providers/remote/inference/openai/config.py +39 -0
  434. llama_stack/providers/remote/inference/openai/openai.py +38 -0
  435. llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
  436. llama_stack/providers/remote/inference/passthrough/config.py +34 -0
  437. llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
  438. llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
  439. llama_stack/providers/remote/inference/runpod/config.py +32 -0
  440. llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
  441. llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
  442. llama_stack/providers/remote/inference/sambanova/config.py +34 -0
  443. llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
  444. llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
  445. llama_stack/providers/remote/inference/tgi/config.py +76 -0
  446. llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
  447. llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
  448. llama_stack/providers/remote/inference/together/config.py +27 -0
  449. llama_stack/providers/remote/inference/together/together.py +102 -0
  450. llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
  451. llama_stack/providers/remote/inference/vertexai/config.py +48 -0
  452. llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
  453. llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
  454. llama_stack/providers/remote/inference/vllm/config.py +59 -0
  455. llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
  456. llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
  457. llama_stack/providers/remote/inference/watsonx/config.py +45 -0
  458. llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
  459. llama_stack/providers/remote/post_training/__init__.py +5 -0
  460. llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
  461. llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
  462. llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
  463. llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
  464. llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
  465. llama_stack/providers/remote/safety/__init__.py +5 -0
  466. llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
  467. llama_stack/providers/remote/safety/bedrock/config.py +14 -0
  468. llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
  469. llama_stack/providers/remote/safety/nvidia/config.py +40 -0
  470. llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
  471. llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
  472. llama_stack/providers/remote/safety/sambanova/config.py +37 -0
  473. llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
  474. llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
  475. llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
  476. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
  477. llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
  478. llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
  479. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
  480. llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
  481. llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
  482. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
  483. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
  484. llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
  485. llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
  486. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
  487. llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
  488. llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
  489. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
  490. llama_stack/providers/remote/vector_io/__init__.py +5 -0
  491. llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
  492. llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
  493. llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
  494. llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
  495. llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
  496. llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
  497. llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
  498. llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
  499. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
  500. llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
  501. llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
  502. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
  503. llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
  504. llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
  505. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
  506. llama_stack/providers/utils/bedrock/__init__.py +5 -0
  507. llama_stack/providers/utils/bedrock/client.py +74 -0
  508. llama_stack/providers/utils/bedrock/config.py +64 -0
  509. llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
  510. llama_stack/providers/utils/common/__init__.py +5 -0
  511. llama_stack/providers/utils/common/data_schema_validator.py +103 -0
  512. llama_stack/providers/utils/datasetio/__init__.py +5 -0
  513. llama_stack/providers/utils/datasetio/url_utils.py +47 -0
  514. llama_stack/providers/utils/files/__init__.py +5 -0
  515. llama_stack/providers/utils/files/form_data.py +69 -0
  516. llama_stack/providers/utils/inference/__init__.py +8 -7
  517. llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
  518. llama_stack/providers/utils/inference/inference_store.py +264 -0
  519. llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
  520. llama_stack/providers/utils/inference/model_registry.py +173 -23
  521. llama_stack/providers/utils/inference/openai_compat.py +1261 -49
  522. llama_stack/providers/utils/inference/openai_mixin.py +506 -0
  523. llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
  524. llama_stack/providers/utils/kvstore/api.py +6 -6
  525. llama_stack/providers/utils/kvstore/config.py +28 -48
  526. llama_stack/providers/utils/kvstore/kvstore.py +61 -15
  527. llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
  528. llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
  529. llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
  530. llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
  531. llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
  532. llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
  533. llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
  534. llama_stack/providers/utils/memory/file_utils.py +1 -1
  535. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
  536. llama_stack/providers/utils/memory/vector_store.py +220 -82
  537. llama_stack/providers/utils/pagination.py +43 -0
  538. llama_stack/providers/utils/responses/__init__.py +5 -0
  539. llama_stack/providers/utils/responses/responses_store.py +292 -0
  540. llama_stack/providers/utils/scheduler.py +270 -0
  541. llama_stack/providers/utils/scoring/__init__.py +5 -0
  542. llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
  543. llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
  544. llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
  545. llama_stack/providers/utils/sqlstore/__init__.py +5 -0
  546. llama_stack/providers/utils/sqlstore/api.py +128 -0
  547. llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
  548. llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
  549. llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
  550. llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
  551. llama_stack/providers/utils/telemetry/tracing.py +192 -53
  552. llama_stack/providers/utils/tools/__init__.py +5 -0
  553. llama_stack/providers/utils/tools/mcp.py +148 -0
  554. llama_stack/providers/utils/tools/ttl_dict.py +70 -0
  555. llama_stack/providers/utils/vector_io/__init__.py +5 -0
  556. llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
  557. llama_stack/schema_utils.py +118 -0
  558. llama_stack/strong_typing/__init__.py +19 -0
  559. llama_stack/strong_typing/auxiliary.py +228 -0
  560. llama_stack/strong_typing/classdef.py +440 -0
  561. llama_stack/strong_typing/core.py +46 -0
  562. llama_stack/strong_typing/deserializer.py +877 -0
  563. llama_stack/strong_typing/docstring.py +409 -0
  564. llama_stack/strong_typing/exception.py +23 -0
  565. llama_stack/strong_typing/inspection.py +1085 -0
  566. llama_stack/strong_typing/mapping.py +40 -0
  567. llama_stack/strong_typing/name.py +182 -0
  568. llama_stack/strong_typing/py.typed +0 -0
  569. llama_stack/strong_typing/schema.py +792 -0
  570. llama_stack/strong_typing/serialization.py +97 -0
  571. llama_stack/strong_typing/serializer.py +500 -0
  572. llama_stack/strong_typing/slots.py +27 -0
  573. llama_stack/strong_typing/topological.py +89 -0
  574. llama_stack/testing/__init__.py +5 -0
  575. llama_stack/testing/api_recorder.py +956 -0
  576. llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
  577. llama_stack-0.3.4.dist-info/METADATA +261 -0
  578. llama_stack-0.3.4.dist-info/RECORD +625 -0
  579. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
  580. llama_stack/apis/agents/client.py +0 -292
  581. llama_stack/apis/agents/event_logger.py +0 -184
  582. llama_stack/apis/batch_inference/batch_inference.py +0 -72
  583. llama_stack/apis/common/deployment_types.py +0 -31
  584. llama_stack/apis/dataset/dataset.py +0 -63
  585. llama_stack/apis/evals/evals.py +0 -122
  586. llama_stack/apis/inference/client.py +0 -197
  587. llama_stack/apis/inspect/client.py +0 -82
  588. llama_stack/apis/memory/client.py +0 -155
  589. llama_stack/apis/memory/memory.py +0 -65
  590. llama_stack/apis/memory_banks/__init__.py +0 -7
  591. llama_stack/apis/memory_banks/client.py +0 -101
  592. llama_stack/apis/memory_banks/memory_banks.py +0 -78
  593. llama_stack/apis/models/client.py +0 -83
  594. llama_stack/apis/reward_scoring/__init__.py +0 -7
  595. llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
  596. llama_stack/apis/safety/client.py +0 -105
  597. llama_stack/apis/shields/client.py +0 -79
  598. llama_stack/cli/download.py +0 -340
  599. llama_stack/cli/model/describe.py +0 -82
  600. llama_stack/cli/model/download.py +0 -24
  601. llama_stack/cli/model/list.py +0 -62
  602. llama_stack/cli/model/model.py +0 -34
  603. llama_stack/cli/model/prompt_format.py +0 -112
  604. llama_stack/cli/model/safety_models.py +0 -52
  605. llama_stack/cli/stack/build.py +0 -299
  606. llama_stack/cli/stack/configure.py +0 -178
  607. llama_stack/distribution/build.py +0 -123
  608. llama_stack/distribution/build_conda_env.sh +0 -136
  609. llama_stack/distribution/build_container.sh +0 -142
  610. llama_stack/distribution/common.sh +0 -40
  611. llama_stack/distribution/configure_container.sh +0 -47
  612. llama_stack/distribution/datatypes.py +0 -139
  613. llama_stack/distribution/distribution.py +0 -58
  614. llama_stack/distribution/inspect.py +0 -67
  615. llama_stack/distribution/request_headers.py +0 -57
  616. llama_stack/distribution/resolver.py +0 -323
  617. llama_stack/distribution/routers/__init__.py +0 -48
  618. llama_stack/distribution/routers/routers.py +0 -158
  619. llama_stack/distribution/routers/routing_tables.py +0 -173
  620. llama_stack/distribution/server/endpoints.py +0 -48
  621. llama_stack/distribution/server/server.py +0 -343
  622. llama_stack/distribution/start_conda_env.sh +0 -42
  623. llama_stack/distribution/start_container.sh +0 -64
  624. llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
  625. llama_stack/distribution/templates/local-build.yaml +0 -10
  626. llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
  627. llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
  628. llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
  629. llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
  630. llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
  631. llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
  632. llama_stack/distribution/templates/local-together-build.yaml +0 -10
  633. llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
  634. llama_stack/distribution/utils/exec.py +0 -105
  635. llama_stack/providers/adapters/agents/sample/sample.py +0 -18
  636. llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
  637. llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
  638. llama_stack/providers/adapters/inference/databricks/config.py +0 -21
  639. llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
  640. llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
  641. llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
  642. llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
  643. llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
  644. llama_stack/providers/adapters/inference/sample/sample.py +0 -23
  645. llama_stack/providers/adapters/inference/tgi/config.py +0 -43
  646. llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
  647. llama_stack/providers/adapters/inference/together/config.py +0 -22
  648. llama_stack/providers/adapters/inference/together/together.py +0 -143
  649. llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
  650. llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
  651. llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
  652. llama_stack/providers/adapters/memory/sample/sample.py +0 -23
  653. llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
  654. llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
  655. llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
  656. llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
  657. llama_stack/providers/adapters/safety/sample/sample.py +0 -23
  658. llama_stack/providers/adapters/safety/together/__init__.py +0 -18
  659. llama_stack/providers/adapters/safety/together/config.py +0 -26
  660. llama_stack/providers/adapters/safety/together/together.py +0 -101
  661. llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
  662. llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
  663. llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
  664. llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
  665. llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
  666. llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
  667. llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
  668. llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
  669. llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
  670. llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
  671. llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
  672. llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
  673. llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
  674. llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
  675. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
  676. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
  677. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
  678. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
  679. llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
  680. llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
  681. llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
  682. llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
  683. llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
  684. llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
  685. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
  686. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
  687. llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
  688. llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
  689. llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
  690. llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
  691. llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
  692. llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
  693. llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
  694. llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
  695. llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
  696. llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
  697. llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
  698. llama_stack/providers/impls/vllm/config.py +0 -35
  699. llama_stack/providers/impls/vllm/vllm.py +0 -241
  700. llama_stack/providers/registry/memory.py +0 -78
  701. llama_stack/providers/registry/telemetry.py +0 -44
  702. llama_stack/providers/tests/agents/test_agents.py +0 -210
  703. llama_stack/providers/tests/inference/test_inference.py +0 -257
  704. llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
  705. llama_stack/providers/tests/memory/test_memory.py +0 -136
  706. llama_stack/providers/tests/resolver.py +0 -100
  707. llama_stack/providers/tests/safety/test_safety.py +0 -77
  708. llama_stack-0.0.42.dist-info/METADATA +0 -137
  709. llama_stack-0.0.42.dist-info/RECORD +0 -256
  710. /llama_stack/{distribution → core}/__init__.py +0 -0
  711. /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
  712. /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
  713. /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
  714. /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
  715. /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
  716. /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
  717. /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
  718. /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
  719. /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
  720. /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
  721. /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
  722. /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
  723. /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
  724. /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
  725. /llama_stack/{distribution → core}/utils/serialize.py +0 -0
  726. /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
  727. /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
  728. /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
  729. /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
  730. /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
  731. /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
  732. /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
  733. /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
  734. /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
  735. /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
  736. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
  737. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
  738. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,31 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
8
+ from llama_stack.providers.utils.sqlstore.sqlstore import sql_store_pip_packages
9
+
10
+
11
+ def available_providers() -> list[ProviderSpec]:
12
+ return [
13
+ InlineProviderSpec(
14
+ api=Api.files,
15
+ provider_type="inline::localfs",
16
+ # TODO: make this dynamic according to the sql store type
17
+ pip_packages=sql_store_pip_packages,
18
+ module="llama_stack.providers.inline.files.localfs",
19
+ config_class="llama_stack.providers.inline.files.localfs.config.LocalfsFilesImplConfig",
20
+ description="Local filesystem-based file storage provider for managing files and documents locally.",
21
+ ),
22
+ RemoteProviderSpec(
23
+ api=Api.files,
24
+ provider_type="remote::s3",
25
+ adapter_type="s3",
26
+ pip_packages=["boto3"] + sql_store_pip_packages,
27
+ module="llama_stack.providers.remote.files.s3",
28
+ config_class="llama_stack.providers.remote.files.s3.config.S3FilesImplConfig",
29
+ description="AWS S3-based file storage provider for scalable cloud file management with metadata persistence.",
30
+ ),
31
+ ]
@@ -4,139 +4,294 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from typing import List
8
7
 
9
- from llama_stack.distribution.datatypes import * # noqa: F403
8
+ from llama_stack.providers.datatypes import (
9
+ Api,
10
+ InlineProviderSpec,
11
+ ProviderSpec,
12
+ RemoteProviderSpec,
13
+ )
10
14
 
15
+ META_REFERENCE_DEPS = [
16
+ "accelerate",
17
+ "fairscale",
18
+ "torch",
19
+ "torchvision",
20
+ "transformers",
21
+ "zmq",
22
+ "lm-format-enforcer",
23
+ "sentence-transformers",
24
+ "torchao==0.8.0",
25
+ "fbgemm-gpu-genai==1.1.2",
26
+ ]
11
27
 
12
- def available_providers() -> List[ProviderSpec]:
28
+
29
+ def available_providers() -> list[ProviderSpec]:
13
30
  return [
14
31
  InlineProviderSpec(
15
32
  api=Api.inference,
16
- provider_type="meta-reference",
33
+ provider_type="inline::meta-reference",
34
+ pip_packages=META_REFERENCE_DEPS,
35
+ module="llama_stack.providers.inline.inference.meta_reference",
36
+ config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig",
37
+ description="Meta's reference implementation of inference with support for various model formats and optimization techniques.",
38
+ ),
39
+ InlineProviderSpec(
40
+ api=Api.inference,
41
+ provider_type="inline::sentence-transformers",
42
+ # CrossEncoder depends on torchao.quantization
17
43
  pip_packages=[
18
- "accelerate",
19
- "blobfile",
20
- "fairscale",
21
- "torch",
22
- "torchvision",
23
- "transformers",
24
- "zmq",
44
+ "torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu",
45
+ "sentence-transformers --no-deps",
46
+ # required by some SentenceTransformers architectures for tensor rearrange/merge ops
47
+ "einops",
48
+ # fast HF tokenization backend used by SentenceTransformers models
49
+ "tokenizers",
50
+ # safe and fast file format for storing and loading tensors
51
+ "safetensors",
25
52
  ],
26
- module="llama_stack.providers.impls.meta_reference.inference",
27
- config_class="llama_stack.providers.impls.meta_reference.inference.MetaReferenceInferenceConfig",
53
+ module="llama_stack.providers.inline.inference.sentence_transformers",
54
+ config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig",
55
+ description="Sentence Transformers inference provider for text embeddings and similarity search.",
28
56
  ),
29
- InlineProviderSpec(
57
+ RemoteProviderSpec(
58
+ api=Api.inference,
59
+ adapter_type="cerebras",
60
+ provider_type="remote::cerebras",
61
+ pip_packages=[],
62
+ module="llama_stack.providers.remote.inference.cerebras",
63
+ config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig",
64
+ description="Cerebras inference provider for running models on Cerebras Cloud platform.",
65
+ ),
66
+ RemoteProviderSpec(
67
+ api=Api.inference,
68
+ adapter_type="ollama",
69
+ provider_type="remote::ollama",
70
+ pip_packages=["ollama", "aiohttp", "h11>=0.16.0"],
71
+ config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
72
+ module="llama_stack.providers.remote.inference.ollama",
73
+ description="Ollama inference provider for running local models through the Ollama runtime.",
74
+ ),
75
+ RemoteProviderSpec(
76
+ api=Api.inference,
77
+ adapter_type="vllm",
78
+ provider_type="remote::vllm",
79
+ pip_packages=[],
80
+ module="llama_stack.providers.remote.inference.vllm",
81
+ config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
82
+ provider_data_validator="llama_stack.providers.remote.inference.vllm.VLLMProviderDataValidator",
83
+ description="Remote vLLM inference provider for connecting to vLLM servers.",
84
+ ),
85
+ RemoteProviderSpec(
86
+ api=Api.inference,
87
+ adapter_type="tgi",
88
+ provider_type="remote::tgi",
89
+ pip_packages=["huggingface_hub", "aiohttp"],
90
+ module="llama_stack.providers.remote.inference.tgi",
91
+ config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig",
92
+ description="Text Generation Inference (TGI) provider for HuggingFace model serving.",
93
+ ),
94
+ RemoteProviderSpec(
30
95
  api=Api.inference,
31
- provider_type="meta-reference-quantized",
96
+ adapter_type="hf::serverless",
97
+ provider_type="remote::hf::serverless",
98
+ pip_packages=["huggingface_hub", "aiohttp"],
99
+ module="llama_stack.providers.remote.inference.tgi",
100
+ config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig",
101
+ description="HuggingFace Inference API serverless provider for on-demand model inference.",
102
+ ),
103
+ RemoteProviderSpec(
104
+ api=Api.inference,
105
+ provider_type="remote::hf::endpoint",
106
+ adapter_type="hf::endpoint",
107
+ pip_packages=["huggingface_hub", "aiohttp"],
108
+ module="llama_stack.providers.remote.inference.tgi",
109
+ config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig",
110
+ description="HuggingFace Inference Endpoints provider for dedicated model serving.",
111
+ ),
112
+ RemoteProviderSpec(
113
+ api=Api.inference,
114
+ adapter_type="fireworks",
115
+ provider_type="remote::fireworks",
32
116
  pip_packages=[
33
- "accelerate",
34
- "blobfile",
35
- "fairscale",
36
- "fbgemm-gpu==0.8.0",
37
- "torch",
38
- "torchvision",
39
- "transformers",
40
- "zmq",
117
+ "fireworks-ai<=0.17.16",
41
118
  ],
42
- module="llama_stack.providers.impls.meta_reference.inference",
43
- config_class="llama_stack.providers.impls.meta_reference.inference.MetaReferenceQuantizedInferenceConfig",
44
- ),
45
- remote_provider_spec(
46
- api=Api.inference,
47
- adapter=AdapterSpec(
48
- adapter_type="sample",
49
- pip_packages=[],
50
- module="llama_stack.providers.adapters.inference.sample",
51
- config_class="llama_stack.providers.adapters.inference.sample.SampleConfig",
52
- ),
53
- ),
54
- remote_provider_spec(
55
- api=Api.inference,
56
- adapter=AdapterSpec(
57
- adapter_type="ollama",
58
- pip_packages=["ollama"],
59
- config_class="llama_stack.providers.adapters.inference.ollama.OllamaImplConfig",
60
- module="llama_stack.providers.adapters.inference.ollama",
61
- ),
62
- ),
63
- remote_provider_spec(
64
- api=Api.inference,
65
- adapter=AdapterSpec(
66
- adapter_type="tgi",
67
- pip_packages=["huggingface_hub", "aiohttp"],
68
- module="llama_stack.providers.adapters.inference.tgi",
69
- config_class="llama_stack.providers.adapters.inference.tgi.TGIImplConfig",
70
- ),
71
- ),
72
- remote_provider_spec(
73
- api=Api.inference,
74
- adapter=AdapterSpec(
75
- adapter_type="hf::serverless",
76
- pip_packages=["huggingface_hub", "aiohttp"],
77
- module="llama_stack.providers.adapters.inference.tgi",
78
- config_class="llama_stack.providers.adapters.inference.tgi.InferenceAPIImplConfig",
79
- ),
80
- ),
81
- remote_provider_spec(
82
- api=Api.inference,
83
- adapter=AdapterSpec(
84
- adapter_type="hf::endpoint",
85
- pip_packages=["huggingface_hub", "aiohttp"],
86
- module="llama_stack.providers.adapters.inference.tgi",
87
- config_class="llama_stack.providers.adapters.inference.tgi.InferenceEndpointImplConfig",
88
- ),
89
- ),
90
- remote_provider_spec(
91
- api=Api.inference,
92
- adapter=AdapterSpec(
93
- adapter_type="fireworks",
94
- pip_packages=[
95
- "fireworks-ai",
96
- ],
97
- module="llama_stack.providers.adapters.inference.fireworks",
98
- config_class="llama_stack.providers.adapters.inference.fireworks.FireworksImplConfig",
99
- ),
100
- ),
101
- remote_provider_spec(
102
- api=Api.inference,
103
- adapter=AdapterSpec(
104
- adapter_type="together",
105
- pip_packages=[
106
- "together",
107
- ],
108
- module="llama_stack.providers.adapters.inference.together",
109
- config_class="llama_stack.providers.adapters.inference.together.TogetherImplConfig",
110
- provider_data_validator="llama_stack.providers.adapters.safety.together.TogetherProviderDataValidator",
111
- ),
112
- ),
113
- remote_provider_spec(
114
- api=Api.inference,
115
- adapter=AdapterSpec(
116
- adapter_type="bedrock",
117
- pip_packages=["boto3"],
118
- module="llama_stack.providers.adapters.inference.bedrock",
119
- config_class="llama_stack.providers.adapters.inference.bedrock.BedrockConfig",
120
- ),
121
- ),
122
- remote_provider_spec(
123
- api=Api.inference,
124
- adapter=AdapterSpec(
125
- adapter_type="databricks",
126
- pip_packages=[
127
- "openai",
128
- ],
129
- module="llama_stack.providers.adapters.inference.databricks",
130
- config_class="llama_stack.providers.adapters.inference.databricks.DatabricksImplConfig",
131
- ),
119
+ module="llama_stack.providers.remote.inference.fireworks",
120
+ config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
121
+ provider_data_validator="llama_stack.providers.remote.inference.fireworks.FireworksProviderDataValidator",
122
+ description="Fireworks AI inference provider for Llama models and other AI models on the Fireworks platform.",
132
123
  ),
133
- InlineProviderSpec(
124
+ RemoteProviderSpec(
125
+ api=Api.inference,
126
+ adapter_type="together",
127
+ provider_type="remote::together",
128
+ pip_packages=[
129
+ "together",
130
+ ],
131
+ module="llama_stack.providers.remote.inference.together",
132
+ config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig",
133
+ provider_data_validator="llama_stack.providers.remote.inference.together.TogetherProviderDataValidator",
134
+ description="Together AI inference provider for open-source models and collaborative AI development.",
135
+ ),
136
+ RemoteProviderSpec(
137
+ api=Api.inference,
138
+ adapter_type="bedrock",
139
+ provider_type="remote::bedrock",
140
+ pip_packages=["boto3"],
141
+ module="llama_stack.providers.remote.inference.bedrock",
142
+ config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
143
+ description="AWS Bedrock inference provider for accessing various AI models through AWS's managed service.",
144
+ ),
145
+ RemoteProviderSpec(
146
+ api=Api.inference,
147
+ adapter_type="databricks",
148
+ provider_type="remote::databricks",
149
+ pip_packages=["databricks-sdk"],
150
+ module="llama_stack.providers.remote.inference.databricks",
151
+ config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
152
+ description="Databricks inference provider for running models on Databricks' unified analytics platform.",
153
+ ),
154
+ RemoteProviderSpec(
134
155
  api=Api.inference,
135
- provider_type="vllm",
156
+ adapter_type="nvidia",
157
+ provider_type="remote::nvidia",
158
+ pip_packages=[],
159
+ module="llama_stack.providers.remote.inference.nvidia",
160
+ config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
161
+ description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.",
162
+ ),
163
+ RemoteProviderSpec(
164
+ api=Api.inference,
165
+ adapter_type="runpod",
166
+ provider_type="remote::runpod",
167
+ pip_packages=[],
168
+ module="llama_stack.providers.remote.inference.runpod",
169
+ config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
170
+ description="RunPod inference provider for running models on RunPod's cloud GPU platform.",
171
+ ),
172
+ RemoteProviderSpec(
173
+ api=Api.inference,
174
+ adapter_type="openai",
175
+ provider_type="remote::openai",
176
+ pip_packages=[],
177
+ module="llama_stack.providers.remote.inference.openai",
178
+ config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig",
179
+ provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator",
180
+ description="OpenAI inference provider for accessing GPT models and other OpenAI services.",
181
+ ),
182
+ RemoteProviderSpec(
183
+ api=Api.inference,
184
+ adapter_type="anthropic",
185
+ provider_type="remote::anthropic",
186
+ pip_packages=["anthropic"],
187
+ module="llama_stack.providers.remote.inference.anthropic",
188
+ config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig",
189
+ provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator",
190
+ description="Anthropic inference provider for accessing Claude models and Anthropic's AI services.",
191
+ ),
192
+ RemoteProviderSpec(
193
+ api=Api.inference,
194
+ adapter_type="gemini",
195
+ provider_type="remote::gemini",
196
+ pip_packages=[],
197
+ module="llama_stack.providers.remote.inference.gemini",
198
+ config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig",
199
+ provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator",
200
+ description="Google Gemini inference provider for accessing Gemini models and Google's AI services.",
201
+ ),
202
+ RemoteProviderSpec(
203
+ api=Api.inference,
204
+ adapter_type="vertexai",
205
+ provider_type="remote::vertexai",
136
206
  pip_packages=[
137
- "vllm",
207
+ "google-cloud-aiplatform",
138
208
  ],
139
- module="llama_stack.providers.impls.vllm",
140
- config_class="llama_stack.providers.impls.vllm.VLLMConfig",
209
+ module="llama_stack.providers.remote.inference.vertexai",
210
+ config_class="llama_stack.providers.remote.inference.vertexai.VertexAIConfig",
211
+ provider_data_validator="llama_stack.providers.remote.inference.vertexai.config.VertexAIProviderDataValidator",
212
+ description="""Google Vertex AI inference provider enables you to use Google's Gemini models through Google Cloud's Vertex AI platform, providing several advantages:
213
+
214
+ • Enterprise-grade security: Uses Google Cloud's security controls and IAM
215
+ • Better integration: Seamless integration with other Google Cloud services
216
+ • Advanced features: Access to additional Vertex AI features like model tuning and monitoring
217
+ • Authentication: Uses Google Cloud Application Default Credentials (ADC) instead of API keys
218
+
219
+ Configuration:
220
+ - Set VERTEX_AI_PROJECT environment variable (required)
221
+ - Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1)
222
+ - Use Google Cloud Application Default Credentials or service account key
223
+
224
+ Authentication Setup:
225
+ Option 1 (Recommended): gcloud auth application-default login
226
+ Option 2: Set GOOGLE_APPLICATION_CREDENTIALS to service account key path
227
+
228
+ Available Models:
229
+ - vertex_ai/gemini-2.0-flash
230
+ - vertex_ai/gemini-2.5-flash
231
+ - vertex_ai/gemini-2.5-pro""",
232
+ ),
233
+ RemoteProviderSpec(
234
+ api=Api.inference,
235
+ adapter_type="groq",
236
+ provider_type="remote::groq",
237
+ pip_packages=[],
238
+ module="llama_stack.providers.remote.inference.groq",
239
+ config_class="llama_stack.providers.remote.inference.groq.GroqConfig",
240
+ provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator",
241
+ description="Groq inference provider for ultra-fast inference using Groq's LPU technology.",
242
+ ),
243
+ RemoteProviderSpec(
244
+ api=Api.inference,
245
+ adapter_type="llama-openai-compat",
246
+ provider_type="remote::llama-openai-compat",
247
+ pip_packages=[],
248
+ module="llama_stack.providers.remote.inference.llama_openai_compat",
249
+ config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig",
250
+ provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator",
251
+ description="Llama OpenAI-compatible provider for using Llama models with OpenAI API format.",
252
+ ),
253
+ RemoteProviderSpec(
254
+ api=Api.inference,
255
+ adapter_type="sambanova",
256
+ provider_type="remote::sambanova",
257
+ pip_packages=[],
258
+ module="llama_stack.providers.remote.inference.sambanova",
259
+ config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
260
+ provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
261
+ description="SambaNova inference provider for running models on SambaNova's dataflow architecture.",
262
+ ),
263
+ RemoteProviderSpec(
264
+ api=Api.inference,
265
+ adapter_type="passthrough",
266
+ provider_type="remote::passthrough",
267
+ pip_packages=[],
268
+ module="llama_stack.providers.remote.inference.passthrough",
269
+ config_class="llama_stack.providers.remote.inference.passthrough.PassthroughImplConfig",
270
+ provider_data_validator="llama_stack.providers.remote.inference.passthrough.PassthroughProviderDataValidator",
271
+ description="Passthrough inference provider for connecting to any external inference service not directly supported.",
272
+ ),
273
+ RemoteProviderSpec(
274
+ api=Api.inference,
275
+ adapter_type="watsonx",
276
+ provider_type="remote::watsonx",
277
+ pip_packages=["litellm"],
278
+ module="llama_stack.providers.remote.inference.watsonx",
279
+ config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
280
+ provider_data_validator="llama_stack.providers.remote.inference.watsonx.config.WatsonXProviderDataValidator",
281
+ description="IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.",
282
+ ),
283
+ RemoteProviderSpec(
284
+ api=Api.inference,
285
+ provider_type="remote::azure",
286
+ adapter_type="azure",
287
+ pip_packages=[],
288
+ module="llama_stack.providers.remote.inference.azure",
289
+ config_class="llama_stack.providers.remote.inference.azure.AzureConfig",
290
+ provider_data_validator="llama_stack.providers.remote.inference.azure.config.AzureProviderDataValidator",
291
+ description="""
292
+ Azure OpenAI inference provider for accessing GPT models and other Azure services.
293
+ Provider documentation
294
+ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
295
+ """,
141
296
  ),
142
297
  ]
@@ -0,0 +1,69 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+
8
+ from typing import cast
9
+
10
+ from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
11
+
12
+ # We provide two versions of these providers so that distributions can package the appropriate version of torch.
13
+ # The CPU version is used for distributions that don't have GPU support -- they result in smaller container images.
14
+ torchtune_def = dict(
15
+ api=Api.post_training,
16
+ pip_packages=["numpy"],
17
+ module="llama_stack.providers.inline.post_training.torchtune",
18
+ config_class="llama_stack.providers.inline.post_training.torchtune.TorchtunePostTrainingConfig",
19
+ api_dependencies=[
20
+ Api.datasetio,
21
+ Api.datasets,
22
+ ],
23
+ description="TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.",
24
+ )
25
+
26
+
27
+ def available_providers() -> list[ProviderSpec]:
28
+ return [
29
+ InlineProviderSpec(
30
+ **{ # type: ignore
31
+ **torchtune_def,
32
+ "provider_type": "inline::torchtune-cpu",
33
+ "pip_packages": (
34
+ cast(list[str], torchtune_def["pip_packages"])
35
+ + ["torch torchtune>=0.5.0 torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu"]
36
+ ),
37
+ },
38
+ ),
39
+ InlineProviderSpec(
40
+ **{ # type: ignore
41
+ **torchtune_def,
42
+ "provider_type": "inline::torchtune-gpu",
43
+ "pip_packages": (
44
+ cast(list[str], torchtune_def["pip_packages"]) + ["torch torchtune>=0.5.0 torchao>=0.12.0"]
45
+ ),
46
+ },
47
+ ),
48
+ InlineProviderSpec(
49
+ api=Api.post_training,
50
+ provider_type="inline::huggingface-gpu",
51
+ pip_packages=["trl", "transformers", "peft", "datasets>=4.0.0", "torch"],
52
+ module="llama_stack.providers.inline.post_training.huggingface",
53
+ config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig",
54
+ api_dependencies=[
55
+ Api.datasetio,
56
+ Api.datasets,
57
+ ],
58
+ description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.",
59
+ ),
60
+ RemoteProviderSpec(
61
+ api=Api.post_training,
62
+ adapter_type="nvidia",
63
+ provider_type="remote::nvidia",
64
+ pip_packages=["requests", "aiohttp"],
65
+ module="llama_stack.providers.remote.post_training.nvidia",
66
+ config_class="llama_stack.providers.remote.post_training.nvidia.NvidiaPostTrainingConfig",
67
+ description="NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform.",
68
+ ),
69
+ ]
@@ -4,70 +4,75 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from typing import List
8
7
 
9
- from llama_stack.distribution.datatypes import (
10
- AdapterSpec,
8
+ from llama_stack.providers.datatypes import (
11
9
  Api,
12
10
  InlineProviderSpec,
13
11
  ProviderSpec,
14
- remote_provider_spec,
12
+ RemoteProviderSpec,
15
13
  )
16
14
 
17
15
 
18
- def available_providers() -> List[ProviderSpec]:
16
+ def available_providers() -> list[ProviderSpec]:
19
17
  return [
20
18
  InlineProviderSpec(
21
19
  api=Api.safety,
22
- provider_type="meta-reference",
20
+ provider_type="inline::prompt-guard",
23
21
  pip_packages=[
24
- "transformers",
22
+ "transformers[accelerate]",
25
23
  "torch --index-url https://download.pytorch.org/whl/cpu",
26
24
  ],
27
- module="llama_stack.providers.impls.meta_reference.safety",
28
- config_class="llama_stack.providers.impls.meta_reference.safety.SafetyConfig",
25
+ module="llama_stack.providers.inline.safety.prompt_guard",
26
+ config_class="llama_stack.providers.inline.safety.prompt_guard.PromptGuardConfig",
27
+ description="Prompt Guard safety provider for detecting and filtering unsafe prompts and content.",
28
+ ),
29
+ InlineProviderSpec(
30
+ api=Api.safety,
31
+ provider_type="inline::llama-guard",
32
+ pip_packages=[],
33
+ module="llama_stack.providers.inline.safety.llama_guard",
34
+ config_class="llama_stack.providers.inline.safety.llama_guard.LlamaGuardConfig",
29
35
  api_dependencies=[
30
36
  Api.inference,
31
37
  ],
38
+ description="Llama Guard safety provider for content moderation and safety filtering using Meta's Llama Guard model.",
32
39
  ),
33
- remote_provider_spec(
40
+ InlineProviderSpec(
34
41
  api=Api.safety,
35
- adapter=AdapterSpec(
36
- adapter_type="sample",
37
- pip_packages=[],
38
- module="llama_stack.providers.adapters.safety.sample",
39
- config_class="llama_stack.providers.adapters.safety.sample.SampleConfig",
40
- ),
42
+ provider_type="inline::code-scanner",
43
+ pip_packages=[
44
+ "codeshield",
45
+ ],
46
+ module="llama_stack.providers.inline.safety.code_scanner",
47
+ config_class="llama_stack.providers.inline.safety.code_scanner.CodeScannerConfig",
48
+ description="Code Scanner safety provider for detecting security vulnerabilities and unsafe code patterns.",
41
49
  ),
42
- remote_provider_spec(
50
+ RemoteProviderSpec(
43
51
  api=Api.safety,
44
- adapter=AdapterSpec(
45
- adapter_type="bedrock",
46
- pip_packages=["boto3"],
47
- module="llama_stack.providers.adapters.safety.bedrock",
48
- config_class="llama_stack.providers.adapters.safety.bedrock.BedrockSafetyConfig",
49
- ),
52
+ adapter_type="bedrock",
53
+ provider_type="remote::bedrock",
54
+ pip_packages=["boto3"],
55
+ module="llama_stack.providers.remote.safety.bedrock",
56
+ config_class="llama_stack.providers.remote.safety.bedrock.BedrockSafetyConfig",
57
+ description="AWS Bedrock safety provider for content moderation using AWS's safety services.",
50
58
  ),
51
- remote_provider_spec(
59
+ RemoteProviderSpec(
52
60
  api=Api.safety,
53
- adapter=AdapterSpec(
54
- adapter_type="together",
55
- pip_packages=[
56
- "together",
57
- ],
58
- module="llama_stack.providers.adapters.safety.together",
59
- config_class="llama_stack.providers.adapters.safety.together.TogetherSafetyConfig",
60
- provider_data_validator="llama_stack.providers.adapters.safety.together.TogetherProviderDataValidator",
61
- ),
61
+ adapter_type="nvidia",
62
+ provider_type="remote::nvidia",
63
+ pip_packages=["requests"],
64
+ module="llama_stack.providers.remote.safety.nvidia",
65
+ config_class="llama_stack.providers.remote.safety.nvidia.NVIDIASafetyConfig",
66
+ description="NVIDIA's safety provider for content moderation and safety filtering.",
62
67
  ),
63
- InlineProviderSpec(
68
+ RemoteProviderSpec(
64
69
  api=Api.safety,
65
- provider_type="meta-reference/codeshield",
66
- pip_packages=[
67
- "codeshield",
68
- ],
69
- module="llama_stack.providers.impls.meta_reference.codeshield",
70
- config_class="llama_stack.providers.impls.meta_reference.codeshield.CodeShieldConfig",
71
- api_dependencies=[],
70
+ adapter_type="sambanova",
71
+ provider_type="remote::sambanova",
72
+ pip_packages=["litellm", "requests"],
73
+ module="llama_stack.providers.remote.safety.sambanova",
74
+ config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig",
75
+ provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator",
76
+ description="SambaNova's safety provider for content moderation and safety filtering.",
72
77
  ),
73
78
  ]