llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (738) hide show
  1. llama_stack/__init__.py +5 -0
  2. llama_stack/apis/agents/__init__.py +1 -1
  3. llama_stack/apis/agents/agents.py +700 -281
  4. llama_stack/apis/agents/openai_responses.py +1311 -0
  5. llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
  6. llama_stack/apis/batches/batches.py +100 -0
  7. llama_stack/apis/benchmarks/__init__.py +7 -0
  8. llama_stack/apis/benchmarks/benchmarks.py +108 -0
  9. llama_stack/apis/common/content_types.py +143 -0
  10. llama_stack/apis/common/errors.py +103 -0
  11. llama_stack/apis/common/job_types.py +38 -0
  12. llama_stack/apis/common/responses.py +36 -0
  13. llama_stack/apis/common/training_types.py +36 -5
  14. llama_stack/apis/common/type_system.py +158 -0
  15. llama_stack/apis/conversations/__init__.py +31 -0
  16. llama_stack/apis/conversations/conversations.py +286 -0
  17. llama_stack/apis/datasetio/__init__.py +7 -0
  18. llama_stack/apis/datasetio/datasetio.py +59 -0
  19. llama_stack/apis/datasets/__init__.py +7 -0
  20. llama_stack/apis/datasets/datasets.py +251 -0
  21. llama_stack/apis/datatypes.py +160 -0
  22. llama_stack/apis/eval/__init__.py +7 -0
  23. llama_stack/apis/eval/eval.py +169 -0
  24. llama_stack/apis/files/__init__.py +7 -0
  25. llama_stack/apis/files/files.py +199 -0
  26. llama_stack/apis/inference/__init__.py +1 -1
  27. llama_stack/apis/inference/inference.py +1169 -113
  28. llama_stack/apis/inspect/__init__.py +1 -1
  29. llama_stack/apis/inspect/inspect.py +69 -16
  30. llama_stack/apis/models/__init__.py +1 -1
  31. llama_stack/apis/models/models.py +148 -21
  32. llama_stack/apis/post_training/__init__.py +1 -1
  33. llama_stack/apis/post_training/post_training.py +265 -120
  34. llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
  35. llama_stack/apis/prompts/prompts.py +204 -0
  36. llama_stack/apis/providers/__init__.py +7 -0
  37. llama_stack/apis/providers/providers.py +69 -0
  38. llama_stack/apis/resource.py +37 -0
  39. llama_stack/apis/safety/__init__.py +1 -1
  40. llama_stack/apis/safety/safety.py +95 -12
  41. llama_stack/apis/scoring/__init__.py +7 -0
  42. llama_stack/apis/scoring/scoring.py +93 -0
  43. llama_stack/apis/scoring_functions/__init__.py +7 -0
  44. llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
  45. llama_stack/apis/shields/__init__.py +1 -1
  46. llama_stack/apis/shields/shields.py +76 -33
  47. llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
  48. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
  49. llama_stack/apis/telemetry/__init__.py +1 -1
  50. llama_stack/apis/telemetry/telemetry.py +322 -31
  51. llama_stack/apis/{dataset → tools}/__init__.py +2 -1
  52. llama_stack/apis/tools/rag_tool.py +218 -0
  53. llama_stack/apis/tools/tools.py +221 -0
  54. llama_stack/apis/vector_io/__init__.py +7 -0
  55. llama_stack/apis/vector_io/vector_io.py +960 -0
  56. llama_stack/apis/vector_stores/__init__.py +7 -0
  57. llama_stack/apis/vector_stores/vector_stores.py +51 -0
  58. llama_stack/apis/version.py +9 -0
  59. llama_stack/cli/llama.py +13 -5
  60. llama_stack/cli/stack/_list_deps.py +182 -0
  61. llama_stack/cli/stack/list_apis.py +1 -1
  62. llama_stack/cli/stack/list_deps.py +55 -0
  63. llama_stack/cli/stack/list_providers.py +24 -10
  64. llama_stack/cli/stack/list_stacks.py +56 -0
  65. llama_stack/cli/stack/remove.py +115 -0
  66. llama_stack/cli/stack/run.py +169 -56
  67. llama_stack/cli/stack/stack.py +18 -4
  68. llama_stack/cli/stack/utils.py +151 -0
  69. llama_stack/cli/table.py +23 -61
  70. llama_stack/cli/utils.py +29 -0
  71. llama_stack/core/access_control/access_control.py +131 -0
  72. llama_stack/core/access_control/conditions.py +129 -0
  73. llama_stack/core/access_control/datatypes.py +107 -0
  74. llama_stack/core/build.py +164 -0
  75. llama_stack/core/client.py +205 -0
  76. llama_stack/core/common.sh +37 -0
  77. llama_stack/{distribution → core}/configure.py +74 -55
  78. llama_stack/core/conversations/conversations.py +309 -0
  79. llama_stack/core/datatypes.py +625 -0
  80. llama_stack/core/distribution.py +276 -0
  81. llama_stack/core/external.py +54 -0
  82. llama_stack/core/id_generation.py +42 -0
  83. llama_stack/core/inspect.py +86 -0
  84. llama_stack/core/library_client.py +539 -0
  85. llama_stack/core/prompts/prompts.py +234 -0
  86. llama_stack/core/providers.py +137 -0
  87. llama_stack/core/request_headers.py +115 -0
  88. llama_stack/core/resolver.py +506 -0
  89. llama_stack/core/routers/__init__.py +101 -0
  90. llama_stack/core/routers/datasets.py +73 -0
  91. llama_stack/core/routers/eval_scoring.py +155 -0
  92. llama_stack/core/routers/inference.py +645 -0
  93. llama_stack/core/routers/safety.py +85 -0
  94. llama_stack/core/routers/tool_runtime.py +91 -0
  95. llama_stack/core/routers/vector_io.py +442 -0
  96. llama_stack/core/routing_tables/benchmarks.py +62 -0
  97. llama_stack/core/routing_tables/common.py +254 -0
  98. llama_stack/core/routing_tables/datasets.py +91 -0
  99. llama_stack/core/routing_tables/models.py +163 -0
  100. llama_stack/core/routing_tables/scoring_functions.py +66 -0
  101. llama_stack/core/routing_tables/shields.py +61 -0
  102. llama_stack/core/routing_tables/toolgroups.py +129 -0
  103. llama_stack/core/routing_tables/vector_stores.py +292 -0
  104. llama_stack/core/server/auth.py +187 -0
  105. llama_stack/core/server/auth_providers.py +494 -0
  106. llama_stack/core/server/quota.py +110 -0
  107. llama_stack/core/server/routes.py +141 -0
  108. llama_stack/core/server/server.py +542 -0
  109. llama_stack/core/server/tracing.py +80 -0
  110. llama_stack/core/stack.py +546 -0
  111. llama_stack/core/start_stack.sh +117 -0
  112. llama_stack/core/storage/datatypes.py +283 -0
  113. llama_stack/{cli/model → core/store}/__init__.py +1 -1
  114. llama_stack/core/store/registry.py +199 -0
  115. llama_stack/core/testing_context.py +49 -0
  116. llama_stack/core/ui/app.py +55 -0
  117. llama_stack/core/ui/modules/api.py +32 -0
  118. llama_stack/core/ui/modules/utils.py +42 -0
  119. llama_stack/core/ui/page/distribution/datasets.py +18 -0
  120. llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
  121. llama_stack/core/ui/page/distribution/models.py +18 -0
  122. llama_stack/core/ui/page/distribution/providers.py +27 -0
  123. llama_stack/core/ui/page/distribution/resources.py +48 -0
  124. llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
  125. llama_stack/core/ui/page/distribution/shields.py +19 -0
  126. llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
  127. llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
  128. llama_stack/core/ui/page/playground/chat.py +130 -0
  129. llama_stack/core/ui/page/playground/tools.py +352 -0
  130. llama_stack/core/utils/config.py +30 -0
  131. llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
  132. llama_stack/core/utils/config_resolution.py +125 -0
  133. llama_stack/core/utils/context.py +84 -0
  134. llama_stack/core/utils/exec.py +96 -0
  135. llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
  136. llama_stack/{distribution → core}/utils/model_utils.py +2 -2
  137. llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
  138. llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
  139. llama_stack/distributions/dell/build.yaml +33 -0
  140. llama_stack/distributions/dell/dell.py +158 -0
  141. llama_stack/distributions/dell/run-with-safety.yaml +141 -0
  142. llama_stack/distributions/dell/run.yaml +132 -0
  143. llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
  144. llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
  145. llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
  146. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
  147. llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
  148. llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
  149. llama_stack/distributions/nvidia/build.yaml +29 -0
  150. llama_stack/distributions/nvidia/nvidia.py +154 -0
  151. llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
  152. llama_stack/distributions/nvidia/run.yaml +116 -0
  153. llama_stack/distributions/open-benchmark/__init__.py +7 -0
  154. llama_stack/distributions/open-benchmark/build.yaml +36 -0
  155. llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
  156. llama_stack/distributions/open-benchmark/run.yaml +252 -0
  157. llama_stack/distributions/postgres-demo/__init__.py +7 -0
  158. llama_stack/distributions/postgres-demo/build.yaml +23 -0
  159. llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
  160. llama_stack/distributions/postgres-demo/run.yaml +115 -0
  161. llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
  162. llama_stack/distributions/starter/build.yaml +61 -0
  163. llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
  164. llama_stack/distributions/starter/run.yaml +276 -0
  165. llama_stack/distributions/starter/starter.py +345 -0
  166. llama_stack/distributions/starter-gpu/__init__.py +7 -0
  167. llama_stack/distributions/starter-gpu/build.yaml +61 -0
  168. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
  169. llama_stack/distributions/starter-gpu/run.yaml +279 -0
  170. llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
  171. llama_stack/distributions/template.py +456 -0
  172. llama_stack/distributions/watsonx/__init__.py +7 -0
  173. llama_stack/distributions/watsonx/build.yaml +33 -0
  174. llama_stack/distributions/watsonx/run.yaml +133 -0
  175. llama_stack/distributions/watsonx/watsonx.py +95 -0
  176. llama_stack/env.py +24 -0
  177. llama_stack/log.py +314 -0
  178. llama_stack/models/llama/checkpoint.py +164 -0
  179. llama_stack/models/llama/datatypes.py +164 -0
  180. llama_stack/models/llama/hadamard_utils.py +86 -0
  181. llama_stack/models/llama/llama3/args.py +74 -0
  182. llama_stack/models/llama/llama3/chat_format.py +286 -0
  183. llama_stack/models/llama/llama3/generation.py +376 -0
  184. llama_stack/models/llama/llama3/interface.py +255 -0
  185. llama_stack/models/llama/llama3/model.py +304 -0
  186. llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
  187. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
  188. llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
  189. llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
  190. llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
  191. llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
  192. llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
  193. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
  194. llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
  195. llama_stack/models/llama/llama3/quantization/loader.py +316 -0
  196. llama_stack/models/llama/llama3/template_data.py +116 -0
  197. llama_stack/models/llama/llama3/tokenizer.model +128000 -0
  198. llama_stack/models/llama/llama3/tokenizer.py +198 -0
  199. llama_stack/models/llama/llama3/tool_utils.py +266 -0
  200. llama_stack/models/llama/llama3_1/__init__.py +12 -0
  201. llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
  202. llama_stack/models/llama/llama3_1/prompts.py +258 -0
  203. llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
  204. llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
  205. llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
  206. llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
  207. llama_stack/models/llama/llama3_3/prompts.py +259 -0
  208. llama_stack/models/llama/llama4/args.py +107 -0
  209. llama_stack/models/llama/llama4/chat_format.py +317 -0
  210. llama_stack/models/llama/llama4/datatypes.py +56 -0
  211. llama_stack/models/llama/llama4/ffn.py +58 -0
  212. llama_stack/models/llama/llama4/generation.py +313 -0
  213. llama_stack/models/llama/llama4/model.py +437 -0
  214. llama_stack/models/llama/llama4/moe.py +214 -0
  215. llama_stack/models/llama/llama4/preprocess.py +435 -0
  216. llama_stack/models/llama/llama4/prompt_format.md +304 -0
  217. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
  218. llama_stack/models/llama/llama4/prompts.py +279 -0
  219. llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
  220. llama_stack/models/llama/llama4/quantization/loader.py +226 -0
  221. llama_stack/models/llama/llama4/tokenizer.model +200000 -0
  222. llama_stack/models/llama/llama4/tokenizer.py +263 -0
  223. llama_stack/models/llama/llama4/vision/__init__.py +5 -0
  224. llama_stack/models/llama/llama4/vision/embedding.py +210 -0
  225. llama_stack/models/llama/llama4/vision/encoder.py +412 -0
  226. llama_stack/models/llama/prompt_format.py +191 -0
  227. llama_stack/models/llama/quantize_impls.py +316 -0
  228. llama_stack/models/llama/sku_list.py +1029 -0
  229. llama_stack/models/llama/sku_types.py +233 -0
  230. llama_stack/models/llama/tokenizer_utils.py +40 -0
  231. llama_stack/providers/datatypes.py +136 -107
  232. llama_stack/providers/inline/__init__.py +5 -0
  233. llama_stack/providers/inline/agents/__init__.py +5 -0
  234. llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
  235. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
  236. llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
  237. llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
  238. llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
  239. llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
  240. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
  241. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
  242. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
  243. llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
  244. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
  245. llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
  246. llama_stack/providers/inline/batches/__init__.py +5 -0
  247. llama_stack/providers/inline/batches/reference/__init__.py +36 -0
  248. llama_stack/providers/inline/batches/reference/batches.py +679 -0
  249. llama_stack/providers/inline/batches/reference/config.py +40 -0
  250. llama_stack/providers/inline/datasetio/__init__.py +5 -0
  251. llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
  252. llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
  253. llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
  254. llama_stack/providers/inline/eval/__init__.py +5 -0
  255. llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
  256. llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
  257. llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
  258. llama_stack/providers/inline/files/localfs/__init__.py +20 -0
  259. llama_stack/providers/inline/files/localfs/config.py +31 -0
  260. llama_stack/providers/inline/files/localfs/files.py +219 -0
  261. llama_stack/providers/inline/inference/__init__.py +5 -0
  262. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
  263. llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
  264. llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
  265. llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
  266. llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
  267. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
  268. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
  269. llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
  270. llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
  271. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
  272. llama_stack/providers/inline/post_training/__init__.py +5 -0
  273. llama_stack/providers/inline/post_training/common/__init__.py +5 -0
  274. llama_stack/providers/inline/post_training/common/utils.py +35 -0
  275. llama_stack/providers/inline/post_training/common/validator.py +36 -0
  276. llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
  277. llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
  278. llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
  279. llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
  280. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
  281. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
  282. llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
  283. llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
  284. llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
  285. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
  286. llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
  287. llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
  288. llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
  289. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
  290. llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
  291. llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
  292. llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
  293. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
  294. llama_stack/providers/inline/safety/__init__.py +5 -0
  295. llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
  296. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
  297. llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
  298. llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
  299. llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
  300. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
  301. llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
  302. llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
  303. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
  304. llama_stack/providers/inline/scoring/__init__.py +5 -0
  305. llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
  306. llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
  307. llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
  308. llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
  309. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
  310. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
  311. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
  312. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
  313. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
  314. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
  315. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
  316. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
  317. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
  318. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
  319. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
  320. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
  321. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
  322. llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
  323. llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
  324. llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
  325. llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
  326. llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
  327. llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
  328. llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
  329. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
  330. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
  331. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
  332. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
  333. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
  334. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
  335. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
  336. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
  337. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
  338. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
  339. llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
  340. llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
  341. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
  342. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
  343. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
  344. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
  345. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
  346. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
  347. llama_stack/providers/inline/telemetry/__init__.py +5 -0
  348. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
  349. llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
  350. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
  351. llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
  352. llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
  353. llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
  354. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
  355. llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
  356. llama_stack/providers/inline/vector_io/__init__.py +5 -0
  357. llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
  358. llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
  359. llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
  360. llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
  361. llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
  362. llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
  363. llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
  364. llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
  365. llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
  366. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
  367. llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
  368. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
  369. llama_stack/providers/registry/agents.py +16 -18
  370. llama_stack/providers/registry/batches.py +26 -0
  371. llama_stack/providers/registry/datasetio.py +49 -0
  372. llama_stack/providers/registry/eval.py +46 -0
  373. llama_stack/providers/registry/files.py +31 -0
  374. llama_stack/providers/registry/inference.py +273 -118
  375. llama_stack/providers/registry/post_training.py +69 -0
  376. llama_stack/providers/registry/safety.py +46 -41
  377. llama_stack/providers/registry/scoring.py +51 -0
  378. llama_stack/providers/registry/tool_runtime.py +87 -0
  379. llama_stack/providers/registry/vector_io.py +828 -0
  380. llama_stack/providers/remote/__init__.py +5 -0
  381. llama_stack/providers/remote/agents/__init__.py +5 -0
  382. llama_stack/providers/remote/datasetio/__init__.py +5 -0
  383. llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
  384. llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
  385. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
  386. llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
  387. llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
  388. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
  389. llama_stack/providers/remote/eval/__init__.py +5 -0
  390. llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
  391. llama_stack/providers/remote/eval/nvidia/config.py +29 -0
  392. llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
  393. llama_stack/providers/remote/files/s3/__init__.py +19 -0
  394. llama_stack/providers/remote/files/s3/config.py +42 -0
  395. llama_stack/providers/remote/files/s3/files.py +313 -0
  396. llama_stack/providers/remote/inference/__init__.py +5 -0
  397. llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
  398. llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
  399. llama_stack/providers/remote/inference/anthropic/config.py +28 -0
  400. llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
  401. llama_stack/providers/remote/inference/azure/azure.py +25 -0
  402. llama_stack/providers/remote/inference/azure/config.py +61 -0
  403. llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
  404. llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
  405. llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
  406. llama_stack/providers/remote/inference/bedrock/models.py +29 -0
  407. llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
  408. llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
  409. llama_stack/providers/remote/inference/cerebras/config.py +30 -0
  410. llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
  411. llama_stack/providers/remote/inference/databricks/config.py +37 -0
  412. llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
  413. llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
  414. llama_stack/providers/remote/inference/fireworks/config.py +27 -0
  415. llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
  416. llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
  417. llama_stack/providers/remote/inference/gemini/config.py +28 -0
  418. llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
  419. llama_stack/providers/remote/inference/groq/__init__.py +15 -0
  420. llama_stack/providers/remote/inference/groq/config.py +34 -0
  421. llama_stack/providers/remote/inference/groq/groq.py +18 -0
  422. llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
  423. llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
  424. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
  425. llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
  426. llama_stack/providers/remote/inference/nvidia/config.py +64 -0
  427. llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
  428. llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
  429. llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
  430. llama_stack/providers/remote/inference/ollama/config.py +25 -0
  431. llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
  432. llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
  433. llama_stack/providers/remote/inference/openai/config.py +39 -0
  434. llama_stack/providers/remote/inference/openai/openai.py +38 -0
  435. llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
  436. llama_stack/providers/remote/inference/passthrough/config.py +34 -0
  437. llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
  438. llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
  439. llama_stack/providers/remote/inference/runpod/config.py +32 -0
  440. llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
  441. llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
  442. llama_stack/providers/remote/inference/sambanova/config.py +34 -0
  443. llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
  444. llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
  445. llama_stack/providers/remote/inference/tgi/config.py +76 -0
  446. llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
  447. llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
  448. llama_stack/providers/remote/inference/together/config.py +27 -0
  449. llama_stack/providers/remote/inference/together/together.py +102 -0
  450. llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
  451. llama_stack/providers/remote/inference/vertexai/config.py +48 -0
  452. llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
  453. llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
  454. llama_stack/providers/remote/inference/vllm/config.py +59 -0
  455. llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
  456. llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
  457. llama_stack/providers/remote/inference/watsonx/config.py +45 -0
  458. llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
  459. llama_stack/providers/remote/post_training/__init__.py +5 -0
  460. llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
  461. llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
  462. llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
  463. llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
  464. llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
  465. llama_stack/providers/remote/safety/__init__.py +5 -0
  466. llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
  467. llama_stack/providers/remote/safety/bedrock/config.py +14 -0
  468. llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
  469. llama_stack/providers/remote/safety/nvidia/config.py +40 -0
  470. llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
  471. llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
  472. llama_stack/providers/remote/safety/sambanova/config.py +37 -0
  473. llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
  474. llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
  475. llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
  476. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
  477. llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
  478. llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
  479. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
  480. llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
  481. llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
  482. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
  483. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
  484. llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
  485. llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
  486. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
  487. llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
  488. llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
  489. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
  490. llama_stack/providers/remote/vector_io/__init__.py +5 -0
  491. llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
  492. llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
  493. llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
  494. llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
  495. llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
  496. llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
  497. llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
  498. llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
  499. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
  500. llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
  501. llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
  502. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
  503. llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
  504. llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
  505. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
  506. llama_stack/providers/utils/bedrock/__init__.py +5 -0
  507. llama_stack/providers/utils/bedrock/client.py +74 -0
  508. llama_stack/providers/utils/bedrock/config.py +64 -0
  509. llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
  510. llama_stack/providers/utils/common/__init__.py +5 -0
  511. llama_stack/providers/utils/common/data_schema_validator.py +103 -0
  512. llama_stack/providers/utils/datasetio/__init__.py +5 -0
  513. llama_stack/providers/utils/datasetio/url_utils.py +47 -0
  514. llama_stack/providers/utils/files/__init__.py +5 -0
  515. llama_stack/providers/utils/files/form_data.py +69 -0
  516. llama_stack/providers/utils/inference/__init__.py +8 -7
  517. llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
  518. llama_stack/providers/utils/inference/inference_store.py +264 -0
  519. llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
  520. llama_stack/providers/utils/inference/model_registry.py +173 -23
  521. llama_stack/providers/utils/inference/openai_compat.py +1261 -49
  522. llama_stack/providers/utils/inference/openai_mixin.py +506 -0
  523. llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
  524. llama_stack/providers/utils/kvstore/api.py +6 -6
  525. llama_stack/providers/utils/kvstore/config.py +28 -48
  526. llama_stack/providers/utils/kvstore/kvstore.py +61 -15
  527. llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
  528. llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
  529. llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
  530. llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
  531. llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
  532. llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
  533. llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
  534. llama_stack/providers/utils/memory/file_utils.py +1 -1
  535. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
  536. llama_stack/providers/utils/memory/vector_store.py +220 -82
  537. llama_stack/providers/utils/pagination.py +43 -0
  538. llama_stack/providers/utils/responses/__init__.py +5 -0
  539. llama_stack/providers/utils/responses/responses_store.py +292 -0
  540. llama_stack/providers/utils/scheduler.py +270 -0
  541. llama_stack/providers/utils/scoring/__init__.py +5 -0
  542. llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
  543. llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
  544. llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
  545. llama_stack/providers/utils/sqlstore/__init__.py +5 -0
  546. llama_stack/providers/utils/sqlstore/api.py +128 -0
  547. llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
  548. llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
  549. llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
  550. llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
  551. llama_stack/providers/utils/telemetry/tracing.py +192 -53
  552. llama_stack/providers/utils/tools/__init__.py +5 -0
  553. llama_stack/providers/utils/tools/mcp.py +148 -0
  554. llama_stack/providers/utils/tools/ttl_dict.py +70 -0
  555. llama_stack/providers/utils/vector_io/__init__.py +5 -0
  556. llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
  557. llama_stack/schema_utils.py +118 -0
  558. llama_stack/strong_typing/__init__.py +19 -0
  559. llama_stack/strong_typing/auxiliary.py +228 -0
  560. llama_stack/strong_typing/classdef.py +440 -0
  561. llama_stack/strong_typing/core.py +46 -0
  562. llama_stack/strong_typing/deserializer.py +877 -0
  563. llama_stack/strong_typing/docstring.py +409 -0
  564. llama_stack/strong_typing/exception.py +23 -0
  565. llama_stack/strong_typing/inspection.py +1085 -0
  566. llama_stack/strong_typing/mapping.py +40 -0
  567. llama_stack/strong_typing/name.py +182 -0
  568. llama_stack/strong_typing/py.typed +0 -0
  569. llama_stack/strong_typing/schema.py +792 -0
  570. llama_stack/strong_typing/serialization.py +97 -0
  571. llama_stack/strong_typing/serializer.py +500 -0
  572. llama_stack/strong_typing/slots.py +27 -0
  573. llama_stack/strong_typing/topological.py +89 -0
  574. llama_stack/testing/__init__.py +5 -0
  575. llama_stack/testing/api_recorder.py +956 -0
  576. llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
  577. llama_stack-0.3.4.dist-info/METADATA +261 -0
  578. llama_stack-0.3.4.dist-info/RECORD +625 -0
  579. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
  580. llama_stack/apis/agents/client.py +0 -292
  581. llama_stack/apis/agents/event_logger.py +0 -184
  582. llama_stack/apis/batch_inference/batch_inference.py +0 -72
  583. llama_stack/apis/common/deployment_types.py +0 -31
  584. llama_stack/apis/dataset/dataset.py +0 -63
  585. llama_stack/apis/evals/evals.py +0 -122
  586. llama_stack/apis/inference/client.py +0 -197
  587. llama_stack/apis/inspect/client.py +0 -82
  588. llama_stack/apis/memory/client.py +0 -155
  589. llama_stack/apis/memory/memory.py +0 -65
  590. llama_stack/apis/memory_banks/__init__.py +0 -7
  591. llama_stack/apis/memory_banks/client.py +0 -101
  592. llama_stack/apis/memory_banks/memory_banks.py +0 -78
  593. llama_stack/apis/models/client.py +0 -83
  594. llama_stack/apis/reward_scoring/__init__.py +0 -7
  595. llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
  596. llama_stack/apis/safety/client.py +0 -105
  597. llama_stack/apis/shields/client.py +0 -79
  598. llama_stack/cli/download.py +0 -340
  599. llama_stack/cli/model/describe.py +0 -82
  600. llama_stack/cli/model/download.py +0 -24
  601. llama_stack/cli/model/list.py +0 -62
  602. llama_stack/cli/model/model.py +0 -34
  603. llama_stack/cli/model/prompt_format.py +0 -112
  604. llama_stack/cli/model/safety_models.py +0 -52
  605. llama_stack/cli/stack/build.py +0 -299
  606. llama_stack/cli/stack/configure.py +0 -178
  607. llama_stack/distribution/build.py +0 -123
  608. llama_stack/distribution/build_conda_env.sh +0 -136
  609. llama_stack/distribution/build_container.sh +0 -142
  610. llama_stack/distribution/common.sh +0 -40
  611. llama_stack/distribution/configure_container.sh +0 -47
  612. llama_stack/distribution/datatypes.py +0 -139
  613. llama_stack/distribution/distribution.py +0 -58
  614. llama_stack/distribution/inspect.py +0 -67
  615. llama_stack/distribution/request_headers.py +0 -57
  616. llama_stack/distribution/resolver.py +0 -323
  617. llama_stack/distribution/routers/__init__.py +0 -48
  618. llama_stack/distribution/routers/routers.py +0 -158
  619. llama_stack/distribution/routers/routing_tables.py +0 -173
  620. llama_stack/distribution/server/endpoints.py +0 -48
  621. llama_stack/distribution/server/server.py +0 -343
  622. llama_stack/distribution/start_conda_env.sh +0 -42
  623. llama_stack/distribution/start_container.sh +0 -64
  624. llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
  625. llama_stack/distribution/templates/local-build.yaml +0 -10
  626. llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
  627. llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
  628. llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
  629. llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
  630. llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
  631. llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
  632. llama_stack/distribution/templates/local-together-build.yaml +0 -10
  633. llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
  634. llama_stack/distribution/utils/exec.py +0 -105
  635. llama_stack/providers/adapters/agents/sample/sample.py +0 -18
  636. llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
  637. llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
  638. llama_stack/providers/adapters/inference/databricks/config.py +0 -21
  639. llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
  640. llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
  641. llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
  642. llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
  643. llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
  644. llama_stack/providers/adapters/inference/sample/sample.py +0 -23
  645. llama_stack/providers/adapters/inference/tgi/config.py +0 -43
  646. llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
  647. llama_stack/providers/adapters/inference/together/config.py +0 -22
  648. llama_stack/providers/adapters/inference/together/together.py +0 -143
  649. llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
  650. llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
  651. llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
  652. llama_stack/providers/adapters/memory/sample/sample.py +0 -23
  653. llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
  654. llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
  655. llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
  656. llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
  657. llama_stack/providers/adapters/safety/sample/sample.py +0 -23
  658. llama_stack/providers/adapters/safety/together/__init__.py +0 -18
  659. llama_stack/providers/adapters/safety/together/config.py +0 -26
  660. llama_stack/providers/adapters/safety/together/together.py +0 -101
  661. llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
  662. llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
  663. llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
  664. llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
  665. llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
  666. llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
  667. llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
  668. llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
  669. llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
  670. llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
  671. llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
  672. llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
  673. llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
  674. llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
  675. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
  676. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
  677. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
  678. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
  679. llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
  680. llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
  681. llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
  682. llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
  683. llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
  684. llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
  685. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
  686. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
  687. llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
  688. llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
  689. llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
  690. llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
  691. llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
  692. llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
  693. llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
  694. llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
  695. llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
  696. llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
  697. llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
  698. llama_stack/providers/impls/vllm/config.py +0 -35
  699. llama_stack/providers/impls/vllm/vllm.py +0 -241
  700. llama_stack/providers/registry/memory.py +0 -78
  701. llama_stack/providers/registry/telemetry.py +0 -44
  702. llama_stack/providers/tests/agents/test_agents.py +0 -210
  703. llama_stack/providers/tests/inference/test_inference.py +0 -257
  704. llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
  705. llama_stack/providers/tests/memory/test_memory.py +0 -136
  706. llama_stack/providers/tests/resolver.py +0 -100
  707. llama_stack/providers/tests/safety/test_safety.py +0 -77
  708. llama_stack-0.0.42.dist-info/METADATA +0 -137
  709. llama_stack-0.0.42.dist-info/RECORD +0 -256
  710. /llama_stack/{distribution → core}/__init__.py +0 -0
  711. /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
  712. /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
  713. /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
  714. /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
  715. /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
  716. /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
  717. /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
  718. /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
  719. /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
  720. /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
  721. /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
  722. /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
  723. /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
  724. /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
  725. /llama_stack/{distribution → core}/utils/serialize.py +0 -0
  726. /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
  727. /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
  728. /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
  729. /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
  730. /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
  731. /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
  732. /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
  733. /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
  734. /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
  735. /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
  736. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
  737. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
  738. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,506 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+ import importlib
7
+ import importlib.metadata
8
+ import inspect
9
+ from typing import Any
10
+
11
+ from llama_stack.apis.agents import Agents
12
+ from llama_stack.apis.batches import Batches
13
+ from llama_stack.apis.benchmarks import Benchmarks
14
+ from llama_stack.apis.conversations import Conversations
15
+ from llama_stack.apis.datasetio import DatasetIO
16
+ from llama_stack.apis.datasets import Datasets
17
+ from llama_stack.apis.datatypes import ExternalApiSpec
18
+ from llama_stack.apis.eval import Eval
19
+ from llama_stack.apis.files import Files
20
+ from llama_stack.apis.inference import Inference, InferenceProvider
21
+ from llama_stack.apis.inspect import Inspect
22
+ from llama_stack.apis.models import Models
23
+ from llama_stack.apis.post_training import PostTraining
24
+ from llama_stack.apis.prompts import Prompts
25
+ from llama_stack.apis.providers import Providers as ProvidersAPI
26
+ from llama_stack.apis.safety import Safety
27
+ from llama_stack.apis.scoring import Scoring
28
+ from llama_stack.apis.scoring_functions import ScoringFunctions
29
+ from llama_stack.apis.shields import Shields
30
+ from llama_stack.apis.telemetry import Telemetry
31
+ from llama_stack.apis.tools import ToolGroups, ToolRuntime
32
+ from llama_stack.apis.vector_io import VectorIO
33
+ from llama_stack.apis.vector_stores import VectorStore
34
+ from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
35
+ from llama_stack.core.client import get_client_impl
36
+ from llama_stack.core.datatypes import (
37
+ AccessRule,
38
+ AutoRoutedProviderSpec,
39
+ Provider,
40
+ RoutingTableProviderSpec,
41
+ StackRunConfig,
42
+ )
43
+ from llama_stack.core.distribution import builtin_automatically_routed_apis
44
+ from llama_stack.core.external import load_external_apis
45
+ from llama_stack.core.store import DistributionRegistry
46
+ from llama_stack.core.utils.dynamic import instantiate_class_type
47
+ from llama_stack.log import get_logger
48
+ from llama_stack.providers.datatypes import (
49
+ Api,
50
+ BenchmarksProtocolPrivate,
51
+ DatasetsProtocolPrivate,
52
+ InlineProviderSpec,
53
+ ModelsProtocolPrivate,
54
+ ProviderSpec,
55
+ RemoteProviderConfig,
56
+ RemoteProviderSpec,
57
+ ScoringFunctionsProtocolPrivate,
58
+ ShieldsProtocolPrivate,
59
+ ToolGroupsProtocolPrivate,
60
+ )
61
+
62
+ logger = get_logger(name=__name__, category="core")
63
+
64
+
65
+ class InvalidProviderError(Exception):
66
+ pass
67
+
68
+
69
+ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) -> dict[Api, Any]:
70
+ """Get a mapping of API types to their protocol classes.
71
+
72
+ Args:
73
+ external_apis: Optional dictionary of external API specifications
74
+
75
+ Returns:
76
+ Dictionary mapping API types to their protocol classes
77
+ """
78
+ protocols = {
79
+ Api.providers: ProvidersAPI,
80
+ Api.agents: Agents,
81
+ Api.inference: Inference,
82
+ Api.inspect: Inspect,
83
+ Api.batches: Batches,
84
+ Api.vector_io: VectorIO,
85
+ Api.vector_stores: VectorStore,
86
+ Api.models: Models,
87
+ Api.safety: Safety,
88
+ Api.shields: Shields,
89
+ Api.datasetio: DatasetIO,
90
+ Api.datasets: Datasets,
91
+ Api.scoring: Scoring,
92
+ Api.scoring_functions: ScoringFunctions,
93
+ Api.eval: Eval,
94
+ Api.benchmarks: Benchmarks,
95
+ Api.post_training: PostTraining,
96
+ Api.tool_groups: ToolGroups,
97
+ Api.tool_runtime: ToolRuntime,
98
+ Api.files: Files,
99
+ Api.prompts: Prompts,
100
+ Api.conversations: Conversations,
101
+ Api.telemetry: Telemetry,
102
+ }
103
+
104
+ if external_apis:
105
+ for api, api_spec in external_apis.items():
106
+ try:
107
+ module = importlib.import_module(api_spec.module)
108
+ api_class = getattr(module, api_spec.protocol)
109
+
110
+ protocols[api] = api_class
111
+ except (ImportError, AttributeError):
112
+ logger.exception(f"Failed to load external API {api_spec.name}")
113
+
114
+ return protocols
115
+
116
+
117
+ def api_protocol_map_for_compliance_check(config: Any) -> dict[Api, Any]:
118
+ external_apis = load_external_apis(config)
119
+ return {
120
+ **api_protocol_map(external_apis),
121
+ Api.inference: InferenceProvider,
122
+ }
123
+
124
+
125
+ def additional_protocols_map() -> dict[Api, Any]:
126
+ return {
127
+ Api.inference: (ModelsProtocolPrivate, Models, Api.models),
128
+ Api.tool_groups: (ToolGroupsProtocolPrivate, ToolGroups, Api.tool_groups),
129
+ Api.safety: (ShieldsProtocolPrivate, Shields, Api.shields),
130
+ Api.datasetio: (DatasetsProtocolPrivate, Datasets, Api.datasets),
131
+ Api.scoring: (
132
+ ScoringFunctionsProtocolPrivate,
133
+ ScoringFunctions,
134
+ Api.scoring_functions,
135
+ ),
136
+ Api.eval: (BenchmarksProtocolPrivate, Benchmarks, Api.benchmarks),
137
+ }
138
+
139
+
140
+ # TODO: make all this naming far less atrocious. Provider. ProviderSpec. ProviderWithSpec. WTF!
141
+ class ProviderWithSpec(Provider):
142
+ spec: ProviderSpec
143
+
144
+
145
+ ProviderRegistry = dict[Api, dict[str, ProviderSpec]]
146
+
147
+
148
+ async def resolve_impls(
149
+ run_config: StackRunConfig,
150
+ provider_registry: ProviderRegistry,
151
+ dist_registry: DistributionRegistry,
152
+ policy: list[AccessRule],
153
+ internal_impls: dict[Api, Any] | None = None,
154
+ ) -> dict[Api, Any]:
155
+ """
156
+ Resolves provider implementations by:
157
+ 1. Validating and organizing providers.
158
+ 2. Sorting them in dependency order.
159
+ 3. Instantiating them with required dependencies.
160
+ """
161
+ routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()}
162
+ router_apis = {x.router_api for x in builtin_automatically_routed_apis()}
163
+
164
+ providers_with_specs = validate_and_prepare_providers(
165
+ run_config, provider_registry, routing_table_apis, router_apis
166
+ )
167
+
168
+ apis_to_serve = run_config.apis or set(
169
+ list(providers_with_specs.keys()) + [x.value for x in routing_table_apis] + [x.value for x in router_apis]
170
+ )
171
+
172
+ providers_with_specs.update(specs_for_autorouted_apis(apis_to_serve))
173
+
174
+ sorted_providers = sort_providers_by_deps(providers_with_specs, run_config)
175
+
176
+ return await instantiate_providers(sorted_providers, router_apis, dist_registry, run_config, policy, internal_impls)
177
+
178
+
179
+ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str, dict[str, ProviderWithSpec]]:
180
+ """Generates specifications for automatically routed APIs."""
181
+ specs = {}
182
+ for info in builtin_automatically_routed_apis():
183
+ if info.router_api.value not in apis_to_serve:
184
+ continue
185
+
186
+ specs[info.routing_table_api.value] = {
187
+ "__builtin__": ProviderWithSpec(
188
+ provider_id="__routing_table__",
189
+ provider_type="__routing_table__",
190
+ config={},
191
+ spec=RoutingTableProviderSpec(
192
+ api=info.routing_table_api,
193
+ router_api=info.router_api,
194
+ module="llama_stack.core.routers",
195
+ api_dependencies=[],
196
+ deps__=[f"inner-{info.router_api.value}"],
197
+ ),
198
+ )
199
+ }
200
+
201
+ specs[info.router_api.value] = {
202
+ "__builtin__": ProviderWithSpec(
203
+ provider_id="__autorouted__",
204
+ provider_type="__autorouted__",
205
+ config={},
206
+ spec=AutoRoutedProviderSpec(
207
+ api=info.router_api,
208
+ module="llama_stack.core.routers",
209
+ routing_table_api=info.routing_table_api,
210
+ api_dependencies=[info.routing_table_api],
211
+ deps__=([info.routing_table_api.value]),
212
+ ),
213
+ )
214
+ }
215
+ return specs
216
+
217
+
218
+ def validate_and_prepare_providers(
219
+ run_config: StackRunConfig, provider_registry: ProviderRegistry, routing_table_apis: set[Api], router_apis: set[Api]
220
+ ) -> dict[str, dict[str, ProviderWithSpec]]:
221
+ """Validates providers, handles deprecations, and organizes them into a spec dictionary."""
222
+ providers_with_specs: dict[str, dict[str, ProviderWithSpec]] = {}
223
+
224
+ for api_str, providers in run_config.providers.items():
225
+ api = Api(api_str)
226
+ if api in routing_table_apis:
227
+ raise ValueError(f"Provider for `{api_str}` is automatically provided and cannot be overridden")
228
+
229
+ specs = {}
230
+ for provider in providers:
231
+ if not provider.provider_id or provider.provider_id == "__disabled__":
232
+ logger.debug(f"Provider `{provider.provider_type}` for API `{api}` is disabled")
233
+ continue
234
+
235
+ validate_provider(provider, api, provider_registry)
236
+ p = provider_registry[api][provider.provider_type]
237
+ p.deps__ = [a.value for a in p.api_dependencies] + [a.value for a in p.optional_api_dependencies]
238
+ spec = ProviderWithSpec(spec=p, **provider.model_dump())
239
+ specs[provider.provider_id] = spec
240
+
241
+ key = api_str if api not in router_apis else f"inner-{api_str}"
242
+ providers_with_specs[key] = specs
243
+
244
+ # TODO: remove this logic, telemetry should not have providers.
245
+ # if telemetry has been enabled in the config initialize our internal impl
246
+ # telemetry is not an external API so it SHOULD NOT be auto-routed.
247
+ if run_config.telemetry.enabled:
248
+ specs = {}
249
+ p = InlineProviderSpec(
250
+ api=Api.telemetry,
251
+ provider_type="inline::meta-reference",
252
+ pip_packages=[],
253
+ optional_api_dependencies=[Api.datasetio],
254
+ module="llama_stack.providers.inline.telemetry.meta_reference",
255
+ config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig",
256
+ description="Meta's reference implementation of telemetry and observability using OpenTelemetry.",
257
+ )
258
+ spec = ProviderWithSpec(spec=p, provider_type="inline::meta-reference", provider_id="meta-reference")
259
+ specs["meta-reference"] = spec
260
+ providers_with_specs["telemetry"] = specs
261
+
262
+ return providers_with_specs
263
+
264
+
265
+ def validate_provider(provider: Provider, api: Api, provider_registry: ProviderRegistry):
266
+ """Validates if the provider is allowed and handles deprecations."""
267
+ if provider.provider_type not in provider_registry[api]:
268
+ raise ValueError(f"Provider `{provider.provider_type}` is not available for API `{api}`")
269
+
270
+ p = provider_registry[api][provider.provider_type]
271
+ if p.deprecation_error:
272
+ logger.error(p.deprecation_error)
273
+ raise InvalidProviderError(p.deprecation_error)
274
+ elif p.deprecation_warning:
275
+ logger.warning(
276
+ f"Provider `{provider.provider_type}` for API `{api}` is deprecated and will be removed in a future release: {p.deprecation_warning}",
277
+ )
278
+
279
+
280
+ def sort_providers_by_deps(
281
+ providers_with_specs: dict[str, dict[str, ProviderWithSpec]], run_config: StackRunConfig
282
+ ) -> list[tuple[str, ProviderWithSpec]]:
283
+ """Sorts providers based on their dependencies."""
284
+ sorted_providers: list[tuple[str, ProviderWithSpec]] = topological_sort(
285
+ {k: list(v.values()) for k, v in providers_with_specs.items()}
286
+ )
287
+
288
+ logger.debug(f"Resolved {len(sorted_providers)} providers")
289
+ for api_str, provider in sorted_providers:
290
+ logger.debug(f" {api_str} => {provider.provider_id}")
291
+ return sorted_providers
292
+
293
+
294
+ async def instantiate_providers(
295
+ sorted_providers: list[tuple[str, ProviderWithSpec]],
296
+ router_apis: set[Api],
297
+ dist_registry: DistributionRegistry,
298
+ run_config: StackRunConfig,
299
+ policy: list[AccessRule],
300
+ internal_impls: dict[Api, Any] | None = None,
301
+ ) -> dict[Api, Any]:
302
+ """Instantiates providers asynchronously while managing dependencies."""
303
+ impls: dict[Api, Any] = internal_impls.copy() if internal_impls else {}
304
+ inner_impls_by_provider_id: dict[str, dict[str, Any]] = {f"inner-{x.value}": {} for x in router_apis}
305
+ for api_str, provider in sorted_providers:
306
+ # Skip providers that are not enabled
307
+ if provider.provider_id is None:
308
+ continue
309
+
310
+ try:
311
+ deps = {a: impls[a] for a in provider.spec.api_dependencies}
312
+ except KeyError as e:
313
+ missing_api = e.args[0]
314
+ raise RuntimeError(
315
+ f"Failed to resolve '{provider.spec.api.value}' provider '{provider.provider_id}' of type '{provider.spec.provider_type}': "
316
+ f"required dependency '{missing_api.value}' is not available. "
317
+ f"Please add a '{missing_api.value}' provider to your configuration or check if the provider is properly configured."
318
+ ) from e
319
+ for a in provider.spec.optional_api_dependencies:
320
+ if a in impls:
321
+ deps[a] = impls[a]
322
+
323
+ inner_impls = {}
324
+ if isinstance(provider.spec, RoutingTableProviderSpec):
325
+ inner_impls = inner_impls_by_provider_id[f"inner-{provider.spec.router_api.value}"]
326
+
327
+ impl = await instantiate_provider(provider, deps, inner_impls, dist_registry, run_config, policy)
328
+
329
+ if api_str.startswith("inner-"):
330
+ inner_impls_by_provider_id[api_str][provider.provider_id] = impl
331
+ else:
332
+ api = Api(api_str)
333
+ impls[api] = impl
334
+
335
+ return impls
336
+
337
+
338
+ def topological_sort(
339
+ providers_with_specs: dict[str, list[ProviderWithSpec]],
340
+ ) -> list[tuple[str, ProviderWithSpec]]:
341
+ def dfs(kv, visited: set[str], stack: list[str]):
342
+ api_str, providers = kv
343
+ visited.add(api_str)
344
+
345
+ deps = []
346
+ for provider in providers:
347
+ for dep in provider.spec.deps__:
348
+ deps.append(dep)
349
+
350
+ for dep in deps:
351
+ if dep not in visited and dep in providers_with_specs:
352
+ dfs((dep, providers_with_specs[dep]), visited, stack)
353
+
354
+ stack.append(api_str)
355
+
356
+ visited: set[str] = set()
357
+ stack: list[str] = []
358
+
359
+ for api_str, providers in providers_with_specs.items():
360
+ if api_str not in visited:
361
+ dfs((api_str, providers), visited, stack)
362
+
363
+ flattened = []
364
+ for api_str in stack:
365
+ for provider in providers_with_specs[api_str]:
366
+ flattened.append((api_str, provider))
367
+
368
+ return flattened
369
+
370
+
371
+ # returns a class implementing the protocol corresponding to the Api
372
+ async def instantiate_provider(
373
+ provider: ProviderWithSpec,
374
+ deps: dict[Api, Any],
375
+ inner_impls: dict[str, Any],
376
+ dist_registry: DistributionRegistry,
377
+ run_config: StackRunConfig,
378
+ policy: list[AccessRule],
379
+ ):
380
+ provider_spec = provider.spec
381
+ if not hasattr(provider_spec, "module") or provider_spec.module is None:
382
+ raise AttributeError(f"ProviderSpec of type {type(provider_spec)} does not have a 'module' attribute")
383
+
384
+ logger.debug(f"Instantiating provider {provider.provider_id} from {provider_spec.module}")
385
+ module = importlib.import_module(provider_spec.module)
386
+ args = []
387
+ if isinstance(provider_spec, RemoteProviderSpec):
388
+ config_type = instantiate_class_type(provider_spec.config_class)
389
+ config = config_type(**provider.config)
390
+
391
+ method = "get_adapter_impl"
392
+ args = [config, deps]
393
+
394
+ if "policy" in inspect.signature(getattr(module, method)).parameters:
395
+ args.append(policy)
396
+
397
+ elif isinstance(provider_spec, AutoRoutedProviderSpec):
398
+ method = "get_auto_router_impl"
399
+
400
+ config = None
401
+ args = [provider_spec.api, deps[provider_spec.routing_table_api], deps, run_config, policy]
402
+ elif isinstance(provider_spec, RoutingTableProviderSpec):
403
+ method = "get_routing_table_impl"
404
+
405
+ config = None
406
+ args = [provider_spec.api, inner_impls, deps, dist_registry, policy]
407
+ else:
408
+ method = "get_provider_impl"
409
+
410
+ config_type = instantiate_class_type(provider_spec.config_class)
411
+ config = config_type(**provider.config)
412
+ args = [config, deps]
413
+ if "policy" in inspect.signature(getattr(module, method)).parameters:
414
+ args.append(policy)
415
+ if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry:
416
+ args.append(run_config.telemetry.enabled)
417
+
418
+ fn = getattr(module, method)
419
+ impl = await fn(*args)
420
+ impl.__provider_id__ = provider.provider_id
421
+ impl.__provider_spec__ = provider_spec
422
+ impl.__provider_config__ = config
423
+
424
+ protocols = api_protocol_map_for_compliance_check(run_config)
425
+ additional_protocols = additional_protocols_map()
426
+ # TODO: check compliance for special tool groups
427
+ # the impl should be for Api.tool_runtime, the name should be the special tool group, the protocol should be the special tool group protocol
428
+ check_protocol_compliance(impl, protocols[provider_spec.api])
429
+ if not isinstance(provider_spec, AutoRoutedProviderSpec) and provider_spec.api in additional_protocols:
430
+ additional_api, _, _ = additional_protocols[provider_spec.api]
431
+ check_protocol_compliance(impl, additional_api)
432
+
433
+ return impl
434
+
435
+
436
+ def check_protocol_compliance(obj: Any, protocol: Any) -> None:
437
+ missing_methods = []
438
+
439
+ mro = type(obj).__mro__
440
+ for name, value in inspect.getmembers(protocol):
441
+ if inspect.isfunction(value) and hasattr(value, "__webmethods__"):
442
+ has_alpha_api = False
443
+ for webmethod in value.__webmethods__:
444
+ if webmethod.level == LLAMA_STACK_API_V1ALPHA:
445
+ has_alpha_api = True
446
+ break
447
+ # if this API has multiple webmethods, and one of them is an alpha API, this API should be skipped when checking for missing or not callable routes
448
+ if has_alpha_api:
449
+ continue
450
+ if not hasattr(obj, name):
451
+ missing_methods.append((name, "missing"))
452
+ elif not callable(getattr(obj, name)):
453
+ missing_methods.append((name, "not_callable"))
454
+ else:
455
+ # Check if the method signatures are compatible
456
+ obj_method = getattr(obj, name)
457
+ proto_sig = inspect.signature(value)
458
+ obj_sig = inspect.signature(obj_method)
459
+
460
+ proto_params = set(proto_sig.parameters)
461
+ proto_params.discard("self")
462
+ obj_params = set(obj_sig.parameters)
463
+ obj_params.discard("self")
464
+ if not (proto_params <= obj_params):
465
+ logger.error(f"Method {name} incompatible proto: {proto_params} vs. obj: {obj_params}")
466
+ missing_methods.append((name, "signature_mismatch"))
467
+ else:
468
+ # Check if the method has a concrete implementation (not just a protocol stub)
469
+ # Find all classes in MRO that define this method
470
+ method_owners = [cls for cls in mro if name in cls.__dict__]
471
+
472
+ # Allow methods from mixins/parents, only reject if ONLY the protocol defines it
473
+ if len(method_owners) == 1 and method_owners[0].__name__ == protocol.__name__:
474
+ # Only reject if the method is ONLY defined in the protocol itself (abstract stub)
475
+ missing_methods.append((name, "not_actually_implemented"))
476
+
477
+ if missing_methods:
478
+ raise ValueError(
479
+ f"Provider `{obj.__provider_id__} ({obj.__provider_spec__.api})` does not implement the following methods:\n{missing_methods}"
480
+ )
481
+
482
+
483
+ async def resolve_remote_stack_impls(
484
+ config: RemoteProviderConfig,
485
+ apis: list[str],
486
+ ) -> dict[Api, Any]:
487
+ protocols = api_protocol_map()
488
+ additional_protocols = additional_protocols_map()
489
+
490
+ impls = {}
491
+ for api_str in apis:
492
+ api = Api(api_str)
493
+ impls[api] = await get_client_impl(
494
+ protocols[api],
495
+ config,
496
+ {},
497
+ )
498
+ if api in additional_protocols:
499
+ _, additional_protocol, additional_api = additional_protocols[api]
500
+ impls[additional_api] = await get_client_impl(
501
+ additional_protocol,
502
+ config,
503
+ {},
504
+ )
505
+
506
+ return impls
@@ -0,0 +1,101 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from typing import Any
8
+
9
+ from llama_stack.core.datatypes import (
10
+ AccessRule,
11
+ RoutedProtocol,
12
+ )
13
+ from llama_stack.core.stack import StackRunConfig
14
+ from llama_stack.core.store import DistributionRegistry
15
+ from llama_stack.providers.datatypes import Api, RoutingTable
16
+ from llama_stack.providers.utils.inference.inference_store import InferenceStore
17
+
18
+
19
+ async def get_routing_table_impl(
20
+ api: Api,
21
+ impls_by_provider_id: dict[str, RoutedProtocol],
22
+ _deps,
23
+ dist_registry: DistributionRegistry,
24
+ policy: list[AccessRule],
25
+ ) -> Any:
26
+ from ..routing_tables.benchmarks import BenchmarksRoutingTable
27
+ from ..routing_tables.datasets import DatasetsRoutingTable
28
+ from ..routing_tables.models import ModelsRoutingTable
29
+ from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable
30
+ from ..routing_tables.shields import ShieldsRoutingTable
31
+ from ..routing_tables.toolgroups import ToolGroupsRoutingTable
32
+ from ..routing_tables.vector_stores import VectorStoresRoutingTable
33
+
34
+ api_to_tables = {
35
+ "models": ModelsRoutingTable,
36
+ "shields": ShieldsRoutingTable,
37
+ "datasets": DatasetsRoutingTable,
38
+ "scoring_functions": ScoringFunctionsRoutingTable,
39
+ "benchmarks": BenchmarksRoutingTable,
40
+ "tool_groups": ToolGroupsRoutingTable,
41
+ "vector_stores": VectorStoresRoutingTable,
42
+ }
43
+
44
+ if api.value not in api_to_tables:
45
+ raise ValueError(f"API {api.value} not found in router map")
46
+
47
+ impl = api_to_tables[api.value](impls_by_provider_id, dist_registry, policy)
48
+ await impl.initialize()
49
+ return impl
50
+
51
+
52
+ async def get_auto_router_impl(
53
+ api: Api, routing_table: RoutingTable, deps: dict[str, Any], run_config: StackRunConfig, policy: list[AccessRule]
54
+ ) -> Any:
55
+ from .datasets import DatasetIORouter
56
+ from .eval_scoring import EvalRouter, ScoringRouter
57
+ from .inference import InferenceRouter
58
+ from .safety import SafetyRouter
59
+ from .tool_runtime import ToolRuntimeRouter
60
+ from .vector_io import VectorIORouter
61
+
62
+ api_to_routers = {
63
+ "vector_io": VectorIORouter,
64
+ "inference": InferenceRouter,
65
+ "safety": SafetyRouter,
66
+ "datasetio": DatasetIORouter,
67
+ "scoring": ScoringRouter,
68
+ "eval": EvalRouter,
69
+ "tool_runtime": ToolRuntimeRouter,
70
+ }
71
+ if api.value not in api_to_routers:
72
+ raise ValueError(f"API {api.value} not found in router map")
73
+
74
+ api_to_dep_impl = {}
75
+ if run_config.telemetry.enabled:
76
+ api_to_deps = {
77
+ "inference": {"telemetry": Api.telemetry},
78
+ }
79
+ for dep_name, dep_api in api_to_deps.get(api.value, {}).items():
80
+ if dep_api in deps:
81
+ api_to_dep_impl[dep_name] = deps[dep_api]
82
+
83
+ # TODO: move pass configs to routers instead
84
+ if api == Api.inference:
85
+ inference_ref = run_config.storage.stores.inference
86
+ if not inference_ref:
87
+ raise ValueError("storage.stores.inference must be configured in run config")
88
+
89
+ inference_store = InferenceStore(
90
+ reference=inference_ref,
91
+ policy=policy,
92
+ )
93
+ await inference_store.initialize()
94
+ api_to_dep_impl["store"] = inference_store
95
+
96
+ elif api == Api.vector_io:
97
+ api_to_dep_impl["vector_stores_config"] = run_config.vector_stores
98
+
99
+ impl = api_to_routers[api.value](routing_table, **api_to_dep_impl)
100
+ await impl.initialize()
101
+ return impl
@@ -0,0 +1,73 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from typing import Any
8
+
9
+ from llama_stack.apis.common.responses import PaginatedResponse
10
+ from llama_stack.apis.datasetio import DatasetIO
11
+ from llama_stack.apis.datasets import DatasetPurpose, DataSource
12
+ from llama_stack.log import get_logger
13
+ from llama_stack.providers.datatypes import RoutingTable
14
+
15
+ logger = get_logger(name=__name__, category="core::routers")
16
+
17
+
18
+ class DatasetIORouter(DatasetIO):
19
+ def __init__(
20
+ self,
21
+ routing_table: RoutingTable,
22
+ ) -> None:
23
+ logger.debug("Initializing DatasetIORouter")
24
+ self.routing_table = routing_table
25
+
26
+ async def initialize(self) -> None:
27
+ logger.debug("DatasetIORouter.initialize")
28
+ pass
29
+
30
+ async def shutdown(self) -> None:
31
+ logger.debug("DatasetIORouter.shutdown")
32
+ pass
33
+
34
+ async def register_dataset(
35
+ self,
36
+ purpose: DatasetPurpose,
37
+ source: DataSource,
38
+ metadata: dict[str, Any] | None = None,
39
+ dataset_id: str | None = None,
40
+ ) -> None:
41
+ logger.debug(
42
+ f"DatasetIORouter.register_dataset: {purpose=} {source=} {metadata=} {dataset_id=}",
43
+ )
44
+ await self.routing_table.register_dataset(
45
+ purpose=purpose,
46
+ source=source,
47
+ metadata=metadata,
48
+ dataset_id=dataset_id,
49
+ )
50
+
51
+ async def iterrows(
52
+ self,
53
+ dataset_id: str,
54
+ start_index: int | None = None,
55
+ limit: int | None = None,
56
+ ) -> PaginatedResponse:
57
+ logger.debug(
58
+ f"DatasetIORouter.iterrows: {dataset_id}, {start_index=} {limit=}",
59
+ )
60
+ provider = await self.routing_table.get_provider_impl(dataset_id)
61
+ return await provider.iterrows(
62
+ dataset_id=dataset_id,
63
+ start_index=start_index,
64
+ limit=limit,
65
+ )
66
+
67
+ async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
68
+ logger.debug(f"DatasetIORouter.append_rows: {dataset_id}, {len(rows)} rows")
69
+ provider = await self.routing_table.get_provider_impl(dataset_id)
70
+ return await provider.append_rows(
71
+ dataset_id=dataset_id,
72
+ rows=rows,
73
+ )