llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (738) hide show
  1. llama_stack/__init__.py +5 -0
  2. llama_stack/apis/agents/__init__.py +1 -1
  3. llama_stack/apis/agents/agents.py +700 -281
  4. llama_stack/apis/agents/openai_responses.py +1311 -0
  5. llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
  6. llama_stack/apis/batches/batches.py +100 -0
  7. llama_stack/apis/benchmarks/__init__.py +7 -0
  8. llama_stack/apis/benchmarks/benchmarks.py +108 -0
  9. llama_stack/apis/common/content_types.py +143 -0
  10. llama_stack/apis/common/errors.py +103 -0
  11. llama_stack/apis/common/job_types.py +38 -0
  12. llama_stack/apis/common/responses.py +36 -0
  13. llama_stack/apis/common/training_types.py +36 -5
  14. llama_stack/apis/common/type_system.py +158 -0
  15. llama_stack/apis/conversations/__init__.py +31 -0
  16. llama_stack/apis/conversations/conversations.py +286 -0
  17. llama_stack/apis/datasetio/__init__.py +7 -0
  18. llama_stack/apis/datasetio/datasetio.py +59 -0
  19. llama_stack/apis/datasets/__init__.py +7 -0
  20. llama_stack/apis/datasets/datasets.py +251 -0
  21. llama_stack/apis/datatypes.py +160 -0
  22. llama_stack/apis/eval/__init__.py +7 -0
  23. llama_stack/apis/eval/eval.py +169 -0
  24. llama_stack/apis/files/__init__.py +7 -0
  25. llama_stack/apis/files/files.py +199 -0
  26. llama_stack/apis/inference/__init__.py +1 -1
  27. llama_stack/apis/inference/inference.py +1169 -113
  28. llama_stack/apis/inspect/__init__.py +1 -1
  29. llama_stack/apis/inspect/inspect.py +69 -16
  30. llama_stack/apis/models/__init__.py +1 -1
  31. llama_stack/apis/models/models.py +148 -21
  32. llama_stack/apis/post_training/__init__.py +1 -1
  33. llama_stack/apis/post_training/post_training.py +265 -120
  34. llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
  35. llama_stack/apis/prompts/prompts.py +204 -0
  36. llama_stack/apis/providers/__init__.py +7 -0
  37. llama_stack/apis/providers/providers.py +69 -0
  38. llama_stack/apis/resource.py +37 -0
  39. llama_stack/apis/safety/__init__.py +1 -1
  40. llama_stack/apis/safety/safety.py +95 -12
  41. llama_stack/apis/scoring/__init__.py +7 -0
  42. llama_stack/apis/scoring/scoring.py +93 -0
  43. llama_stack/apis/scoring_functions/__init__.py +7 -0
  44. llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
  45. llama_stack/apis/shields/__init__.py +1 -1
  46. llama_stack/apis/shields/shields.py +76 -33
  47. llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
  48. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
  49. llama_stack/apis/telemetry/__init__.py +1 -1
  50. llama_stack/apis/telemetry/telemetry.py +322 -31
  51. llama_stack/apis/{dataset → tools}/__init__.py +2 -1
  52. llama_stack/apis/tools/rag_tool.py +218 -0
  53. llama_stack/apis/tools/tools.py +221 -0
  54. llama_stack/apis/vector_io/__init__.py +7 -0
  55. llama_stack/apis/vector_io/vector_io.py +960 -0
  56. llama_stack/apis/vector_stores/__init__.py +7 -0
  57. llama_stack/apis/vector_stores/vector_stores.py +51 -0
  58. llama_stack/apis/version.py +9 -0
  59. llama_stack/cli/llama.py +13 -5
  60. llama_stack/cli/stack/_list_deps.py +182 -0
  61. llama_stack/cli/stack/list_apis.py +1 -1
  62. llama_stack/cli/stack/list_deps.py +55 -0
  63. llama_stack/cli/stack/list_providers.py +24 -10
  64. llama_stack/cli/stack/list_stacks.py +56 -0
  65. llama_stack/cli/stack/remove.py +115 -0
  66. llama_stack/cli/stack/run.py +169 -56
  67. llama_stack/cli/stack/stack.py +18 -4
  68. llama_stack/cli/stack/utils.py +151 -0
  69. llama_stack/cli/table.py +23 -61
  70. llama_stack/cli/utils.py +29 -0
  71. llama_stack/core/access_control/access_control.py +131 -0
  72. llama_stack/core/access_control/conditions.py +129 -0
  73. llama_stack/core/access_control/datatypes.py +107 -0
  74. llama_stack/core/build.py +164 -0
  75. llama_stack/core/client.py +205 -0
  76. llama_stack/core/common.sh +37 -0
  77. llama_stack/{distribution → core}/configure.py +74 -55
  78. llama_stack/core/conversations/conversations.py +309 -0
  79. llama_stack/core/datatypes.py +625 -0
  80. llama_stack/core/distribution.py +276 -0
  81. llama_stack/core/external.py +54 -0
  82. llama_stack/core/id_generation.py +42 -0
  83. llama_stack/core/inspect.py +86 -0
  84. llama_stack/core/library_client.py +539 -0
  85. llama_stack/core/prompts/prompts.py +234 -0
  86. llama_stack/core/providers.py +137 -0
  87. llama_stack/core/request_headers.py +115 -0
  88. llama_stack/core/resolver.py +506 -0
  89. llama_stack/core/routers/__init__.py +101 -0
  90. llama_stack/core/routers/datasets.py +73 -0
  91. llama_stack/core/routers/eval_scoring.py +155 -0
  92. llama_stack/core/routers/inference.py +645 -0
  93. llama_stack/core/routers/safety.py +85 -0
  94. llama_stack/core/routers/tool_runtime.py +91 -0
  95. llama_stack/core/routers/vector_io.py +442 -0
  96. llama_stack/core/routing_tables/benchmarks.py +62 -0
  97. llama_stack/core/routing_tables/common.py +254 -0
  98. llama_stack/core/routing_tables/datasets.py +91 -0
  99. llama_stack/core/routing_tables/models.py +163 -0
  100. llama_stack/core/routing_tables/scoring_functions.py +66 -0
  101. llama_stack/core/routing_tables/shields.py +61 -0
  102. llama_stack/core/routing_tables/toolgroups.py +129 -0
  103. llama_stack/core/routing_tables/vector_stores.py +292 -0
  104. llama_stack/core/server/auth.py +187 -0
  105. llama_stack/core/server/auth_providers.py +494 -0
  106. llama_stack/core/server/quota.py +110 -0
  107. llama_stack/core/server/routes.py +141 -0
  108. llama_stack/core/server/server.py +542 -0
  109. llama_stack/core/server/tracing.py +80 -0
  110. llama_stack/core/stack.py +546 -0
  111. llama_stack/core/start_stack.sh +117 -0
  112. llama_stack/core/storage/datatypes.py +283 -0
  113. llama_stack/{cli/model → core/store}/__init__.py +1 -1
  114. llama_stack/core/store/registry.py +199 -0
  115. llama_stack/core/testing_context.py +49 -0
  116. llama_stack/core/ui/app.py +55 -0
  117. llama_stack/core/ui/modules/api.py +32 -0
  118. llama_stack/core/ui/modules/utils.py +42 -0
  119. llama_stack/core/ui/page/distribution/datasets.py +18 -0
  120. llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
  121. llama_stack/core/ui/page/distribution/models.py +18 -0
  122. llama_stack/core/ui/page/distribution/providers.py +27 -0
  123. llama_stack/core/ui/page/distribution/resources.py +48 -0
  124. llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
  125. llama_stack/core/ui/page/distribution/shields.py +19 -0
  126. llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
  127. llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
  128. llama_stack/core/ui/page/playground/chat.py +130 -0
  129. llama_stack/core/ui/page/playground/tools.py +352 -0
  130. llama_stack/core/utils/config.py +30 -0
  131. llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
  132. llama_stack/core/utils/config_resolution.py +125 -0
  133. llama_stack/core/utils/context.py +84 -0
  134. llama_stack/core/utils/exec.py +96 -0
  135. llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
  136. llama_stack/{distribution → core}/utils/model_utils.py +2 -2
  137. llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
  138. llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
  139. llama_stack/distributions/dell/build.yaml +33 -0
  140. llama_stack/distributions/dell/dell.py +158 -0
  141. llama_stack/distributions/dell/run-with-safety.yaml +141 -0
  142. llama_stack/distributions/dell/run.yaml +132 -0
  143. llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
  144. llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
  145. llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
  146. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
  147. llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
  148. llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
  149. llama_stack/distributions/nvidia/build.yaml +29 -0
  150. llama_stack/distributions/nvidia/nvidia.py +154 -0
  151. llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
  152. llama_stack/distributions/nvidia/run.yaml +116 -0
  153. llama_stack/distributions/open-benchmark/__init__.py +7 -0
  154. llama_stack/distributions/open-benchmark/build.yaml +36 -0
  155. llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
  156. llama_stack/distributions/open-benchmark/run.yaml +252 -0
  157. llama_stack/distributions/postgres-demo/__init__.py +7 -0
  158. llama_stack/distributions/postgres-demo/build.yaml +23 -0
  159. llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
  160. llama_stack/distributions/postgres-demo/run.yaml +115 -0
  161. llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
  162. llama_stack/distributions/starter/build.yaml +61 -0
  163. llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
  164. llama_stack/distributions/starter/run.yaml +276 -0
  165. llama_stack/distributions/starter/starter.py +345 -0
  166. llama_stack/distributions/starter-gpu/__init__.py +7 -0
  167. llama_stack/distributions/starter-gpu/build.yaml +61 -0
  168. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
  169. llama_stack/distributions/starter-gpu/run.yaml +279 -0
  170. llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
  171. llama_stack/distributions/template.py +456 -0
  172. llama_stack/distributions/watsonx/__init__.py +7 -0
  173. llama_stack/distributions/watsonx/build.yaml +33 -0
  174. llama_stack/distributions/watsonx/run.yaml +133 -0
  175. llama_stack/distributions/watsonx/watsonx.py +95 -0
  176. llama_stack/env.py +24 -0
  177. llama_stack/log.py +314 -0
  178. llama_stack/models/llama/checkpoint.py +164 -0
  179. llama_stack/models/llama/datatypes.py +164 -0
  180. llama_stack/models/llama/hadamard_utils.py +86 -0
  181. llama_stack/models/llama/llama3/args.py +74 -0
  182. llama_stack/models/llama/llama3/chat_format.py +286 -0
  183. llama_stack/models/llama/llama3/generation.py +376 -0
  184. llama_stack/models/llama/llama3/interface.py +255 -0
  185. llama_stack/models/llama/llama3/model.py +304 -0
  186. llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
  187. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
  188. llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
  189. llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
  190. llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
  191. llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
  192. llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
  193. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
  194. llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
  195. llama_stack/models/llama/llama3/quantization/loader.py +316 -0
  196. llama_stack/models/llama/llama3/template_data.py +116 -0
  197. llama_stack/models/llama/llama3/tokenizer.model +128000 -0
  198. llama_stack/models/llama/llama3/tokenizer.py +198 -0
  199. llama_stack/models/llama/llama3/tool_utils.py +266 -0
  200. llama_stack/models/llama/llama3_1/__init__.py +12 -0
  201. llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
  202. llama_stack/models/llama/llama3_1/prompts.py +258 -0
  203. llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
  204. llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
  205. llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
  206. llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
  207. llama_stack/models/llama/llama3_3/prompts.py +259 -0
  208. llama_stack/models/llama/llama4/args.py +107 -0
  209. llama_stack/models/llama/llama4/chat_format.py +317 -0
  210. llama_stack/models/llama/llama4/datatypes.py +56 -0
  211. llama_stack/models/llama/llama4/ffn.py +58 -0
  212. llama_stack/models/llama/llama4/generation.py +313 -0
  213. llama_stack/models/llama/llama4/model.py +437 -0
  214. llama_stack/models/llama/llama4/moe.py +214 -0
  215. llama_stack/models/llama/llama4/preprocess.py +435 -0
  216. llama_stack/models/llama/llama4/prompt_format.md +304 -0
  217. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
  218. llama_stack/models/llama/llama4/prompts.py +279 -0
  219. llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
  220. llama_stack/models/llama/llama4/quantization/loader.py +226 -0
  221. llama_stack/models/llama/llama4/tokenizer.model +200000 -0
  222. llama_stack/models/llama/llama4/tokenizer.py +263 -0
  223. llama_stack/models/llama/llama4/vision/__init__.py +5 -0
  224. llama_stack/models/llama/llama4/vision/embedding.py +210 -0
  225. llama_stack/models/llama/llama4/vision/encoder.py +412 -0
  226. llama_stack/models/llama/prompt_format.py +191 -0
  227. llama_stack/models/llama/quantize_impls.py +316 -0
  228. llama_stack/models/llama/sku_list.py +1029 -0
  229. llama_stack/models/llama/sku_types.py +233 -0
  230. llama_stack/models/llama/tokenizer_utils.py +40 -0
  231. llama_stack/providers/datatypes.py +136 -107
  232. llama_stack/providers/inline/__init__.py +5 -0
  233. llama_stack/providers/inline/agents/__init__.py +5 -0
  234. llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
  235. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
  236. llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
  237. llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
  238. llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
  239. llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
  240. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
  241. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
  242. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
  243. llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
  244. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
  245. llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
  246. llama_stack/providers/inline/batches/__init__.py +5 -0
  247. llama_stack/providers/inline/batches/reference/__init__.py +36 -0
  248. llama_stack/providers/inline/batches/reference/batches.py +679 -0
  249. llama_stack/providers/inline/batches/reference/config.py +40 -0
  250. llama_stack/providers/inline/datasetio/__init__.py +5 -0
  251. llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
  252. llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
  253. llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
  254. llama_stack/providers/inline/eval/__init__.py +5 -0
  255. llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
  256. llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
  257. llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
  258. llama_stack/providers/inline/files/localfs/__init__.py +20 -0
  259. llama_stack/providers/inline/files/localfs/config.py +31 -0
  260. llama_stack/providers/inline/files/localfs/files.py +219 -0
  261. llama_stack/providers/inline/inference/__init__.py +5 -0
  262. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
  263. llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
  264. llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
  265. llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
  266. llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
  267. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
  268. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
  269. llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
  270. llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
  271. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
  272. llama_stack/providers/inline/post_training/__init__.py +5 -0
  273. llama_stack/providers/inline/post_training/common/__init__.py +5 -0
  274. llama_stack/providers/inline/post_training/common/utils.py +35 -0
  275. llama_stack/providers/inline/post_training/common/validator.py +36 -0
  276. llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
  277. llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
  278. llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
  279. llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
  280. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
  281. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
  282. llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
  283. llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
  284. llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
  285. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
  286. llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
  287. llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
  288. llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
  289. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
  290. llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
  291. llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
  292. llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
  293. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
  294. llama_stack/providers/inline/safety/__init__.py +5 -0
  295. llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
  296. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
  297. llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
  298. llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
  299. llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
  300. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
  301. llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
  302. llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
  303. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
  304. llama_stack/providers/inline/scoring/__init__.py +5 -0
  305. llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
  306. llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
  307. llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
  308. llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
  309. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
  310. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
  311. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
  312. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
  313. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
  314. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
  315. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
  316. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
  317. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
  318. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
  319. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
  320. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
  321. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
  322. llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
  323. llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
  324. llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
  325. llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
  326. llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
  327. llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
  328. llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
  329. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
  330. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
  331. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
  332. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
  333. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
  334. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
  335. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
  336. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
  337. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
  338. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
  339. llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
  340. llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
  341. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
  342. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
  343. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
  344. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
  345. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
  346. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
  347. llama_stack/providers/inline/telemetry/__init__.py +5 -0
  348. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
  349. llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
  350. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
  351. llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
  352. llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
  353. llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
  354. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
  355. llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
  356. llama_stack/providers/inline/vector_io/__init__.py +5 -0
  357. llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
  358. llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
  359. llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
  360. llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
  361. llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
  362. llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
  363. llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
  364. llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
  365. llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
  366. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
  367. llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
  368. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
  369. llama_stack/providers/registry/agents.py +16 -18
  370. llama_stack/providers/registry/batches.py +26 -0
  371. llama_stack/providers/registry/datasetio.py +49 -0
  372. llama_stack/providers/registry/eval.py +46 -0
  373. llama_stack/providers/registry/files.py +31 -0
  374. llama_stack/providers/registry/inference.py +273 -118
  375. llama_stack/providers/registry/post_training.py +69 -0
  376. llama_stack/providers/registry/safety.py +46 -41
  377. llama_stack/providers/registry/scoring.py +51 -0
  378. llama_stack/providers/registry/tool_runtime.py +87 -0
  379. llama_stack/providers/registry/vector_io.py +828 -0
  380. llama_stack/providers/remote/__init__.py +5 -0
  381. llama_stack/providers/remote/agents/__init__.py +5 -0
  382. llama_stack/providers/remote/datasetio/__init__.py +5 -0
  383. llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
  384. llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
  385. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
  386. llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
  387. llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
  388. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
  389. llama_stack/providers/remote/eval/__init__.py +5 -0
  390. llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
  391. llama_stack/providers/remote/eval/nvidia/config.py +29 -0
  392. llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
  393. llama_stack/providers/remote/files/s3/__init__.py +19 -0
  394. llama_stack/providers/remote/files/s3/config.py +42 -0
  395. llama_stack/providers/remote/files/s3/files.py +313 -0
  396. llama_stack/providers/remote/inference/__init__.py +5 -0
  397. llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
  398. llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
  399. llama_stack/providers/remote/inference/anthropic/config.py +28 -0
  400. llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
  401. llama_stack/providers/remote/inference/azure/azure.py +25 -0
  402. llama_stack/providers/remote/inference/azure/config.py +61 -0
  403. llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
  404. llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
  405. llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
  406. llama_stack/providers/remote/inference/bedrock/models.py +29 -0
  407. llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
  408. llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
  409. llama_stack/providers/remote/inference/cerebras/config.py +30 -0
  410. llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
  411. llama_stack/providers/remote/inference/databricks/config.py +37 -0
  412. llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
  413. llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
  414. llama_stack/providers/remote/inference/fireworks/config.py +27 -0
  415. llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
  416. llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
  417. llama_stack/providers/remote/inference/gemini/config.py +28 -0
  418. llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
  419. llama_stack/providers/remote/inference/groq/__init__.py +15 -0
  420. llama_stack/providers/remote/inference/groq/config.py +34 -0
  421. llama_stack/providers/remote/inference/groq/groq.py +18 -0
  422. llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
  423. llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
  424. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
  425. llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
  426. llama_stack/providers/remote/inference/nvidia/config.py +64 -0
  427. llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
  428. llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
  429. llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
  430. llama_stack/providers/remote/inference/ollama/config.py +25 -0
  431. llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
  432. llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
  433. llama_stack/providers/remote/inference/openai/config.py +39 -0
  434. llama_stack/providers/remote/inference/openai/openai.py +38 -0
  435. llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
  436. llama_stack/providers/remote/inference/passthrough/config.py +34 -0
  437. llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
  438. llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
  439. llama_stack/providers/remote/inference/runpod/config.py +32 -0
  440. llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
  441. llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
  442. llama_stack/providers/remote/inference/sambanova/config.py +34 -0
  443. llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
  444. llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
  445. llama_stack/providers/remote/inference/tgi/config.py +76 -0
  446. llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
  447. llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
  448. llama_stack/providers/remote/inference/together/config.py +27 -0
  449. llama_stack/providers/remote/inference/together/together.py +102 -0
  450. llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
  451. llama_stack/providers/remote/inference/vertexai/config.py +48 -0
  452. llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
  453. llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
  454. llama_stack/providers/remote/inference/vllm/config.py +59 -0
  455. llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
  456. llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
  457. llama_stack/providers/remote/inference/watsonx/config.py +45 -0
  458. llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
  459. llama_stack/providers/remote/post_training/__init__.py +5 -0
  460. llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
  461. llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
  462. llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
  463. llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
  464. llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
  465. llama_stack/providers/remote/safety/__init__.py +5 -0
  466. llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
  467. llama_stack/providers/remote/safety/bedrock/config.py +14 -0
  468. llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
  469. llama_stack/providers/remote/safety/nvidia/config.py +40 -0
  470. llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
  471. llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
  472. llama_stack/providers/remote/safety/sambanova/config.py +37 -0
  473. llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
  474. llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
  475. llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
  476. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
  477. llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
  478. llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
  479. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
  480. llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
  481. llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
  482. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
  483. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
  484. llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
  485. llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
  486. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
  487. llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
  488. llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
  489. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
  490. llama_stack/providers/remote/vector_io/__init__.py +5 -0
  491. llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
  492. llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
  493. llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
  494. llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
  495. llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
  496. llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
  497. llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
  498. llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
  499. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
  500. llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
  501. llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
  502. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
  503. llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
  504. llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
  505. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
  506. llama_stack/providers/utils/bedrock/__init__.py +5 -0
  507. llama_stack/providers/utils/bedrock/client.py +74 -0
  508. llama_stack/providers/utils/bedrock/config.py +64 -0
  509. llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
  510. llama_stack/providers/utils/common/__init__.py +5 -0
  511. llama_stack/providers/utils/common/data_schema_validator.py +103 -0
  512. llama_stack/providers/utils/datasetio/__init__.py +5 -0
  513. llama_stack/providers/utils/datasetio/url_utils.py +47 -0
  514. llama_stack/providers/utils/files/__init__.py +5 -0
  515. llama_stack/providers/utils/files/form_data.py +69 -0
  516. llama_stack/providers/utils/inference/__init__.py +8 -7
  517. llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
  518. llama_stack/providers/utils/inference/inference_store.py +264 -0
  519. llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
  520. llama_stack/providers/utils/inference/model_registry.py +173 -23
  521. llama_stack/providers/utils/inference/openai_compat.py +1261 -49
  522. llama_stack/providers/utils/inference/openai_mixin.py +506 -0
  523. llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
  524. llama_stack/providers/utils/kvstore/api.py +6 -6
  525. llama_stack/providers/utils/kvstore/config.py +28 -48
  526. llama_stack/providers/utils/kvstore/kvstore.py +61 -15
  527. llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
  528. llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
  529. llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
  530. llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
  531. llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
  532. llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
  533. llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
  534. llama_stack/providers/utils/memory/file_utils.py +1 -1
  535. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
  536. llama_stack/providers/utils/memory/vector_store.py +220 -82
  537. llama_stack/providers/utils/pagination.py +43 -0
  538. llama_stack/providers/utils/responses/__init__.py +5 -0
  539. llama_stack/providers/utils/responses/responses_store.py +292 -0
  540. llama_stack/providers/utils/scheduler.py +270 -0
  541. llama_stack/providers/utils/scoring/__init__.py +5 -0
  542. llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
  543. llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
  544. llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
  545. llama_stack/providers/utils/sqlstore/__init__.py +5 -0
  546. llama_stack/providers/utils/sqlstore/api.py +128 -0
  547. llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
  548. llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
  549. llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
  550. llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
  551. llama_stack/providers/utils/telemetry/tracing.py +192 -53
  552. llama_stack/providers/utils/tools/__init__.py +5 -0
  553. llama_stack/providers/utils/tools/mcp.py +148 -0
  554. llama_stack/providers/utils/tools/ttl_dict.py +70 -0
  555. llama_stack/providers/utils/vector_io/__init__.py +5 -0
  556. llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
  557. llama_stack/schema_utils.py +118 -0
  558. llama_stack/strong_typing/__init__.py +19 -0
  559. llama_stack/strong_typing/auxiliary.py +228 -0
  560. llama_stack/strong_typing/classdef.py +440 -0
  561. llama_stack/strong_typing/core.py +46 -0
  562. llama_stack/strong_typing/deserializer.py +877 -0
  563. llama_stack/strong_typing/docstring.py +409 -0
  564. llama_stack/strong_typing/exception.py +23 -0
  565. llama_stack/strong_typing/inspection.py +1085 -0
  566. llama_stack/strong_typing/mapping.py +40 -0
  567. llama_stack/strong_typing/name.py +182 -0
  568. llama_stack/strong_typing/py.typed +0 -0
  569. llama_stack/strong_typing/schema.py +792 -0
  570. llama_stack/strong_typing/serialization.py +97 -0
  571. llama_stack/strong_typing/serializer.py +500 -0
  572. llama_stack/strong_typing/slots.py +27 -0
  573. llama_stack/strong_typing/topological.py +89 -0
  574. llama_stack/testing/__init__.py +5 -0
  575. llama_stack/testing/api_recorder.py +956 -0
  576. llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
  577. llama_stack-0.3.4.dist-info/METADATA +261 -0
  578. llama_stack-0.3.4.dist-info/RECORD +625 -0
  579. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
  580. llama_stack/apis/agents/client.py +0 -292
  581. llama_stack/apis/agents/event_logger.py +0 -184
  582. llama_stack/apis/batch_inference/batch_inference.py +0 -72
  583. llama_stack/apis/common/deployment_types.py +0 -31
  584. llama_stack/apis/dataset/dataset.py +0 -63
  585. llama_stack/apis/evals/evals.py +0 -122
  586. llama_stack/apis/inference/client.py +0 -197
  587. llama_stack/apis/inspect/client.py +0 -82
  588. llama_stack/apis/memory/client.py +0 -155
  589. llama_stack/apis/memory/memory.py +0 -65
  590. llama_stack/apis/memory_banks/__init__.py +0 -7
  591. llama_stack/apis/memory_banks/client.py +0 -101
  592. llama_stack/apis/memory_banks/memory_banks.py +0 -78
  593. llama_stack/apis/models/client.py +0 -83
  594. llama_stack/apis/reward_scoring/__init__.py +0 -7
  595. llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
  596. llama_stack/apis/safety/client.py +0 -105
  597. llama_stack/apis/shields/client.py +0 -79
  598. llama_stack/cli/download.py +0 -340
  599. llama_stack/cli/model/describe.py +0 -82
  600. llama_stack/cli/model/download.py +0 -24
  601. llama_stack/cli/model/list.py +0 -62
  602. llama_stack/cli/model/model.py +0 -34
  603. llama_stack/cli/model/prompt_format.py +0 -112
  604. llama_stack/cli/model/safety_models.py +0 -52
  605. llama_stack/cli/stack/build.py +0 -299
  606. llama_stack/cli/stack/configure.py +0 -178
  607. llama_stack/distribution/build.py +0 -123
  608. llama_stack/distribution/build_conda_env.sh +0 -136
  609. llama_stack/distribution/build_container.sh +0 -142
  610. llama_stack/distribution/common.sh +0 -40
  611. llama_stack/distribution/configure_container.sh +0 -47
  612. llama_stack/distribution/datatypes.py +0 -139
  613. llama_stack/distribution/distribution.py +0 -58
  614. llama_stack/distribution/inspect.py +0 -67
  615. llama_stack/distribution/request_headers.py +0 -57
  616. llama_stack/distribution/resolver.py +0 -323
  617. llama_stack/distribution/routers/__init__.py +0 -48
  618. llama_stack/distribution/routers/routers.py +0 -158
  619. llama_stack/distribution/routers/routing_tables.py +0 -173
  620. llama_stack/distribution/server/endpoints.py +0 -48
  621. llama_stack/distribution/server/server.py +0 -343
  622. llama_stack/distribution/start_conda_env.sh +0 -42
  623. llama_stack/distribution/start_container.sh +0 -64
  624. llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
  625. llama_stack/distribution/templates/local-build.yaml +0 -10
  626. llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
  627. llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
  628. llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
  629. llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
  630. llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
  631. llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
  632. llama_stack/distribution/templates/local-together-build.yaml +0 -10
  633. llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
  634. llama_stack/distribution/utils/exec.py +0 -105
  635. llama_stack/providers/adapters/agents/sample/sample.py +0 -18
  636. llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
  637. llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
  638. llama_stack/providers/adapters/inference/databricks/config.py +0 -21
  639. llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
  640. llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
  641. llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
  642. llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
  643. llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
  644. llama_stack/providers/adapters/inference/sample/sample.py +0 -23
  645. llama_stack/providers/adapters/inference/tgi/config.py +0 -43
  646. llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
  647. llama_stack/providers/adapters/inference/together/config.py +0 -22
  648. llama_stack/providers/adapters/inference/together/together.py +0 -143
  649. llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
  650. llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
  651. llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
  652. llama_stack/providers/adapters/memory/sample/sample.py +0 -23
  653. llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
  654. llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
  655. llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
  656. llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
  657. llama_stack/providers/adapters/safety/sample/sample.py +0 -23
  658. llama_stack/providers/adapters/safety/together/__init__.py +0 -18
  659. llama_stack/providers/adapters/safety/together/config.py +0 -26
  660. llama_stack/providers/adapters/safety/together/together.py +0 -101
  661. llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
  662. llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
  663. llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
  664. llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
  665. llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
  666. llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
  667. llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
  668. llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
  669. llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
  670. llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
  671. llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
  672. llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
  673. llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
  674. llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
  675. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
  676. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
  677. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
  678. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
  679. llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
  680. llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
  681. llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
  682. llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
  683. llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
  684. llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
  685. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
  686. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
  687. llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
  688. llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
  689. llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
  690. llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
  691. llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
  692. llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
  693. llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
  694. llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
  695. llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
  696. llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
  697. llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
  698. llama_stack/providers/impls/vllm/config.py +0 -35
  699. llama_stack/providers/impls/vllm/vllm.py +0 -241
  700. llama_stack/providers/registry/memory.py +0 -78
  701. llama_stack/providers/registry/telemetry.py +0 -44
  702. llama_stack/providers/tests/agents/test_agents.py +0 -210
  703. llama_stack/providers/tests/inference/test_inference.py +0 -257
  704. llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
  705. llama_stack/providers/tests/memory/test_memory.py +0 -136
  706. llama_stack/providers/tests/resolver.py +0 -100
  707. llama_stack/providers/tests/safety/test_safety.py +0 -77
  708. llama_stack-0.0.42.dist-info/METADATA +0 -137
  709. llama_stack-0.0.42.dist-info/RECORD +0 -256
  710. /llama_stack/{distribution → core}/__init__.py +0 -0
  711. /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
  712. /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
  713. /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
  714. /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
  715. /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
  716. /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
  717. /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
  718. /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
  719. /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
  720. /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
  721. /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
  722. /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
  723. /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
  724. /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
  725. /llama_stack/{distribution → core}/utils/serialize.py +0 -0
  726. /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
  727. /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
  728. /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
  729. /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
  730. /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
  731. /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
  732. /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
  733. /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
  734. /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
  735. /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
  736. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
  737. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
  738. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
@@ -3,39 +3,281 @@
3
3
  #
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
- from typing import Tuple
7
6
 
8
- from llama_models.llama3.api.chat_format import ChatFormat
9
- from termcolor import cprint
10
-
11
- from llama_models.llama3.api.datatypes import * # noqa: F403
12
- from llama_stack.apis.inference import * # noqa: F403
13
- from llama_models.datatypes import ModelFamily
14
- from llama_models.llama3.prompt_templates import (
7
+ import asyncio
8
+ import base64
9
+ import io
10
+ import json
11
+ import re
12
+ from typing import Any
13
+
14
+ import httpx
15
+ from PIL import Image as PIL_Image
16
+
17
+ from llama_stack.apis.common.content_types import (
18
+ ImageContentItem,
19
+ InterleavedContent,
20
+ InterleavedContentItem,
21
+ TextContentItem,
22
+ )
23
+ from llama_stack.apis.inference import (
24
+ ChatCompletionRequest,
25
+ CompletionRequest,
26
+ Message,
27
+ OpenAIChatCompletionContentPartImageParam,
28
+ OpenAIChatCompletionContentPartTextParam,
29
+ OpenAIFile,
30
+ ResponseFormat,
31
+ ResponseFormatType,
32
+ SystemMessage,
33
+ SystemMessageBehavior,
34
+ ToolChoice,
35
+ ToolDefinition,
36
+ UserMessage,
37
+ )
38
+ from llama_stack.log import get_logger
39
+ from llama_stack.models.llama.datatypes import (
40
+ RawContent,
41
+ RawContentItem,
42
+ RawMediaItem,
43
+ RawMessage,
44
+ RawTextItem,
45
+ Role,
46
+ StopReason,
47
+ ToolPromptFormat,
48
+ )
49
+ from llama_stack.models.llama.llama3.chat_format import ChatFormat
50
+ from llama_stack.models.llama.llama3.prompt_templates import (
15
51
  BuiltinToolGenerator,
16
52
  FunctionTagCustomToolGenerator,
17
53
  JsonCustomToolGenerator,
18
54
  PythonListCustomToolGenerator,
19
55
  SystemDefaultGenerator,
20
56
  )
21
- from llama_models.sku_list import resolve_model
22
-
57
+ from llama_stack.models.llama.llama3.tokenizer import Tokenizer
58
+ from llama_stack.models.llama.llama4.prompt_templates.system_prompts import (
59
+ PythonListCustomToolGenerator as PythonListCustomToolGeneratorLlama4,
60
+ )
61
+ from llama_stack.models.llama.sku_list import resolve_model
62
+ from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal
23
63
  from llama_stack.providers.utils.inference import supported_inference_models
24
64
 
65
+ log = get_logger(name=__name__, category="providers::utils")
66
+
67
+
68
+ class ChatCompletionRequestWithRawContent(ChatCompletionRequest):
69
+ messages: list[RawMessage]
70
+
71
+
72
+ class CompletionRequestWithRawContent(CompletionRequest):
73
+ content: RawContent
74
+
25
75
 
26
- def chat_completion_request_to_prompt(
27
- request: ChatCompletionRequest, formatter: ChatFormat
76
+ def decode_assistant_message(content: str, stop_reason: StopReason) -> RawMessage:
77
+ formatter = ChatFormat(Tokenizer.get_instance())
78
+ return formatter.decode_assistant_message_from_content(content, stop_reason)
79
+
80
+
81
+ def interleaved_content_as_str(
82
+ content: Any,
83
+ sep: str = " ",
28
84
  ) -> str:
29
- messages = chat_completion_request_to_messages(request)
30
- model_input = formatter.encode_dialog_prompt(messages)
85
+ if content is None:
86
+ return ""
87
+
88
+ def _process(c) -> str:
89
+ if isinstance(c, str):
90
+ return c
91
+ elif isinstance(c, TextContentItem) or isinstance(c, OpenAIChatCompletionContentPartTextParam):
92
+ return c.text
93
+ elif isinstance(c, ImageContentItem) or isinstance(c, OpenAIChatCompletionContentPartImageParam):
94
+ return "<image>"
95
+ elif isinstance(c, OpenAIFile):
96
+ return "<file>"
97
+ else:
98
+ raise ValueError(f"Unsupported content type: {type(c)}")
99
+
100
+ if isinstance(content, list):
101
+ return sep.join(_process(c) for c in content)
102
+ else:
103
+ return _process(content)
104
+
105
+
106
+ async def convert_request_to_raw(
107
+ request: ChatCompletionRequest | CompletionRequest,
108
+ ) -> ChatCompletionRequestWithRawContent | CompletionRequestWithRawContent:
109
+ if isinstance(request, ChatCompletionRequest):
110
+ messages = []
111
+ for m in request.messages:
112
+ content = await interleaved_content_convert_to_raw(m.content)
113
+ d = m.model_dump()
114
+ d["content"] = content
115
+ messages.append(RawMessage(**d))
116
+
117
+ d = request.model_dump()
118
+ d["messages"] = messages
119
+ request = ChatCompletionRequestWithRawContent(**d)
120
+ else:
121
+ d = request.model_dump()
122
+ d["content"] = await interleaved_content_convert_to_raw(request.content)
123
+ request = CompletionRequestWithRawContent(**d)
124
+
125
+ return request
126
+
127
+
128
+ async def interleaved_content_convert_to_raw(
129
+ content: InterleavedContent,
130
+ ) -> RawContent:
131
+ """Download content from URLs / files etc. so plain bytes can be sent to the model"""
132
+
133
+ async def _localize_single(c: str | InterleavedContentItem) -> str | RawContentItem:
134
+ if isinstance(c, str):
135
+ return RawTextItem(text=c)
136
+ elif isinstance(c, TextContentItem):
137
+ return RawTextItem(text=c.text)
138
+ elif isinstance(c, ImageContentItem):
139
+ image = c.image
140
+ if image.url:
141
+ # Load image bytes from URL
142
+ if image.url.uri.startswith("data"):
143
+ match = re.match(r"data:image/(\w+);base64,(.+)", image.url.uri)
144
+ if not match:
145
+ raise ValueError(f"Invalid data URL format, {image.url.uri[:40]}...")
146
+ _, image_data = match.groups()
147
+ data = base64.b64decode(image_data)
148
+ elif image.url.uri.startswith("file://"):
149
+ path = image.url.uri[len("file://") :]
150
+ with open(path, "rb") as f:
151
+ data = f.read() # type: ignore
152
+ elif image.url.uri.startswith("http"):
153
+ async with httpx.AsyncClient() as client:
154
+ response = await client.get(image.url.uri)
155
+ data = response.content
156
+ else:
157
+ raise ValueError("Unsupported URL type")
158
+ elif image.data:
159
+ # data is a base64 encoded string, decode it to bytes for RawMediaItem
160
+ data = base64.b64decode(image.data)
161
+ else:
162
+ raise ValueError("No data or URL provided")
163
+
164
+ return RawMediaItem(data=data)
165
+ else:
166
+ raise ValueError(f"Unsupported content type: {type(c)}")
167
+
168
+ if isinstance(content, list):
169
+ return await asyncio.gather(*(_localize_single(c) for c in content))
170
+ else:
171
+ return await _localize_single(content)
172
+
173
+
174
+ def content_has_media(content: InterleavedContent):
175
+ def _has_media_content(c):
176
+ return isinstance(c, ImageContentItem)
177
+
178
+ if isinstance(content, list):
179
+ return any(_has_media_content(c) for c in content)
180
+ else:
181
+ return _has_media_content(content)
182
+
183
+
184
+ def messages_have_media(messages: list[Message]):
185
+ return any(content_has_media(m.content) for m in messages)
186
+
187
+
188
+ def request_has_media(request: ChatCompletionRequest | CompletionRequest):
189
+ if isinstance(request, ChatCompletionRequest):
190
+ return messages_have_media(request.messages)
191
+ else:
192
+ return content_has_media(request.content)
193
+
194
+
195
+ async def localize_image_content(uri: str) -> tuple[bytes, str] | None:
196
+ if uri.startswith("http"):
197
+ async with httpx.AsyncClient() as client:
198
+ r = await client.get(uri)
199
+ content = r.content
200
+ content_type = r.headers.get("content-type")
201
+ if content_type:
202
+ format = content_type.split("/")[-1]
203
+ else:
204
+ format = "png"
205
+
206
+ return content, format
207
+ elif uri.startswith("data"):
208
+ # data:image/{format};base64,{data}
209
+ match = re.match(r"data:image/(\w+);base64,(.+)", uri)
210
+ if not match:
211
+ raise ValueError(f"Invalid data URL format, {uri[:40]}...")
212
+ fmt, image_data = match.groups()
213
+ content = base64.b64decode(image_data)
214
+ return content, fmt
215
+ else:
216
+ return None
217
+
218
+
219
+ async def convert_image_content_to_url(
220
+ media: ImageContentItem, download: bool = False, include_format: bool = True
221
+ ) -> str:
222
+ image = media.image
223
+ if image.url and (not download or image.url.uri.startswith("data")):
224
+ return image.url.uri
225
+
226
+ if image.data:
227
+ # data is a base64 encoded string, decode it to bytes first
228
+ # TODO(mf): do this more efficiently, decode less
229
+ content = base64.b64decode(image.data)
230
+ pil_image = PIL_Image.open(io.BytesIO(content))
231
+ format = pil_image.format
232
+ else:
233
+ localize_result = await localize_image_content(image.url.uri)
234
+ if localize_result is None:
235
+ raise ValueError(f"Failed to localize image content from {image.url.uri}")
236
+ content, format = localize_result
237
+
238
+ if include_format:
239
+ return f"data:image/{format};base64," + base64.b64encode(content).decode("utf-8")
240
+ else:
241
+ return base64.b64encode(content).decode("utf-8")
242
+
243
+
244
+ def augment_content_with_response_format_prompt(response_format, content):
245
+ if fmt_prompt := response_format_prompt(response_format):
246
+ if isinstance(content, list):
247
+ return content + [TextContentItem(text=fmt_prompt)]
248
+ elif isinstance(content, str):
249
+ return [TextContentItem(text=content), TextContentItem(text=fmt_prompt)]
250
+ else:
251
+ return [content, TextContentItem(text=fmt_prompt)]
252
+
253
+ return content
254
+
255
+
256
+ async def chat_completion_request_to_prompt(request: ChatCompletionRequest, llama_model: str) -> str:
257
+ messages = chat_completion_request_to_messages(request, llama_model)
258
+ request.messages = messages
259
+ request = await convert_request_to_raw(request)
260
+
261
+ formatter = ChatFormat(tokenizer=Tokenizer.get_instance())
262
+ model_input = formatter.encode_dialog_prompt(
263
+ request.messages,
264
+ tool_prompt_format=request.tool_config.tool_prompt_format or get_default_tool_prompt_format(llama_model),
265
+ )
31
266
  return formatter.tokenizer.decode(model_input.tokens)
32
267
 
33
268
 
34
- def chat_completion_request_to_model_input_info(
35
- request: ChatCompletionRequest, formatter: ChatFormat
36
- ) -> Tuple[str, int]:
37
- messages = chat_completion_request_to_messages(request)
38
- model_input = formatter.encode_dialog_prompt(messages)
269
+ async def chat_completion_request_to_model_input_info(
270
+ request: ChatCompletionRequest, llama_model: str
271
+ ) -> tuple[str, int]:
272
+ messages = chat_completion_request_to_messages(request, llama_model)
273
+ request.messages = messages
274
+ request = await convert_request_to_raw(request)
275
+
276
+ formatter = ChatFormat(tokenizer=Tokenizer.get_instance())
277
+ model_input = formatter.encode_dialog_prompt(
278
+ request.messages,
279
+ tool_prompt_format=request.tool_config.tool_prompt_format or get_default_tool_prompt_format(llama_model),
280
+ )
39
281
  return (
40
282
  formatter.tokenizer.decode(model_input.tokens),
41
283
  len(model_input.tokens),
@@ -44,45 +286,67 @@ def chat_completion_request_to_model_input_info(
44
286
 
45
287
  def chat_completion_request_to_messages(
46
288
  request: ChatCompletionRequest,
47
- ) -> List[Message]:
289
+ llama_model: str,
290
+ ) -> list[Message]:
48
291
  """Reads chat completion request and augments the messages to handle tools.
49
292
  For eg. for llama_3_1, add system message with the appropriate tools or
50
293
  add user messsage for custom tools, etc.
51
294
  """
52
- model = resolve_model(request.model)
295
+ assert llama_model is not None, "llama_model is required"
296
+ model = resolve_model(llama_model)
53
297
  if model is None:
54
- cprint(f"Could not resolve model {request.model}", color="red")
298
+ log.error(f"Could not resolve model {llama_model}")
55
299
  return request.messages
56
300
 
57
- if model.descriptor() not in supported_inference_models():
58
- cprint(f"Unsupported inference model? {model.descriptor()}", color="red")
301
+ allowed_models = supported_inference_models()
302
+ descriptors = [m.descriptor() for m in allowed_models]
303
+ if model.descriptor() not in descriptors:
304
+ log.error(f"Unsupported inference model? {model.descriptor()}")
59
305
  return request.messages
60
306
 
61
307
  if model.model_family == ModelFamily.llama3_1 or (
62
- model.model_family == ModelFamily.llama3_2
63
- and is_multimodal(model.core_model_id)
308
+ model.model_family == ModelFamily.llama3_2 and is_multimodal(model.core_model_id)
64
309
  ):
65
310
  # llama3.1 and llama3.2 multimodal models follow the same tool prompt format
66
- return augment_messages_for_tools_llama_3_1(request)
67
- elif model.model_family == ModelFamily.llama3_2:
68
- return augment_messages_for_tools_llama_3_2(request)
311
+ messages = augment_messages_for_tools_llama_3_1(request)
312
+ elif model.model_family in (
313
+ ModelFamily.llama3_2,
314
+ ModelFamily.llama3_3,
315
+ ):
316
+ # llama3.2, llama3.3 follow the same tool prompt format
317
+ messages = augment_messages_for_tools_llama(request, PythonListCustomToolGenerator)
318
+ elif model.model_family == ModelFamily.llama4:
319
+ messages = augment_messages_for_tools_llama(request, PythonListCustomToolGeneratorLlama4)
69
320
  else:
70
- return request.messages
321
+ messages = request.messages
322
+
323
+ if fmt_prompt := response_format_prompt(request.response_format):
324
+ messages.append(UserMessage(content=fmt_prompt))
325
+
326
+ return messages
327
+
328
+
329
+ def response_format_prompt(fmt: ResponseFormat | None):
330
+ if not fmt:
331
+ return None
332
+
333
+ if fmt.type == ResponseFormatType.json_schema.value:
334
+ return f"Please respond in JSON format with the schema: {json.dumps(fmt.json_schema)}"
335
+ elif fmt.type == ResponseFormatType.grammar.value:
336
+ raise NotImplementedError("Grammar response format not supported yet")
337
+ else:
338
+ raise ValueError(f"Unknown response format {fmt.type}")
71
339
 
72
340
 
73
341
  def augment_messages_for_tools_llama_3_1(
74
342
  request: ChatCompletionRequest,
75
- ) -> List[Message]:
76
- assert request.tool_choice == ToolChoice.auto, "Only `ToolChoice.auto` supported"
77
-
343
+ ) -> list[Message]:
78
344
  existing_messages = request.messages
79
345
  existing_system_message = None
80
346
  if existing_messages[0].role == Role.system.value:
81
347
  existing_system_message = existing_messages.pop(0)
82
348
 
83
- assert (
84
- existing_messages[0].role != Role.system.value
85
- ), "Should only have 1 system message"
349
+ assert existing_messages[0].role != Role.system.value, "Should only have 1 system message"
86
350
 
87
351
  messages = []
88
352
 
@@ -114,22 +378,23 @@ def augment_messages_for_tools_llama_3_1(
114
378
  if isinstance(existing_system_message.content, str):
115
379
  sys_content += _process(existing_system_message.content)
116
380
  elif isinstance(existing_system_message.content, list):
117
- sys_content += "\n".join(
118
- [_process(c) for c in existing_system_message.content]
119
- )
381
+ sys_content += "\n".join([_process(c) for c in existing_system_message.content])
382
+
383
+ tool_choice_prompt = _get_tool_choice_prompt(request.tool_config.tool_choice, request.tools)
384
+ if tool_choice_prompt:
385
+ sys_content += "\n" + tool_choice_prompt
120
386
 
121
387
  messages.append(SystemMessage(content=sys_content))
122
388
 
123
- has_custom_tools = any(isinstance(dfn.tool_name, str) for dfn in request.tools)
389
+ has_custom_tools = request.tools is not None and any(isinstance(dfn.tool_name, str) for dfn in request.tools)
124
390
  if has_custom_tools:
125
- if request.tool_prompt_format == ToolPromptFormat.json:
391
+ fmt = request.tool_config.tool_prompt_format or ToolPromptFormat.json
392
+ if fmt == ToolPromptFormat.json:
126
393
  tool_gen = JsonCustomToolGenerator()
127
- elif request.tool_prompt_format == ToolPromptFormat.function_tag:
394
+ elif fmt == ToolPromptFormat.function_tag:
128
395
  tool_gen = FunctionTagCustomToolGenerator()
129
396
  else:
130
- raise ValueError(
131
- f"Non supported ToolPromptFormat {request.tool_prompt_format}"
132
- )
397
+ raise ValueError(f"Non supported ToolPromptFormat {fmt}")
133
398
 
134
399
  custom_tools = [t for t in request.tools if isinstance(t.tool_name, str)]
135
400
  custom_template = tool_gen.gen(custom_tools)
@@ -141,21 +406,17 @@ def augment_messages_for_tools_llama_3_1(
141
406
  return messages
142
407
 
143
408
 
144
- def augment_messages_for_tools_llama_3_2(
409
+ def augment_messages_for_tools_llama(
145
410
  request: ChatCompletionRequest,
146
- ) -> List[Message]:
147
- assert request.tool_choice == ToolChoice.auto, "Only `ToolChoice.auto` supported"
148
-
411
+ custom_tool_prompt_generator,
412
+ ) -> list[Message]:
149
413
  existing_messages = request.messages
150
414
  existing_system_message = None
151
415
  if existing_messages[0].role == Role.system.value:
152
416
  existing_system_message = existing_messages.pop(0)
153
417
 
154
- assert (
155
- existing_messages[0].role != Role.system.value
156
- ), "Should only have 1 system message"
418
+ assert existing_messages[0].role != Role.system.value, "Should only have 1 system message"
157
419
 
158
- messages = []
159
420
  sys_content = ""
160
421
  custom_tools, builtin_tools = [], []
161
422
  for t in request.tools:
@@ -164,7 +425,6 @@ def augment_messages_for_tools_llama_3_2(
164
425
  else:
165
426
  builtin_tools.append(t)
166
427
 
167
- tool_template = None
168
428
  if builtin_tools:
169
429
  tool_gen = BuiltinToolGenerator()
170
430
  tool_template = tool_gen.gen(builtin_tools)
@@ -174,24 +434,62 @@ def augment_messages_for_tools_llama_3_2(
174
434
 
175
435
  custom_tools = [dfn for dfn in request.tools if isinstance(dfn.tool_name, str)]
176
436
  if custom_tools:
177
- if request.tool_prompt_format != ToolPromptFormat.python_list:
178
- raise ValueError(
179
- f"Non supported ToolPromptFormat {request.tool_prompt_format}"
180
- )
437
+ fmt = request.tool_config.tool_prompt_format or ToolPromptFormat.python_list
438
+ if fmt != ToolPromptFormat.python_list:
439
+ raise ValueError(f"Non supported ToolPromptFormat {request.tool_config.tool_prompt_format}")
440
+
441
+ system_prompt = None
442
+ if existing_system_message and request.tool_config.system_message_behavior == SystemMessageBehavior.replace:
443
+ system_prompt = existing_system_message.content
181
444
 
182
- tool_gen = PythonListCustomToolGenerator()
183
- tool_template = tool_gen.gen(custom_tools)
445
+ tool_template = custom_tool_prompt_generator().gen(custom_tools, system_prompt)
184
446
 
185
447
  sys_content += tool_template.render()
186
448
  sys_content += "\n"
187
449
 
188
- if existing_system_message:
189
- sys_content += interleaved_text_media_as_str(
190
- existing_system_message.content, sep="\n"
191
- )
450
+ if existing_system_message and (
451
+ request.tool_config.system_message_behavior == SystemMessageBehavior.append or not custom_tools
452
+ ):
453
+ sys_content += interleaved_content_as_str(existing_system_message.content, sep="\n")
192
454
 
193
- messages.append(SystemMessage(content=sys_content))
455
+ tool_choice_prompt = _get_tool_choice_prompt(request.tool_config.tool_choice, request.tools)
456
+ if tool_choice_prompt:
457
+ sys_content += "\n" + tool_choice_prompt
194
458
 
195
- # Add back existing messages from the request
196
- messages += existing_messages
459
+ messages = [SystemMessage(content=sys_content.strip("\n")), *existing_messages]
197
460
  return messages
461
+
462
+
463
+ def _get_tool_choice_prompt(tool_choice: ToolChoice | str, tools: list[ToolDefinition]) -> str:
464
+ if tool_choice == ToolChoice.auto:
465
+ return ""
466
+ elif tool_choice == ToolChoice.required:
467
+ return "You MUST use one of the provided functions/tools to answer the user query."
468
+ elif tool_choice == ToolChoice.none:
469
+ # tools are already not passed in
470
+ return ""
471
+ else:
472
+ # specific tool
473
+ return f"You MUST use the tool `{tool_choice}` to answer the user query."
474
+
475
+
476
+ def get_default_tool_prompt_format(model: str) -> ToolPromptFormat:
477
+ llama_model = resolve_model(model)
478
+ if llama_model is None:
479
+ log.warning(f"Could not resolve model {model}, defaulting to json tool prompt format")
480
+ return ToolPromptFormat.json
481
+
482
+ if llama_model.model_family == ModelFamily.llama3_1 or (
483
+ llama_model.model_family == ModelFamily.llama3_2 and is_multimodal(llama_model.core_model_id)
484
+ ):
485
+ # llama3.1 and llama3.2 multimodal models follow the same tool prompt format
486
+ return ToolPromptFormat.json
487
+ elif llama_model.model_family in (
488
+ ModelFamily.llama3_2,
489
+ ModelFamily.llama3_3,
490
+ ModelFamily.llama4,
491
+ ):
492
+ # llama3.2 and llama3.3 models follow the same tool prompt format
493
+ return ToolPromptFormat.python_list
494
+ else:
495
+ return ToolPromptFormat.json
@@ -5,17 +5,17 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
  from datetime import datetime
8
- from typing import List, Optional, Protocol
8
+ from typing import Protocol
9
9
 
10
10
 
11
11
  class KVStore(Protocol):
12
12
  # TODO: make the value type bytes instead of str
13
- async def set(
14
- self, key: str, value: str, expiration: Optional[datetime] = None
15
- ) -> None: ...
13
+ async def set(self, key: str, value: str, expiration: datetime | None = None) -> None: ...
16
14
 
17
- async def get(self, key: str) -> Optional[str]: ...
15
+ async def get(self, key: str) -> str | None: ...
18
16
 
19
17
  async def delete(self, key: str) -> None: ...
20
18
 
21
- async def range(self, start_key: str, end_key: str) -> List[str]: ...
19
+ async def values_in_range(self, start_key: str, end_key: str) -> list[str]: ...
20
+
21
+ async def keys_in_range(self, start_key: str, end_key: str) -> list[str]: ...
@@ -4,56 +4,36 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from enum import Enum
8
- from typing import Literal, Optional, Union
7
+ from typing import Annotated
9
8
 
10
- from pydantic import BaseModel, Field
11
- from typing_extensions import Annotated
12
-
13
- from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
14
-
15
-
16
- class KVStoreType(Enum):
17
- redis = "redis"
18
- sqlite = "sqlite"
19
- postgres = "postgres"
20
-
21
-
22
- class CommonConfig(BaseModel):
23
- namespace: Optional[str] = Field(
24
- default=None,
25
- description="All keys will be prefixed with this namespace",
26
- )
27
-
28
-
29
- class RedisKVStoreConfig(CommonConfig):
30
- type: Literal[KVStoreType.redis.value] = KVStoreType.redis.value
31
- host: str = "localhost"
32
- port: int = 6379
33
-
34
- @property
35
- def url(self) -> str:
36
- return f"redis://{self.host}:{self.port}"
37
-
38
-
39
- class SqliteKVStoreConfig(CommonConfig):
40
- type: Literal[KVStoreType.sqlite.value] = KVStoreType.sqlite.value
41
- db_path: str = Field(
42
- default=(RUNTIME_BASE_DIR / "kvstore.db").as_posix(),
43
- description="File path for the sqlite database",
44
- )
45
-
46
-
47
- class PostgresKVStoreConfig(CommonConfig):
48
- type: Literal[KVStoreType.postgres.value] = KVStoreType.postgres.value
49
- host: str = "localhost"
50
- port: int = 5432
51
- db: str = "llamastack"
52
- user: str
53
- password: Optional[str] = None
9
+ from pydantic import Field
54
10
 
11
+ from llama_stack.core.storage.datatypes import (
12
+ MongoDBKVStoreConfig,
13
+ PostgresKVStoreConfig,
14
+ RedisKVStoreConfig,
15
+ SqliteKVStoreConfig,
16
+ StorageBackendType,
17
+ )
55
18
 
56
19
  KVStoreConfig = Annotated[
57
- Union[RedisKVStoreConfig, SqliteKVStoreConfig, PostgresKVStoreConfig],
58
- Field(discriminator="type", default=KVStoreType.sqlite.value),
20
+ RedisKVStoreConfig | SqliteKVStoreConfig | PostgresKVStoreConfig | MongoDBKVStoreConfig, Field(discriminator="type")
59
21
  ]
22
+
23
+
24
+ def get_pip_packages(store_config: dict | KVStoreConfig) -> list[str]:
25
+ """Get pip packages for KV store config, handling both dict and object cases."""
26
+ if isinstance(store_config, dict):
27
+ store_type = store_config.get("type")
28
+ if store_type == StorageBackendType.KV_SQLITE.value:
29
+ return SqliteKVStoreConfig.pip_packages()
30
+ elif store_type == StorageBackendType.KV_POSTGRES.value:
31
+ return PostgresKVStoreConfig.pip_packages()
32
+ elif store_type == StorageBackendType.KV_REDIS.value:
33
+ return RedisKVStoreConfig.pip_packages()
34
+ elif store_type == StorageBackendType.KV_MONGODB.value:
35
+ return MongoDBKVStoreConfig.pip_packages()
36
+ else:
37
+ raise ValueError(f"Unknown KV store type: {store_type}")
38
+ else:
39
+ return store_config.pip_packages()