llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (738) hide show
  1. llama_stack/__init__.py +5 -0
  2. llama_stack/apis/agents/__init__.py +1 -1
  3. llama_stack/apis/agents/agents.py +700 -281
  4. llama_stack/apis/agents/openai_responses.py +1311 -0
  5. llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
  6. llama_stack/apis/batches/batches.py +100 -0
  7. llama_stack/apis/benchmarks/__init__.py +7 -0
  8. llama_stack/apis/benchmarks/benchmarks.py +108 -0
  9. llama_stack/apis/common/content_types.py +143 -0
  10. llama_stack/apis/common/errors.py +103 -0
  11. llama_stack/apis/common/job_types.py +38 -0
  12. llama_stack/apis/common/responses.py +36 -0
  13. llama_stack/apis/common/training_types.py +36 -5
  14. llama_stack/apis/common/type_system.py +158 -0
  15. llama_stack/apis/conversations/__init__.py +31 -0
  16. llama_stack/apis/conversations/conversations.py +286 -0
  17. llama_stack/apis/datasetio/__init__.py +7 -0
  18. llama_stack/apis/datasetio/datasetio.py +59 -0
  19. llama_stack/apis/datasets/__init__.py +7 -0
  20. llama_stack/apis/datasets/datasets.py +251 -0
  21. llama_stack/apis/datatypes.py +160 -0
  22. llama_stack/apis/eval/__init__.py +7 -0
  23. llama_stack/apis/eval/eval.py +169 -0
  24. llama_stack/apis/files/__init__.py +7 -0
  25. llama_stack/apis/files/files.py +199 -0
  26. llama_stack/apis/inference/__init__.py +1 -1
  27. llama_stack/apis/inference/inference.py +1169 -113
  28. llama_stack/apis/inspect/__init__.py +1 -1
  29. llama_stack/apis/inspect/inspect.py +69 -16
  30. llama_stack/apis/models/__init__.py +1 -1
  31. llama_stack/apis/models/models.py +148 -21
  32. llama_stack/apis/post_training/__init__.py +1 -1
  33. llama_stack/apis/post_training/post_training.py +265 -120
  34. llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
  35. llama_stack/apis/prompts/prompts.py +204 -0
  36. llama_stack/apis/providers/__init__.py +7 -0
  37. llama_stack/apis/providers/providers.py +69 -0
  38. llama_stack/apis/resource.py +37 -0
  39. llama_stack/apis/safety/__init__.py +1 -1
  40. llama_stack/apis/safety/safety.py +95 -12
  41. llama_stack/apis/scoring/__init__.py +7 -0
  42. llama_stack/apis/scoring/scoring.py +93 -0
  43. llama_stack/apis/scoring_functions/__init__.py +7 -0
  44. llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
  45. llama_stack/apis/shields/__init__.py +1 -1
  46. llama_stack/apis/shields/shields.py +76 -33
  47. llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
  48. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
  49. llama_stack/apis/telemetry/__init__.py +1 -1
  50. llama_stack/apis/telemetry/telemetry.py +322 -31
  51. llama_stack/apis/{dataset → tools}/__init__.py +2 -1
  52. llama_stack/apis/tools/rag_tool.py +218 -0
  53. llama_stack/apis/tools/tools.py +221 -0
  54. llama_stack/apis/vector_io/__init__.py +7 -0
  55. llama_stack/apis/vector_io/vector_io.py +960 -0
  56. llama_stack/apis/vector_stores/__init__.py +7 -0
  57. llama_stack/apis/vector_stores/vector_stores.py +51 -0
  58. llama_stack/apis/version.py +9 -0
  59. llama_stack/cli/llama.py +13 -5
  60. llama_stack/cli/stack/_list_deps.py +182 -0
  61. llama_stack/cli/stack/list_apis.py +1 -1
  62. llama_stack/cli/stack/list_deps.py +55 -0
  63. llama_stack/cli/stack/list_providers.py +24 -10
  64. llama_stack/cli/stack/list_stacks.py +56 -0
  65. llama_stack/cli/stack/remove.py +115 -0
  66. llama_stack/cli/stack/run.py +169 -56
  67. llama_stack/cli/stack/stack.py +18 -4
  68. llama_stack/cli/stack/utils.py +151 -0
  69. llama_stack/cli/table.py +23 -61
  70. llama_stack/cli/utils.py +29 -0
  71. llama_stack/core/access_control/access_control.py +131 -0
  72. llama_stack/core/access_control/conditions.py +129 -0
  73. llama_stack/core/access_control/datatypes.py +107 -0
  74. llama_stack/core/build.py +164 -0
  75. llama_stack/core/client.py +205 -0
  76. llama_stack/core/common.sh +37 -0
  77. llama_stack/{distribution → core}/configure.py +74 -55
  78. llama_stack/core/conversations/conversations.py +309 -0
  79. llama_stack/core/datatypes.py +625 -0
  80. llama_stack/core/distribution.py +276 -0
  81. llama_stack/core/external.py +54 -0
  82. llama_stack/core/id_generation.py +42 -0
  83. llama_stack/core/inspect.py +86 -0
  84. llama_stack/core/library_client.py +539 -0
  85. llama_stack/core/prompts/prompts.py +234 -0
  86. llama_stack/core/providers.py +137 -0
  87. llama_stack/core/request_headers.py +115 -0
  88. llama_stack/core/resolver.py +506 -0
  89. llama_stack/core/routers/__init__.py +101 -0
  90. llama_stack/core/routers/datasets.py +73 -0
  91. llama_stack/core/routers/eval_scoring.py +155 -0
  92. llama_stack/core/routers/inference.py +645 -0
  93. llama_stack/core/routers/safety.py +85 -0
  94. llama_stack/core/routers/tool_runtime.py +91 -0
  95. llama_stack/core/routers/vector_io.py +442 -0
  96. llama_stack/core/routing_tables/benchmarks.py +62 -0
  97. llama_stack/core/routing_tables/common.py +254 -0
  98. llama_stack/core/routing_tables/datasets.py +91 -0
  99. llama_stack/core/routing_tables/models.py +163 -0
  100. llama_stack/core/routing_tables/scoring_functions.py +66 -0
  101. llama_stack/core/routing_tables/shields.py +61 -0
  102. llama_stack/core/routing_tables/toolgroups.py +129 -0
  103. llama_stack/core/routing_tables/vector_stores.py +292 -0
  104. llama_stack/core/server/auth.py +187 -0
  105. llama_stack/core/server/auth_providers.py +494 -0
  106. llama_stack/core/server/quota.py +110 -0
  107. llama_stack/core/server/routes.py +141 -0
  108. llama_stack/core/server/server.py +542 -0
  109. llama_stack/core/server/tracing.py +80 -0
  110. llama_stack/core/stack.py +546 -0
  111. llama_stack/core/start_stack.sh +117 -0
  112. llama_stack/core/storage/datatypes.py +283 -0
  113. llama_stack/{cli/model → core/store}/__init__.py +1 -1
  114. llama_stack/core/store/registry.py +199 -0
  115. llama_stack/core/testing_context.py +49 -0
  116. llama_stack/core/ui/app.py +55 -0
  117. llama_stack/core/ui/modules/api.py +32 -0
  118. llama_stack/core/ui/modules/utils.py +42 -0
  119. llama_stack/core/ui/page/distribution/datasets.py +18 -0
  120. llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
  121. llama_stack/core/ui/page/distribution/models.py +18 -0
  122. llama_stack/core/ui/page/distribution/providers.py +27 -0
  123. llama_stack/core/ui/page/distribution/resources.py +48 -0
  124. llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
  125. llama_stack/core/ui/page/distribution/shields.py +19 -0
  126. llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
  127. llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
  128. llama_stack/core/ui/page/playground/chat.py +130 -0
  129. llama_stack/core/ui/page/playground/tools.py +352 -0
  130. llama_stack/core/utils/config.py +30 -0
  131. llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
  132. llama_stack/core/utils/config_resolution.py +125 -0
  133. llama_stack/core/utils/context.py +84 -0
  134. llama_stack/core/utils/exec.py +96 -0
  135. llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
  136. llama_stack/{distribution → core}/utils/model_utils.py +2 -2
  137. llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
  138. llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
  139. llama_stack/distributions/dell/build.yaml +33 -0
  140. llama_stack/distributions/dell/dell.py +158 -0
  141. llama_stack/distributions/dell/run-with-safety.yaml +141 -0
  142. llama_stack/distributions/dell/run.yaml +132 -0
  143. llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
  144. llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
  145. llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
  146. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
  147. llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
  148. llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
  149. llama_stack/distributions/nvidia/build.yaml +29 -0
  150. llama_stack/distributions/nvidia/nvidia.py +154 -0
  151. llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
  152. llama_stack/distributions/nvidia/run.yaml +116 -0
  153. llama_stack/distributions/open-benchmark/__init__.py +7 -0
  154. llama_stack/distributions/open-benchmark/build.yaml +36 -0
  155. llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
  156. llama_stack/distributions/open-benchmark/run.yaml +252 -0
  157. llama_stack/distributions/postgres-demo/__init__.py +7 -0
  158. llama_stack/distributions/postgres-demo/build.yaml +23 -0
  159. llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
  160. llama_stack/distributions/postgres-demo/run.yaml +115 -0
  161. llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
  162. llama_stack/distributions/starter/build.yaml +61 -0
  163. llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
  164. llama_stack/distributions/starter/run.yaml +276 -0
  165. llama_stack/distributions/starter/starter.py +345 -0
  166. llama_stack/distributions/starter-gpu/__init__.py +7 -0
  167. llama_stack/distributions/starter-gpu/build.yaml +61 -0
  168. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
  169. llama_stack/distributions/starter-gpu/run.yaml +279 -0
  170. llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
  171. llama_stack/distributions/template.py +456 -0
  172. llama_stack/distributions/watsonx/__init__.py +7 -0
  173. llama_stack/distributions/watsonx/build.yaml +33 -0
  174. llama_stack/distributions/watsonx/run.yaml +133 -0
  175. llama_stack/distributions/watsonx/watsonx.py +95 -0
  176. llama_stack/env.py +24 -0
  177. llama_stack/log.py +314 -0
  178. llama_stack/models/llama/checkpoint.py +164 -0
  179. llama_stack/models/llama/datatypes.py +164 -0
  180. llama_stack/models/llama/hadamard_utils.py +86 -0
  181. llama_stack/models/llama/llama3/args.py +74 -0
  182. llama_stack/models/llama/llama3/chat_format.py +286 -0
  183. llama_stack/models/llama/llama3/generation.py +376 -0
  184. llama_stack/models/llama/llama3/interface.py +255 -0
  185. llama_stack/models/llama/llama3/model.py +304 -0
  186. llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
  187. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
  188. llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
  189. llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
  190. llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
  191. llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
  192. llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
  193. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
  194. llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
  195. llama_stack/models/llama/llama3/quantization/loader.py +316 -0
  196. llama_stack/models/llama/llama3/template_data.py +116 -0
  197. llama_stack/models/llama/llama3/tokenizer.model +128000 -0
  198. llama_stack/models/llama/llama3/tokenizer.py +198 -0
  199. llama_stack/models/llama/llama3/tool_utils.py +266 -0
  200. llama_stack/models/llama/llama3_1/__init__.py +12 -0
  201. llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
  202. llama_stack/models/llama/llama3_1/prompts.py +258 -0
  203. llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
  204. llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
  205. llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
  206. llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
  207. llama_stack/models/llama/llama3_3/prompts.py +259 -0
  208. llama_stack/models/llama/llama4/args.py +107 -0
  209. llama_stack/models/llama/llama4/chat_format.py +317 -0
  210. llama_stack/models/llama/llama4/datatypes.py +56 -0
  211. llama_stack/models/llama/llama4/ffn.py +58 -0
  212. llama_stack/models/llama/llama4/generation.py +313 -0
  213. llama_stack/models/llama/llama4/model.py +437 -0
  214. llama_stack/models/llama/llama4/moe.py +214 -0
  215. llama_stack/models/llama/llama4/preprocess.py +435 -0
  216. llama_stack/models/llama/llama4/prompt_format.md +304 -0
  217. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
  218. llama_stack/models/llama/llama4/prompts.py +279 -0
  219. llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
  220. llama_stack/models/llama/llama4/quantization/loader.py +226 -0
  221. llama_stack/models/llama/llama4/tokenizer.model +200000 -0
  222. llama_stack/models/llama/llama4/tokenizer.py +263 -0
  223. llama_stack/models/llama/llama4/vision/__init__.py +5 -0
  224. llama_stack/models/llama/llama4/vision/embedding.py +210 -0
  225. llama_stack/models/llama/llama4/vision/encoder.py +412 -0
  226. llama_stack/models/llama/prompt_format.py +191 -0
  227. llama_stack/models/llama/quantize_impls.py +316 -0
  228. llama_stack/models/llama/sku_list.py +1029 -0
  229. llama_stack/models/llama/sku_types.py +233 -0
  230. llama_stack/models/llama/tokenizer_utils.py +40 -0
  231. llama_stack/providers/datatypes.py +136 -107
  232. llama_stack/providers/inline/__init__.py +5 -0
  233. llama_stack/providers/inline/agents/__init__.py +5 -0
  234. llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
  235. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
  236. llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
  237. llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
  238. llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
  239. llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
  240. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
  241. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
  242. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
  243. llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
  244. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
  245. llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
  246. llama_stack/providers/inline/batches/__init__.py +5 -0
  247. llama_stack/providers/inline/batches/reference/__init__.py +36 -0
  248. llama_stack/providers/inline/batches/reference/batches.py +679 -0
  249. llama_stack/providers/inline/batches/reference/config.py +40 -0
  250. llama_stack/providers/inline/datasetio/__init__.py +5 -0
  251. llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
  252. llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
  253. llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
  254. llama_stack/providers/inline/eval/__init__.py +5 -0
  255. llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
  256. llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
  257. llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
  258. llama_stack/providers/inline/files/localfs/__init__.py +20 -0
  259. llama_stack/providers/inline/files/localfs/config.py +31 -0
  260. llama_stack/providers/inline/files/localfs/files.py +219 -0
  261. llama_stack/providers/inline/inference/__init__.py +5 -0
  262. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
  263. llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
  264. llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
  265. llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
  266. llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
  267. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
  268. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
  269. llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
  270. llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
  271. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
  272. llama_stack/providers/inline/post_training/__init__.py +5 -0
  273. llama_stack/providers/inline/post_training/common/__init__.py +5 -0
  274. llama_stack/providers/inline/post_training/common/utils.py +35 -0
  275. llama_stack/providers/inline/post_training/common/validator.py +36 -0
  276. llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
  277. llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
  278. llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
  279. llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
  280. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
  281. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
  282. llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
  283. llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
  284. llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
  285. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
  286. llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
  287. llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
  288. llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
  289. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
  290. llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
  291. llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
  292. llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
  293. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
  294. llama_stack/providers/inline/safety/__init__.py +5 -0
  295. llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
  296. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
  297. llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
  298. llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
  299. llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
  300. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
  301. llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
  302. llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
  303. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
  304. llama_stack/providers/inline/scoring/__init__.py +5 -0
  305. llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
  306. llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
  307. llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
  308. llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
  309. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
  310. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
  311. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
  312. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
  313. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
  314. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
  315. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
  316. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
  317. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
  318. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
  319. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
  320. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
  321. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
  322. llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
  323. llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
  324. llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
  325. llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
  326. llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
  327. llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
  328. llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
  329. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
  330. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
  331. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
  332. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
  333. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
  334. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
  335. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
  336. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
  337. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
  338. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
  339. llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
  340. llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
  341. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
  342. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
  343. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
  344. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
  345. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
  346. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
  347. llama_stack/providers/inline/telemetry/__init__.py +5 -0
  348. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
  349. llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
  350. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
  351. llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
  352. llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
  353. llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
  354. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
  355. llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
  356. llama_stack/providers/inline/vector_io/__init__.py +5 -0
  357. llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
  358. llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
  359. llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
  360. llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
  361. llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
  362. llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
  363. llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
  364. llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
  365. llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
  366. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
  367. llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
  368. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
  369. llama_stack/providers/registry/agents.py +16 -18
  370. llama_stack/providers/registry/batches.py +26 -0
  371. llama_stack/providers/registry/datasetio.py +49 -0
  372. llama_stack/providers/registry/eval.py +46 -0
  373. llama_stack/providers/registry/files.py +31 -0
  374. llama_stack/providers/registry/inference.py +273 -118
  375. llama_stack/providers/registry/post_training.py +69 -0
  376. llama_stack/providers/registry/safety.py +46 -41
  377. llama_stack/providers/registry/scoring.py +51 -0
  378. llama_stack/providers/registry/tool_runtime.py +87 -0
  379. llama_stack/providers/registry/vector_io.py +828 -0
  380. llama_stack/providers/remote/__init__.py +5 -0
  381. llama_stack/providers/remote/agents/__init__.py +5 -0
  382. llama_stack/providers/remote/datasetio/__init__.py +5 -0
  383. llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
  384. llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
  385. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
  386. llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
  387. llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
  388. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
  389. llama_stack/providers/remote/eval/__init__.py +5 -0
  390. llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
  391. llama_stack/providers/remote/eval/nvidia/config.py +29 -0
  392. llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
  393. llama_stack/providers/remote/files/s3/__init__.py +19 -0
  394. llama_stack/providers/remote/files/s3/config.py +42 -0
  395. llama_stack/providers/remote/files/s3/files.py +313 -0
  396. llama_stack/providers/remote/inference/__init__.py +5 -0
  397. llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
  398. llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
  399. llama_stack/providers/remote/inference/anthropic/config.py +28 -0
  400. llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
  401. llama_stack/providers/remote/inference/azure/azure.py +25 -0
  402. llama_stack/providers/remote/inference/azure/config.py +61 -0
  403. llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
  404. llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
  405. llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
  406. llama_stack/providers/remote/inference/bedrock/models.py +29 -0
  407. llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
  408. llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
  409. llama_stack/providers/remote/inference/cerebras/config.py +30 -0
  410. llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
  411. llama_stack/providers/remote/inference/databricks/config.py +37 -0
  412. llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
  413. llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
  414. llama_stack/providers/remote/inference/fireworks/config.py +27 -0
  415. llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
  416. llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
  417. llama_stack/providers/remote/inference/gemini/config.py +28 -0
  418. llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
  419. llama_stack/providers/remote/inference/groq/__init__.py +15 -0
  420. llama_stack/providers/remote/inference/groq/config.py +34 -0
  421. llama_stack/providers/remote/inference/groq/groq.py +18 -0
  422. llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
  423. llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
  424. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
  425. llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
  426. llama_stack/providers/remote/inference/nvidia/config.py +64 -0
  427. llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
  428. llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
  429. llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
  430. llama_stack/providers/remote/inference/ollama/config.py +25 -0
  431. llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
  432. llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
  433. llama_stack/providers/remote/inference/openai/config.py +39 -0
  434. llama_stack/providers/remote/inference/openai/openai.py +38 -0
  435. llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
  436. llama_stack/providers/remote/inference/passthrough/config.py +34 -0
  437. llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
  438. llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
  439. llama_stack/providers/remote/inference/runpod/config.py +32 -0
  440. llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
  441. llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
  442. llama_stack/providers/remote/inference/sambanova/config.py +34 -0
  443. llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
  444. llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
  445. llama_stack/providers/remote/inference/tgi/config.py +76 -0
  446. llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
  447. llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
  448. llama_stack/providers/remote/inference/together/config.py +27 -0
  449. llama_stack/providers/remote/inference/together/together.py +102 -0
  450. llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
  451. llama_stack/providers/remote/inference/vertexai/config.py +48 -0
  452. llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
  453. llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
  454. llama_stack/providers/remote/inference/vllm/config.py +59 -0
  455. llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
  456. llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
  457. llama_stack/providers/remote/inference/watsonx/config.py +45 -0
  458. llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
  459. llama_stack/providers/remote/post_training/__init__.py +5 -0
  460. llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
  461. llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
  462. llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
  463. llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
  464. llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
  465. llama_stack/providers/remote/safety/__init__.py +5 -0
  466. llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
  467. llama_stack/providers/remote/safety/bedrock/config.py +14 -0
  468. llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
  469. llama_stack/providers/remote/safety/nvidia/config.py +40 -0
  470. llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
  471. llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
  472. llama_stack/providers/remote/safety/sambanova/config.py +37 -0
  473. llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
  474. llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
  475. llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
  476. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
  477. llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
  478. llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
  479. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
  480. llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
  481. llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
  482. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
  483. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
  484. llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
  485. llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
  486. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
  487. llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
  488. llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
  489. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
  490. llama_stack/providers/remote/vector_io/__init__.py +5 -0
  491. llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
  492. llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
  493. llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
  494. llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
  495. llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
  496. llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
  497. llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
  498. llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
  499. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
  500. llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
  501. llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
  502. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
  503. llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
  504. llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
  505. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
  506. llama_stack/providers/utils/bedrock/__init__.py +5 -0
  507. llama_stack/providers/utils/bedrock/client.py +74 -0
  508. llama_stack/providers/utils/bedrock/config.py +64 -0
  509. llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
  510. llama_stack/providers/utils/common/__init__.py +5 -0
  511. llama_stack/providers/utils/common/data_schema_validator.py +103 -0
  512. llama_stack/providers/utils/datasetio/__init__.py +5 -0
  513. llama_stack/providers/utils/datasetio/url_utils.py +47 -0
  514. llama_stack/providers/utils/files/__init__.py +5 -0
  515. llama_stack/providers/utils/files/form_data.py +69 -0
  516. llama_stack/providers/utils/inference/__init__.py +8 -7
  517. llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
  518. llama_stack/providers/utils/inference/inference_store.py +264 -0
  519. llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
  520. llama_stack/providers/utils/inference/model_registry.py +173 -23
  521. llama_stack/providers/utils/inference/openai_compat.py +1261 -49
  522. llama_stack/providers/utils/inference/openai_mixin.py +506 -0
  523. llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
  524. llama_stack/providers/utils/kvstore/api.py +6 -6
  525. llama_stack/providers/utils/kvstore/config.py +28 -48
  526. llama_stack/providers/utils/kvstore/kvstore.py +61 -15
  527. llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
  528. llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
  529. llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
  530. llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
  531. llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
  532. llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
  533. llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
  534. llama_stack/providers/utils/memory/file_utils.py +1 -1
  535. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
  536. llama_stack/providers/utils/memory/vector_store.py +220 -82
  537. llama_stack/providers/utils/pagination.py +43 -0
  538. llama_stack/providers/utils/responses/__init__.py +5 -0
  539. llama_stack/providers/utils/responses/responses_store.py +292 -0
  540. llama_stack/providers/utils/scheduler.py +270 -0
  541. llama_stack/providers/utils/scoring/__init__.py +5 -0
  542. llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
  543. llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
  544. llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
  545. llama_stack/providers/utils/sqlstore/__init__.py +5 -0
  546. llama_stack/providers/utils/sqlstore/api.py +128 -0
  547. llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
  548. llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
  549. llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
  550. llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
  551. llama_stack/providers/utils/telemetry/tracing.py +192 -53
  552. llama_stack/providers/utils/tools/__init__.py +5 -0
  553. llama_stack/providers/utils/tools/mcp.py +148 -0
  554. llama_stack/providers/utils/tools/ttl_dict.py +70 -0
  555. llama_stack/providers/utils/vector_io/__init__.py +5 -0
  556. llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
  557. llama_stack/schema_utils.py +118 -0
  558. llama_stack/strong_typing/__init__.py +19 -0
  559. llama_stack/strong_typing/auxiliary.py +228 -0
  560. llama_stack/strong_typing/classdef.py +440 -0
  561. llama_stack/strong_typing/core.py +46 -0
  562. llama_stack/strong_typing/deserializer.py +877 -0
  563. llama_stack/strong_typing/docstring.py +409 -0
  564. llama_stack/strong_typing/exception.py +23 -0
  565. llama_stack/strong_typing/inspection.py +1085 -0
  566. llama_stack/strong_typing/mapping.py +40 -0
  567. llama_stack/strong_typing/name.py +182 -0
  568. llama_stack/strong_typing/py.typed +0 -0
  569. llama_stack/strong_typing/schema.py +792 -0
  570. llama_stack/strong_typing/serialization.py +97 -0
  571. llama_stack/strong_typing/serializer.py +500 -0
  572. llama_stack/strong_typing/slots.py +27 -0
  573. llama_stack/strong_typing/topological.py +89 -0
  574. llama_stack/testing/__init__.py +5 -0
  575. llama_stack/testing/api_recorder.py +956 -0
  576. llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
  577. llama_stack-0.3.4.dist-info/METADATA +261 -0
  578. llama_stack-0.3.4.dist-info/RECORD +625 -0
  579. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
  580. llama_stack/apis/agents/client.py +0 -292
  581. llama_stack/apis/agents/event_logger.py +0 -184
  582. llama_stack/apis/batch_inference/batch_inference.py +0 -72
  583. llama_stack/apis/common/deployment_types.py +0 -31
  584. llama_stack/apis/dataset/dataset.py +0 -63
  585. llama_stack/apis/evals/evals.py +0 -122
  586. llama_stack/apis/inference/client.py +0 -197
  587. llama_stack/apis/inspect/client.py +0 -82
  588. llama_stack/apis/memory/client.py +0 -155
  589. llama_stack/apis/memory/memory.py +0 -65
  590. llama_stack/apis/memory_banks/__init__.py +0 -7
  591. llama_stack/apis/memory_banks/client.py +0 -101
  592. llama_stack/apis/memory_banks/memory_banks.py +0 -78
  593. llama_stack/apis/models/client.py +0 -83
  594. llama_stack/apis/reward_scoring/__init__.py +0 -7
  595. llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
  596. llama_stack/apis/safety/client.py +0 -105
  597. llama_stack/apis/shields/client.py +0 -79
  598. llama_stack/cli/download.py +0 -340
  599. llama_stack/cli/model/describe.py +0 -82
  600. llama_stack/cli/model/download.py +0 -24
  601. llama_stack/cli/model/list.py +0 -62
  602. llama_stack/cli/model/model.py +0 -34
  603. llama_stack/cli/model/prompt_format.py +0 -112
  604. llama_stack/cli/model/safety_models.py +0 -52
  605. llama_stack/cli/stack/build.py +0 -299
  606. llama_stack/cli/stack/configure.py +0 -178
  607. llama_stack/distribution/build.py +0 -123
  608. llama_stack/distribution/build_conda_env.sh +0 -136
  609. llama_stack/distribution/build_container.sh +0 -142
  610. llama_stack/distribution/common.sh +0 -40
  611. llama_stack/distribution/configure_container.sh +0 -47
  612. llama_stack/distribution/datatypes.py +0 -139
  613. llama_stack/distribution/distribution.py +0 -58
  614. llama_stack/distribution/inspect.py +0 -67
  615. llama_stack/distribution/request_headers.py +0 -57
  616. llama_stack/distribution/resolver.py +0 -323
  617. llama_stack/distribution/routers/__init__.py +0 -48
  618. llama_stack/distribution/routers/routers.py +0 -158
  619. llama_stack/distribution/routers/routing_tables.py +0 -173
  620. llama_stack/distribution/server/endpoints.py +0 -48
  621. llama_stack/distribution/server/server.py +0 -343
  622. llama_stack/distribution/start_conda_env.sh +0 -42
  623. llama_stack/distribution/start_container.sh +0 -64
  624. llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
  625. llama_stack/distribution/templates/local-build.yaml +0 -10
  626. llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
  627. llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
  628. llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
  629. llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
  630. llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
  631. llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
  632. llama_stack/distribution/templates/local-together-build.yaml +0 -10
  633. llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
  634. llama_stack/distribution/utils/exec.py +0 -105
  635. llama_stack/providers/adapters/agents/sample/sample.py +0 -18
  636. llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
  637. llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
  638. llama_stack/providers/adapters/inference/databricks/config.py +0 -21
  639. llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
  640. llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
  641. llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
  642. llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
  643. llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
  644. llama_stack/providers/adapters/inference/sample/sample.py +0 -23
  645. llama_stack/providers/adapters/inference/tgi/config.py +0 -43
  646. llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
  647. llama_stack/providers/adapters/inference/together/config.py +0 -22
  648. llama_stack/providers/adapters/inference/together/together.py +0 -143
  649. llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
  650. llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
  651. llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
  652. llama_stack/providers/adapters/memory/sample/sample.py +0 -23
  653. llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
  654. llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
  655. llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
  656. llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
  657. llama_stack/providers/adapters/safety/sample/sample.py +0 -23
  658. llama_stack/providers/adapters/safety/together/__init__.py +0 -18
  659. llama_stack/providers/adapters/safety/together/config.py +0 -26
  660. llama_stack/providers/adapters/safety/together/together.py +0 -101
  661. llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
  662. llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
  663. llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
  664. llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
  665. llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
  666. llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
  667. llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
  668. llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
  669. llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
  670. llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
  671. llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
  672. llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
  673. llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
  674. llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
  675. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
  676. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
  677. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
  678. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
  679. llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
  680. llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
  681. llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
  682. llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
  683. llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
  684. llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
  685. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
  686. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
  687. llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
  688. llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
  689. llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
  690. llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
  691. llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
  692. llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
  693. llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
  694. llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
  695. llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
  696. llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
  697. llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
  698. llama_stack/providers/impls/vllm/config.py +0 -35
  699. llama_stack/providers/impls/vllm/vllm.py +0 -241
  700. llama_stack/providers/registry/memory.py +0 -78
  701. llama_stack/providers/registry/telemetry.py +0 -44
  702. llama_stack/providers/tests/agents/test_agents.py +0 -210
  703. llama_stack/providers/tests/inference/test_inference.py +0 -257
  704. llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
  705. llama_stack/providers/tests/memory/test_memory.py +0 -136
  706. llama_stack/providers/tests/resolver.py +0 -100
  707. llama_stack/providers/tests/safety/test_safety.py +0 -77
  708. llama_stack-0.0.42.dist-info/METADATA +0 -137
  709. llama_stack-0.0.42.dist-info/RECORD +0 -256
  710. /llama_stack/{distribution → core}/__init__.py +0 -0
  711. /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
  712. /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
  713. /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
  714. /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
  715. /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
  716. /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
  717. /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
  718. /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
  719. /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
  720. /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
  721. /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
  722. /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
  723. /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
  724. /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
  725. /llama_stack/{distribution → core}/utils/serialize.py +0 -0
  726. /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
  727. /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
  728. /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
  729. /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
  730. /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
  731. /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
  732. /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
  733. /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
  734. /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
  735. /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
  736. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
  737. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
  738. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1024 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ import copy
8
+ import json
9
+ import re
10
+ import uuid
11
+ import warnings
12
+ from collections.abc import AsyncGenerator
13
+ from datetime import UTC, datetime
14
+
15
+ import httpx
16
+
17
+ from llama_stack.apis.agents import (
18
+ AgentConfig,
19
+ AgentToolGroup,
20
+ AgentToolGroupWithArgs,
21
+ AgentTurnCreateRequest,
22
+ AgentTurnResponseEvent,
23
+ AgentTurnResponseEventType,
24
+ AgentTurnResponseStepCompletePayload,
25
+ AgentTurnResponseStepProgressPayload,
26
+ AgentTurnResponseStepStartPayload,
27
+ AgentTurnResponseStreamChunk,
28
+ AgentTurnResponseTurnAwaitingInputPayload,
29
+ AgentTurnResponseTurnCompletePayload,
30
+ AgentTurnResumeRequest,
31
+ Attachment,
32
+ Document,
33
+ InferenceStep,
34
+ ShieldCallStep,
35
+ StepType,
36
+ ToolExecutionStep,
37
+ Turn,
38
+ )
39
+ from llama_stack.apis.common.content_types import (
40
+ URL,
41
+ TextContentItem,
42
+ ToolCallDelta,
43
+ ToolCallParseStatus,
44
+ )
45
+ from llama_stack.apis.common.errors import SessionNotFoundError
46
+ from llama_stack.apis.inference import (
47
+ ChatCompletionResponseEventType,
48
+ CompletionMessage,
49
+ Inference,
50
+ Message,
51
+ OpenAIAssistantMessageParam,
52
+ OpenAIChatCompletionRequestWithExtraBody,
53
+ OpenAIDeveloperMessageParam,
54
+ OpenAIMessageParam,
55
+ OpenAISystemMessageParam,
56
+ OpenAIToolMessageParam,
57
+ OpenAIUserMessageParam,
58
+ SamplingParams,
59
+ StopReason,
60
+ SystemMessage,
61
+ ToolDefinition,
62
+ ToolResponse,
63
+ ToolResponseMessage,
64
+ UserMessage,
65
+ )
66
+ from llama_stack.apis.safety import Safety
67
+ from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime
68
+ from llama_stack.apis.vector_io import VectorIO
69
+ from llama_stack.core.datatypes import AccessRule
70
+ from llama_stack.log import get_logger
71
+ from llama_stack.models.llama.datatypes import (
72
+ BuiltinTool,
73
+ ToolCall,
74
+ )
75
+ from llama_stack.providers.utils.inference.openai_compat import (
76
+ convert_message_to_openai_dict_new,
77
+ convert_openai_chat_completion_stream,
78
+ convert_tooldef_to_openai_tool,
79
+ )
80
+ from llama_stack.providers.utils.kvstore import KVStore
81
+ from llama_stack.providers.utils.telemetry import tracing
82
+
83
+ from .persistence import AgentPersistence
84
+ from .safety import SafetyException, ShieldRunnerMixin
85
+
86
+ TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})")
87
+ MEMORY_QUERY_TOOL = "knowledge_search"
88
+ WEB_SEARCH_TOOL = "web_search"
89
+ RAG_TOOL_GROUP = "builtin::rag"
90
+
91
+ logger = get_logger(name=__name__, category="agents::meta_reference")
92
+
93
+
94
+ class ChatAgent(ShieldRunnerMixin):
95
+ def __init__(
96
+ self,
97
+ agent_id: str,
98
+ agent_config: AgentConfig,
99
+ inference_api: Inference,
100
+ safety_api: Safety,
101
+ tool_runtime_api: ToolRuntime,
102
+ tool_groups_api: ToolGroups,
103
+ vector_io_api: VectorIO,
104
+ persistence_store: KVStore,
105
+ created_at: str,
106
+ policy: list[AccessRule],
107
+ telemetry_enabled: bool = False,
108
+ ):
109
+ self.agent_id = agent_id
110
+ self.agent_config = agent_config
111
+ self.inference_api = inference_api
112
+ self.safety_api = safety_api
113
+ self.vector_io_api = vector_io_api
114
+ self.storage = AgentPersistence(agent_id, persistence_store, policy)
115
+ self.tool_runtime_api = tool_runtime_api
116
+ self.tool_groups_api = tool_groups_api
117
+ self.created_at = created_at
118
+ self.telemetry_enabled = telemetry_enabled
119
+
120
+ ShieldRunnerMixin.__init__(
121
+ self,
122
+ safety_api,
123
+ input_shields=agent_config.input_shields,
124
+ output_shields=agent_config.output_shields,
125
+ )
126
+
127
+ def turn_to_messages(self, turn: Turn) -> list[Message]:
128
+ messages = []
129
+
130
+ # NOTE: if a toolcall response is in a step, we do not add it when processing the input messages
131
+ tool_call_ids = set()
132
+ for step in turn.steps:
133
+ if step.step_type == StepType.tool_execution.value:
134
+ for response in step.tool_responses:
135
+ tool_call_ids.add(response.call_id)
136
+
137
+ for m in turn.input_messages:
138
+ msg = m.model_copy()
139
+ # We do not want to keep adding RAG context to the input messages
140
+ # May be this should be a parameter of the agentic instance
141
+ # that can define its behavior in a custom way
142
+ if isinstance(msg, UserMessage):
143
+ msg.context = None
144
+ if isinstance(msg, ToolResponseMessage):
145
+ if msg.call_id in tool_call_ids:
146
+ # NOTE: do not add ToolResponseMessage here, we'll add them in tool_execution steps
147
+ continue
148
+
149
+ messages.append(msg)
150
+
151
+ for step in turn.steps:
152
+ if step.step_type == StepType.inference.value:
153
+ messages.append(step.model_response)
154
+ elif step.step_type == StepType.tool_execution.value:
155
+ for response in step.tool_responses:
156
+ messages.append(
157
+ ToolResponseMessage(
158
+ call_id=response.call_id,
159
+ content=response.content,
160
+ )
161
+ )
162
+ elif step.step_type == StepType.shield_call.value:
163
+ if step.violation:
164
+ # CompletionMessage itself in the ShieldResponse
165
+ messages.append(
166
+ CompletionMessage(
167
+ content=step.violation.user_message,
168
+ stop_reason=StopReason.end_of_turn,
169
+ )
170
+ )
171
+ return messages
172
+
173
+ async def create_session(self, name: str) -> str:
174
+ return await self.storage.create_session(name)
175
+
176
+ async def get_messages_from_turns(self, turns: list[Turn]) -> list[Message]:
177
+ messages = []
178
+ if self.agent_config.instructions != "":
179
+ messages.append(SystemMessage(content=self.agent_config.instructions))
180
+
181
+ for turn in turns:
182
+ messages.extend(self.turn_to_messages(turn))
183
+ return messages
184
+
185
+ async def create_and_execute_turn(self, request: AgentTurnCreateRequest) -> AsyncGenerator:
186
+ turn_id = str(uuid.uuid4())
187
+ if self.telemetry_enabled:
188
+ span = tracing.get_current_span()
189
+ if span is not None:
190
+ span.set_attribute("session_id", request.session_id)
191
+ span.set_attribute("agent_id", self.agent_id)
192
+ span.set_attribute("request", request.model_dump_json())
193
+ span.set_attribute("turn_id", turn_id)
194
+ if self.agent_config.name:
195
+ span.set_attribute("agent_name", self.agent_config.name)
196
+
197
+ await self._initialize_tools(request.toolgroups)
198
+ async for chunk in self._run_turn(request, turn_id):
199
+ yield chunk
200
+
201
+ async def resume_turn(self, request: AgentTurnResumeRequest) -> AsyncGenerator:
202
+ if self.telemetry_enabled:
203
+ span = tracing.get_current_span()
204
+ if span is not None:
205
+ span.set_attribute("agent_id", self.agent_id)
206
+ span.set_attribute("session_id", request.session_id)
207
+ span.set_attribute("request", request.model_dump_json())
208
+ span.set_attribute("turn_id", request.turn_id)
209
+ if self.agent_config.name:
210
+ span.set_attribute("agent_name", self.agent_config.name)
211
+
212
+ await self._initialize_tools()
213
+ async for chunk in self._run_turn(request):
214
+ yield chunk
215
+
216
+ async def _run_turn(
217
+ self,
218
+ request: AgentTurnCreateRequest | AgentTurnResumeRequest,
219
+ turn_id: str | None = None,
220
+ ) -> AsyncGenerator:
221
+ assert request.stream is True, "Non-streaming not supported"
222
+
223
+ is_resume = isinstance(request, AgentTurnResumeRequest)
224
+ session_info = await self.storage.get_session_info(request.session_id)
225
+ if session_info is None:
226
+ raise SessionNotFoundError(request.session_id)
227
+
228
+ turns = await self.storage.get_session_turns(request.session_id)
229
+ if is_resume and len(turns) == 0:
230
+ raise ValueError("No turns found for session")
231
+
232
+ steps = []
233
+ messages = await self.get_messages_from_turns(turns)
234
+ if is_resume:
235
+ tool_response_messages = [
236
+ ToolResponseMessage(call_id=x.call_id, content=x.content) for x in request.tool_responses
237
+ ]
238
+ messages.extend(tool_response_messages)
239
+ last_turn = turns[-1]
240
+ last_turn_messages = self.turn_to_messages(last_turn)
241
+ last_turn_messages = [
242
+ x for x in last_turn_messages if isinstance(x, UserMessage) or isinstance(x, ToolResponseMessage)
243
+ ]
244
+ last_turn_messages.extend(tool_response_messages)
245
+
246
+ # get steps from the turn
247
+ steps = last_turn.steps
248
+
249
+ # mark tool execution step as complete
250
+ # if there's no tool execution in progress step (due to storage, or tool call parsing on client),
251
+ # we'll create a new tool execution step with current time
252
+ in_progress_tool_call_step = await self.storage.get_in_progress_tool_call_step(
253
+ request.session_id, request.turn_id
254
+ )
255
+ now = datetime.now(UTC).isoformat()
256
+ tool_execution_step = ToolExecutionStep(
257
+ step_id=(in_progress_tool_call_step.step_id if in_progress_tool_call_step else str(uuid.uuid4())),
258
+ turn_id=request.turn_id,
259
+ tool_calls=(in_progress_tool_call_step.tool_calls if in_progress_tool_call_step else []),
260
+ tool_responses=request.tool_responses,
261
+ completed_at=now,
262
+ started_at=(in_progress_tool_call_step.started_at if in_progress_tool_call_step else now),
263
+ )
264
+ steps.append(tool_execution_step)
265
+ yield AgentTurnResponseStreamChunk(
266
+ event=AgentTurnResponseEvent(
267
+ payload=AgentTurnResponseStepCompletePayload(
268
+ step_type=StepType.tool_execution.value,
269
+ step_id=tool_execution_step.step_id,
270
+ step_details=tool_execution_step,
271
+ )
272
+ )
273
+ )
274
+ input_messages = last_turn.input_messages
275
+
276
+ turn_id = request.turn_id
277
+ start_time = last_turn.started_at
278
+ else:
279
+ messages.extend(request.messages)
280
+ start_time = datetime.now(UTC).isoformat()
281
+ input_messages = request.messages
282
+
283
+ output_message = None
284
+ async for chunk in self.run(
285
+ session_id=request.session_id,
286
+ turn_id=turn_id,
287
+ input_messages=messages,
288
+ sampling_params=self.agent_config.sampling_params,
289
+ stream=request.stream,
290
+ documents=request.documents if not is_resume else None,
291
+ ):
292
+ if isinstance(chunk, CompletionMessage):
293
+ output_message = chunk
294
+ continue
295
+
296
+ assert isinstance(chunk, AgentTurnResponseStreamChunk), f"Unexpected type {type(chunk)}"
297
+ event = chunk.event
298
+ if event.payload.event_type == AgentTurnResponseEventType.step_complete.value:
299
+ steps.append(event.payload.step_details)
300
+
301
+ yield chunk
302
+
303
+ assert output_message is not None
304
+
305
+ turn = Turn(
306
+ turn_id=turn_id,
307
+ session_id=request.session_id,
308
+ input_messages=input_messages,
309
+ output_message=output_message,
310
+ started_at=start_time,
311
+ completed_at=datetime.now(UTC).isoformat(),
312
+ steps=steps,
313
+ )
314
+ await self.storage.add_turn_to_session(request.session_id, turn)
315
+ if output_message.tool_calls:
316
+ chunk = AgentTurnResponseStreamChunk(
317
+ event=AgentTurnResponseEvent(
318
+ payload=AgentTurnResponseTurnAwaitingInputPayload(
319
+ turn=turn,
320
+ )
321
+ )
322
+ )
323
+ else:
324
+ chunk = AgentTurnResponseStreamChunk(
325
+ event=AgentTurnResponseEvent(
326
+ payload=AgentTurnResponseTurnCompletePayload(
327
+ turn=turn,
328
+ )
329
+ )
330
+ )
331
+
332
+ yield chunk
333
+
334
+ async def run(
335
+ self,
336
+ session_id: str,
337
+ turn_id: str,
338
+ input_messages: list[Message],
339
+ sampling_params: SamplingParams,
340
+ stream: bool = False,
341
+ documents: list[Document] | None = None,
342
+ ) -> AsyncGenerator:
343
+ # Doing async generators makes downstream code much simpler and everything amenable to
344
+ # streaming. However, it also makes things complicated here because AsyncGenerators cannot
345
+ # return a "final value" for the `yield from` statement. we simulate that by yielding a
346
+ # final boolean (to see whether an exception happened) and then explicitly testing for it.
347
+
348
+ if len(self.input_shields) > 0:
349
+ async for res in self.run_multiple_shields_wrapper(
350
+ turn_id, input_messages, self.input_shields, "user-input"
351
+ ):
352
+ if isinstance(res, bool):
353
+ return
354
+ else:
355
+ yield res
356
+
357
+ async for res in self._run(
358
+ session_id,
359
+ turn_id,
360
+ input_messages,
361
+ sampling_params,
362
+ stream,
363
+ documents,
364
+ ):
365
+ if isinstance(res, bool):
366
+ return
367
+ elif isinstance(res, CompletionMessage):
368
+ final_response = res
369
+ break
370
+ else:
371
+ yield res
372
+
373
+ assert final_response is not None
374
+ # for output shields run on the full input and output combination
375
+ messages = input_messages + [final_response]
376
+
377
+ if len(self.output_shields) > 0:
378
+ async for res in self.run_multiple_shields_wrapper(
379
+ turn_id, messages, self.output_shields, "assistant-output"
380
+ ):
381
+ if isinstance(res, bool):
382
+ return
383
+ else:
384
+ yield res
385
+
386
+ yield final_response
387
+
388
+ async def run_multiple_shields_wrapper(
389
+ self,
390
+ turn_id: str,
391
+ messages: list[Message],
392
+ shields: list[str],
393
+ touchpoint: str,
394
+ ) -> AsyncGenerator:
395
+ async with tracing.span("run_shields") as span:
396
+ if self.telemetry_enabled and span is not None:
397
+ span.set_attribute("input", [m.model_dump_json() for m in messages])
398
+ if len(shields) == 0:
399
+ span.set_attribute("output", "no shields")
400
+
401
+ if len(shields) == 0:
402
+ return
403
+
404
+ step_id = str(uuid.uuid4())
405
+ shield_call_start_time = datetime.now(UTC).isoformat()
406
+ try:
407
+ yield AgentTurnResponseStreamChunk(
408
+ event=AgentTurnResponseEvent(
409
+ payload=AgentTurnResponseStepStartPayload(
410
+ step_type=StepType.shield_call.value,
411
+ step_id=step_id,
412
+ metadata=dict(touchpoint=touchpoint),
413
+ )
414
+ )
415
+ )
416
+ await self.run_multiple_shields(messages, shields)
417
+
418
+ except SafetyException as e:
419
+ yield AgentTurnResponseStreamChunk(
420
+ event=AgentTurnResponseEvent(
421
+ payload=AgentTurnResponseStepCompletePayload(
422
+ step_type=StepType.shield_call.value,
423
+ step_id=step_id,
424
+ step_details=ShieldCallStep(
425
+ step_id=step_id,
426
+ turn_id=turn_id,
427
+ violation=e.violation,
428
+ started_at=shield_call_start_time,
429
+ completed_at=datetime.now(UTC).isoformat(),
430
+ ),
431
+ )
432
+ )
433
+ )
434
+ if self.telemetry_enabled and span is not None:
435
+ span.set_attribute("output", e.violation.model_dump_json())
436
+
437
+ yield CompletionMessage(
438
+ content=str(e),
439
+ stop_reason=StopReason.end_of_turn,
440
+ )
441
+ yield False
442
+
443
+ yield AgentTurnResponseStreamChunk(
444
+ event=AgentTurnResponseEvent(
445
+ payload=AgentTurnResponseStepCompletePayload(
446
+ step_type=StepType.shield_call.value,
447
+ step_id=step_id,
448
+ step_details=ShieldCallStep(
449
+ step_id=step_id,
450
+ turn_id=turn_id,
451
+ violation=None,
452
+ started_at=shield_call_start_time,
453
+ completed_at=datetime.now(UTC).isoformat(),
454
+ ),
455
+ )
456
+ )
457
+ )
458
+ if self.telemetry_enabled and span is not None:
459
+ span.set_attribute("output", "no violations")
460
+
461
+ async def _run(
462
+ self,
463
+ session_id: str,
464
+ turn_id: str,
465
+ input_messages: list[Message],
466
+ sampling_params: SamplingParams,
467
+ stream: bool = False,
468
+ documents: list[Document] | None = None,
469
+ ) -> AsyncGenerator:
470
+ # if document is passed in a turn, we parse the raw text of the document
471
+ # and sent it as a user message
472
+ if documents:
473
+ contexts = []
474
+ for document in documents:
475
+ raw_document_text = await get_raw_document_text(document)
476
+ contexts.append(raw_document_text)
477
+
478
+ attached_context = "\n".join(contexts)
479
+ if isinstance(input_messages[-1].content, str):
480
+ input_messages[-1].content += attached_context
481
+ elif isinstance(input_messages[-1].content, list):
482
+ input_messages[-1].content.append(TextContentItem(text=attached_context))
483
+ else:
484
+ input_messages[-1].content = [
485
+ input_messages[-1].content,
486
+ TextContentItem(text=attached_context),
487
+ ]
488
+
489
+ session_info = await self.storage.get_session_info(session_id)
490
+ # if the session has a memory bank id, let the memory tool use it
491
+ if session_info and session_info.vector_db_id:
492
+ for tool_name in self.tool_name_to_args.keys():
493
+ if tool_name == MEMORY_QUERY_TOOL:
494
+ if "vector_db_ids" not in self.tool_name_to_args[tool_name]:
495
+ self.tool_name_to_args[tool_name]["vector_db_ids"] = [session_info.vector_db_id]
496
+ else:
497
+ self.tool_name_to_args[tool_name]["vector_db_ids"].append(session_info.vector_db_id)
498
+
499
+ output_attachments = []
500
+
501
+ n_iter = await self.storage.get_num_infer_iters_in_turn(session_id, turn_id) or 0
502
+
503
+ # Build a map of custom tools to their definitions for faster lookup
504
+ client_tools = {}
505
+ for tool in self.agent_config.client_tools:
506
+ client_tools[tool.name] = tool
507
+ while True:
508
+ step_id = str(uuid.uuid4())
509
+ inference_start_time = datetime.now(UTC).isoformat()
510
+ yield AgentTurnResponseStreamChunk(
511
+ event=AgentTurnResponseEvent(
512
+ payload=AgentTurnResponseStepStartPayload(
513
+ step_type=StepType.inference.value,
514
+ step_id=step_id,
515
+ )
516
+ )
517
+ )
518
+
519
+ tool_calls = []
520
+ content = ""
521
+ stop_reason: StopReason | None = None
522
+
523
+ async with tracing.span("inference") as span:
524
+ if self.telemetry_enabled and span is not None:
525
+ if self.agent_config.name:
526
+ span.set_attribute("agent_name", self.agent_config.name)
527
+
528
+ def _serialize_nested(value):
529
+ """Recursively serialize nested Pydantic models to dicts."""
530
+ from pydantic import BaseModel
531
+
532
+ if isinstance(value, BaseModel):
533
+ return value.model_dump(mode="json")
534
+ elif isinstance(value, dict):
535
+ return {k: _serialize_nested(v) for k, v in value.items()}
536
+ elif isinstance(value, list):
537
+ return [_serialize_nested(item) for item in value]
538
+ else:
539
+ return value
540
+
541
+ def _add_type(openai_msg: dict) -> OpenAIMessageParam:
542
+ # Serialize any nested Pydantic models to plain dicts
543
+ openai_msg = _serialize_nested(openai_msg)
544
+
545
+ role = openai_msg.get("role")
546
+ if role == "user":
547
+ return OpenAIUserMessageParam(**openai_msg)
548
+ elif role == "system":
549
+ return OpenAISystemMessageParam(**openai_msg)
550
+ elif role == "assistant":
551
+ return OpenAIAssistantMessageParam(**openai_msg)
552
+ elif role == "tool":
553
+ return OpenAIToolMessageParam(**openai_msg)
554
+ elif role == "developer":
555
+ return OpenAIDeveloperMessageParam(**openai_msg)
556
+ else:
557
+ raise ValueError(f"Unknown message role: {role}")
558
+
559
+ # Convert messages to OpenAI format
560
+ openai_messages: list[OpenAIMessageParam] = [
561
+ _add_type(await convert_message_to_openai_dict_new(message)) for message in input_messages
562
+ ]
563
+
564
+ # Convert tool definitions to OpenAI format
565
+ openai_tools = [convert_tooldef_to_openai_tool(x) for x in (self.tool_defs or [])]
566
+
567
+ # Extract tool_choice from tool_config for OpenAI compatibility
568
+ # Note: tool_choice can only be provided when tools are also provided
569
+ tool_choice = None
570
+ if openai_tools and self.agent_config.tool_config and self.agent_config.tool_config.tool_choice:
571
+ tc = self.agent_config.tool_config.tool_choice
572
+ tool_choice_str = tc.value if hasattr(tc, "value") else str(tc)
573
+ # Convert tool_choice to OpenAI format
574
+ if tool_choice_str in ("auto", "none", "required"):
575
+ tool_choice = tool_choice_str
576
+ else:
577
+ # It's a specific tool name, wrap it in the proper format
578
+ tool_choice = {"type": "function", "function": {"name": tool_choice_str}}
579
+
580
+ # Convert sampling params to OpenAI format (temperature, top_p, max_tokens)
581
+ temperature = getattr(getattr(sampling_params, "strategy", None), "temperature", None)
582
+ top_p = getattr(getattr(sampling_params, "strategy", None), "top_p", None)
583
+ max_tokens = getattr(sampling_params, "max_tokens", None)
584
+
585
+ # Use OpenAI chat completion
586
+ params = OpenAIChatCompletionRequestWithExtraBody(
587
+ model=self.agent_config.model,
588
+ messages=openai_messages,
589
+ tools=openai_tools if openai_tools else None,
590
+ tool_choice=tool_choice,
591
+ response_format=self.agent_config.response_format,
592
+ temperature=temperature,
593
+ top_p=top_p,
594
+ max_tokens=max_tokens,
595
+ stream=True,
596
+ )
597
+ openai_stream = await self.inference_api.openai_chat_completion(params)
598
+
599
+ # Convert OpenAI stream back to Llama Stack format
600
+ response_stream = convert_openai_chat_completion_stream(
601
+ openai_stream, enable_incremental_tool_calls=True
602
+ )
603
+
604
+ async for chunk in response_stream:
605
+ event = chunk.event
606
+ if event.event_type == ChatCompletionResponseEventType.start:
607
+ continue
608
+ elif event.event_type == ChatCompletionResponseEventType.complete:
609
+ stop_reason = event.stop_reason or StopReason.end_of_turn
610
+ continue
611
+
612
+ delta = event.delta
613
+ if delta.type == "tool_call":
614
+ if delta.parse_status == ToolCallParseStatus.succeeded:
615
+ tool_calls.append(delta.tool_call)
616
+ elif delta.parse_status == ToolCallParseStatus.failed:
617
+ # If we cannot parse the tools, set the content to the unparsed raw text
618
+ content = str(delta.tool_call)
619
+ if stream:
620
+ yield AgentTurnResponseStreamChunk(
621
+ event=AgentTurnResponseEvent(
622
+ payload=AgentTurnResponseStepProgressPayload(
623
+ step_type=StepType.inference.value,
624
+ step_id=step_id,
625
+ delta=delta,
626
+ )
627
+ )
628
+ )
629
+
630
+ elif delta.type == "text":
631
+ content += delta.text
632
+ if stream and event.stop_reason is None:
633
+ yield AgentTurnResponseStreamChunk(
634
+ event=AgentTurnResponseEvent(
635
+ payload=AgentTurnResponseStepProgressPayload(
636
+ step_type=StepType.inference.value,
637
+ step_id=step_id,
638
+ delta=delta,
639
+ )
640
+ )
641
+ )
642
+ else:
643
+ raise ValueError(f"Unexpected delta type {type(delta)}")
644
+
645
+ if self.telemetry_enabled and span is not None:
646
+ span.set_attribute("stop_reason", stop_reason or StopReason.end_of_turn)
647
+ span.set_attribute(
648
+ "input",
649
+ json.dumps([json.loads(m.model_dump_json()) for m in input_messages]),
650
+ )
651
+ output_attr = json.dumps(
652
+ {
653
+ "content": content,
654
+ "tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls],
655
+ }
656
+ )
657
+ span.set_attribute("output", output_attr)
658
+
659
+ n_iter += 1
660
+ await self.storage.set_num_infer_iters_in_turn(session_id, turn_id, n_iter)
661
+
662
+ stop_reason = stop_reason or StopReason.out_of_tokens
663
+
664
+ # If tool calls are parsed successfully,
665
+ # if content is not made null the tool call str will also be in the content
666
+ # and tokens will have tool call syntax included twice
667
+ if tool_calls:
668
+ content = ""
669
+
670
+ message = CompletionMessage(
671
+ content=content,
672
+ stop_reason=stop_reason,
673
+ tool_calls=tool_calls,
674
+ )
675
+
676
+ yield AgentTurnResponseStreamChunk(
677
+ event=AgentTurnResponseEvent(
678
+ payload=AgentTurnResponseStepCompletePayload(
679
+ step_type=StepType.inference.value,
680
+ step_id=step_id,
681
+ step_details=InferenceStep(
682
+ # somewhere deep, we are re-assigning message or closing over some
683
+ # variable which causes message to mutate later on. fix with a
684
+ # `deepcopy` for now, but this is symptomatic of a deeper issue.
685
+ step_id=step_id,
686
+ turn_id=turn_id,
687
+ model_response=copy.deepcopy(message),
688
+ started_at=inference_start_time,
689
+ completed_at=datetime.now(UTC).isoformat(),
690
+ ),
691
+ )
692
+ )
693
+ )
694
+
695
+ if n_iter >= self.agent_config.max_infer_iters:
696
+ logger.info(f"done with MAX iterations ({n_iter}), exiting.")
697
+ # NOTE: mark end_of_turn to indicate to client that we are done with the turn
698
+ # Do not continue the tool call loop after this point
699
+ message.stop_reason = StopReason.end_of_turn
700
+ yield message
701
+ break
702
+
703
+ if stop_reason == StopReason.out_of_tokens:
704
+ logger.info("out of token budget, exiting.")
705
+ yield message
706
+ break
707
+
708
+ if len(message.tool_calls) == 0:
709
+ if stop_reason == StopReason.end_of_turn:
710
+ # TODO: UPDATE RETURN TYPE TO SEND A TUPLE OF (MESSAGE, ATTACHMENTS)
711
+ if len(output_attachments) > 0:
712
+ if isinstance(message.content, list):
713
+ message.content += output_attachments
714
+ else:
715
+ message.content = [message.content] + output_attachments
716
+ yield message
717
+ else:
718
+ logger.debug(f"completion message with EOM (iter: {n_iter}): {str(message)}")
719
+ input_messages = input_messages + [message]
720
+ else:
721
+ input_messages = input_messages + [message]
722
+
723
+ # Process tool calls in the message
724
+ client_tool_calls = []
725
+ non_client_tool_calls = []
726
+
727
+ # Separate client and non-client tool calls
728
+ for tool_call in message.tool_calls:
729
+ if tool_call.tool_name in client_tools:
730
+ client_tool_calls.append(tool_call)
731
+ else:
732
+ non_client_tool_calls.append(tool_call)
733
+
734
+ # Process non-client tool calls first
735
+ for tool_call in non_client_tool_calls:
736
+ step_id = str(uuid.uuid4())
737
+ yield AgentTurnResponseStreamChunk(
738
+ event=AgentTurnResponseEvent(
739
+ payload=AgentTurnResponseStepStartPayload(
740
+ step_type=StepType.tool_execution.value,
741
+ step_id=step_id,
742
+ )
743
+ )
744
+ )
745
+
746
+ yield AgentTurnResponseStreamChunk(
747
+ event=AgentTurnResponseEvent(
748
+ payload=AgentTurnResponseStepProgressPayload(
749
+ step_type=StepType.tool_execution.value,
750
+ step_id=step_id,
751
+ delta=ToolCallDelta(
752
+ parse_status=ToolCallParseStatus.in_progress,
753
+ tool_call=tool_call,
754
+ ),
755
+ )
756
+ )
757
+ )
758
+
759
+ # Execute the tool call
760
+ async with tracing.span(
761
+ "tool_execution",
762
+ {
763
+ "tool_name": tool_call.tool_name,
764
+ "input": message.model_dump_json(),
765
+ }
766
+ if self.telemetry_enabled
767
+ else {},
768
+ ) as span:
769
+ tool_execution_start_time = datetime.now(UTC).isoformat()
770
+ tool_result = await self.execute_tool_call_maybe(
771
+ session_id,
772
+ tool_call,
773
+ )
774
+ if tool_result.content is None:
775
+ raise ValueError(
776
+ f"Tool call result (id: {tool_call.call_id}, name: {tool_call.tool_name}) does not have any content"
777
+ )
778
+ result_message = ToolResponseMessage(
779
+ call_id=tool_call.call_id,
780
+ content=tool_result.content,
781
+ )
782
+ if self.telemetry_enabled and span is not None:
783
+ span.set_attribute("output", result_message.model_dump_json())
784
+
785
+ # Store tool execution step
786
+ tool_execution_step = ToolExecutionStep(
787
+ step_id=step_id,
788
+ turn_id=turn_id,
789
+ tool_calls=[tool_call],
790
+ tool_responses=[
791
+ ToolResponse(
792
+ call_id=tool_call.call_id,
793
+ tool_name=tool_call.tool_name,
794
+ content=tool_result.content,
795
+ metadata=tool_result.metadata,
796
+ )
797
+ ],
798
+ started_at=tool_execution_start_time,
799
+ completed_at=datetime.now(UTC).isoformat(),
800
+ )
801
+
802
+ # Yield the step completion event
803
+ yield AgentTurnResponseStreamChunk(
804
+ event=AgentTurnResponseEvent(
805
+ payload=AgentTurnResponseStepCompletePayload(
806
+ step_type=StepType.tool_execution.value,
807
+ step_id=step_id,
808
+ step_details=tool_execution_step,
809
+ )
810
+ )
811
+ )
812
+
813
+ # Add the result message to input_messages for the next iteration
814
+ input_messages.append(result_message)
815
+
816
+ # TODO: add tool-input touchpoint and a "start" event for this step also
817
+ # but that needs a lot more refactoring of Tool code potentially
818
+ if (type(result_message.content) is str) and (
819
+ out_attachment := _interpret_content_as_attachment(result_message.content)
820
+ ):
821
+ # NOTE: when we push this message back to the model, the model may ignore the
822
+ # attached file path etc. since the model is trained to only provide a user message
823
+ # with the summary. We keep all generated attachments and then attach them to final message
824
+ output_attachments.append(out_attachment)
825
+
826
+ # If there are client tool calls, yield a message with only those tool calls
827
+ if client_tool_calls:
828
+ await self.storage.set_in_progress_tool_call_step(
829
+ session_id,
830
+ turn_id,
831
+ ToolExecutionStep(
832
+ step_id=step_id,
833
+ turn_id=turn_id,
834
+ tool_calls=client_tool_calls,
835
+ tool_responses=[],
836
+ started_at=datetime.now(UTC).isoformat(),
837
+ ),
838
+ )
839
+
840
+ # Create a copy of the message with only client tool calls
841
+ client_message = message.model_copy(deep=True)
842
+ client_message.tool_calls = client_tool_calls
843
+ # NOTE: mark end_of_message to indicate to client that it may
844
+ # call the tool and continue the conversation with the tool's response.
845
+ client_message.stop_reason = StopReason.end_of_message
846
+
847
+ # Yield the message with client tool calls
848
+ yield client_message
849
+ return
850
+
851
+ async def _initialize_tools(
852
+ self,
853
+ toolgroups_for_turn: list[AgentToolGroup] | None = None,
854
+ ) -> None:
855
+ toolgroup_to_args = {}
856
+ for toolgroup in (self.agent_config.toolgroups or []) + (toolgroups_for_turn or []):
857
+ if isinstance(toolgroup, AgentToolGroupWithArgs):
858
+ tool_group_name, _ = self._parse_toolgroup_name(toolgroup.name)
859
+ toolgroup_to_args[tool_group_name] = toolgroup.args
860
+
861
+ # Determine which tools to include
862
+ tool_groups_to_include = toolgroups_for_turn or self.agent_config.toolgroups or []
863
+ agent_config_toolgroups = []
864
+ for toolgroup in tool_groups_to_include:
865
+ name = toolgroup.name if isinstance(toolgroup, AgentToolGroupWithArgs) else toolgroup
866
+ if name not in agent_config_toolgroups:
867
+ agent_config_toolgroups.append(name)
868
+
869
+ toolgroup_to_args = toolgroup_to_args or {}
870
+
871
+ tool_name_to_def = {}
872
+ tool_name_to_args = {}
873
+
874
+ for tool_def in self.agent_config.client_tools:
875
+ if tool_name_to_def.get(tool_def.name, None):
876
+ raise ValueError(f"Tool {tool_def.name} already exists")
877
+
878
+ # Use input_schema from ToolDef directly
879
+ tool_name_to_def[tool_def.name] = ToolDefinition(
880
+ tool_name=tool_def.name,
881
+ description=tool_def.description,
882
+ input_schema=tool_def.input_schema,
883
+ )
884
+ for toolgroup_name_with_maybe_tool_name in agent_config_toolgroups:
885
+ toolgroup_name, input_tool_name = self._parse_toolgroup_name(toolgroup_name_with_maybe_tool_name)
886
+ tools = await self.tool_groups_api.list_tools(toolgroup_id=toolgroup_name)
887
+ if not tools.data:
888
+ available_tool_groups = ", ".join(
889
+ [t.identifier for t in (await self.tool_groups_api.list_tool_groups()).data]
890
+ )
891
+ raise ValueError(f"Toolgroup {toolgroup_name} not found, available toolgroups: {available_tool_groups}")
892
+ if input_tool_name is not None and not any(tool.name == input_tool_name for tool in tools.data):
893
+ raise ValueError(
894
+ f"Tool {input_tool_name} not found in toolgroup {toolgroup_name}. Available tools: {', '.join([tool.name for tool in tools.data])}"
895
+ )
896
+
897
+ for tool_def in tools.data:
898
+ if toolgroup_name.startswith("builtin") and toolgroup_name != RAG_TOOL_GROUP:
899
+ identifier: str | BuiltinTool | None = tool_def.name
900
+ if identifier == "web_search":
901
+ identifier = BuiltinTool.brave_search
902
+ else:
903
+ identifier = BuiltinTool(identifier)
904
+ else:
905
+ # add if tool_name is unspecified or the tool_def identifier is the same as the tool_name
906
+ if input_tool_name in (None, tool_def.name):
907
+ identifier = tool_def.name
908
+ else:
909
+ identifier = None
910
+
911
+ if tool_name_to_def.get(identifier, None):
912
+ raise ValueError(f"Tool {identifier} already exists")
913
+ if identifier:
914
+ tool_name_to_def[identifier] = ToolDefinition(
915
+ tool_name=identifier,
916
+ description=tool_def.description,
917
+ input_schema=tool_def.input_schema,
918
+ )
919
+ tool_name_to_args[identifier] = toolgroup_to_args.get(toolgroup_name, {})
920
+
921
+ self.tool_defs, self.tool_name_to_args = (
922
+ list(tool_name_to_def.values()),
923
+ tool_name_to_args,
924
+ )
925
+
926
+ def _parse_toolgroup_name(self, toolgroup_name_with_maybe_tool_name: str) -> tuple[str, str | None]:
927
+ """Parse a toolgroup name into its components.
928
+
929
+ Args:
930
+ toolgroup_name: The toolgroup name to parse (e.g. "builtin::rag/knowledge_search")
931
+
932
+ Returns:
933
+ A tuple of (tool_type, tool_group, tool_name)
934
+ """
935
+ split_names = toolgroup_name_with_maybe_tool_name.split("/")
936
+ if len(split_names) == 2:
937
+ # e.g. "builtin::rag"
938
+ tool_group, tool_name = split_names
939
+ else:
940
+ tool_group, tool_name = split_names[0], None
941
+ return tool_group, tool_name
942
+
943
+ async def execute_tool_call_maybe(
944
+ self,
945
+ session_id: str,
946
+ tool_call: ToolCall,
947
+ ) -> ToolInvocationResult:
948
+ tool_name = tool_call.tool_name
949
+ registered_tool_names = [tool_def.tool_name for tool_def in self.tool_defs]
950
+ if tool_name not in registered_tool_names:
951
+ raise ValueError(
952
+ f"Tool {tool_name} not found in provided tools, registered tools: {', '.join([str(x) for x in registered_tool_names])}"
953
+ )
954
+ if isinstance(tool_name, BuiltinTool):
955
+ if tool_name == BuiltinTool.brave_search:
956
+ tool_name_str = WEB_SEARCH_TOOL
957
+ else:
958
+ tool_name_str = tool_name.value
959
+ else:
960
+ tool_name_str = tool_name
961
+
962
+ logger.info(f"executing tool call: {tool_name_str} with args: {tool_call.arguments}")
963
+
964
+ try:
965
+ args = json.loads(tool_call.arguments)
966
+ except json.JSONDecodeError as e:
967
+ raise ValueError(f"Failed to parse arguments for tool call: {tool_call.arguments}") from e
968
+
969
+ result = await self.tool_runtime_api.invoke_tool(
970
+ tool_name=tool_name_str,
971
+ kwargs={
972
+ "session_id": session_id,
973
+ # get the arguments generated by the model and augment with toolgroup arg overrides for the agent
974
+ **args,
975
+ **self.tool_name_to_args.get(tool_name_str, {}),
976
+ },
977
+ )
978
+ logger.debug(f"tool call {tool_name_str} completed with result: {result}")
979
+ return result
980
+
981
+
982
+ async def load_data_from_url(url: str) -> str:
983
+ if url.startswith("http"):
984
+ async with httpx.AsyncClient() as client:
985
+ r = await client.get(url)
986
+ resp = r.text
987
+ return resp
988
+ raise ValueError(f"Unexpected URL: {type(url)}")
989
+
990
+
991
+ async def get_raw_document_text(document: Document) -> str:
992
+ # Handle deprecated text/yaml mime type with warning
993
+ if document.mime_type == "text/yaml":
994
+ warnings.warn(
995
+ "The 'text/yaml' MIME type is deprecated. Please use 'application/yaml' instead.",
996
+ DeprecationWarning,
997
+ stacklevel=2,
998
+ )
999
+ elif not (document.mime_type.startswith("text/") or document.mime_type in ("application/yaml", "application/json")):
1000
+ raise ValueError(f"Unexpected document mime type: {document.mime_type}")
1001
+
1002
+ if isinstance(document.content, URL):
1003
+ return await load_data_from_url(document.content.uri)
1004
+ elif isinstance(document.content, str):
1005
+ return document.content
1006
+ elif isinstance(document.content, TextContentItem):
1007
+ return document.content.text
1008
+ else:
1009
+ raise ValueError(f"Unexpected document content type: {type(document.content)}")
1010
+
1011
+
1012
+ def _interpret_content_as_attachment(
1013
+ content: str,
1014
+ ) -> Attachment | None:
1015
+ match = re.search(TOOLS_ATTACHMENT_KEY_REGEX, content)
1016
+ if match:
1017
+ snippet = match.group(1)
1018
+ data = json.loads(snippet)
1019
+ return Attachment(
1020
+ url=URL(uri="file://" + data["filepath"]),
1021
+ mime_type=data["mimetype"],
1022
+ )
1023
+
1024
+ return None