llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (738) hide show
  1. llama_stack/__init__.py +5 -0
  2. llama_stack/apis/agents/__init__.py +1 -1
  3. llama_stack/apis/agents/agents.py +700 -281
  4. llama_stack/apis/agents/openai_responses.py +1311 -0
  5. llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
  6. llama_stack/apis/batches/batches.py +100 -0
  7. llama_stack/apis/benchmarks/__init__.py +7 -0
  8. llama_stack/apis/benchmarks/benchmarks.py +108 -0
  9. llama_stack/apis/common/content_types.py +143 -0
  10. llama_stack/apis/common/errors.py +103 -0
  11. llama_stack/apis/common/job_types.py +38 -0
  12. llama_stack/apis/common/responses.py +36 -0
  13. llama_stack/apis/common/training_types.py +36 -5
  14. llama_stack/apis/common/type_system.py +158 -0
  15. llama_stack/apis/conversations/__init__.py +31 -0
  16. llama_stack/apis/conversations/conversations.py +286 -0
  17. llama_stack/apis/datasetio/__init__.py +7 -0
  18. llama_stack/apis/datasetio/datasetio.py +59 -0
  19. llama_stack/apis/datasets/__init__.py +7 -0
  20. llama_stack/apis/datasets/datasets.py +251 -0
  21. llama_stack/apis/datatypes.py +160 -0
  22. llama_stack/apis/eval/__init__.py +7 -0
  23. llama_stack/apis/eval/eval.py +169 -0
  24. llama_stack/apis/files/__init__.py +7 -0
  25. llama_stack/apis/files/files.py +199 -0
  26. llama_stack/apis/inference/__init__.py +1 -1
  27. llama_stack/apis/inference/inference.py +1169 -113
  28. llama_stack/apis/inspect/__init__.py +1 -1
  29. llama_stack/apis/inspect/inspect.py +69 -16
  30. llama_stack/apis/models/__init__.py +1 -1
  31. llama_stack/apis/models/models.py +148 -21
  32. llama_stack/apis/post_training/__init__.py +1 -1
  33. llama_stack/apis/post_training/post_training.py +265 -120
  34. llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
  35. llama_stack/apis/prompts/prompts.py +204 -0
  36. llama_stack/apis/providers/__init__.py +7 -0
  37. llama_stack/apis/providers/providers.py +69 -0
  38. llama_stack/apis/resource.py +37 -0
  39. llama_stack/apis/safety/__init__.py +1 -1
  40. llama_stack/apis/safety/safety.py +95 -12
  41. llama_stack/apis/scoring/__init__.py +7 -0
  42. llama_stack/apis/scoring/scoring.py +93 -0
  43. llama_stack/apis/scoring_functions/__init__.py +7 -0
  44. llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
  45. llama_stack/apis/shields/__init__.py +1 -1
  46. llama_stack/apis/shields/shields.py +76 -33
  47. llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
  48. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
  49. llama_stack/apis/telemetry/__init__.py +1 -1
  50. llama_stack/apis/telemetry/telemetry.py +322 -31
  51. llama_stack/apis/{dataset → tools}/__init__.py +2 -1
  52. llama_stack/apis/tools/rag_tool.py +218 -0
  53. llama_stack/apis/tools/tools.py +221 -0
  54. llama_stack/apis/vector_io/__init__.py +7 -0
  55. llama_stack/apis/vector_io/vector_io.py +960 -0
  56. llama_stack/apis/vector_stores/__init__.py +7 -0
  57. llama_stack/apis/vector_stores/vector_stores.py +51 -0
  58. llama_stack/apis/version.py +9 -0
  59. llama_stack/cli/llama.py +13 -5
  60. llama_stack/cli/stack/_list_deps.py +182 -0
  61. llama_stack/cli/stack/list_apis.py +1 -1
  62. llama_stack/cli/stack/list_deps.py +55 -0
  63. llama_stack/cli/stack/list_providers.py +24 -10
  64. llama_stack/cli/stack/list_stacks.py +56 -0
  65. llama_stack/cli/stack/remove.py +115 -0
  66. llama_stack/cli/stack/run.py +169 -56
  67. llama_stack/cli/stack/stack.py +18 -4
  68. llama_stack/cli/stack/utils.py +151 -0
  69. llama_stack/cli/table.py +23 -61
  70. llama_stack/cli/utils.py +29 -0
  71. llama_stack/core/access_control/access_control.py +131 -0
  72. llama_stack/core/access_control/conditions.py +129 -0
  73. llama_stack/core/access_control/datatypes.py +107 -0
  74. llama_stack/core/build.py +164 -0
  75. llama_stack/core/client.py +205 -0
  76. llama_stack/core/common.sh +37 -0
  77. llama_stack/{distribution → core}/configure.py +74 -55
  78. llama_stack/core/conversations/conversations.py +309 -0
  79. llama_stack/core/datatypes.py +625 -0
  80. llama_stack/core/distribution.py +276 -0
  81. llama_stack/core/external.py +54 -0
  82. llama_stack/core/id_generation.py +42 -0
  83. llama_stack/core/inspect.py +86 -0
  84. llama_stack/core/library_client.py +539 -0
  85. llama_stack/core/prompts/prompts.py +234 -0
  86. llama_stack/core/providers.py +137 -0
  87. llama_stack/core/request_headers.py +115 -0
  88. llama_stack/core/resolver.py +506 -0
  89. llama_stack/core/routers/__init__.py +101 -0
  90. llama_stack/core/routers/datasets.py +73 -0
  91. llama_stack/core/routers/eval_scoring.py +155 -0
  92. llama_stack/core/routers/inference.py +645 -0
  93. llama_stack/core/routers/safety.py +85 -0
  94. llama_stack/core/routers/tool_runtime.py +91 -0
  95. llama_stack/core/routers/vector_io.py +442 -0
  96. llama_stack/core/routing_tables/benchmarks.py +62 -0
  97. llama_stack/core/routing_tables/common.py +254 -0
  98. llama_stack/core/routing_tables/datasets.py +91 -0
  99. llama_stack/core/routing_tables/models.py +163 -0
  100. llama_stack/core/routing_tables/scoring_functions.py +66 -0
  101. llama_stack/core/routing_tables/shields.py +61 -0
  102. llama_stack/core/routing_tables/toolgroups.py +129 -0
  103. llama_stack/core/routing_tables/vector_stores.py +292 -0
  104. llama_stack/core/server/auth.py +187 -0
  105. llama_stack/core/server/auth_providers.py +494 -0
  106. llama_stack/core/server/quota.py +110 -0
  107. llama_stack/core/server/routes.py +141 -0
  108. llama_stack/core/server/server.py +542 -0
  109. llama_stack/core/server/tracing.py +80 -0
  110. llama_stack/core/stack.py +546 -0
  111. llama_stack/core/start_stack.sh +117 -0
  112. llama_stack/core/storage/datatypes.py +283 -0
  113. llama_stack/{cli/model → core/store}/__init__.py +1 -1
  114. llama_stack/core/store/registry.py +199 -0
  115. llama_stack/core/testing_context.py +49 -0
  116. llama_stack/core/ui/app.py +55 -0
  117. llama_stack/core/ui/modules/api.py +32 -0
  118. llama_stack/core/ui/modules/utils.py +42 -0
  119. llama_stack/core/ui/page/distribution/datasets.py +18 -0
  120. llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
  121. llama_stack/core/ui/page/distribution/models.py +18 -0
  122. llama_stack/core/ui/page/distribution/providers.py +27 -0
  123. llama_stack/core/ui/page/distribution/resources.py +48 -0
  124. llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
  125. llama_stack/core/ui/page/distribution/shields.py +19 -0
  126. llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
  127. llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
  128. llama_stack/core/ui/page/playground/chat.py +130 -0
  129. llama_stack/core/ui/page/playground/tools.py +352 -0
  130. llama_stack/core/utils/config.py +30 -0
  131. llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
  132. llama_stack/core/utils/config_resolution.py +125 -0
  133. llama_stack/core/utils/context.py +84 -0
  134. llama_stack/core/utils/exec.py +96 -0
  135. llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
  136. llama_stack/{distribution → core}/utils/model_utils.py +2 -2
  137. llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
  138. llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
  139. llama_stack/distributions/dell/build.yaml +33 -0
  140. llama_stack/distributions/dell/dell.py +158 -0
  141. llama_stack/distributions/dell/run-with-safety.yaml +141 -0
  142. llama_stack/distributions/dell/run.yaml +132 -0
  143. llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
  144. llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
  145. llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
  146. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
  147. llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
  148. llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
  149. llama_stack/distributions/nvidia/build.yaml +29 -0
  150. llama_stack/distributions/nvidia/nvidia.py +154 -0
  151. llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
  152. llama_stack/distributions/nvidia/run.yaml +116 -0
  153. llama_stack/distributions/open-benchmark/__init__.py +7 -0
  154. llama_stack/distributions/open-benchmark/build.yaml +36 -0
  155. llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
  156. llama_stack/distributions/open-benchmark/run.yaml +252 -0
  157. llama_stack/distributions/postgres-demo/__init__.py +7 -0
  158. llama_stack/distributions/postgres-demo/build.yaml +23 -0
  159. llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
  160. llama_stack/distributions/postgres-demo/run.yaml +115 -0
  161. llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
  162. llama_stack/distributions/starter/build.yaml +61 -0
  163. llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
  164. llama_stack/distributions/starter/run.yaml +276 -0
  165. llama_stack/distributions/starter/starter.py +345 -0
  166. llama_stack/distributions/starter-gpu/__init__.py +7 -0
  167. llama_stack/distributions/starter-gpu/build.yaml +61 -0
  168. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
  169. llama_stack/distributions/starter-gpu/run.yaml +279 -0
  170. llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
  171. llama_stack/distributions/template.py +456 -0
  172. llama_stack/distributions/watsonx/__init__.py +7 -0
  173. llama_stack/distributions/watsonx/build.yaml +33 -0
  174. llama_stack/distributions/watsonx/run.yaml +133 -0
  175. llama_stack/distributions/watsonx/watsonx.py +95 -0
  176. llama_stack/env.py +24 -0
  177. llama_stack/log.py +314 -0
  178. llama_stack/models/llama/checkpoint.py +164 -0
  179. llama_stack/models/llama/datatypes.py +164 -0
  180. llama_stack/models/llama/hadamard_utils.py +86 -0
  181. llama_stack/models/llama/llama3/args.py +74 -0
  182. llama_stack/models/llama/llama3/chat_format.py +286 -0
  183. llama_stack/models/llama/llama3/generation.py +376 -0
  184. llama_stack/models/llama/llama3/interface.py +255 -0
  185. llama_stack/models/llama/llama3/model.py +304 -0
  186. llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
  187. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
  188. llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
  189. llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
  190. llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
  191. llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
  192. llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
  193. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
  194. llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
  195. llama_stack/models/llama/llama3/quantization/loader.py +316 -0
  196. llama_stack/models/llama/llama3/template_data.py +116 -0
  197. llama_stack/models/llama/llama3/tokenizer.model +128000 -0
  198. llama_stack/models/llama/llama3/tokenizer.py +198 -0
  199. llama_stack/models/llama/llama3/tool_utils.py +266 -0
  200. llama_stack/models/llama/llama3_1/__init__.py +12 -0
  201. llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
  202. llama_stack/models/llama/llama3_1/prompts.py +258 -0
  203. llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
  204. llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
  205. llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
  206. llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
  207. llama_stack/models/llama/llama3_3/prompts.py +259 -0
  208. llama_stack/models/llama/llama4/args.py +107 -0
  209. llama_stack/models/llama/llama4/chat_format.py +317 -0
  210. llama_stack/models/llama/llama4/datatypes.py +56 -0
  211. llama_stack/models/llama/llama4/ffn.py +58 -0
  212. llama_stack/models/llama/llama4/generation.py +313 -0
  213. llama_stack/models/llama/llama4/model.py +437 -0
  214. llama_stack/models/llama/llama4/moe.py +214 -0
  215. llama_stack/models/llama/llama4/preprocess.py +435 -0
  216. llama_stack/models/llama/llama4/prompt_format.md +304 -0
  217. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
  218. llama_stack/models/llama/llama4/prompts.py +279 -0
  219. llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
  220. llama_stack/models/llama/llama4/quantization/loader.py +226 -0
  221. llama_stack/models/llama/llama4/tokenizer.model +200000 -0
  222. llama_stack/models/llama/llama4/tokenizer.py +263 -0
  223. llama_stack/models/llama/llama4/vision/__init__.py +5 -0
  224. llama_stack/models/llama/llama4/vision/embedding.py +210 -0
  225. llama_stack/models/llama/llama4/vision/encoder.py +412 -0
  226. llama_stack/models/llama/prompt_format.py +191 -0
  227. llama_stack/models/llama/quantize_impls.py +316 -0
  228. llama_stack/models/llama/sku_list.py +1029 -0
  229. llama_stack/models/llama/sku_types.py +233 -0
  230. llama_stack/models/llama/tokenizer_utils.py +40 -0
  231. llama_stack/providers/datatypes.py +136 -107
  232. llama_stack/providers/inline/__init__.py +5 -0
  233. llama_stack/providers/inline/agents/__init__.py +5 -0
  234. llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
  235. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
  236. llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
  237. llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
  238. llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
  239. llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
  240. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
  241. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
  242. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
  243. llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
  244. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
  245. llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
  246. llama_stack/providers/inline/batches/__init__.py +5 -0
  247. llama_stack/providers/inline/batches/reference/__init__.py +36 -0
  248. llama_stack/providers/inline/batches/reference/batches.py +679 -0
  249. llama_stack/providers/inline/batches/reference/config.py +40 -0
  250. llama_stack/providers/inline/datasetio/__init__.py +5 -0
  251. llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
  252. llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
  253. llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
  254. llama_stack/providers/inline/eval/__init__.py +5 -0
  255. llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
  256. llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
  257. llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
  258. llama_stack/providers/inline/files/localfs/__init__.py +20 -0
  259. llama_stack/providers/inline/files/localfs/config.py +31 -0
  260. llama_stack/providers/inline/files/localfs/files.py +219 -0
  261. llama_stack/providers/inline/inference/__init__.py +5 -0
  262. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
  263. llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
  264. llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
  265. llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
  266. llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
  267. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
  268. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
  269. llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
  270. llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
  271. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
  272. llama_stack/providers/inline/post_training/__init__.py +5 -0
  273. llama_stack/providers/inline/post_training/common/__init__.py +5 -0
  274. llama_stack/providers/inline/post_training/common/utils.py +35 -0
  275. llama_stack/providers/inline/post_training/common/validator.py +36 -0
  276. llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
  277. llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
  278. llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
  279. llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
  280. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
  281. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
  282. llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
  283. llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
  284. llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
  285. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
  286. llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
  287. llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
  288. llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
  289. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
  290. llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
  291. llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
  292. llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
  293. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
  294. llama_stack/providers/inline/safety/__init__.py +5 -0
  295. llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
  296. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
  297. llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
  298. llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
  299. llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
  300. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
  301. llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
  302. llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
  303. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
  304. llama_stack/providers/inline/scoring/__init__.py +5 -0
  305. llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
  306. llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
  307. llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
  308. llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
  309. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
  310. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
  311. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
  312. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
  313. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
  314. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
  315. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
  316. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
  317. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
  318. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
  319. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
  320. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
  321. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
  322. llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
  323. llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
  324. llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
  325. llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
  326. llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
  327. llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
  328. llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
  329. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
  330. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
  331. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
  332. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
  333. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
  334. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
  335. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
  336. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
  337. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
  338. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
  339. llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
  340. llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
  341. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
  342. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
  343. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
  344. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
  345. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
  346. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
  347. llama_stack/providers/inline/telemetry/__init__.py +5 -0
  348. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
  349. llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
  350. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
  351. llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
  352. llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
  353. llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
  354. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
  355. llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
  356. llama_stack/providers/inline/vector_io/__init__.py +5 -0
  357. llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
  358. llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
  359. llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
  360. llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
  361. llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
  362. llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
  363. llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
  364. llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
  365. llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
  366. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
  367. llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
  368. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
  369. llama_stack/providers/registry/agents.py +16 -18
  370. llama_stack/providers/registry/batches.py +26 -0
  371. llama_stack/providers/registry/datasetio.py +49 -0
  372. llama_stack/providers/registry/eval.py +46 -0
  373. llama_stack/providers/registry/files.py +31 -0
  374. llama_stack/providers/registry/inference.py +273 -118
  375. llama_stack/providers/registry/post_training.py +69 -0
  376. llama_stack/providers/registry/safety.py +46 -41
  377. llama_stack/providers/registry/scoring.py +51 -0
  378. llama_stack/providers/registry/tool_runtime.py +87 -0
  379. llama_stack/providers/registry/vector_io.py +828 -0
  380. llama_stack/providers/remote/__init__.py +5 -0
  381. llama_stack/providers/remote/agents/__init__.py +5 -0
  382. llama_stack/providers/remote/datasetio/__init__.py +5 -0
  383. llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
  384. llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
  385. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
  386. llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
  387. llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
  388. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
  389. llama_stack/providers/remote/eval/__init__.py +5 -0
  390. llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
  391. llama_stack/providers/remote/eval/nvidia/config.py +29 -0
  392. llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
  393. llama_stack/providers/remote/files/s3/__init__.py +19 -0
  394. llama_stack/providers/remote/files/s3/config.py +42 -0
  395. llama_stack/providers/remote/files/s3/files.py +313 -0
  396. llama_stack/providers/remote/inference/__init__.py +5 -0
  397. llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
  398. llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
  399. llama_stack/providers/remote/inference/anthropic/config.py +28 -0
  400. llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
  401. llama_stack/providers/remote/inference/azure/azure.py +25 -0
  402. llama_stack/providers/remote/inference/azure/config.py +61 -0
  403. llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
  404. llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
  405. llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
  406. llama_stack/providers/remote/inference/bedrock/models.py +29 -0
  407. llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
  408. llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
  409. llama_stack/providers/remote/inference/cerebras/config.py +30 -0
  410. llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
  411. llama_stack/providers/remote/inference/databricks/config.py +37 -0
  412. llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
  413. llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
  414. llama_stack/providers/remote/inference/fireworks/config.py +27 -0
  415. llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
  416. llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
  417. llama_stack/providers/remote/inference/gemini/config.py +28 -0
  418. llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
  419. llama_stack/providers/remote/inference/groq/__init__.py +15 -0
  420. llama_stack/providers/remote/inference/groq/config.py +34 -0
  421. llama_stack/providers/remote/inference/groq/groq.py +18 -0
  422. llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
  423. llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
  424. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
  425. llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
  426. llama_stack/providers/remote/inference/nvidia/config.py +64 -0
  427. llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
  428. llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
  429. llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
  430. llama_stack/providers/remote/inference/ollama/config.py +25 -0
  431. llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
  432. llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
  433. llama_stack/providers/remote/inference/openai/config.py +39 -0
  434. llama_stack/providers/remote/inference/openai/openai.py +38 -0
  435. llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
  436. llama_stack/providers/remote/inference/passthrough/config.py +34 -0
  437. llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
  438. llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
  439. llama_stack/providers/remote/inference/runpod/config.py +32 -0
  440. llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
  441. llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
  442. llama_stack/providers/remote/inference/sambanova/config.py +34 -0
  443. llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
  444. llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
  445. llama_stack/providers/remote/inference/tgi/config.py +76 -0
  446. llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
  447. llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
  448. llama_stack/providers/remote/inference/together/config.py +27 -0
  449. llama_stack/providers/remote/inference/together/together.py +102 -0
  450. llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
  451. llama_stack/providers/remote/inference/vertexai/config.py +48 -0
  452. llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
  453. llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
  454. llama_stack/providers/remote/inference/vllm/config.py +59 -0
  455. llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
  456. llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
  457. llama_stack/providers/remote/inference/watsonx/config.py +45 -0
  458. llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
  459. llama_stack/providers/remote/post_training/__init__.py +5 -0
  460. llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
  461. llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
  462. llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
  463. llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
  464. llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
  465. llama_stack/providers/remote/safety/__init__.py +5 -0
  466. llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
  467. llama_stack/providers/remote/safety/bedrock/config.py +14 -0
  468. llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
  469. llama_stack/providers/remote/safety/nvidia/config.py +40 -0
  470. llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
  471. llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
  472. llama_stack/providers/remote/safety/sambanova/config.py +37 -0
  473. llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
  474. llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
  475. llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
  476. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
  477. llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
  478. llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
  479. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
  480. llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
  481. llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
  482. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
  483. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
  484. llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
  485. llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
  486. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
  487. llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
  488. llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
  489. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
  490. llama_stack/providers/remote/vector_io/__init__.py +5 -0
  491. llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
  492. llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
  493. llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
  494. llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
  495. llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
  496. llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
  497. llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
  498. llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
  499. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
  500. llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
  501. llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
  502. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
  503. llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
  504. llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
  505. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
  506. llama_stack/providers/utils/bedrock/__init__.py +5 -0
  507. llama_stack/providers/utils/bedrock/client.py +74 -0
  508. llama_stack/providers/utils/bedrock/config.py +64 -0
  509. llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
  510. llama_stack/providers/utils/common/__init__.py +5 -0
  511. llama_stack/providers/utils/common/data_schema_validator.py +103 -0
  512. llama_stack/providers/utils/datasetio/__init__.py +5 -0
  513. llama_stack/providers/utils/datasetio/url_utils.py +47 -0
  514. llama_stack/providers/utils/files/__init__.py +5 -0
  515. llama_stack/providers/utils/files/form_data.py +69 -0
  516. llama_stack/providers/utils/inference/__init__.py +8 -7
  517. llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
  518. llama_stack/providers/utils/inference/inference_store.py +264 -0
  519. llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
  520. llama_stack/providers/utils/inference/model_registry.py +173 -23
  521. llama_stack/providers/utils/inference/openai_compat.py +1261 -49
  522. llama_stack/providers/utils/inference/openai_mixin.py +506 -0
  523. llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
  524. llama_stack/providers/utils/kvstore/api.py +6 -6
  525. llama_stack/providers/utils/kvstore/config.py +28 -48
  526. llama_stack/providers/utils/kvstore/kvstore.py +61 -15
  527. llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
  528. llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
  529. llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
  530. llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
  531. llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
  532. llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
  533. llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
  534. llama_stack/providers/utils/memory/file_utils.py +1 -1
  535. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
  536. llama_stack/providers/utils/memory/vector_store.py +220 -82
  537. llama_stack/providers/utils/pagination.py +43 -0
  538. llama_stack/providers/utils/responses/__init__.py +5 -0
  539. llama_stack/providers/utils/responses/responses_store.py +292 -0
  540. llama_stack/providers/utils/scheduler.py +270 -0
  541. llama_stack/providers/utils/scoring/__init__.py +5 -0
  542. llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
  543. llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
  544. llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
  545. llama_stack/providers/utils/sqlstore/__init__.py +5 -0
  546. llama_stack/providers/utils/sqlstore/api.py +128 -0
  547. llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
  548. llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
  549. llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
  550. llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
  551. llama_stack/providers/utils/telemetry/tracing.py +192 -53
  552. llama_stack/providers/utils/tools/__init__.py +5 -0
  553. llama_stack/providers/utils/tools/mcp.py +148 -0
  554. llama_stack/providers/utils/tools/ttl_dict.py +70 -0
  555. llama_stack/providers/utils/vector_io/__init__.py +5 -0
  556. llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
  557. llama_stack/schema_utils.py +118 -0
  558. llama_stack/strong_typing/__init__.py +19 -0
  559. llama_stack/strong_typing/auxiliary.py +228 -0
  560. llama_stack/strong_typing/classdef.py +440 -0
  561. llama_stack/strong_typing/core.py +46 -0
  562. llama_stack/strong_typing/deserializer.py +877 -0
  563. llama_stack/strong_typing/docstring.py +409 -0
  564. llama_stack/strong_typing/exception.py +23 -0
  565. llama_stack/strong_typing/inspection.py +1085 -0
  566. llama_stack/strong_typing/mapping.py +40 -0
  567. llama_stack/strong_typing/name.py +182 -0
  568. llama_stack/strong_typing/py.typed +0 -0
  569. llama_stack/strong_typing/schema.py +792 -0
  570. llama_stack/strong_typing/serialization.py +97 -0
  571. llama_stack/strong_typing/serializer.py +500 -0
  572. llama_stack/strong_typing/slots.py +27 -0
  573. llama_stack/strong_typing/topological.py +89 -0
  574. llama_stack/testing/__init__.py +5 -0
  575. llama_stack/testing/api_recorder.py +956 -0
  576. llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
  577. llama_stack-0.3.4.dist-info/METADATA +261 -0
  578. llama_stack-0.3.4.dist-info/RECORD +625 -0
  579. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
  580. llama_stack/apis/agents/client.py +0 -292
  581. llama_stack/apis/agents/event_logger.py +0 -184
  582. llama_stack/apis/batch_inference/batch_inference.py +0 -72
  583. llama_stack/apis/common/deployment_types.py +0 -31
  584. llama_stack/apis/dataset/dataset.py +0 -63
  585. llama_stack/apis/evals/evals.py +0 -122
  586. llama_stack/apis/inference/client.py +0 -197
  587. llama_stack/apis/inspect/client.py +0 -82
  588. llama_stack/apis/memory/client.py +0 -155
  589. llama_stack/apis/memory/memory.py +0 -65
  590. llama_stack/apis/memory_banks/__init__.py +0 -7
  591. llama_stack/apis/memory_banks/client.py +0 -101
  592. llama_stack/apis/memory_banks/memory_banks.py +0 -78
  593. llama_stack/apis/models/client.py +0 -83
  594. llama_stack/apis/reward_scoring/__init__.py +0 -7
  595. llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
  596. llama_stack/apis/safety/client.py +0 -105
  597. llama_stack/apis/shields/client.py +0 -79
  598. llama_stack/cli/download.py +0 -340
  599. llama_stack/cli/model/describe.py +0 -82
  600. llama_stack/cli/model/download.py +0 -24
  601. llama_stack/cli/model/list.py +0 -62
  602. llama_stack/cli/model/model.py +0 -34
  603. llama_stack/cli/model/prompt_format.py +0 -112
  604. llama_stack/cli/model/safety_models.py +0 -52
  605. llama_stack/cli/stack/build.py +0 -299
  606. llama_stack/cli/stack/configure.py +0 -178
  607. llama_stack/distribution/build.py +0 -123
  608. llama_stack/distribution/build_conda_env.sh +0 -136
  609. llama_stack/distribution/build_container.sh +0 -142
  610. llama_stack/distribution/common.sh +0 -40
  611. llama_stack/distribution/configure_container.sh +0 -47
  612. llama_stack/distribution/datatypes.py +0 -139
  613. llama_stack/distribution/distribution.py +0 -58
  614. llama_stack/distribution/inspect.py +0 -67
  615. llama_stack/distribution/request_headers.py +0 -57
  616. llama_stack/distribution/resolver.py +0 -323
  617. llama_stack/distribution/routers/__init__.py +0 -48
  618. llama_stack/distribution/routers/routers.py +0 -158
  619. llama_stack/distribution/routers/routing_tables.py +0 -173
  620. llama_stack/distribution/server/endpoints.py +0 -48
  621. llama_stack/distribution/server/server.py +0 -343
  622. llama_stack/distribution/start_conda_env.sh +0 -42
  623. llama_stack/distribution/start_container.sh +0 -64
  624. llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
  625. llama_stack/distribution/templates/local-build.yaml +0 -10
  626. llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
  627. llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
  628. llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
  629. llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
  630. llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
  631. llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
  632. llama_stack/distribution/templates/local-together-build.yaml +0 -10
  633. llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
  634. llama_stack/distribution/utils/exec.py +0 -105
  635. llama_stack/providers/adapters/agents/sample/sample.py +0 -18
  636. llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
  637. llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
  638. llama_stack/providers/adapters/inference/databricks/config.py +0 -21
  639. llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
  640. llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
  641. llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
  642. llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
  643. llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
  644. llama_stack/providers/adapters/inference/sample/sample.py +0 -23
  645. llama_stack/providers/adapters/inference/tgi/config.py +0 -43
  646. llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
  647. llama_stack/providers/adapters/inference/together/config.py +0 -22
  648. llama_stack/providers/adapters/inference/together/together.py +0 -143
  649. llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
  650. llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
  651. llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
  652. llama_stack/providers/adapters/memory/sample/sample.py +0 -23
  653. llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
  654. llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
  655. llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
  656. llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
  657. llama_stack/providers/adapters/safety/sample/sample.py +0 -23
  658. llama_stack/providers/adapters/safety/together/__init__.py +0 -18
  659. llama_stack/providers/adapters/safety/together/config.py +0 -26
  660. llama_stack/providers/adapters/safety/together/together.py +0 -101
  661. llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
  662. llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
  663. llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
  664. llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
  665. llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
  666. llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
  667. llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
  668. llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
  669. llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
  670. llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
  671. llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
  672. llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
  673. llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
  674. llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
  675. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
  676. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
  677. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
  678. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
  679. llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
  680. llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
  681. llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
  682. llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
  683. llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
  684. llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
  685. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
  686. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
  687. llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
  688. llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
  689. llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
  690. llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
  691. llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
  692. llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
  693. llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
  694. llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
  695. llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
  696. llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
  697. llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
  698. llama_stack/providers/impls/vllm/config.py +0 -35
  699. llama_stack/providers/impls/vllm/vllm.py +0 -241
  700. llama_stack/providers/registry/memory.py +0 -78
  701. llama_stack/providers/registry/telemetry.py +0 -44
  702. llama_stack/providers/tests/agents/test_agents.py +0 -210
  703. llama_stack/providers/tests/inference/test_inference.py +0 -257
  704. llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
  705. llama_stack/providers/tests/memory/test_memory.py +0 -136
  706. llama_stack/providers/tests/resolver.py +0 -100
  707. llama_stack/providers/tests/safety/test_safety.py +0 -77
  708. llama_stack-0.0.42.dist-info/METADATA +0 -137
  709. llama_stack-0.0.42.dist-info/RECORD +0 -256
  710. /llama_stack/{distribution → core}/__init__.py +0 -0
  711. /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
  712. /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
  713. /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
  714. /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
  715. /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
  716. /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
  717. /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
  718. /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
  719. /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
  720. /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
  721. /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
  722. /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
  723. /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
  724. /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
  725. /llama_stack/{distribution → core}/utils/serialize.py +0 -0
  726. /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
  727. /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
  728. /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
  729. /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
  730. /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
  731. /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
  732. /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
  733. /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
  734. /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
  735. /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
  736. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
  737. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
  738. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
@@ -1,844 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- import asyncio
8
- import copy
9
- import os
10
- import re
11
- import secrets
12
- import shutil
13
- import string
14
- import tempfile
15
- import uuid
16
- from datetime import datetime
17
- from typing import AsyncGenerator, List, Tuple
18
- from urllib.parse import urlparse
19
-
20
- import httpx
21
-
22
- from termcolor import cprint
23
-
24
- from llama_stack.apis.agents import * # noqa: F403
25
- from llama_stack.apis.inference import * # noqa: F403
26
- from llama_stack.apis.memory import * # noqa: F403
27
- from llama_stack.apis.memory_banks import * # noqa: F403
28
- from llama_stack.apis.safety import * # noqa: F403
29
-
30
- from llama_stack.providers.utils.kvstore import KVStore
31
- from llama_stack.providers.utils.telemetry import tracing
32
-
33
- from .persistence import AgentPersistence
34
- from .rag.context_retriever import generate_rag_query
35
- from .safety import SafetyException, ShieldRunnerMixin
36
- from .tools.base import BaseTool
37
- from .tools.builtin import (
38
- CodeInterpreterTool,
39
- interpret_content_as_attachment,
40
- PhotogenTool,
41
- SearchTool,
42
- WolframAlphaTool,
43
- )
44
- from .tools.safety import SafeTool
45
-
46
-
47
- def make_random_string(length: int = 8):
48
- return "".join(
49
- secrets.choice(string.ascii_letters + string.digits) for _ in range(length)
50
- )
51
-
52
-
53
- class ChatAgent(ShieldRunnerMixin):
54
- def __init__(
55
- self,
56
- agent_id: str,
57
- agent_config: AgentConfig,
58
- inference_api: Inference,
59
- memory_api: Memory,
60
- memory_banks_api: MemoryBanks,
61
- safety_api: Safety,
62
- persistence_store: KVStore,
63
- ):
64
- self.agent_id = agent_id
65
- self.agent_config = agent_config
66
- self.inference_api = inference_api
67
- self.memory_api = memory_api
68
- self.memory_banks_api = memory_banks_api
69
- self.safety_api = safety_api
70
- self.storage = AgentPersistence(agent_id, persistence_store)
71
-
72
- self.tempdir = tempfile.mkdtemp()
73
-
74
- builtin_tools = []
75
- for tool_defn in agent_config.tools:
76
- if isinstance(tool_defn, WolframAlphaToolDefinition):
77
- tool = WolframAlphaTool(tool_defn.api_key)
78
- elif isinstance(tool_defn, SearchToolDefinition):
79
- tool = SearchTool(tool_defn.engine, tool_defn.api_key)
80
- elif isinstance(tool_defn, CodeInterpreterToolDefinition):
81
- tool = CodeInterpreterTool()
82
- elif isinstance(tool_defn, PhotogenToolDefinition):
83
- tool = PhotogenTool(dump_dir=self.tempdir)
84
- else:
85
- continue
86
-
87
- builtin_tools.append(
88
- SafeTool(
89
- tool,
90
- safety_api,
91
- tool_defn.input_shields,
92
- tool_defn.output_shields,
93
- )
94
- )
95
- self.tools_dict = {t.get_name(): t for t in builtin_tools}
96
-
97
- ShieldRunnerMixin.__init__(
98
- self,
99
- safety_api,
100
- input_shields=agent_config.input_shields,
101
- output_shields=agent_config.output_shields,
102
- )
103
-
104
- def __del__(self):
105
- shutil.rmtree(self.tempdir)
106
-
107
- def turn_to_messages(self, turn: Turn) -> List[Message]:
108
- messages = []
109
-
110
- # We do not want to keep adding RAG context to the input messages
111
- # May be this should be a parameter of the agentic instance
112
- # that can define its behavior in a custom way
113
- for m in turn.input_messages:
114
- msg = m.copy()
115
- if isinstance(msg, UserMessage):
116
- msg.context = None
117
- messages.append(msg)
118
-
119
- for step in turn.steps:
120
- if step.step_type == StepType.inference.value:
121
- messages.append(step.model_response)
122
- elif step.step_type == StepType.tool_execution.value:
123
- for response in step.tool_responses:
124
- messages.append(
125
- ToolResponseMessage(
126
- call_id=response.call_id,
127
- tool_name=response.tool_name,
128
- content=response.content,
129
- )
130
- )
131
- elif step.step_type == StepType.shield_call.value:
132
- if step.violation:
133
- # CompletionMessage itself in the ShieldResponse
134
- messages.append(
135
- CompletionMessage(
136
- content=step.violation.user_message,
137
- stop_reason=StopReason.end_of_turn,
138
- )
139
- )
140
- # print_dialog(messages)
141
- return messages
142
-
143
- async def create_session(self, name: str) -> str:
144
- return await self.storage.create_session(name)
145
-
146
- @tracing.span("create_and_execute_turn")
147
- async def create_and_execute_turn(
148
- self, request: AgentTurnCreateRequest
149
- ) -> AsyncGenerator:
150
- assert request.stream is True, "Non-streaming not supported"
151
-
152
- session_info = await self.storage.get_session_info(request.session_id)
153
- if session_info is None:
154
- raise ValueError(f"Session {request.session_id} not found")
155
-
156
- turns = await self.storage.get_session_turns(request.session_id)
157
-
158
- messages = []
159
- if len(turns) == 0 and self.agent_config.instructions != "":
160
- messages.append(SystemMessage(content=self.agent_config.instructions))
161
-
162
- for i, turn in enumerate(turns):
163
- messages.extend(self.turn_to_messages(turn))
164
-
165
- messages.extend(request.messages)
166
-
167
- turn_id = str(uuid.uuid4())
168
- start_time = datetime.now()
169
- yield AgentTurnResponseStreamChunk(
170
- event=AgentTurnResponseEvent(
171
- payload=AgentTurnResponseTurnStartPayload(
172
- turn_id=turn_id,
173
- )
174
- )
175
- )
176
-
177
- steps = []
178
- output_message = None
179
- async for chunk in self.run(
180
- session_id=request.session_id,
181
- turn_id=turn_id,
182
- input_messages=messages,
183
- attachments=request.attachments or [],
184
- sampling_params=self.agent_config.sampling_params,
185
- stream=request.stream,
186
- ):
187
- if isinstance(chunk, CompletionMessage):
188
- cprint(
189
- f"{chunk.role.capitalize()}: {chunk.content}",
190
- "white",
191
- attrs=["bold"],
192
- )
193
- output_message = chunk
194
- continue
195
-
196
- assert isinstance(
197
- chunk, AgentTurnResponseStreamChunk
198
- ), f"Unexpected type {type(chunk)}"
199
- event = chunk.event
200
- if (
201
- event.payload.event_type
202
- == AgentTurnResponseEventType.step_complete.value
203
- ):
204
- steps.append(event.payload.step_details)
205
-
206
- yield chunk
207
-
208
- assert output_message is not None
209
-
210
- turn = Turn(
211
- turn_id=turn_id,
212
- session_id=request.session_id,
213
- input_messages=request.messages,
214
- output_message=output_message,
215
- started_at=start_time,
216
- completed_at=datetime.now(),
217
- steps=steps,
218
- )
219
- await self.storage.add_turn_to_session(request.session_id, turn)
220
-
221
- chunk = AgentTurnResponseStreamChunk(
222
- event=AgentTurnResponseEvent(
223
- payload=AgentTurnResponseTurnCompletePayload(
224
- turn=turn,
225
- )
226
- )
227
- )
228
- yield chunk
229
-
230
- async def run(
231
- self,
232
- session_id: str,
233
- turn_id: str,
234
- input_messages: List[Message],
235
- attachments: List[Attachment],
236
- sampling_params: SamplingParams,
237
- stream: bool = False,
238
- ) -> AsyncGenerator:
239
- # Doing async generators makes downstream code much simpler and everything amenable to
240
- # streaming. However, it also makes things complicated here because AsyncGenerators cannot
241
- # return a "final value" for the `yield from` statement. we simulate that by yielding a
242
- # final boolean (to see whether an exception happened) and then explicitly testing for it.
243
-
244
- async for res in self.run_multiple_shields_wrapper(
245
- turn_id, input_messages, self.input_shields, "user-input"
246
- ):
247
- if isinstance(res, bool):
248
- return
249
- else:
250
- yield res
251
-
252
- async for res in self._run(
253
- session_id, turn_id, input_messages, attachments, sampling_params, stream
254
- ):
255
- if isinstance(res, bool):
256
- return
257
- elif isinstance(res, CompletionMessage):
258
- final_response = res
259
- break
260
- else:
261
- yield res
262
-
263
- assert final_response is not None
264
- # for output shields run on the full input and output combination
265
- messages = input_messages + [final_response]
266
-
267
- async for res in self.run_multiple_shields_wrapper(
268
- turn_id, messages, self.output_shields, "assistant-output"
269
- ):
270
- if isinstance(res, bool):
271
- return
272
- else:
273
- yield res
274
-
275
- yield final_response
276
-
277
- @tracing.span("run_shields")
278
- async def run_multiple_shields_wrapper(
279
- self,
280
- turn_id: str,
281
- messages: List[Message],
282
- shields: List[str],
283
- touchpoint: str,
284
- ) -> AsyncGenerator:
285
- if len(shields) == 0:
286
- return
287
-
288
- step_id = str(uuid.uuid4())
289
- try:
290
- yield AgentTurnResponseStreamChunk(
291
- event=AgentTurnResponseEvent(
292
- payload=AgentTurnResponseStepStartPayload(
293
- step_type=StepType.shield_call.value,
294
- step_id=step_id,
295
- metadata=dict(touchpoint=touchpoint),
296
- )
297
- )
298
- )
299
- await self.run_multiple_shields(messages, shields)
300
-
301
- except SafetyException as e:
302
- yield AgentTurnResponseStreamChunk(
303
- event=AgentTurnResponseEvent(
304
- payload=AgentTurnResponseStepCompletePayload(
305
- step_type=StepType.shield_call.value,
306
- step_details=ShieldCallStep(
307
- step_id=step_id,
308
- turn_id=turn_id,
309
- violation=e.violation,
310
- ),
311
- )
312
- )
313
- )
314
-
315
- yield CompletionMessage(
316
- content=str(e),
317
- stop_reason=StopReason.end_of_turn,
318
- )
319
- yield False
320
-
321
- yield AgentTurnResponseStreamChunk(
322
- event=AgentTurnResponseEvent(
323
- payload=AgentTurnResponseStepCompletePayload(
324
- step_type=StepType.shield_call.value,
325
- step_details=ShieldCallStep(
326
- step_id=step_id,
327
- turn_id=turn_id,
328
- violation=None,
329
- ),
330
- )
331
- )
332
- )
333
-
334
- async def _run(
335
- self,
336
- session_id: str,
337
- turn_id: str,
338
- input_messages: List[Message],
339
- attachments: List[Attachment],
340
- sampling_params: SamplingParams,
341
- stream: bool = False,
342
- ) -> AsyncGenerator:
343
- enabled_tools = set(t.type for t in self.agent_config.tools)
344
- need_rag_context = await self._should_retrieve_context(
345
- input_messages, attachments
346
- )
347
- if need_rag_context:
348
- step_id = str(uuid.uuid4())
349
- yield AgentTurnResponseStreamChunk(
350
- event=AgentTurnResponseEvent(
351
- payload=AgentTurnResponseStepStartPayload(
352
- step_type=StepType.memory_retrieval.value,
353
- step_id=step_id,
354
- )
355
- )
356
- )
357
-
358
- # TODO: find older context from the session and either replace it
359
- # or append with a sliding window. this is really a very simplistic implementation
360
- with tracing.span("retrieve_rag_context"):
361
- rag_context, bank_ids = await self._retrieve_context(
362
- session_id, input_messages, attachments
363
- )
364
-
365
- step_id = str(uuid.uuid4())
366
- yield AgentTurnResponseStreamChunk(
367
- event=AgentTurnResponseEvent(
368
- payload=AgentTurnResponseStepCompletePayload(
369
- step_type=StepType.memory_retrieval.value,
370
- step_id=step_id,
371
- step_details=MemoryRetrievalStep(
372
- turn_id=turn_id,
373
- step_id=step_id,
374
- memory_bank_ids=bank_ids,
375
- inserted_context=rag_context or "",
376
- ),
377
- )
378
- )
379
- )
380
-
381
- if rag_context:
382
- last_message = input_messages[-1]
383
- last_message.context = "\n".join(rag_context)
384
-
385
- elif attachments and AgentTool.code_interpreter.value in enabled_tools:
386
- urls = [a.content for a in attachments if isinstance(a.content, URL)]
387
- # TODO: we need to migrate URL away from str type
388
- pattern = re.compile("^(https?://|file://|data:)")
389
- urls += [
390
- URL(uri=a.content) for a in attachments if pattern.match(a.content)
391
- ]
392
- msg = await attachment_message(self.tempdir, urls)
393
- input_messages.append(msg)
394
-
395
- output_attachments = []
396
-
397
- n_iter = 0
398
- while True:
399
- msg = input_messages[-1]
400
- if msg.role == Role.user.value:
401
- color = "blue"
402
- elif msg.role == Role.ipython.value:
403
- color = "yellow"
404
- else:
405
- color = None
406
- if len(str(msg)) > 1000:
407
- msg_str = f"{str(msg)[:500]}...<more>...{str(msg)[-500:]}"
408
- else:
409
- msg_str = str(msg)
410
- cprint(f"{msg_str}", color=color)
411
-
412
- step_id = str(uuid.uuid4())
413
- yield AgentTurnResponseStreamChunk(
414
- event=AgentTurnResponseEvent(
415
- payload=AgentTurnResponseStepStartPayload(
416
- step_type=StepType.inference.value,
417
- step_id=step_id,
418
- )
419
- )
420
- )
421
-
422
- tool_calls = []
423
- content = ""
424
- stop_reason = None
425
-
426
- with tracing.span("inference"):
427
- async for chunk in self.inference_api.chat_completion(
428
- self.agent_config.model,
429
- input_messages,
430
- tools=self._get_tools(),
431
- tool_prompt_format=self.agent_config.tool_prompt_format,
432
- stream=True,
433
- sampling_params=sampling_params,
434
- ):
435
- event = chunk.event
436
- if event.event_type == ChatCompletionResponseEventType.start:
437
- continue
438
- elif event.event_type == ChatCompletionResponseEventType.complete:
439
- stop_reason = StopReason.end_of_turn
440
- continue
441
-
442
- delta = event.delta
443
- if isinstance(delta, ToolCallDelta):
444
- if delta.parse_status == ToolCallParseStatus.success:
445
- tool_calls.append(delta.content)
446
-
447
- if stream:
448
- yield AgentTurnResponseStreamChunk(
449
- event=AgentTurnResponseEvent(
450
- payload=AgentTurnResponseStepProgressPayload(
451
- step_type=StepType.inference.value,
452
- step_id=step_id,
453
- model_response_text_delta="",
454
- tool_call_delta=delta,
455
- )
456
- )
457
- )
458
-
459
- elif isinstance(delta, str):
460
- content += delta
461
- if stream and event.stop_reason is None:
462
- yield AgentTurnResponseStreamChunk(
463
- event=AgentTurnResponseEvent(
464
- payload=AgentTurnResponseStepProgressPayload(
465
- step_type=StepType.inference.value,
466
- step_id=step_id,
467
- model_response_text_delta=event.delta,
468
- )
469
- )
470
- )
471
- else:
472
- raise ValueError(f"Unexpected delta type {type(delta)}")
473
-
474
- if event.stop_reason is not None:
475
- stop_reason = event.stop_reason
476
-
477
- stop_reason = stop_reason or StopReason.out_of_tokens
478
-
479
- # If tool calls are parsed successfully,
480
- # if content is not made null the tool call str will also be in the content
481
- # and tokens will have tool call syntax included twice
482
- if tool_calls:
483
- content = ""
484
-
485
- message = CompletionMessage(
486
- content=content,
487
- stop_reason=stop_reason,
488
- tool_calls=tool_calls,
489
- )
490
-
491
- yield AgentTurnResponseStreamChunk(
492
- event=AgentTurnResponseEvent(
493
- payload=AgentTurnResponseStepCompletePayload(
494
- step_type=StepType.inference.value,
495
- step_id=step_id,
496
- step_details=InferenceStep(
497
- # somewhere deep, we are re-assigning message or closing over some
498
- # variable which causes message to mutate later on. fix with a
499
- # `deepcopy` for now, but this is symptomatic of a deeper issue.
500
- step_id=step_id,
501
- turn_id=turn_id,
502
- model_response=copy.deepcopy(message),
503
- ),
504
- )
505
- )
506
- )
507
-
508
- if n_iter >= self.agent_config.max_infer_iters:
509
- cprint("Done with MAX iterations, exiting.")
510
- yield message
511
- break
512
-
513
- if stop_reason == StopReason.out_of_tokens:
514
- cprint("Out of token budget, exiting.")
515
- yield message
516
- break
517
-
518
- if len(message.tool_calls) == 0:
519
- if stop_reason == StopReason.end_of_turn:
520
- # TODO: UPDATE RETURN TYPE TO SEND A TUPLE OF (MESSAGE, ATTACHMENTS)
521
- if len(output_attachments) > 0:
522
- if isinstance(message.content, list):
523
- message.content += attachments
524
- else:
525
- message.content = [message.content] + attachments
526
- yield message
527
- else:
528
- cprint(f"Partial message: {str(message)}", color="green")
529
- input_messages = input_messages + [message]
530
- else:
531
- cprint(f"{str(message)}", color="green")
532
- try:
533
- tool_call = message.tool_calls[0]
534
-
535
- name = tool_call.tool_name
536
- if not isinstance(name, BuiltinTool):
537
- yield message
538
- return
539
-
540
- step_id = str(uuid.uuid4())
541
- yield AgentTurnResponseStreamChunk(
542
- event=AgentTurnResponseEvent(
543
- payload=AgentTurnResponseStepStartPayload(
544
- step_type=StepType.tool_execution.value,
545
- step_id=step_id,
546
- )
547
- )
548
- )
549
- yield AgentTurnResponseStreamChunk(
550
- event=AgentTurnResponseEvent(
551
- payload=AgentTurnResponseStepProgressPayload(
552
- step_type=StepType.tool_execution.value,
553
- step_id=step_id,
554
- tool_call=tool_call,
555
- )
556
- )
557
- )
558
-
559
- with tracing.span("tool_execution"):
560
- result_messages = await execute_tool_call_maybe(
561
- self.tools_dict,
562
- [message],
563
- )
564
- assert (
565
- len(result_messages) == 1
566
- ), "Currently not supporting multiple messages"
567
- result_message = result_messages[0]
568
-
569
- yield AgentTurnResponseStreamChunk(
570
- event=AgentTurnResponseEvent(
571
- payload=AgentTurnResponseStepCompletePayload(
572
- step_type=StepType.tool_execution.value,
573
- step_details=ToolExecutionStep(
574
- step_id=step_id,
575
- turn_id=turn_id,
576
- tool_calls=[tool_call],
577
- tool_responses=[
578
- ToolResponse(
579
- call_id=result_message.call_id,
580
- tool_name=result_message.tool_name,
581
- content=result_message.content,
582
- )
583
- ],
584
- ),
585
- )
586
- )
587
- )
588
-
589
- # TODO: add tool-input touchpoint and a "start" event for this step also
590
- # but that needs a lot more refactoring of Tool code potentially
591
- yield AgentTurnResponseStreamChunk(
592
- event=AgentTurnResponseEvent(
593
- payload=AgentTurnResponseStepCompletePayload(
594
- step_type=StepType.shield_call.value,
595
- step_details=ShieldCallStep(
596
- step_id=str(uuid.uuid4()),
597
- turn_id=turn_id,
598
- violation=None,
599
- ),
600
- )
601
- )
602
- )
603
-
604
- except SafetyException as e:
605
- yield AgentTurnResponseStreamChunk(
606
- event=AgentTurnResponseEvent(
607
- payload=AgentTurnResponseStepCompletePayload(
608
- step_type=StepType.shield_call.value,
609
- step_details=ShieldCallStep(
610
- step_id=str(uuid.uuid4()),
611
- turn_id=turn_id,
612
- violation=e.violation,
613
- ),
614
- )
615
- )
616
- )
617
-
618
- yield CompletionMessage(
619
- content=str(e),
620
- stop_reason=StopReason.end_of_turn,
621
- )
622
- yield False
623
- return
624
-
625
- if out_attachment := interpret_content_as_attachment(
626
- result_message.content
627
- ):
628
- # NOTE: when we push this message back to the model, the model may ignore the
629
- # attached file path etc. since the model is trained to only provide a user message
630
- # with the summary. We keep all generated attachments and then attach them to final message
631
- output_attachments.append(out_attachment)
632
-
633
- input_messages = input_messages + [message, result_message]
634
-
635
- n_iter += 1
636
-
637
- async def _ensure_memory_bank(self, session_id: str) -> str:
638
- session_info = await self.storage.get_session_info(session_id)
639
- if session_info is None:
640
- raise ValueError(f"Session {session_id} not found")
641
-
642
- if session_info.memory_bank_id is None:
643
- bank_id = f"memory_bank_{session_id}"
644
- memory_bank = VectorMemoryBankDef(
645
- identifier=bank_id,
646
- embedding_model="all-MiniLM-L6-v2",
647
- chunk_size_in_tokens=512,
648
- )
649
- await self.memory_banks_api.register_memory_bank(memory_bank)
650
- await self.storage.add_memory_bank_to_session(session_id, bank_id)
651
- else:
652
- bank_id = session_info.memory_bank_id
653
-
654
- return bank_id
655
-
656
- async def _should_retrieve_context(
657
- self, messages: List[Message], attachments: List[Attachment]
658
- ) -> bool:
659
- enabled_tools = set(t.type for t in self.agent_config.tools)
660
- if attachments:
661
- if (
662
- AgentTool.code_interpreter.value in enabled_tools
663
- and self.agent_config.tool_choice == ToolChoice.required
664
- ):
665
- return False
666
- else:
667
- return True
668
-
669
- return AgentTool.memory.value in enabled_tools
670
-
671
- def _memory_tool_definition(self) -> Optional[MemoryToolDefinition]:
672
- for t in self.agent_config.tools:
673
- if t.type == AgentTool.memory.value:
674
- return t
675
-
676
- return None
677
-
678
- async def _retrieve_context(
679
- self, session_id: str, messages: List[Message], attachments: List[Attachment]
680
- ) -> Tuple[Optional[List[str]], Optional[List[int]]]: # (rag_context, bank_ids)
681
- bank_ids = []
682
-
683
- memory = self._memory_tool_definition()
684
- assert memory is not None, "Memory tool not configured"
685
- bank_ids.extend(c.bank_id for c in memory.memory_bank_configs)
686
-
687
- if attachments:
688
- bank_id = await self._ensure_memory_bank(session_id)
689
- bank_ids.append(bank_id)
690
-
691
- documents = [
692
- MemoryBankDocument(
693
- document_id=str(uuid.uuid4()),
694
- content=a.content,
695
- mime_type=a.mime_type,
696
- metadata={},
697
- )
698
- for a in attachments
699
- ]
700
- with tracing.span("insert_documents"):
701
- await self.memory_api.insert_documents(bank_id, documents)
702
- else:
703
- session_info = await self.storage.get_session_info(session_id)
704
- if session_info.memory_bank_id:
705
- bank_ids.append(session_info.memory_bank_id)
706
-
707
- if not bank_ids:
708
- # this can happen if the per-session memory bank is not yet populated
709
- # (i.e., no prior turns uploaded an Attachment)
710
- return None, []
711
-
712
- query = await generate_rag_query(
713
- memory.query_generator_config, messages, inference_api=self.inference_api
714
- )
715
- tasks = [
716
- self.memory_api.query_documents(
717
- bank_id=bank_id,
718
- query=query,
719
- params={
720
- "max_chunks": 5,
721
- },
722
- )
723
- for bank_id in bank_ids
724
- ]
725
- results: List[QueryDocumentsResponse] = await asyncio.gather(*tasks)
726
- chunks = [c for r in results for c in r.chunks]
727
- scores = [s for r in results for s in r.scores]
728
-
729
- if not chunks:
730
- return None, bank_ids
731
-
732
- # sort by score
733
- chunks, scores = zip(
734
- *sorted(zip(chunks, scores), key=lambda x: x[1], reverse=True)
735
- )
736
-
737
- tokens = 0
738
- picked = []
739
- for c in chunks[: memory.max_chunks]:
740
- tokens += c.token_count
741
- if tokens > memory.max_tokens_in_context:
742
- cprint(
743
- f"Using {len(picked)} chunks; reached max tokens in context: {tokens}",
744
- "red",
745
- )
746
- break
747
- picked.append(f"id:{c.document_id}; content:{c.content}")
748
-
749
- return [
750
- "Here are the retrieved documents for relevant context:\n=== START-RETRIEVED-CONTEXT ===\n",
751
- *picked,
752
- "\n=== END-RETRIEVED-CONTEXT ===\n",
753
- ], bank_ids
754
-
755
- def _get_tools(self) -> List[ToolDefinition]:
756
- ret = []
757
- for t in self.agent_config.tools:
758
- if isinstance(t, SearchToolDefinition):
759
- ret.append(ToolDefinition(tool_name=BuiltinTool.brave_search))
760
- elif isinstance(t, WolframAlphaToolDefinition):
761
- ret.append(ToolDefinition(tool_name=BuiltinTool.wolfram_alpha))
762
- elif isinstance(t, PhotogenToolDefinition):
763
- ret.append(ToolDefinition(tool_name=BuiltinTool.photogen))
764
- elif isinstance(t, CodeInterpreterToolDefinition):
765
- ret.append(ToolDefinition(tool_name=BuiltinTool.code_interpreter))
766
- elif isinstance(t, FunctionCallToolDefinition):
767
- ret.append(
768
- ToolDefinition(
769
- tool_name=t.function_name,
770
- description=t.description,
771
- parameters=t.parameters,
772
- )
773
- )
774
- return ret
775
-
776
-
777
- async def attachment_message(tempdir: str, urls: List[URL]) -> ToolResponseMessage:
778
- content = []
779
-
780
- for url in urls:
781
- uri = url.uri
782
- if uri.startswith("file://"):
783
- filepath = uri[len("file://") :]
784
- elif uri.startswith("http"):
785
- path = urlparse(uri).path
786
- basename = os.path.basename(path)
787
- filepath = f"{tempdir}/{make_random_string() + basename}"
788
- print(f"Downloading {url} -> {filepath}")
789
-
790
- async with httpx.AsyncClient() as client:
791
- r = await client.get(uri)
792
- resp = r.text
793
- with open(filepath, "w") as fp:
794
- fp.write(resp)
795
- else:
796
- raise ValueError(f"Unsupported URL {url}")
797
-
798
- content.append(f'# There is a file accessible to you at "{filepath}"\n')
799
-
800
- return ToolResponseMessage(
801
- call_id="",
802
- tool_name=BuiltinTool.code_interpreter,
803
- content=content,
804
- )
805
-
806
-
807
- async def execute_tool_call_maybe(
808
- tools_dict: Dict[str, BaseTool], messages: List[CompletionMessage]
809
- ) -> List[ToolResponseMessage]:
810
- # While Tools.run interface takes a list of messages,
811
- # All tools currently only run on a single message
812
- # When this changes, we can drop this assert
813
- # Whether to call tools on each message and aggregate
814
- # or aggregate and call tool once, reamins to be seen.
815
- assert len(messages) == 1, "Expected single message"
816
- message = messages[0]
817
-
818
- tool_call = message.tool_calls[0]
819
- name = tool_call.tool_name
820
- assert isinstance(name, BuiltinTool)
821
-
822
- name = name.value
823
-
824
- assert name in tools_dict, f"Tool {name} not found"
825
- tool = tools_dict[name]
826
- result_messages = await tool.run(messages)
827
- return result_messages
828
-
829
-
830
- def print_dialog(messages: List[Message]):
831
- for i, m in enumerate(messages):
832
- if m.role == Role.user.value:
833
- color = "red"
834
- elif m.role == Role.assistant.value:
835
- color = "white"
836
- elif m.role == Role.ipython.value:
837
- color = "yellow"
838
- elif m.role == Role.system.value:
839
- color = "green"
840
- else:
841
- color = "white"
842
-
843
- s = str(m)
844
- cprint(f"{i} ::: {s[:100]}...", color=color)