llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (738) hide show
  1. llama_stack/__init__.py +5 -0
  2. llama_stack/apis/agents/__init__.py +1 -1
  3. llama_stack/apis/agents/agents.py +700 -281
  4. llama_stack/apis/agents/openai_responses.py +1311 -0
  5. llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
  6. llama_stack/apis/batches/batches.py +100 -0
  7. llama_stack/apis/benchmarks/__init__.py +7 -0
  8. llama_stack/apis/benchmarks/benchmarks.py +108 -0
  9. llama_stack/apis/common/content_types.py +143 -0
  10. llama_stack/apis/common/errors.py +103 -0
  11. llama_stack/apis/common/job_types.py +38 -0
  12. llama_stack/apis/common/responses.py +36 -0
  13. llama_stack/apis/common/training_types.py +36 -5
  14. llama_stack/apis/common/type_system.py +158 -0
  15. llama_stack/apis/conversations/__init__.py +31 -0
  16. llama_stack/apis/conversations/conversations.py +286 -0
  17. llama_stack/apis/datasetio/__init__.py +7 -0
  18. llama_stack/apis/datasetio/datasetio.py +59 -0
  19. llama_stack/apis/datasets/__init__.py +7 -0
  20. llama_stack/apis/datasets/datasets.py +251 -0
  21. llama_stack/apis/datatypes.py +160 -0
  22. llama_stack/apis/eval/__init__.py +7 -0
  23. llama_stack/apis/eval/eval.py +169 -0
  24. llama_stack/apis/files/__init__.py +7 -0
  25. llama_stack/apis/files/files.py +199 -0
  26. llama_stack/apis/inference/__init__.py +1 -1
  27. llama_stack/apis/inference/inference.py +1169 -113
  28. llama_stack/apis/inspect/__init__.py +1 -1
  29. llama_stack/apis/inspect/inspect.py +69 -16
  30. llama_stack/apis/models/__init__.py +1 -1
  31. llama_stack/apis/models/models.py +148 -21
  32. llama_stack/apis/post_training/__init__.py +1 -1
  33. llama_stack/apis/post_training/post_training.py +265 -120
  34. llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
  35. llama_stack/apis/prompts/prompts.py +204 -0
  36. llama_stack/apis/providers/__init__.py +7 -0
  37. llama_stack/apis/providers/providers.py +69 -0
  38. llama_stack/apis/resource.py +37 -0
  39. llama_stack/apis/safety/__init__.py +1 -1
  40. llama_stack/apis/safety/safety.py +95 -12
  41. llama_stack/apis/scoring/__init__.py +7 -0
  42. llama_stack/apis/scoring/scoring.py +93 -0
  43. llama_stack/apis/scoring_functions/__init__.py +7 -0
  44. llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
  45. llama_stack/apis/shields/__init__.py +1 -1
  46. llama_stack/apis/shields/shields.py +76 -33
  47. llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
  48. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
  49. llama_stack/apis/telemetry/__init__.py +1 -1
  50. llama_stack/apis/telemetry/telemetry.py +322 -31
  51. llama_stack/apis/{dataset → tools}/__init__.py +2 -1
  52. llama_stack/apis/tools/rag_tool.py +218 -0
  53. llama_stack/apis/tools/tools.py +221 -0
  54. llama_stack/apis/vector_io/__init__.py +7 -0
  55. llama_stack/apis/vector_io/vector_io.py +960 -0
  56. llama_stack/apis/vector_stores/__init__.py +7 -0
  57. llama_stack/apis/vector_stores/vector_stores.py +51 -0
  58. llama_stack/apis/version.py +9 -0
  59. llama_stack/cli/llama.py +13 -5
  60. llama_stack/cli/stack/_list_deps.py +182 -0
  61. llama_stack/cli/stack/list_apis.py +1 -1
  62. llama_stack/cli/stack/list_deps.py +55 -0
  63. llama_stack/cli/stack/list_providers.py +24 -10
  64. llama_stack/cli/stack/list_stacks.py +56 -0
  65. llama_stack/cli/stack/remove.py +115 -0
  66. llama_stack/cli/stack/run.py +169 -56
  67. llama_stack/cli/stack/stack.py +18 -4
  68. llama_stack/cli/stack/utils.py +151 -0
  69. llama_stack/cli/table.py +23 -61
  70. llama_stack/cli/utils.py +29 -0
  71. llama_stack/core/access_control/access_control.py +131 -0
  72. llama_stack/core/access_control/conditions.py +129 -0
  73. llama_stack/core/access_control/datatypes.py +107 -0
  74. llama_stack/core/build.py +164 -0
  75. llama_stack/core/client.py +205 -0
  76. llama_stack/core/common.sh +37 -0
  77. llama_stack/{distribution → core}/configure.py +74 -55
  78. llama_stack/core/conversations/conversations.py +309 -0
  79. llama_stack/core/datatypes.py +625 -0
  80. llama_stack/core/distribution.py +276 -0
  81. llama_stack/core/external.py +54 -0
  82. llama_stack/core/id_generation.py +42 -0
  83. llama_stack/core/inspect.py +86 -0
  84. llama_stack/core/library_client.py +539 -0
  85. llama_stack/core/prompts/prompts.py +234 -0
  86. llama_stack/core/providers.py +137 -0
  87. llama_stack/core/request_headers.py +115 -0
  88. llama_stack/core/resolver.py +506 -0
  89. llama_stack/core/routers/__init__.py +101 -0
  90. llama_stack/core/routers/datasets.py +73 -0
  91. llama_stack/core/routers/eval_scoring.py +155 -0
  92. llama_stack/core/routers/inference.py +645 -0
  93. llama_stack/core/routers/safety.py +85 -0
  94. llama_stack/core/routers/tool_runtime.py +91 -0
  95. llama_stack/core/routers/vector_io.py +442 -0
  96. llama_stack/core/routing_tables/benchmarks.py +62 -0
  97. llama_stack/core/routing_tables/common.py +254 -0
  98. llama_stack/core/routing_tables/datasets.py +91 -0
  99. llama_stack/core/routing_tables/models.py +163 -0
  100. llama_stack/core/routing_tables/scoring_functions.py +66 -0
  101. llama_stack/core/routing_tables/shields.py +61 -0
  102. llama_stack/core/routing_tables/toolgroups.py +129 -0
  103. llama_stack/core/routing_tables/vector_stores.py +292 -0
  104. llama_stack/core/server/auth.py +187 -0
  105. llama_stack/core/server/auth_providers.py +494 -0
  106. llama_stack/core/server/quota.py +110 -0
  107. llama_stack/core/server/routes.py +141 -0
  108. llama_stack/core/server/server.py +542 -0
  109. llama_stack/core/server/tracing.py +80 -0
  110. llama_stack/core/stack.py +546 -0
  111. llama_stack/core/start_stack.sh +117 -0
  112. llama_stack/core/storage/datatypes.py +283 -0
  113. llama_stack/{cli/model → core/store}/__init__.py +1 -1
  114. llama_stack/core/store/registry.py +199 -0
  115. llama_stack/core/testing_context.py +49 -0
  116. llama_stack/core/ui/app.py +55 -0
  117. llama_stack/core/ui/modules/api.py +32 -0
  118. llama_stack/core/ui/modules/utils.py +42 -0
  119. llama_stack/core/ui/page/distribution/datasets.py +18 -0
  120. llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
  121. llama_stack/core/ui/page/distribution/models.py +18 -0
  122. llama_stack/core/ui/page/distribution/providers.py +27 -0
  123. llama_stack/core/ui/page/distribution/resources.py +48 -0
  124. llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
  125. llama_stack/core/ui/page/distribution/shields.py +19 -0
  126. llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
  127. llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
  128. llama_stack/core/ui/page/playground/chat.py +130 -0
  129. llama_stack/core/ui/page/playground/tools.py +352 -0
  130. llama_stack/core/utils/config.py +30 -0
  131. llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
  132. llama_stack/core/utils/config_resolution.py +125 -0
  133. llama_stack/core/utils/context.py +84 -0
  134. llama_stack/core/utils/exec.py +96 -0
  135. llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
  136. llama_stack/{distribution → core}/utils/model_utils.py +2 -2
  137. llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
  138. llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
  139. llama_stack/distributions/dell/build.yaml +33 -0
  140. llama_stack/distributions/dell/dell.py +158 -0
  141. llama_stack/distributions/dell/run-with-safety.yaml +141 -0
  142. llama_stack/distributions/dell/run.yaml +132 -0
  143. llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
  144. llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
  145. llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
  146. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
  147. llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
  148. llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
  149. llama_stack/distributions/nvidia/build.yaml +29 -0
  150. llama_stack/distributions/nvidia/nvidia.py +154 -0
  151. llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
  152. llama_stack/distributions/nvidia/run.yaml +116 -0
  153. llama_stack/distributions/open-benchmark/__init__.py +7 -0
  154. llama_stack/distributions/open-benchmark/build.yaml +36 -0
  155. llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
  156. llama_stack/distributions/open-benchmark/run.yaml +252 -0
  157. llama_stack/distributions/postgres-demo/__init__.py +7 -0
  158. llama_stack/distributions/postgres-demo/build.yaml +23 -0
  159. llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
  160. llama_stack/distributions/postgres-demo/run.yaml +115 -0
  161. llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
  162. llama_stack/distributions/starter/build.yaml +61 -0
  163. llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
  164. llama_stack/distributions/starter/run.yaml +276 -0
  165. llama_stack/distributions/starter/starter.py +345 -0
  166. llama_stack/distributions/starter-gpu/__init__.py +7 -0
  167. llama_stack/distributions/starter-gpu/build.yaml +61 -0
  168. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
  169. llama_stack/distributions/starter-gpu/run.yaml +279 -0
  170. llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
  171. llama_stack/distributions/template.py +456 -0
  172. llama_stack/distributions/watsonx/__init__.py +7 -0
  173. llama_stack/distributions/watsonx/build.yaml +33 -0
  174. llama_stack/distributions/watsonx/run.yaml +133 -0
  175. llama_stack/distributions/watsonx/watsonx.py +95 -0
  176. llama_stack/env.py +24 -0
  177. llama_stack/log.py +314 -0
  178. llama_stack/models/llama/checkpoint.py +164 -0
  179. llama_stack/models/llama/datatypes.py +164 -0
  180. llama_stack/models/llama/hadamard_utils.py +86 -0
  181. llama_stack/models/llama/llama3/args.py +74 -0
  182. llama_stack/models/llama/llama3/chat_format.py +286 -0
  183. llama_stack/models/llama/llama3/generation.py +376 -0
  184. llama_stack/models/llama/llama3/interface.py +255 -0
  185. llama_stack/models/llama/llama3/model.py +304 -0
  186. llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
  187. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
  188. llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
  189. llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
  190. llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
  191. llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
  192. llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
  193. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
  194. llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
  195. llama_stack/models/llama/llama3/quantization/loader.py +316 -0
  196. llama_stack/models/llama/llama3/template_data.py +116 -0
  197. llama_stack/models/llama/llama3/tokenizer.model +128000 -0
  198. llama_stack/models/llama/llama3/tokenizer.py +198 -0
  199. llama_stack/models/llama/llama3/tool_utils.py +266 -0
  200. llama_stack/models/llama/llama3_1/__init__.py +12 -0
  201. llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
  202. llama_stack/models/llama/llama3_1/prompts.py +258 -0
  203. llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
  204. llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
  205. llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
  206. llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
  207. llama_stack/models/llama/llama3_3/prompts.py +259 -0
  208. llama_stack/models/llama/llama4/args.py +107 -0
  209. llama_stack/models/llama/llama4/chat_format.py +317 -0
  210. llama_stack/models/llama/llama4/datatypes.py +56 -0
  211. llama_stack/models/llama/llama4/ffn.py +58 -0
  212. llama_stack/models/llama/llama4/generation.py +313 -0
  213. llama_stack/models/llama/llama4/model.py +437 -0
  214. llama_stack/models/llama/llama4/moe.py +214 -0
  215. llama_stack/models/llama/llama4/preprocess.py +435 -0
  216. llama_stack/models/llama/llama4/prompt_format.md +304 -0
  217. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
  218. llama_stack/models/llama/llama4/prompts.py +279 -0
  219. llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
  220. llama_stack/models/llama/llama4/quantization/loader.py +226 -0
  221. llama_stack/models/llama/llama4/tokenizer.model +200000 -0
  222. llama_stack/models/llama/llama4/tokenizer.py +263 -0
  223. llama_stack/models/llama/llama4/vision/__init__.py +5 -0
  224. llama_stack/models/llama/llama4/vision/embedding.py +210 -0
  225. llama_stack/models/llama/llama4/vision/encoder.py +412 -0
  226. llama_stack/models/llama/prompt_format.py +191 -0
  227. llama_stack/models/llama/quantize_impls.py +316 -0
  228. llama_stack/models/llama/sku_list.py +1029 -0
  229. llama_stack/models/llama/sku_types.py +233 -0
  230. llama_stack/models/llama/tokenizer_utils.py +40 -0
  231. llama_stack/providers/datatypes.py +136 -107
  232. llama_stack/providers/inline/__init__.py +5 -0
  233. llama_stack/providers/inline/agents/__init__.py +5 -0
  234. llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
  235. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
  236. llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
  237. llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
  238. llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
  239. llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
  240. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
  241. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
  242. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
  243. llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
  244. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
  245. llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
  246. llama_stack/providers/inline/batches/__init__.py +5 -0
  247. llama_stack/providers/inline/batches/reference/__init__.py +36 -0
  248. llama_stack/providers/inline/batches/reference/batches.py +679 -0
  249. llama_stack/providers/inline/batches/reference/config.py +40 -0
  250. llama_stack/providers/inline/datasetio/__init__.py +5 -0
  251. llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
  252. llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
  253. llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
  254. llama_stack/providers/inline/eval/__init__.py +5 -0
  255. llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
  256. llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
  257. llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
  258. llama_stack/providers/inline/files/localfs/__init__.py +20 -0
  259. llama_stack/providers/inline/files/localfs/config.py +31 -0
  260. llama_stack/providers/inline/files/localfs/files.py +219 -0
  261. llama_stack/providers/inline/inference/__init__.py +5 -0
  262. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
  263. llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
  264. llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
  265. llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
  266. llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
  267. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
  268. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
  269. llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
  270. llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
  271. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
  272. llama_stack/providers/inline/post_training/__init__.py +5 -0
  273. llama_stack/providers/inline/post_training/common/__init__.py +5 -0
  274. llama_stack/providers/inline/post_training/common/utils.py +35 -0
  275. llama_stack/providers/inline/post_training/common/validator.py +36 -0
  276. llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
  277. llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
  278. llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
  279. llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
  280. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
  281. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
  282. llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
  283. llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
  284. llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
  285. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
  286. llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
  287. llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
  288. llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
  289. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
  290. llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
  291. llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
  292. llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
  293. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
  294. llama_stack/providers/inline/safety/__init__.py +5 -0
  295. llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
  296. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
  297. llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
  298. llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
  299. llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
  300. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
  301. llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
  302. llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
  303. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
  304. llama_stack/providers/inline/scoring/__init__.py +5 -0
  305. llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
  306. llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
  307. llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
  308. llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
  309. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
  310. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
  311. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
  312. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
  313. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
  314. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
  315. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
  316. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
  317. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
  318. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
  319. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
  320. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
  321. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
  322. llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
  323. llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
  324. llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
  325. llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
  326. llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
  327. llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
  328. llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
  329. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
  330. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
  331. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
  332. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
  333. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
  334. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
  335. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
  336. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
  337. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
  338. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
  339. llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
  340. llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
  341. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
  342. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
  343. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
  344. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
  345. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
  346. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
  347. llama_stack/providers/inline/telemetry/__init__.py +5 -0
  348. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
  349. llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
  350. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
  351. llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
  352. llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
  353. llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
  354. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
  355. llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
  356. llama_stack/providers/inline/vector_io/__init__.py +5 -0
  357. llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
  358. llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
  359. llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
  360. llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
  361. llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
  362. llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
  363. llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
  364. llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
  365. llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
  366. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
  367. llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
  368. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
  369. llama_stack/providers/registry/agents.py +16 -18
  370. llama_stack/providers/registry/batches.py +26 -0
  371. llama_stack/providers/registry/datasetio.py +49 -0
  372. llama_stack/providers/registry/eval.py +46 -0
  373. llama_stack/providers/registry/files.py +31 -0
  374. llama_stack/providers/registry/inference.py +273 -118
  375. llama_stack/providers/registry/post_training.py +69 -0
  376. llama_stack/providers/registry/safety.py +46 -41
  377. llama_stack/providers/registry/scoring.py +51 -0
  378. llama_stack/providers/registry/tool_runtime.py +87 -0
  379. llama_stack/providers/registry/vector_io.py +828 -0
  380. llama_stack/providers/remote/__init__.py +5 -0
  381. llama_stack/providers/remote/agents/__init__.py +5 -0
  382. llama_stack/providers/remote/datasetio/__init__.py +5 -0
  383. llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
  384. llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
  385. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
  386. llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
  387. llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
  388. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
  389. llama_stack/providers/remote/eval/__init__.py +5 -0
  390. llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
  391. llama_stack/providers/remote/eval/nvidia/config.py +29 -0
  392. llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
  393. llama_stack/providers/remote/files/s3/__init__.py +19 -0
  394. llama_stack/providers/remote/files/s3/config.py +42 -0
  395. llama_stack/providers/remote/files/s3/files.py +313 -0
  396. llama_stack/providers/remote/inference/__init__.py +5 -0
  397. llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
  398. llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
  399. llama_stack/providers/remote/inference/anthropic/config.py +28 -0
  400. llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
  401. llama_stack/providers/remote/inference/azure/azure.py +25 -0
  402. llama_stack/providers/remote/inference/azure/config.py +61 -0
  403. llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
  404. llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
  405. llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
  406. llama_stack/providers/remote/inference/bedrock/models.py +29 -0
  407. llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
  408. llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
  409. llama_stack/providers/remote/inference/cerebras/config.py +30 -0
  410. llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
  411. llama_stack/providers/remote/inference/databricks/config.py +37 -0
  412. llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
  413. llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
  414. llama_stack/providers/remote/inference/fireworks/config.py +27 -0
  415. llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
  416. llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
  417. llama_stack/providers/remote/inference/gemini/config.py +28 -0
  418. llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
  419. llama_stack/providers/remote/inference/groq/__init__.py +15 -0
  420. llama_stack/providers/remote/inference/groq/config.py +34 -0
  421. llama_stack/providers/remote/inference/groq/groq.py +18 -0
  422. llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
  423. llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
  424. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
  425. llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
  426. llama_stack/providers/remote/inference/nvidia/config.py +64 -0
  427. llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
  428. llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
  429. llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
  430. llama_stack/providers/remote/inference/ollama/config.py +25 -0
  431. llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
  432. llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
  433. llama_stack/providers/remote/inference/openai/config.py +39 -0
  434. llama_stack/providers/remote/inference/openai/openai.py +38 -0
  435. llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
  436. llama_stack/providers/remote/inference/passthrough/config.py +34 -0
  437. llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
  438. llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
  439. llama_stack/providers/remote/inference/runpod/config.py +32 -0
  440. llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
  441. llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
  442. llama_stack/providers/remote/inference/sambanova/config.py +34 -0
  443. llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
  444. llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
  445. llama_stack/providers/remote/inference/tgi/config.py +76 -0
  446. llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
  447. llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
  448. llama_stack/providers/remote/inference/together/config.py +27 -0
  449. llama_stack/providers/remote/inference/together/together.py +102 -0
  450. llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
  451. llama_stack/providers/remote/inference/vertexai/config.py +48 -0
  452. llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
  453. llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
  454. llama_stack/providers/remote/inference/vllm/config.py +59 -0
  455. llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
  456. llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
  457. llama_stack/providers/remote/inference/watsonx/config.py +45 -0
  458. llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
  459. llama_stack/providers/remote/post_training/__init__.py +5 -0
  460. llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
  461. llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
  462. llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
  463. llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
  464. llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
  465. llama_stack/providers/remote/safety/__init__.py +5 -0
  466. llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
  467. llama_stack/providers/remote/safety/bedrock/config.py +14 -0
  468. llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
  469. llama_stack/providers/remote/safety/nvidia/config.py +40 -0
  470. llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
  471. llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
  472. llama_stack/providers/remote/safety/sambanova/config.py +37 -0
  473. llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
  474. llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
  475. llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
  476. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
  477. llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
  478. llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
  479. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
  480. llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
  481. llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
  482. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
  483. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
  484. llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
  485. llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
  486. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
  487. llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
  488. llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
  489. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
  490. llama_stack/providers/remote/vector_io/__init__.py +5 -0
  491. llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
  492. llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
  493. llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
  494. llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
  495. llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
  496. llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
  497. llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
  498. llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
  499. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
  500. llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
  501. llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
  502. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
  503. llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
  504. llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
  505. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
  506. llama_stack/providers/utils/bedrock/__init__.py +5 -0
  507. llama_stack/providers/utils/bedrock/client.py +74 -0
  508. llama_stack/providers/utils/bedrock/config.py +64 -0
  509. llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
  510. llama_stack/providers/utils/common/__init__.py +5 -0
  511. llama_stack/providers/utils/common/data_schema_validator.py +103 -0
  512. llama_stack/providers/utils/datasetio/__init__.py +5 -0
  513. llama_stack/providers/utils/datasetio/url_utils.py +47 -0
  514. llama_stack/providers/utils/files/__init__.py +5 -0
  515. llama_stack/providers/utils/files/form_data.py +69 -0
  516. llama_stack/providers/utils/inference/__init__.py +8 -7
  517. llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
  518. llama_stack/providers/utils/inference/inference_store.py +264 -0
  519. llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
  520. llama_stack/providers/utils/inference/model_registry.py +173 -23
  521. llama_stack/providers/utils/inference/openai_compat.py +1261 -49
  522. llama_stack/providers/utils/inference/openai_mixin.py +506 -0
  523. llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
  524. llama_stack/providers/utils/kvstore/api.py +6 -6
  525. llama_stack/providers/utils/kvstore/config.py +28 -48
  526. llama_stack/providers/utils/kvstore/kvstore.py +61 -15
  527. llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
  528. llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
  529. llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
  530. llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
  531. llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
  532. llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
  533. llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
  534. llama_stack/providers/utils/memory/file_utils.py +1 -1
  535. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
  536. llama_stack/providers/utils/memory/vector_store.py +220 -82
  537. llama_stack/providers/utils/pagination.py +43 -0
  538. llama_stack/providers/utils/responses/__init__.py +5 -0
  539. llama_stack/providers/utils/responses/responses_store.py +292 -0
  540. llama_stack/providers/utils/scheduler.py +270 -0
  541. llama_stack/providers/utils/scoring/__init__.py +5 -0
  542. llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
  543. llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
  544. llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
  545. llama_stack/providers/utils/sqlstore/__init__.py +5 -0
  546. llama_stack/providers/utils/sqlstore/api.py +128 -0
  547. llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
  548. llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
  549. llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
  550. llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
  551. llama_stack/providers/utils/telemetry/tracing.py +192 -53
  552. llama_stack/providers/utils/tools/__init__.py +5 -0
  553. llama_stack/providers/utils/tools/mcp.py +148 -0
  554. llama_stack/providers/utils/tools/ttl_dict.py +70 -0
  555. llama_stack/providers/utils/vector_io/__init__.py +5 -0
  556. llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
  557. llama_stack/schema_utils.py +118 -0
  558. llama_stack/strong_typing/__init__.py +19 -0
  559. llama_stack/strong_typing/auxiliary.py +228 -0
  560. llama_stack/strong_typing/classdef.py +440 -0
  561. llama_stack/strong_typing/core.py +46 -0
  562. llama_stack/strong_typing/deserializer.py +877 -0
  563. llama_stack/strong_typing/docstring.py +409 -0
  564. llama_stack/strong_typing/exception.py +23 -0
  565. llama_stack/strong_typing/inspection.py +1085 -0
  566. llama_stack/strong_typing/mapping.py +40 -0
  567. llama_stack/strong_typing/name.py +182 -0
  568. llama_stack/strong_typing/py.typed +0 -0
  569. llama_stack/strong_typing/schema.py +792 -0
  570. llama_stack/strong_typing/serialization.py +97 -0
  571. llama_stack/strong_typing/serializer.py +500 -0
  572. llama_stack/strong_typing/slots.py +27 -0
  573. llama_stack/strong_typing/topological.py +89 -0
  574. llama_stack/testing/__init__.py +5 -0
  575. llama_stack/testing/api_recorder.py +956 -0
  576. llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
  577. llama_stack-0.3.4.dist-info/METADATA +261 -0
  578. llama_stack-0.3.4.dist-info/RECORD +625 -0
  579. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
  580. llama_stack/apis/agents/client.py +0 -292
  581. llama_stack/apis/agents/event_logger.py +0 -184
  582. llama_stack/apis/batch_inference/batch_inference.py +0 -72
  583. llama_stack/apis/common/deployment_types.py +0 -31
  584. llama_stack/apis/dataset/dataset.py +0 -63
  585. llama_stack/apis/evals/evals.py +0 -122
  586. llama_stack/apis/inference/client.py +0 -197
  587. llama_stack/apis/inspect/client.py +0 -82
  588. llama_stack/apis/memory/client.py +0 -155
  589. llama_stack/apis/memory/memory.py +0 -65
  590. llama_stack/apis/memory_banks/__init__.py +0 -7
  591. llama_stack/apis/memory_banks/client.py +0 -101
  592. llama_stack/apis/memory_banks/memory_banks.py +0 -78
  593. llama_stack/apis/models/client.py +0 -83
  594. llama_stack/apis/reward_scoring/__init__.py +0 -7
  595. llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
  596. llama_stack/apis/safety/client.py +0 -105
  597. llama_stack/apis/shields/client.py +0 -79
  598. llama_stack/cli/download.py +0 -340
  599. llama_stack/cli/model/describe.py +0 -82
  600. llama_stack/cli/model/download.py +0 -24
  601. llama_stack/cli/model/list.py +0 -62
  602. llama_stack/cli/model/model.py +0 -34
  603. llama_stack/cli/model/prompt_format.py +0 -112
  604. llama_stack/cli/model/safety_models.py +0 -52
  605. llama_stack/cli/stack/build.py +0 -299
  606. llama_stack/cli/stack/configure.py +0 -178
  607. llama_stack/distribution/build.py +0 -123
  608. llama_stack/distribution/build_conda_env.sh +0 -136
  609. llama_stack/distribution/build_container.sh +0 -142
  610. llama_stack/distribution/common.sh +0 -40
  611. llama_stack/distribution/configure_container.sh +0 -47
  612. llama_stack/distribution/datatypes.py +0 -139
  613. llama_stack/distribution/distribution.py +0 -58
  614. llama_stack/distribution/inspect.py +0 -67
  615. llama_stack/distribution/request_headers.py +0 -57
  616. llama_stack/distribution/resolver.py +0 -323
  617. llama_stack/distribution/routers/__init__.py +0 -48
  618. llama_stack/distribution/routers/routers.py +0 -158
  619. llama_stack/distribution/routers/routing_tables.py +0 -173
  620. llama_stack/distribution/server/endpoints.py +0 -48
  621. llama_stack/distribution/server/server.py +0 -343
  622. llama_stack/distribution/start_conda_env.sh +0 -42
  623. llama_stack/distribution/start_container.sh +0 -64
  624. llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
  625. llama_stack/distribution/templates/local-build.yaml +0 -10
  626. llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
  627. llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
  628. llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
  629. llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
  630. llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
  631. llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
  632. llama_stack/distribution/templates/local-together-build.yaml +0 -10
  633. llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
  634. llama_stack/distribution/utils/exec.py +0 -105
  635. llama_stack/providers/adapters/agents/sample/sample.py +0 -18
  636. llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
  637. llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
  638. llama_stack/providers/adapters/inference/databricks/config.py +0 -21
  639. llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
  640. llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
  641. llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
  642. llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
  643. llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
  644. llama_stack/providers/adapters/inference/sample/sample.py +0 -23
  645. llama_stack/providers/adapters/inference/tgi/config.py +0 -43
  646. llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
  647. llama_stack/providers/adapters/inference/together/config.py +0 -22
  648. llama_stack/providers/adapters/inference/together/together.py +0 -143
  649. llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
  650. llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
  651. llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
  652. llama_stack/providers/adapters/memory/sample/sample.py +0 -23
  653. llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
  654. llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
  655. llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
  656. llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
  657. llama_stack/providers/adapters/safety/sample/sample.py +0 -23
  658. llama_stack/providers/adapters/safety/together/__init__.py +0 -18
  659. llama_stack/providers/adapters/safety/together/config.py +0 -26
  660. llama_stack/providers/adapters/safety/together/together.py +0 -101
  661. llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
  662. llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
  663. llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
  664. llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
  665. llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
  666. llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
  667. llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
  668. llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
  669. llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
  670. llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
  671. llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
  672. llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
  673. llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
  674. llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
  675. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
  676. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
  677. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
  678. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
  679. llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
  680. llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
  681. llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
  682. llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
  683. llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
  684. llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
  685. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
  686. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
  687. llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
  688. llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
  689. llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
  690. llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
  691. llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
  692. llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
  693. llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
  694. llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
  695. llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
  696. llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
  697. llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
  698. llama_stack/providers/impls/vllm/config.py +0 -35
  699. llama_stack/providers/impls/vllm/vllm.py +0 -241
  700. llama_stack/providers/registry/memory.py +0 -78
  701. llama_stack/providers/registry/telemetry.py +0 -44
  702. llama_stack/providers/tests/agents/test_agents.py +0 -210
  703. llama_stack/providers/tests/inference/test_inference.py +0 -257
  704. llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
  705. llama_stack/providers/tests/memory/test_memory.py +0 -136
  706. llama_stack/providers/tests/resolver.py +0 -100
  707. llama_stack/providers/tests/safety/test_safety.py +0 -77
  708. llama_stack-0.0.42.dist-info/METADATA +0 -137
  709. llama_stack-0.0.42.dist-info/RECORD +0 -256
  710. /llama_stack/{distribution → core}/__init__.py +0 -0
  711. /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
  712. /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
  713. /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
  714. /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
  715. /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
  716. /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
  717. /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
  718. /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
  719. /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
  720. /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
  721. /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
  722. /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
  723. /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
  724. /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
  725. /llama_stack/{distribution → core}/utils/serialize.py +0 -0
  726. /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
  727. /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
  728. /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
  729. /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
  730. /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
  731. /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
  732. /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
  733. /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
  734. /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
  735. /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
  736. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
  737. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
  738. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
@@ -4,196 +4,104 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
+ from collections.abc import AsyncIterator
7
8
  from datetime import datetime
8
- from enum import Enum
9
- from typing import (
10
- Any,
11
- Dict,
12
- List,
13
- Literal,
14
- Optional,
15
- Protocol,
16
- runtime_checkable,
17
- Union,
18
- )
19
-
20
- from llama_models.schema_utils import json_schema_type, webmethod
9
+ from enum import StrEnum
10
+ from typing import Annotated, Any, Literal, Protocol, runtime_checkable
21
11
 
22
12
  from pydantic import BaseModel, ConfigDict, Field
23
- from typing_extensions import Annotated
24
-
25
- from llama_models.llama3.api.datatypes import * # noqa: F403
26
- from llama_stack.apis.common.deployment_types import * # noqa: F403
27
- from llama_stack.apis.inference import * # noqa: F403
28
- from llama_stack.apis.safety import * # noqa: F403
29
- from llama_stack.apis.memory import * # noqa: F403
30
-
31
-
32
- @json_schema_type
33
- class Attachment(BaseModel):
34
- content: InterleavedTextMedia | URL
35
- mime_type: str
36
-
37
-
38
- class AgentTool(Enum):
39
- brave_search = "brave_search"
40
- wolfram_alpha = "wolfram_alpha"
41
- photogen = "photogen"
42
- code_interpreter = "code_interpreter"
43
-
44
- function_call = "function_call"
45
- memory = "memory"
46
-
47
-
48
- class ToolDefinitionCommon(BaseModel):
49
- input_shields: Optional[List[str]] = Field(default_factory=list)
50
- output_shields: Optional[List[str]] = Field(default_factory=list)
51
-
52
-
53
- class SearchEngineType(Enum):
54
- bing = "bing"
55
- brave = "brave"
56
-
57
-
58
- @json_schema_type
59
- class SearchToolDefinition(ToolDefinitionCommon):
60
- # NOTE: brave_search is just a placeholder since model always uses
61
- # brave_search as tool call name
62
- type: Literal[AgentTool.brave_search.value] = AgentTool.brave_search.value
63
- api_key: str
64
- engine: SearchEngineType = SearchEngineType.brave
65
- remote_execution: Optional[RestAPIExecutionConfig] = None
66
-
67
-
68
- @json_schema_type
69
- class WolframAlphaToolDefinition(ToolDefinitionCommon):
70
- type: Literal[AgentTool.wolfram_alpha.value] = AgentTool.wolfram_alpha.value
71
- api_key: str
72
- remote_execution: Optional[RestAPIExecutionConfig] = None
73
13
 
74
-
75
- @json_schema_type
76
- class PhotogenToolDefinition(ToolDefinitionCommon):
77
- type: Literal[AgentTool.photogen.value] = AgentTool.photogen.value
78
- remote_execution: Optional[RestAPIExecutionConfig] = None
79
-
80
-
81
- @json_schema_type
82
- class CodeInterpreterToolDefinition(ToolDefinitionCommon):
83
- type: Literal[AgentTool.code_interpreter.value] = AgentTool.code_interpreter.value
84
- enable_inline_code_execution: bool = True
85
- remote_execution: Optional[RestAPIExecutionConfig] = None
14
+ from llama_stack.apis.common.content_types import URL, ContentDelta, InterleavedContent
15
+ from llama_stack.apis.common.responses import Order, PaginatedResponse
16
+ from llama_stack.apis.inference import (
17
+ CompletionMessage,
18
+ ResponseFormat,
19
+ SamplingParams,
20
+ ToolCall,
21
+ ToolChoice,
22
+ ToolConfig,
23
+ ToolPromptFormat,
24
+ ToolResponse,
25
+ ToolResponseMessage,
26
+ UserMessage,
27
+ )
28
+ from llama_stack.apis.safety import SafetyViolation
29
+ from llama_stack.apis.tools import ToolDef
30
+ from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
31
+ from llama_stack.schema_utils import ExtraBodyField, json_schema_type, register_schema, webmethod
32
+
33
+ from .openai_responses import (
34
+ ListOpenAIResponseInputItem,
35
+ ListOpenAIResponseObject,
36
+ OpenAIDeleteResponseObject,
37
+ OpenAIResponseInput,
38
+ OpenAIResponseInputTool,
39
+ OpenAIResponseObject,
40
+ OpenAIResponseObjectStream,
41
+ OpenAIResponseText,
42
+ )
86
43
 
87
44
 
88
45
  @json_schema_type
89
- class FunctionCallToolDefinition(ToolDefinitionCommon):
90
- type: Literal[AgentTool.function_call.value] = AgentTool.function_call.value
91
- function_name: str
92
- description: str
93
- parameters: Dict[str, ToolParamDefinition]
94
- remote_execution: Optional[RestAPIExecutionConfig] = None
95
-
96
-
97
- class _MemoryBankConfigCommon(BaseModel):
98
- bank_id: str
99
-
100
-
101
- class AgentVectorMemoryBankConfig(_MemoryBankConfigCommon):
102
- type: Literal[MemoryBankType.vector.value] = MemoryBankType.vector.value
46
+ class ResponseGuardrailSpec(BaseModel):
47
+ """Specification for a guardrail to apply during response generation.
103
48
 
49
+ :param type: The type/identifier of the guardrail.
50
+ """
104
51
 
105
- class AgentKeyValueMemoryBankConfig(_MemoryBankConfigCommon):
106
- type: Literal[MemoryBankType.keyvalue.value] = MemoryBankType.keyvalue.value
107
- keys: List[str] # what keys to focus on
52
+ type: str
53
+ # TODO: more fields to be added for guardrail configuration
108
54
 
109
55
 
110
- class AgentKeywordMemoryBankConfig(_MemoryBankConfigCommon):
111
- type: Literal[MemoryBankType.keyword.value] = MemoryBankType.keyword.value
56
+ ResponseGuardrail = str | ResponseGuardrailSpec
112
57
 
113
58
 
114
- class AgentGraphMemoryBankConfig(_MemoryBankConfigCommon):
115
- type: Literal[MemoryBankType.graph.value] = MemoryBankType.graph.value
116
- entities: List[str] # what entities to focus on
117
-
118
-
119
- MemoryBankConfig = Annotated[
120
- Union[
121
- AgentVectorMemoryBankConfig,
122
- AgentKeyValueMemoryBankConfig,
123
- AgentKeywordMemoryBankConfig,
124
- AgentGraphMemoryBankConfig,
125
- ],
126
- Field(discriminator="type"),
127
- ]
128
-
129
-
130
- class MemoryQueryGenerator(Enum):
131
- default = "default"
132
- llm = "llm"
133
- custom = "custom"
134
-
135
-
136
- class DefaultMemoryQueryGeneratorConfig(BaseModel):
137
- type: Literal[MemoryQueryGenerator.default.value] = (
138
- MemoryQueryGenerator.default.value
139
- )
140
- sep: str = " "
59
+ class Attachment(BaseModel):
60
+ """An attachment to an agent turn.
141
61
 
62
+ :param content: The content of the attachment.
63
+ :param mime_type: The MIME type of the attachment.
64
+ """
142
65
 
143
- class LLMMemoryQueryGeneratorConfig(BaseModel):
144
- type: Literal[MemoryQueryGenerator.llm.value] = MemoryQueryGenerator.llm.value
145
- model: str
146
- template: str
66
+ content: InterleavedContent | URL
67
+ mime_type: str
147
68
 
148
69
 
149
- class CustomMemoryQueryGeneratorConfig(BaseModel):
150
- type: Literal[MemoryQueryGenerator.custom.value] = MemoryQueryGenerator.custom.value
70
+ class Document(BaseModel):
71
+ """A document to be used by an agent.
151
72
 
73
+ :param content: The content of the document.
74
+ :param mime_type: The MIME type of the document.
75
+ """
152
76
 
153
- MemoryQueryGeneratorConfig = Annotated[
154
- Union[
155
- DefaultMemoryQueryGeneratorConfig,
156
- LLMMemoryQueryGeneratorConfig,
157
- CustomMemoryQueryGeneratorConfig,
158
- ],
159
- Field(discriminator="type"),
160
- ]
77
+ content: InterleavedContent | URL
78
+ mime_type: str
161
79
 
162
80
 
163
- @json_schema_type
164
- class MemoryToolDefinition(ToolDefinitionCommon):
165
- type: Literal[AgentTool.memory.value] = AgentTool.memory.value
166
- memory_bank_configs: List[MemoryBankConfig] = Field(default_factory=list)
167
- # This config defines how a query is generated using the messages
168
- # for memory bank retrieval.
169
- query_generator_config: MemoryQueryGeneratorConfig = Field(
170
- default=DefaultMemoryQueryGeneratorConfig()
171
- )
172
- max_tokens_in_context: int = 4096
173
- max_chunks: int = 10
174
-
175
-
176
- AgentToolDefinition = Annotated[
177
- Union[
178
- SearchToolDefinition,
179
- WolframAlphaToolDefinition,
180
- PhotogenToolDefinition,
181
- CodeInterpreterToolDefinition,
182
- FunctionCallToolDefinition,
183
- MemoryToolDefinition,
184
- ],
185
- Field(discriminator="type"),
186
- ]
81
+ class StepCommon(BaseModel):
82
+ """A common step in an agent turn.
187
83
 
84
+ :param turn_id: The ID of the turn.
85
+ :param step_id: The ID of the step.
86
+ :param started_at: The time the step started.
87
+ :param completed_at: The time the step completed.
88
+ """
188
89
 
189
- class StepCommon(BaseModel):
190
90
  turn_id: str
191
91
  step_id: str
192
- started_at: Optional[datetime] = None
193
- completed_at: Optional[datetime] = None
92
+ started_at: datetime | None = None
93
+ completed_at: datetime | None = None
94
+
194
95
 
96
+ class StepType(StrEnum):
97
+ """Type of the step in an agent turn.
98
+
99
+ :cvar inference: The step is an inference step that calls an LLM.
100
+ :cvar tool_execution: The step is a tool execution step that executes a tool call.
101
+ :cvar shield_call: The step is a shield call step that checks for safety violations.
102
+ :cvar memory_retrieval: The step is a memory retrieval step that retrieves context for vector dbs.
103
+ """
195
104
 
196
- class StepType(Enum):
197
105
  inference = "inference"
198
106
  tool_execution = "tool_execution"
199
107
  shield_call = "shield_call"
@@ -202,274 +110,785 @@ class StepType(Enum):
202
110
 
203
111
  @json_schema_type
204
112
  class InferenceStep(StepCommon):
113
+ """An inference step in an agent turn.
114
+
115
+ :param model_response: The response from the LLM.
116
+ """
117
+
205
118
  model_config = ConfigDict(protected_namespaces=())
206
119
 
207
- step_type: Literal[StepType.inference.value] = StepType.inference.value
120
+ step_type: Literal[StepType.inference] = StepType.inference
208
121
  model_response: CompletionMessage
209
122
 
210
123
 
211
124
  @json_schema_type
212
125
  class ToolExecutionStep(StepCommon):
213
- step_type: Literal[StepType.tool_execution.value] = StepType.tool_execution.value
214
- tool_calls: List[ToolCall]
215
- tool_responses: List[ToolResponse]
126
+ """A tool execution step in an agent turn.
127
+
128
+ :param tool_calls: The tool calls to execute.
129
+ :param tool_responses: The tool responses from the tool calls.
130
+ """
131
+
132
+ step_type: Literal[StepType.tool_execution] = StepType.tool_execution
133
+ tool_calls: list[ToolCall]
134
+ tool_responses: list[ToolResponse]
216
135
 
217
136
 
218
137
  @json_schema_type
219
138
  class ShieldCallStep(StepCommon):
220
- step_type: Literal[StepType.shield_call.value] = StepType.shield_call.value
221
- violation: Optional[SafetyViolation]
139
+ """A shield call step in an agent turn.
140
+
141
+ :param violation: The violation from the shield call.
142
+ """
143
+
144
+ step_type: Literal[StepType.shield_call] = StepType.shield_call
145
+ violation: SafetyViolation | None
222
146
 
223
147
 
224
148
  @json_schema_type
225
149
  class MemoryRetrievalStep(StepCommon):
226
- step_type: Literal[StepType.memory_retrieval.value] = (
227
- StepType.memory_retrieval.value
228
- )
229
- memory_bank_ids: List[str]
230
- inserted_context: InterleavedTextMedia
150
+ """A memory retrieval step in an agent turn.
151
+
152
+ :param vector_db_ids: The IDs of the vector databases to retrieve context from.
153
+ :param inserted_context: The context retrieved from the vector databases.
154
+ """
155
+
156
+ step_type: Literal[StepType.memory_retrieval] = StepType.memory_retrieval
157
+ # TODO: should this be List[str]?
158
+ vector_db_ids: str
159
+ inserted_context: InterleavedContent
231
160
 
232
161
 
233
162
  Step = Annotated[
234
- Union[
235
- InferenceStep,
236
- ToolExecutionStep,
237
- ShieldCallStep,
238
- MemoryRetrievalStep,
239
- ],
163
+ InferenceStep | ToolExecutionStep | ShieldCallStep | MemoryRetrievalStep,
240
164
  Field(discriminator="step_type"),
241
165
  ]
242
166
 
243
167
 
244
168
  @json_schema_type
245
169
  class Turn(BaseModel):
246
- """A single turn in an interaction with an Agentic System."""
170
+ """A single turn in an interaction with an Agentic System.
171
+
172
+ :param turn_id: Unique identifier for the turn within a session
173
+ :param session_id: Unique identifier for the conversation session
174
+ :param input_messages: List of messages that initiated this turn
175
+ :param steps: Ordered list of processing steps executed during this turn
176
+ :param output_message: The model's generated response containing content and metadata
177
+ :param output_attachments: (Optional) Files or media attached to the agent's response
178
+ :param started_at: Timestamp when the turn began
179
+ :param completed_at: (Optional) Timestamp when the turn finished, if completed
180
+ """
247
181
 
248
182
  turn_id: str
249
183
  session_id: str
250
- input_messages: List[
251
- Union[
252
- UserMessage,
253
- ToolResponseMessage,
254
- ]
255
- ]
256
- steps: List[Step]
184
+ input_messages: list[UserMessage | ToolResponseMessage]
185
+ steps: list[Step]
257
186
  output_message: CompletionMessage
258
- output_attachments: List[Attachment] = Field(default_factory=list)
187
+ output_attachments: list[Attachment] | None = Field(default_factory=lambda: [])
259
188
 
260
189
  started_at: datetime
261
- completed_at: Optional[datetime] = None
190
+ completed_at: datetime | None = None
262
191
 
263
192
 
264
193
  @json_schema_type
265
194
  class Session(BaseModel):
266
- """A single session of an interaction with an Agentic System."""
195
+ """A single session of an interaction with an Agentic System.
196
+
197
+ :param session_id: Unique identifier for the conversation session
198
+ :param session_name: Human-readable name for the session
199
+ :param turns: List of all turns that have occurred in this session
200
+ :param started_at: Timestamp when the session was created
201
+ """
267
202
 
268
203
  session_id: str
269
204
  session_name: str
270
- turns: List[Turn]
205
+ turns: list[Turn]
271
206
  started_at: datetime
272
207
 
273
- memory_bank: Optional[MemoryBankDef] = None
274
208
 
209
+ class AgentToolGroupWithArgs(BaseModel):
210
+ name: str
211
+ args: dict[str, Any]
275
212
 
276
- class AgentConfigCommon(BaseModel):
277
- sampling_params: Optional[SamplingParams] = SamplingParams()
278
213
 
279
- input_shields: Optional[List[str]] = Field(default_factory=list)
280
- output_shields: Optional[List[str]] = Field(default_factory=list)
214
+ AgentToolGroup = str | AgentToolGroupWithArgs
215
+ register_schema(AgentToolGroup, name="AgentTool")
281
216
 
282
- tools: Optional[List[AgentToolDefinition]] = Field(default_factory=list)
283
- tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto)
284
- tool_prompt_format: Optional[ToolPromptFormat] = Field(
285
- default=ToolPromptFormat.json
286
- )
287
217
 
288
- max_infer_iters: int = 10
218
+ class AgentConfigCommon(BaseModel):
219
+ sampling_params: SamplingParams | None = Field(default_factory=SamplingParams)
220
+
221
+ input_shields: list[str] | None = Field(default_factory=lambda: [])
222
+ output_shields: list[str] | None = Field(default_factory=lambda: [])
223
+ toolgroups: list[AgentToolGroup] | None = Field(default_factory=lambda: [])
224
+ client_tools: list[ToolDef] | None = Field(default_factory=lambda: [])
225
+ tool_choice: ToolChoice | None = Field(default=None, deprecated="use tool_config instead")
226
+ tool_prompt_format: ToolPromptFormat | None = Field(default=None, deprecated="use tool_config instead")
227
+ tool_config: ToolConfig | None = Field(default=None)
228
+
229
+ max_infer_iters: int | None = 10
230
+
231
+ def model_post_init(self, __context):
232
+ if self.tool_config:
233
+ if self.tool_choice and self.tool_config.tool_choice != self.tool_choice:
234
+ raise ValueError("tool_choice is deprecated. Use tool_choice in tool_config instead.")
235
+ if self.tool_prompt_format and self.tool_config.tool_prompt_format != self.tool_prompt_format:
236
+ raise ValueError("tool_prompt_format is deprecated. Use tool_prompt_format in tool_config instead.")
237
+ else:
238
+ params = {}
239
+ if self.tool_choice:
240
+ params["tool_choice"] = self.tool_choice
241
+ if self.tool_prompt_format:
242
+ params["tool_prompt_format"] = self.tool_prompt_format
243
+ self.tool_config = ToolConfig(**params)
289
244
 
290
245
 
291
246
  @json_schema_type
292
247
  class AgentConfig(AgentConfigCommon):
248
+ """Configuration for an agent.
249
+
250
+ :param model: The model identifier to use for the agent
251
+ :param instructions: The system instructions for the agent
252
+ :param name: Optional name for the agent, used in telemetry and identification
253
+ :param enable_session_persistence: Optional flag indicating whether session data has to be persisted
254
+ :param response_format: Optional response format configuration
255
+ """
256
+
293
257
  model: str
294
258
  instructions: str
295
- enable_session_persistence: bool
259
+ name: str | None = None
260
+ enable_session_persistence: bool | None = False
261
+ response_format: ResponseFormat | None = None
262
+
263
+
264
+ @json_schema_type
265
+ class Agent(BaseModel):
266
+ """An agent instance with configuration and metadata.
267
+
268
+ :param agent_id: Unique identifier for the agent
269
+ :param agent_config: Configuration settings for the agent
270
+ :param created_at: Timestamp when the agent was created
271
+ """
272
+
273
+ agent_id: str
274
+ agent_config: AgentConfig
275
+ created_at: datetime
296
276
 
297
277
 
298
278
  class AgentConfigOverridablePerTurn(AgentConfigCommon):
299
- instructions: Optional[str] = None
279
+ instructions: str | None = None
300
280
 
301
281
 
302
- class AgentTurnResponseEventType(Enum):
282
+ class AgentTurnResponseEventType(StrEnum):
303
283
  step_start = "step_start"
304
284
  step_complete = "step_complete"
305
285
  step_progress = "step_progress"
306
286
 
307
287
  turn_start = "turn_start"
308
288
  turn_complete = "turn_complete"
289
+ turn_awaiting_input = "turn_awaiting_input"
309
290
 
310
291
 
311
292
  @json_schema_type
312
293
  class AgentTurnResponseStepStartPayload(BaseModel):
313
- event_type: Literal[AgentTurnResponseEventType.step_start.value] = (
314
- AgentTurnResponseEventType.step_start.value
315
- )
294
+ """Payload for step start events in agent turn responses.
295
+
296
+ :param event_type: Type of event being reported
297
+ :param step_type: Type of step being executed
298
+ :param step_id: Unique identifier for the step within a turn
299
+ :param metadata: (Optional) Additional metadata for the step
300
+ """
301
+
302
+ event_type: Literal[AgentTurnResponseEventType.step_start] = AgentTurnResponseEventType.step_start
316
303
  step_type: StepType
317
304
  step_id: str
318
- metadata: Optional[Dict[str, Any]] = Field(default_factory=dict)
305
+ metadata: dict[str, Any] | None = Field(default_factory=lambda: {})
319
306
 
320
307
 
321
308
  @json_schema_type
322
309
  class AgentTurnResponseStepCompletePayload(BaseModel):
323
- event_type: Literal[AgentTurnResponseEventType.step_complete.value] = (
324
- AgentTurnResponseEventType.step_complete.value
325
- )
310
+ """Payload for step completion events in agent turn responses.
311
+
312
+ :param event_type: Type of event being reported
313
+ :param step_type: Type of step being executed
314
+ :param step_id: Unique identifier for the step within a turn
315
+ :param step_details: Complete details of the executed step
316
+ """
317
+
318
+ event_type: Literal[AgentTurnResponseEventType.step_complete] = AgentTurnResponseEventType.step_complete
326
319
  step_type: StepType
320
+ step_id: str
327
321
  step_details: Step
328
322
 
329
323
 
330
324
  @json_schema_type
331
325
  class AgentTurnResponseStepProgressPayload(BaseModel):
326
+ """Payload for step progress events in agent turn responses.
327
+
328
+ :param event_type: Type of event being reported
329
+ :param step_type: Type of step being executed
330
+ :param step_id: Unique identifier for the step within a turn
331
+ :param delta: Incremental content changes during step execution
332
+ """
333
+
332
334
  model_config = ConfigDict(protected_namespaces=())
333
335
 
334
- event_type: Literal[AgentTurnResponseEventType.step_progress.value] = (
335
- AgentTurnResponseEventType.step_progress.value
336
- )
336
+ event_type: Literal[AgentTurnResponseEventType.step_progress] = AgentTurnResponseEventType.step_progress
337
337
  step_type: StepType
338
338
  step_id: str
339
339
 
340
- model_response_text_delta: Optional[str] = None
341
- tool_call_delta: Optional[ToolCallDelta] = None
342
- tool_response_text_delta: Optional[str] = None
340
+ delta: ContentDelta
343
341
 
344
342
 
345
343
  @json_schema_type
346
344
  class AgentTurnResponseTurnStartPayload(BaseModel):
347
- event_type: Literal[AgentTurnResponseEventType.turn_start.value] = (
348
- AgentTurnResponseEventType.turn_start.value
349
- )
345
+ """Payload for turn start events in agent turn responses.
346
+
347
+ :param event_type: Type of event being reported
348
+ :param turn_id: Unique identifier for the turn within a session
349
+ """
350
+
351
+ event_type: Literal[AgentTurnResponseEventType.turn_start] = AgentTurnResponseEventType.turn_start
350
352
  turn_id: str
351
353
 
352
354
 
353
355
  @json_schema_type
354
356
  class AgentTurnResponseTurnCompletePayload(BaseModel):
355
- event_type: Literal[AgentTurnResponseEventType.turn_complete.value] = (
356
- AgentTurnResponseEventType.turn_complete.value
357
- )
357
+ """Payload for turn completion events in agent turn responses.
358
+
359
+ :param event_type: Type of event being reported
360
+ :param turn: Complete turn data including all steps and results
361
+ """
362
+
363
+ event_type: Literal[AgentTurnResponseEventType.turn_complete] = AgentTurnResponseEventType.turn_complete
358
364
  turn: Turn
359
365
 
360
366
 
367
+ @json_schema_type
368
+ class AgentTurnResponseTurnAwaitingInputPayload(BaseModel):
369
+ """Payload for turn awaiting input events in agent turn responses.
370
+
371
+ :param event_type: Type of event being reported
372
+ :param turn: Turn data when waiting for external tool responses
373
+ """
374
+
375
+ event_type: Literal[AgentTurnResponseEventType.turn_awaiting_input] = AgentTurnResponseEventType.turn_awaiting_input
376
+ turn: Turn
377
+
378
+
379
+ AgentTurnResponseEventPayload = Annotated[
380
+ AgentTurnResponseStepStartPayload
381
+ | AgentTurnResponseStepProgressPayload
382
+ | AgentTurnResponseStepCompletePayload
383
+ | AgentTurnResponseTurnStartPayload
384
+ | AgentTurnResponseTurnCompletePayload
385
+ | AgentTurnResponseTurnAwaitingInputPayload,
386
+ Field(discriminator="event_type"),
387
+ ]
388
+ register_schema(AgentTurnResponseEventPayload, name="AgentTurnResponseEventPayload")
389
+
390
+
361
391
  @json_schema_type
362
392
  class AgentTurnResponseEvent(BaseModel):
363
- """Streamed agent execution response."""
393
+ """An event in an agent turn response stream.
394
+
395
+ :param payload: Event-specific payload containing event data
396
+ """
364
397
 
365
- payload: Annotated[
366
- Union[
367
- AgentTurnResponseStepStartPayload,
368
- AgentTurnResponseStepProgressPayload,
369
- AgentTurnResponseStepCompletePayload,
370
- AgentTurnResponseTurnStartPayload,
371
- AgentTurnResponseTurnCompletePayload,
372
- ],
373
- Field(discriminator="event_type"),
374
- ]
398
+ payload: AgentTurnResponseEventPayload
375
399
 
376
400
 
377
401
  @json_schema_type
378
402
  class AgentCreateResponse(BaseModel):
403
+ """Response returned when creating a new agent.
404
+
405
+ :param agent_id: Unique identifier for the created agent
406
+ """
407
+
379
408
  agent_id: str
380
409
 
381
410
 
382
411
  @json_schema_type
383
412
  class AgentSessionCreateResponse(BaseModel):
413
+ """Response returned when creating a new agent session.
414
+
415
+ :param session_id: Unique identifier for the created session
416
+ """
417
+
384
418
  session_id: str
385
419
 
386
420
 
387
421
  @json_schema_type
388
422
  class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):
423
+ """Request to create a new turn for an agent.
424
+
425
+ :param agent_id: Unique identifier for the agent
426
+ :param session_id: Unique identifier for the conversation session
427
+ :param messages: List of messages to start the turn with
428
+ :param documents: (Optional) List of documents to provide to the agent
429
+ :param toolgroups: (Optional) List of tool groups to make available for this turn
430
+ :param stream: (Optional) Whether to stream the response
431
+ :param tool_config: (Optional) Tool configuration to override agent defaults
432
+ """
433
+
389
434
  agent_id: str
390
435
  session_id: str
391
436
 
392
437
  # TODO: figure out how we can simplify this and make why
393
438
  # ToolResponseMessage needs to be here (it is function call
394
439
  # execution from outside the system)
395
- messages: List[
396
- Union[
397
- UserMessage,
398
- ToolResponseMessage,
399
- ]
400
- ]
401
- attachments: Optional[List[Attachment]] = None
440
+ messages: list[UserMessage | ToolResponseMessage]
441
+
442
+ documents: list[Document] | None = None
443
+ toolgroups: list[AgentToolGroup] | None = Field(default_factory=lambda: [])
444
+
445
+ stream: bool | None = False
446
+ tool_config: ToolConfig | None = None
447
+
448
+
449
+ @json_schema_type
450
+ class AgentTurnResumeRequest(BaseModel):
451
+ """Request to resume an agent turn with tool responses.
452
+
453
+ :param agent_id: Unique identifier for the agent
454
+ :param session_id: Unique identifier for the conversation session
455
+ :param turn_id: Unique identifier for the turn within a session
456
+ :param tool_responses: List of tool responses to submit to continue the turn
457
+ :param stream: (Optional) Whether to stream the response
458
+ """
402
459
 
403
- stream: Optional[bool] = False
460
+ agent_id: str
461
+ session_id: str
462
+ turn_id: str
463
+ tool_responses: list[ToolResponse]
464
+ stream: bool | None = False
404
465
 
405
466
 
406
467
  @json_schema_type
407
468
  class AgentTurnResponseStreamChunk(BaseModel):
469
+ """Streamed agent turn completion response.
470
+
471
+ :param event: Individual event in the agent turn response stream
472
+ """
473
+
408
474
  event: AgentTurnResponseEvent
409
475
 
410
476
 
411
477
  @json_schema_type
412
478
  class AgentStepResponse(BaseModel):
479
+ """Response containing details of a specific agent step.
480
+
481
+ :param step: The complete step data and execution details
482
+ """
483
+
413
484
  step: Step
414
485
 
415
486
 
416
487
  @runtime_checkable
417
488
  class Agents(Protocol):
418
- @webmethod(route="/agents/create")
489
+ """Agents
490
+
491
+ APIs for creating and interacting with agentic systems."""
492
+
493
+ @webmethod(
494
+ route="/agents",
495
+ method="POST",
496
+ descriptive_name="create_agent",
497
+ deprecated=True,
498
+ level=LLAMA_STACK_API_V1,
499
+ )
500
+ @webmethod(
501
+ route="/agents",
502
+ method="POST",
503
+ descriptive_name="create_agent",
504
+ level=LLAMA_STACK_API_V1ALPHA,
505
+ )
419
506
  async def create_agent(
420
507
  self,
421
508
  agent_config: AgentConfig,
422
- ) -> AgentCreateResponse: ...
423
-
424
- # This method is not `async def` because it can result in either an
425
- # `AsyncGenerator` or a `AgentTurnCreateResponse` depending on the value of `stream`.
426
- @webmethod(route="/agents/turn/create")
427
- def create_agent_turn(
509
+ ) -> AgentCreateResponse:
510
+ """Create an agent with the given configuration.
511
+
512
+ :param agent_config: The configuration for the agent.
513
+ :returns: An AgentCreateResponse with the agent ID.
514
+ """
515
+ ...
516
+
517
+ @webmethod(
518
+ route="/agents/{agent_id}/session/{session_id}/turn",
519
+ method="POST",
520
+ descriptive_name="create_agent_turn",
521
+ deprecated=True,
522
+ level=LLAMA_STACK_API_V1,
523
+ )
524
+ @webmethod(
525
+ route="/agents/{agent_id}/session/{session_id}/turn",
526
+ method="POST",
527
+ descriptive_name="create_agent_turn",
528
+ level=LLAMA_STACK_API_V1ALPHA,
529
+ )
530
+ async def create_agent_turn(
531
+ self,
532
+ agent_id: str,
533
+ session_id: str,
534
+ messages: list[UserMessage | ToolResponseMessage],
535
+ stream: bool | None = False,
536
+ documents: list[Document] | None = None,
537
+ toolgroups: list[AgentToolGroup] | None = None,
538
+ tool_config: ToolConfig | None = None,
539
+ ) -> Turn | AsyncIterator[AgentTurnResponseStreamChunk]:
540
+ """Create a new turn for an agent.
541
+
542
+ :param agent_id: The ID of the agent to create the turn for.
543
+ :param session_id: The ID of the session to create the turn for.
544
+ :param messages: List of messages to start the turn with.
545
+ :param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
546
+ :param documents: (Optional) List of documents to create the turn with.
547
+ :param toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition to the agent's config toolgroups for the request.
548
+ :param tool_config: (Optional) The tool configuration to create the turn with, will be used to override the agent's tool_config.
549
+ :returns: If stream=False, returns a Turn object.
550
+ If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk.
551
+ """
552
+ ...
553
+
554
+ @webmethod(
555
+ route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
556
+ method="POST",
557
+ descriptive_name="resume_agent_turn",
558
+ deprecated=True,
559
+ level=LLAMA_STACK_API_V1,
560
+ )
561
+ @webmethod(
562
+ route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
563
+ method="POST",
564
+ descriptive_name="resume_agent_turn",
565
+ level=LLAMA_STACK_API_V1ALPHA,
566
+ )
567
+ async def resume_agent_turn(
428
568
  self,
429
569
  agent_id: str,
430
570
  session_id: str,
431
- messages: List[
432
- Union[
433
- UserMessage,
434
- ToolResponseMessage,
435
- ]
436
- ],
437
- attachments: Optional[List[Attachment]] = None,
438
- stream: Optional[bool] = False,
439
- ) -> AgentTurnResponseStreamChunk: ...
440
-
441
- @webmethod(route="/agents/turn/get")
571
+ turn_id: str,
572
+ tool_responses: list[ToolResponse],
573
+ stream: bool | None = False,
574
+ ) -> Turn | AsyncIterator[AgentTurnResponseStreamChunk]:
575
+ """Resume an agent turn with executed tool call responses.
576
+
577
+ When a Turn has the status `awaiting_input` due to pending input from client side tool calls, this endpoint can be used to submit the outputs from the tool calls once they are ready.
578
+
579
+ :param agent_id: The ID of the agent to resume.
580
+ :param session_id: The ID of the session to resume.
581
+ :param turn_id: The ID of the turn to resume.
582
+ :param tool_responses: The tool call responses to resume the turn with.
583
+ :param stream: Whether to stream the response.
584
+ :returns: A Turn object if stream is False, otherwise an AsyncIterator of AgentTurnResponseStreamChunk objects.
585
+ """
586
+ ...
587
+
588
+ @webmethod(
589
+ route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
590
+ method="GET",
591
+ deprecated=True,
592
+ level=LLAMA_STACK_API_V1,
593
+ )
594
+ @webmethod(
595
+ route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
596
+ method="GET",
597
+ level=LLAMA_STACK_API_V1ALPHA,
598
+ )
442
599
  async def get_agents_turn(
443
600
  self,
444
601
  agent_id: str,
602
+ session_id: str,
445
603
  turn_id: str,
446
- ) -> Turn: ...
447
-
448
- @webmethod(route="/agents/step/get")
604
+ ) -> Turn:
605
+ """Retrieve an agent turn by its ID.
606
+
607
+ :param agent_id: The ID of the agent to get the turn for.
608
+ :param session_id: The ID of the session to get the turn for.
609
+ :param turn_id: The ID of the turn to get.
610
+ :returns: A Turn.
611
+ """
612
+ ...
613
+
614
+ @webmethod(
615
+ route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
616
+ method="GET",
617
+ deprecated=True,
618
+ level=LLAMA_STACK_API_V1,
619
+ )
620
+ @webmethod(
621
+ route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
622
+ method="GET",
623
+ level=LLAMA_STACK_API_V1ALPHA,
624
+ )
449
625
  async def get_agents_step(
450
- self, agent_id: str, turn_id: str, step_id: str
451
- ) -> AgentStepResponse: ...
452
-
453
- @webmethod(route="/agents/session/create")
626
+ self,
627
+ agent_id: str,
628
+ session_id: str,
629
+ turn_id: str,
630
+ step_id: str,
631
+ ) -> AgentStepResponse:
632
+ """Retrieve an agent step by its ID.
633
+
634
+ :param agent_id: The ID of the agent to get the step for.
635
+ :param session_id: The ID of the session to get the step for.
636
+ :param turn_id: The ID of the turn to get the step for.
637
+ :param step_id: The ID of the step to get.
638
+ :returns: An AgentStepResponse.
639
+ """
640
+ ...
641
+
642
+ @webmethod(
643
+ route="/agents/{agent_id}/session",
644
+ method="POST",
645
+ descriptive_name="create_agent_session",
646
+ deprecated=True,
647
+ level=LLAMA_STACK_API_V1,
648
+ )
649
+ @webmethod(
650
+ route="/agents/{agent_id}/session",
651
+ method="POST",
652
+ descriptive_name="create_agent_session",
653
+ level=LLAMA_STACK_API_V1ALPHA,
654
+ )
454
655
  async def create_agent_session(
455
656
  self,
456
657
  agent_id: str,
457
658
  session_name: str,
458
- ) -> AgentSessionCreateResponse: ...
459
-
460
- @webmethod(route="/agents/session/get")
659
+ ) -> AgentSessionCreateResponse:
660
+ """Create a new session for an agent.
661
+
662
+ :param agent_id: The ID of the agent to create the session for.
663
+ :param session_name: The name of the session to create.
664
+ :returns: An AgentSessionCreateResponse.
665
+ """
666
+ ...
667
+
668
+ @webmethod(
669
+ route="/agents/{agent_id}/session/{session_id}",
670
+ method="GET",
671
+ deprecated=True,
672
+ level=LLAMA_STACK_API_V1,
673
+ )
674
+ @webmethod(
675
+ route="/agents/{agent_id}/session/{session_id}",
676
+ method="GET",
677
+ level=LLAMA_STACK_API_V1ALPHA,
678
+ )
461
679
  async def get_agents_session(
462
680
  self,
681
+ session_id: str,
463
682
  agent_id: str,
683
+ turn_ids: list[str] | None = None,
684
+ ) -> Session:
685
+ """Retrieve an agent session by its ID.
686
+
687
+ :param session_id: The ID of the session to get.
688
+ :param agent_id: The ID of the agent to get the session for.
689
+ :param turn_ids: (Optional) List of turn IDs to filter the session by.
690
+ :returns: A Session.
691
+ """
692
+ ...
693
+
694
+ @webmethod(
695
+ route="/agents/{agent_id}/session/{session_id}",
696
+ method="DELETE",
697
+ deprecated=True,
698
+ level=LLAMA_STACK_API_V1,
699
+ )
700
+ @webmethod(
701
+ route="/agents/{agent_id}/session/{session_id}",
702
+ method="DELETE",
703
+ level=LLAMA_STACK_API_V1ALPHA,
704
+ )
705
+ async def delete_agents_session(
706
+ self,
464
707
  session_id: str,
465
- turn_ids: Optional[List[str]] = None,
466
- ) -> Session: ...
467
-
468
- @webmethod(route="/agents/session/delete")
469
- async def delete_agents_session(self, agent_id: str, session_id: str) -> None: ...
470
-
471
- @webmethod(route="/agents/delete")
472
- async def delete_agents(
708
+ agent_id: str,
709
+ ) -> None:
710
+ """Delete an agent session by its ID and its associated turns.
711
+
712
+ :param session_id: The ID of the session to delete.
713
+ :param agent_id: The ID of the agent to delete the session for.
714
+ """
715
+ ...
716
+
717
+ @webmethod(
718
+ route="/agents/{agent_id}",
719
+ method="DELETE",
720
+ deprecated=True,
721
+ level=LLAMA_STACK_API_V1,
722
+ )
723
+ @webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
724
+ async def delete_agent(
473
725
  self,
474
726
  agent_id: str,
475
- ) -> None: ...
727
+ ) -> None:
728
+ """Delete an agent by its ID and its associated sessions and turns.
729
+
730
+ :param agent_id: The ID of the agent to delete.
731
+ """
732
+ ...
733
+
734
+ @webmethod(route="/agents", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
735
+ @webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1ALPHA)
736
+ async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
737
+ """List all agents.
738
+
739
+ :param start_index: The index to start the pagination from.
740
+ :param limit: The number of agents to return.
741
+ :returns: A PaginatedResponse.
742
+ """
743
+ ...
744
+
745
+ @webmethod(
746
+ route="/agents/{agent_id}",
747
+ method="GET",
748
+ deprecated=True,
749
+ level=LLAMA_STACK_API_V1,
750
+ )
751
+ @webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
752
+ async def get_agent(self, agent_id: str) -> Agent:
753
+ """Describe an agent by its ID.
754
+
755
+ :param agent_id: ID of the agent.
756
+ :returns: An Agent of the agent.
757
+ """
758
+ ...
759
+
760
+ @webmethod(
761
+ route="/agents/{agent_id}/sessions",
762
+ method="GET",
763
+ deprecated=True,
764
+ level=LLAMA_STACK_API_V1,
765
+ )
766
+ @webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1ALPHA)
767
+ async def list_agent_sessions(
768
+ self,
769
+ agent_id: str,
770
+ start_index: int | None = None,
771
+ limit: int | None = None,
772
+ ) -> PaginatedResponse:
773
+ """List all session(s) of a given agent.
774
+
775
+ :param agent_id: The ID of the agent to list sessions for.
776
+ :param start_index: The index to start the pagination from.
777
+ :param limit: The number of sessions to return.
778
+ :returns: A PaginatedResponse.
779
+ """
780
+ ...
781
+
782
+ # We situate the OpenAI Responses API in the Agents API just like we did things
783
+ # for Inference. The Responses API, in its intent, serves the same purpose as
784
+ # the Agents API above -- it is essentially a lightweight "agentic loop" with
785
+ # integrated tool calling.
786
+ #
787
+ # Both of these APIs are inherently stateful.
788
+
789
+ @webmethod(
790
+ route="/openai/v1/responses/{response_id}",
791
+ method="GET",
792
+ level=LLAMA_STACK_API_V1,
793
+ deprecated=True,
794
+ )
795
+ @webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
796
+ async def get_openai_response(
797
+ self,
798
+ response_id: str,
799
+ ) -> OpenAIResponseObject:
800
+ """Get a model response.
801
+
802
+ :param response_id: The ID of the OpenAI response to retrieve.
803
+ :returns: An OpenAIResponseObject.
804
+ """
805
+ ...
806
+
807
+ @webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
808
+ @webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
809
+ async def create_openai_response(
810
+ self,
811
+ input: str | list[OpenAIResponseInput],
812
+ model: str,
813
+ instructions: str | None = None,
814
+ previous_response_id: str | None = None,
815
+ conversation: str | None = None,
816
+ store: bool | None = True,
817
+ stream: bool | None = False,
818
+ temperature: float | None = None,
819
+ text: OpenAIResponseText | None = None,
820
+ tools: list[OpenAIResponseInputTool] | None = None,
821
+ include: list[str] | None = None,
822
+ max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
823
+ guardrails: Annotated[
824
+ list[ResponseGuardrail] | None,
825
+ ExtraBodyField(
826
+ "List of guardrails to apply during response generation. Guardrails provide safety and content moderation."
827
+ ),
828
+ ] = None,
829
+ ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
830
+ """Create a model response.
831
+
832
+ :param input: Input message(s) to create the response.
833
+ :param model: The underlying LLM used for completions.
834
+ :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
835
+ :param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
836
+ :param include: (Optional) Additional fields to include in the response.
837
+ :param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
838
+ :returns: An OpenAIResponseObject.
839
+ """
840
+ ...
841
+
842
+ @webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
843
+ @webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
844
+ async def list_openai_responses(
845
+ self,
846
+ after: str | None = None,
847
+ limit: int | None = 50,
848
+ model: str | None = None,
849
+ order: Order | None = Order.desc,
850
+ ) -> ListOpenAIResponseObject:
851
+ """List all responses.
852
+
853
+ :param after: The ID of the last response to return.
854
+ :param limit: The number of responses to return.
855
+ :param model: The model to filter responses by.
856
+ :param order: The order to sort responses by when sorted by created_at ('asc' or 'desc').
857
+ :returns: A ListOpenAIResponseObject.
858
+ """
859
+ ...
860
+
861
+ @webmethod(
862
+ route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
863
+ )
864
+ @webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
865
+ async def list_openai_response_input_items(
866
+ self,
867
+ response_id: str,
868
+ after: str | None = None,
869
+ before: str | None = None,
870
+ include: list[str] | None = None,
871
+ limit: int | None = 20,
872
+ order: Order | None = Order.desc,
873
+ ) -> ListOpenAIResponseInputItem:
874
+ """List input items.
875
+
876
+ :param response_id: The ID of the response to retrieve input items for.
877
+ :param after: An item ID to list items after, used for pagination.
878
+ :param before: An item ID to list items before, used for pagination.
879
+ :param include: Additional fields to include in the response.
880
+ :param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
881
+ :param order: The order to return the input items in. Default is desc.
882
+ :returns: An ListOpenAIResponseInputItem.
883
+ """
884
+ ...
885
+
886
+ @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
887
+ @webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
888
+ async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
889
+ """Delete a response.
890
+
891
+ :param response_id: The ID of the OpenAI response to delete.
892
+ :returns: An OpenAIDeleteResponseObject
893
+ """
894
+ ...