llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.5.dist-info/RECORD +0 -625
  445. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -4,53 +4,35 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- import uuid
8
- from collections.abc import AsyncGenerator
9
- from datetime import UTC, datetime
10
7
 
11
- from llama_stack.apis.agents import (
12
- Agent,
13
- AgentConfig,
14
- AgentCreateResponse,
8
+ from llama_stack.core.datatypes import AccessRule
9
+ from llama_stack.core.storage.kvstore import InmemoryKVStoreImpl, kvstore_impl
10
+ from llama_stack.log import get_logger
11
+ from llama_stack.providers.utils.responses.responses_store import ResponsesStore
12
+ from llama_stack_api import (
15
13
  Agents,
16
- AgentSessionCreateResponse,
17
- AgentStepResponse,
18
- AgentToolGroup,
19
- AgentTurnCreateRequest,
20
- AgentTurnResumeRequest,
21
- Document,
14
+ Conversations,
15
+ Files,
16
+ Inference,
22
17
  ListOpenAIResponseInputItem,
23
18
  ListOpenAIResponseObject,
19
+ OpenAIDeleteResponseObject,
24
20
  OpenAIResponseInput,
25
21
  OpenAIResponseInputTool,
22
+ OpenAIResponseInputToolChoice,
26
23
  OpenAIResponseObject,
24
+ OpenAIResponsePrompt,
25
+ OpenAIResponseText,
27
26
  Order,
28
- Session,
29
- Turn,
27
+ Prompts,
28
+ ResponseGuardrail,
29
+ Safety,
30
+ ToolGroups,
31
+ ToolRuntime,
32
+ VectorIO,
30
33
  )
31
- from llama_stack.apis.agents.agents import ResponseGuardrail
32
- from llama_stack.apis.agents.openai_responses import OpenAIResponseText
33
- from llama_stack.apis.common.responses import PaginatedResponse
34
- from llama_stack.apis.conversations import Conversations
35
- from llama_stack.apis.inference import (
36
- Inference,
37
- ToolConfig,
38
- ToolResponse,
39
- ToolResponseMessage,
40
- UserMessage,
41
- )
42
- from llama_stack.apis.safety import Safety
43
- from llama_stack.apis.tools import ToolGroups, ToolRuntime
44
- from llama_stack.apis.vector_io import VectorIO
45
- from llama_stack.core.datatypes import AccessRule
46
- from llama_stack.log import get_logger
47
- from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
48
- from llama_stack.providers.utils.pagination import paginate_records
49
- from llama_stack.providers.utils.responses.responses_store import ResponsesStore
50
34
 
51
- from .agent_instance import ChatAgent
52
35
  from .config import MetaReferenceAgentsImplConfig
53
- from .persistence import AgentInfo
54
36
  from .responses.openai_responses import OpenAIResponsesImpl
55
37
 
56
38
  logger = get_logger(name=__name__, category="agents::meta_reference")
@@ -62,12 +44,13 @@ class MetaReferenceAgentsImpl(Agents):
62
44
  config: MetaReferenceAgentsImplConfig,
63
45
  inference_api: Inference,
64
46
  vector_io_api: VectorIO,
65
- safety_api: Safety,
47
+ safety_api: Safety | None,
66
48
  tool_runtime_api: ToolRuntime,
67
49
  tool_groups_api: ToolGroups,
68
50
  conversations_api: Conversations,
51
+ prompts_api: Prompts,
52
+ files_api: Files,
69
53
  policy: list[AccessRule],
70
- telemetry_enabled: bool = False,
71
54
  ):
72
55
  self.config = config
73
56
  self.inference_api = inference_api
@@ -76,8 +59,8 @@ class MetaReferenceAgentsImpl(Agents):
76
59
  self.tool_runtime_api = tool_runtime_api
77
60
  self.tool_groups_api = tool_groups_api
78
61
  self.conversations_api = conversations_api
79
- self.telemetry_enabled = telemetry_enabled
80
-
62
+ self.prompts_api = prompts_api
63
+ self.files_api = files_api
81
64
  self.in_memory_store = InmemoryKVStoreImpl()
82
65
  self.openai_responses_impl: OpenAIResponsesImpl | None = None
83
66
  self.policy = policy
@@ -94,227 +77,11 @@ class MetaReferenceAgentsImpl(Agents):
94
77
  vector_io_api=self.vector_io_api,
95
78
  safety_api=self.safety_api,
96
79
  conversations_api=self.conversations_api,
80
+ prompts_api=self.prompts_api,
81
+ files_api=self.files_api,
82
+ vector_stores_config=self.config.vector_stores_config,
97
83
  )
98
84
 
99
- async def create_agent(
100
- self,
101
- agent_config: AgentConfig,
102
- ) -> AgentCreateResponse:
103
- agent_id = str(uuid.uuid4())
104
- created_at = datetime.now(UTC)
105
-
106
- agent_info = AgentInfo(
107
- **agent_config.model_dump(),
108
- created_at=created_at,
109
- )
110
-
111
- # Store the agent info
112
- await self.persistence_store.set(
113
- key=f"agent:{agent_id}",
114
- value=agent_info.model_dump_json(),
115
- )
116
-
117
- return AgentCreateResponse(
118
- agent_id=agent_id,
119
- )
120
-
121
- async def _get_agent_impl(self, agent_id: str) -> ChatAgent:
122
- agent_info_json = await self.persistence_store.get(
123
- key=f"agent:{agent_id}",
124
- )
125
- if not agent_info_json:
126
- raise ValueError(f"Could not find agent info for {agent_id}")
127
-
128
- try:
129
- agent_info = AgentInfo.model_validate_json(agent_info_json)
130
- except Exception as e:
131
- raise ValueError(f"Could not validate agent info for {agent_id}") from e
132
-
133
- return ChatAgent(
134
- agent_id=agent_id,
135
- agent_config=agent_info,
136
- inference_api=self.inference_api,
137
- safety_api=self.safety_api,
138
- vector_io_api=self.vector_io_api,
139
- tool_runtime_api=self.tool_runtime_api,
140
- tool_groups_api=self.tool_groups_api,
141
- persistence_store=(
142
- self.persistence_store if agent_info.enable_session_persistence else self.in_memory_store
143
- ),
144
- created_at=agent_info.created_at,
145
- policy=self.policy,
146
- telemetry_enabled=self.telemetry_enabled,
147
- )
148
-
149
- async def create_agent_session(
150
- self,
151
- agent_id: str,
152
- session_name: str,
153
- ) -> AgentSessionCreateResponse:
154
- agent = await self._get_agent_impl(agent_id)
155
-
156
- session_id = await agent.create_session(session_name)
157
- return AgentSessionCreateResponse(
158
- session_id=session_id,
159
- )
160
-
161
- async def create_agent_turn(
162
- self,
163
- agent_id: str,
164
- session_id: str,
165
- messages: list[UserMessage | ToolResponseMessage],
166
- toolgroups: list[AgentToolGroup] | None = None,
167
- documents: list[Document] | None = None,
168
- stream: bool | None = False,
169
- tool_config: ToolConfig | None = None,
170
- ) -> AsyncGenerator:
171
- request = AgentTurnCreateRequest(
172
- agent_id=agent_id,
173
- session_id=session_id,
174
- messages=messages,
175
- stream=True,
176
- toolgroups=toolgroups,
177
- documents=documents,
178
- tool_config=tool_config,
179
- )
180
- if stream:
181
- return self._create_agent_turn_streaming(request)
182
- else:
183
- raise NotImplementedError("Non-streaming agent turns not yet implemented")
184
-
185
- async def _create_agent_turn_streaming(
186
- self,
187
- request: AgentTurnCreateRequest,
188
- ) -> AsyncGenerator:
189
- agent = await self._get_agent_impl(request.agent_id)
190
- async for event in agent.create_and_execute_turn(request):
191
- yield event
192
-
193
- async def resume_agent_turn(
194
- self,
195
- agent_id: str,
196
- session_id: str,
197
- turn_id: str,
198
- tool_responses: list[ToolResponse],
199
- stream: bool | None = False,
200
- ) -> AsyncGenerator:
201
- request = AgentTurnResumeRequest(
202
- agent_id=agent_id,
203
- session_id=session_id,
204
- turn_id=turn_id,
205
- tool_responses=tool_responses,
206
- stream=stream,
207
- )
208
- if stream:
209
- return self._continue_agent_turn_streaming(request)
210
- else:
211
- raise NotImplementedError("Non-streaming agent turns not yet implemented")
212
-
213
- async def _continue_agent_turn_streaming(
214
- self,
215
- request: AgentTurnResumeRequest,
216
- ) -> AsyncGenerator:
217
- agent = await self._get_agent_impl(request.agent_id)
218
- async for event in agent.resume_turn(request):
219
- yield event
220
-
221
- async def get_agents_turn(self, agent_id: str, session_id: str, turn_id: str) -> Turn:
222
- agent = await self._get_agent_impl(agent_id)
223
- turn = await agent.storage.get_session_turn(session_id, turn_id)
224
- return turn
225
-
226
- async def get_agents_step(self, agent_id: str, session_id: str, turn_id: str, step_id: str) -> AgentStepResponse:
227
- turn = await self.get_agents_turn(agent_id, session_id, turn_id)
228
- for step in turn.steps:
229
- if step.step_id == step_id:
230
- return AgentStepResponse(step=step)
231
- raise ValueError(f"Provided step_id {step_id} could not be found")
232
-
233
- async def get_agents_session(
234
- self,
235
- agent_id: str,
236
- session_id: str,
237
- turn_ids: list[str] | None = None,
238
- ) -> Session:
239
- agent = await self._get_agent_impl(agent_id)
240
-
241
- session_info = await agent.storage.get_session_info(session_id)
242
- turns = await agent.storage.get_session_turns(session_id)
243
- if turn_ids:
244
- turns = [turn for turn in turns if turn.turn_id in turn_ids]
245
- return Session(
246
- session_name=session_info.session_name,
247
- session_id=session_id,
248
- turns=turns,
249
- started_at=session_info.started_at,
250
- )
251
-
252
- async def delete_agents_session(self, agent_id: str, session_id: str) -> None:
253
- agent = await self._get_agent_impl(agent_id)
254
-
255
- # Delete turns first, then the session
256
- await agent.storage.delete_session_turns(session_id)
257
- await agent.storage.delete_session(session_id)
258
-
259
- async def delete_agent(self, agent_id: str) -> None:
260
- # First get all sessions for this agent
261
- agent = await self._get_agent_impl(agent_id)
262
- sessions = await agent.storage.list_sessions()
263
-
264
- # Delete all sessions
265
- for session in sessions:
266
- await self.delete_agents_session(agent_id, session.session_id)
267
-
268
- # Finally delete the agent itself
269
- await self.persistence_store.delete(f"agent:{agent_id}")
270
-
271
- async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
272
- agent_keys = await self.persistence_store.keys_in_range("agent:", "agent:\xff")
273
- agent_list: list[Agent] = []
274
- for agent_key in agent_keys:
275
- agent_id = agent_key.split(":")[1]
276
-
277
- # Get the agent info using the key
278
- agent_info_json = await self.persistence_store.get(agent_key)
279
- if not agent_info_json:
280
- logger.error(f"Could not find agent info for key {agent_key}")
281
- continue
282
-
283
- try:
284
- agent_info = AgentInfo.model_validate_json(agent_info_json)
285
- agent_list.append(
286
- Agent(
287
- agent_id=agent_id,
288
- agent_config=agent_info,
289
- created_at=agent_info.created_at,
290
- )
291
- )
292
- except Exception as e:
293
- logger.error(f"Error parsing agent info for {agent_id}: {e}")
294
- continue
295
-
296
- # Convert Agent objects to dictionaries
297
- agent_dicts = [agent.model_dump() for agent in agent_list]
298
- return paginate_records(agent_dicts, start_index, limit)
299
-
300
- async def get_agent(self, agent_id: str) -> Agent:
301
- chat_agent = await self._get_agent_impl(agent_id)
302
- agent = Agent(
303
- agent_id=agent_id,
304
- agent_config=chat_agent.agent_config,
305
- created_at=chat_agent.created_at,
306
- )
307
- return agent
308
-
309
- async def list_agent_sessions(
310
- self, agent_id: str, start_index: int | None = None, limit: int | None = None
311
- ) -> PaginatedResponse:
312
- agent = await self._get_agent_impl(agent_id)
313
- sessions = await agent.storage.list_sessions()
314
- # Convert Session objects to dictionaries
315
- session_dicts = [session.model_dump() for session in sessions]
316
- return paginate_records(session_dicts, start_index, limit)
317
-
318
85
  async def shutdown(self) -> None:
319
86
  pass
320
87
 
@@ -323,27 +90,35 @@ class MetaReferenceAgentsImpl(Agents):
323
90
  self,
324
91
  response_id: str,
325
92
  ) -> OpenAIResponseObject:
93
+ assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
326
94
  return await self.openai_responses_impl.get_openai_response(response_id)
327
95
 
328
96
  async def create_openai_response(
329
97
  self,
330
98
  input: str | list[OpenAIResponseInput],
331
99
  model: str,
100
+ prompt: OpenAIResponsePrompt | None = None,
332
101
  instructions: str | None = None,
102
+ parallel_tool_calls: bool | None = True,
333
103
  previous_response_id: str | None = None,
334
104
  conversation: str | None = None,
335
105
  store: bool | None = True,
336
106
  stream: bool | None = False,
337
107
  temperature: float | None = None,
338
108
  text: OpenAIResponseText | None = None,
109
+ tool_choice: OpenAIResponseInputToolChoice | None = None,
339
110
  tools: list[OpenAIResponseInputTool] | None = None,
340
111
  include: list[str] | None = None,
341
112
  max_infer_iters: int | None = 10,
342
113
  guardrails: list[ResponseGuardrail] | None = None,
114
+ max_tool_calls: int | None = None,
115
+ metadata: dict[str, str] | None = None,
343
116
  ) -> OpenAIResponseObject:
344
- return await self.openai_responses_impl.create_openai_response(
117
+ assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
118
+ result = await self.openai_responses_impl.create_openai_response(
345
119
  input,
346
120
  model,
121
+ prompt,
347
122
  instructions,
348
123
  previous_response_id,
349
124
  conversation,
@@ -351,11 +126,16 @@ class MetaReferenceAgentsImpl(Agents):
351
126
  stream,
352
127
  temperature,
353
128
  text,
129
+ tool_choice,
354
130
  tools,
355
131
  include,
356
132
  max_infer_iters,
357
133
  guardrails,
134
+ parallel_tool_calls,
135
+ max_tool_calls,
136
+ metadata,
358
137
  )
138
+ return result # type: ignore[no-any-return]
359
139
 
360
140
  async def list_openai_responses(
361
141
  self,
@@ -364,6 +144,7 @@ class MetaReferenceAgentsImpl(Agents):
364
144
  model: str | None = None,
365
145
  order: Order | None = Order.desc,
366
146
  ) -> ListOpenAIResponseObject:
147
+ assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
367
148
  return await self.openai_responses_impl.list_openai_responses(after, limit, model, order)
368
149
 
369
150
  async def list_openai_response_input_items(
@@ -375,9 +156,11 @@ class MetaReferenceAgentsImpl(Agents):
375
156
  limit: int | None = 20,
376
157
  order: Order | None = Order.desc,
377
158
  ) -> ListOpenAIResponseInputItem:
159
+ assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
378
160
  return await self.openai_responses_impl.list_openai_response_input_items(
379
161
  response_id, after, before, include, limit, order
380
162
  )
381
163
 
382
- async def delete_openai_response(self, response_id: str) -> None:
164
+ async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
165
+ assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
383
166
  return await self.openai_responses_impl.delete_openai_response(response_id)
@@ -6,8 +6,9 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from pydantic import BaseModel
9
+ from pydantic import BaseModel, Field
10
10
 
11
+ from llama_stack.core.datatypes import VectorStoresConfig
11
12
  from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference
12
13
 
13
14
 
@@ -20,6 +21,10 @@ class AgentPersistenceConfig(BaseModel):
20
21
 
21
22
  class MetaReferenceAgentsImplConfig(BaseModel):
22
23
  persistence: AgentPersistenceConfig
24
+ vector_stores_config: VectorStoresConfig | None = Field(
25
+ default=None,
26
+ description="Configuration for vector store prompt templates and behavior",
27
+ )
23
28
 
24
29
  @classmethod
25
30
  def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: