llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (460) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +235 -62
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
  169. llama_stack/providers/registry/agents.py +8 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/providers/utils/vector_io/__init__.py +16 -0
  284. llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
  285. llama_stack/telemetry/constants.py +27 -0
  286. llama_stack/telemetry/helpers.py +43 -0
  287. llama_stack/testing/api_recorder.py +25 -16
  288. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
  289. llama_stack-0.4.1.dist-info/RECORD +588 -0
  290. llama_stack-0.4.1.dist-info/top_level.txt +2 -0
  291. llama_stack_api/__init__.py +945 -0
  292. llama_stack_api/admin/__init__.py +45 -0
  293. llama_stack_api/admin/api.py +72 -0
  294. llama_stack_api/admin/fastapi_routes.py +117 -0
  295. llama_stack_api/admin/models.py +113 -0
  296. llama_stack_api/agents.py +173 -0
  297. llama_stack_api/batches/__init__.py +40 -0
  298. llama_stack_api/batches/api.py +53 -0
  299. llama_stack_api/batches/fastapi_routes.py +113 -0
  300. llama_stack_api/batches/models.py +78 -0
  301. llama_stack_api/benchmarks/__init__.py +43 -0
  302. llama_stack_api/benchmarks/api.py +39 -0
  303. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  304. llama_stack_api/benchmarks/models.py +109 -0
  305. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  306. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  307. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  308. llama_stack_api/common/responses.py +77 -0
  309. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  310. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  311. llama_stack_api/connectors.py +146 -0
  312. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  313. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  314. llama_stack_api/datasets/__init__.py +61 -0
  315. llama_stack_api/datasets/api.py +35 -0
  316. llama_stack_api/datasets/fastapi_routes.py +104 -0
  317. llama_stack_api/datasets/models.py +152 -0
  318. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  319. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  320. llama_stack_api/file_processors/__init__.py +27 -0
  321. llama_stack_api/file_processors/api.py +64 -0
  322. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  323. llama_stack_api/file_processors/models.py +42 -0
  324. llama_stack_api/files/__init__.py +35 -0
  325. llama_stack_api/files/api.py +51 -0
  326. llama_stack_api/files/fastapi_routes.py +124 -0
  327. llama_stack_api/files/models.py +107 -0
  328. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  329. llama_stack_api/inspect_api/__init__.py +37 -0
  330. llama_stack_api/inspect_api/api.py +25 -0
  331. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  332. llama_stack_api/inspect_api/models.py +28 -0
  333. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  334. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  335. llama_stack_api/internal/sqlstore.py +79 -0
  336. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  337. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  338. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  339. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  340. llama_stack_api/providers/__init__.py +33 -0
  341. llama_stack_api/providers/api.py +16 -0
  342. llama_stack_api/providers/fastapi_routes.py +57 -0
  343. llama_stack_api/providers/models.py +24 -0
  344. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  345. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  346. llama_stack_api/router_utils.py +160 -0
  347. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  348. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  349. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  350. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  351. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  352. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  353. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  354. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  355. llama_stack/apis/agents/agents.py +0 -894
  356. llama_stack/apis/batches/__init__.py +0 -9
  357. llama_stack/apis/batches/batches.py +0 -100
  358. llama_stack/apis/benchmarks/__init__.py +0 -7
  359. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  360. llama_stack/apis/common/responses.py +0 -36
  361. llama_stack/apis/conversations/__init__.py +0 -31
  362. llama_stack/apis/datasets/datasets.py +0 -251
  363. llama_stack/apis/datatypes.py +0 -160
  364. llama_stack/apis/eval/__init__.py +0 -7
  365. llama_stack/apis/files/__init__.py +0 -7
  366. llama_stack/apis/files/files.py +0 -199
  367. llama_stack/apis/inference/__init__.py +0 -7
  368. llama_stack/apis/inference/event_logger.py +0 -43
  369. llama_stack/apis/inspect/__init__.py +0 -7
  370. llama_stack/apis/inspect/inspect.py +0 -94
  371. llama_stack/apis/models/__init__.py +0 -7
  372. llama_stack/apis/post_training/__init__.py +0 -7
  373. llama_stack/apis/prompts/__init__.py +0 -9
  374. llama_stack/apis/providers/__init__.py +0 -7
  375. llama_stack/apis/providers/providers.py +0 -69
  376. llama_stack/apis/safety/__init__.py +0 -7
  377. llama_stack/apis/scoring/__init__.py +0 -7
  378. llama_stack/apis/scoring_functions/__init__.py +0 -7
  379. llama_stack/apis/shields/__init__.py +0 -7
  380. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  381. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  382. llama_stack/apis/telemetry/__init__.py +0 -7
  383. llama_stack/apis/telemetry/telemetry.py +0 -423
  384. llama_stack/apis/tools/__init__.py +0 -8
  385. llama_stack/apis/vector_io/__init__.py +0 -7
  386. llama_stack/apis/vector_stores/__init__.py +0 -7
  387. llama_stack/core/server/tracing.py +0 -80
  388. llama_stack/core/ui/app.py +0 -55
  389. llama_stack/core/ui/modules/__init__.py +0 -5
  390. llama_stack/core/ui/modules/api.py +0 -32
  391. llama_stack/core/ui/modules/utils.py +0 -42
  392. llama_stack/core/ui/page/__init__.py +0 -5
  393. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  394. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  395. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  396. llama_stack/core/ui/page/distribution/models.py +0 -18
  397. llama_stack/core/ui/page/distribution/providers.py +0 -27
  398. llama_stack/core/ui/page/distribution/resources.py +0 -48
  399. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  400. llama_stack/core/ui/page/distribution/shields.py +0 -19
  401. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  402. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  403. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  404. llama_stack/core/ui/page/playground/__init__.py +0 -5
  405. llama_stack/core/ui/page/playground/chat.py +0 -130
  406. llama_stack/core/ui/page/playground/tools.py +0 -352
  407. llama_stack/distributions/dell/build.yaml +0 -33
  408. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  409. llama_stack/distributions/nvidia/build.yaml +0 -29
  410. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  411. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  412. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  413. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  414. llama_stack/distributions/starter/build.yaml +0 -61
  415. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  416. llama_stack/distributions/watsonx/build.yaml +0 -33
  417. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  418. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  419. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  420. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  421. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  422. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  423. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  424. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  425. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  426. llama_stack/providers/utils/sqlstore/api.py +0 -128
  427. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  428. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  429. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  430. llama_stack/strong_typing/__init__.py +0 -19
  431. llama_stack/strong_typing/auxiliary.py +0 -228
  432. llama_stack/strong_typing/classdef.py +0 -440
  433. llama_stack/strong_typing/core.py +0 -46
  434. llama_stack/strong_typing/deserializer.py +0 -877
  435. llama_stack/strong_typing/docstring.py +0 -409
  436. llama_stack/strong_typing/exception.py +0 -23
  437. llama_stack/strong_typing/inspection.py +0 -1085
  438. llama_stack/strong_typing/mapping.py +0 -40
  439. llama_stack/strong_typing/name.py +0 -182
  440. llama_stack/strong_typing/schema.py +0 -792
  441. llama_stack/strong_typing/serialization.py +0 -97
  442. llama_stack/strong_typing/serializer.py +0 -500
  443. llama_stack/strong_typing/slots.py +0 -27
  444. llama_stack/strong_typing/topological.py +0 -89
  445. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  446. llama_stack-0.3.5.dist-info/RECORD +0 -625
  447. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  448. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  451. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  452. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  453. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  454. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
  456. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
  457. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
  458. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  459. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  460. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -4,50 +4,58 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
+ import re
7
8
  import time
8
9
  import uuid
9
10
  from collections.abc import AsyncIterator
10
11
 
11
12
  from pydantic import BaseModel, TypeAdapter
12
13
 
13
- from llama_stack.apis.agents import Order
14
- from llama_stack.apis.agents.agents import ResponseGuardrailSpec
15
- from llama_stack.apis.agents.openai_responses import (
14
+ from llama_stack.log import get_logger
15
+ from llama_stack.providers.utils.responses.responses_store import (
16
+ ResponsesStore,
17
+ _OpenAIResponseObjectWithInputAndMessages,
18
+ )
19
+ from llama_stack_api import (
20
+ ConversationItem,
21
+ Conversations,
22
+ Files,
23
+ Inference,
24
+ InvalidConversationIdError,
16
25
  ListOpenAIResponseInputItem,
17
26
  ListOpenAIResponseObject,
27
+ OpenAIChatCompletionContentPartParam,
18
28
  OpenAIDeleteResponseObject,
29
+ OpenAIMessageParam,
19
30
  OpenAIResponseInput,
31
+ OpenAIResponseInputMessageContentFile,
32
+ OpenAIResponseInputMessageContentImage,
20
33
  OpenAIResponseInputMessageContentText,
21
34
  OpenAIResponseInputTool,
35
+ OpenAIResponseInputToolChoice,
22
36
  OpenAIResponseMessage,
23
37
  OpenAIResponseObject,
24
38
  OpenAIResponseObjectStream,
39
+ OpenAIResponsePrompt,
25
40
  OpenAIResponseText,
26
41
  OpenAIResponseTextFormat,
27
- )
28
- from llama_stack.apis.common.errors import (
29
- InvalidConversationIdError,
30
- )
31
- from llama_stack.apis.conversations import Conversations
32
- from llama_stack.apis.conversations.conversations import ConversationItem
33
- from llama_stack.apis.inference import (
34
- Inference,
35
- OpenAIMessageParam,
36
42
  OpenAISystemMessageParam,
37
- )
38
- from llama_stack.apis.safety import Safety
39
- from llama_stack.apis.tools import ToolGroups, ToolRuntime
40
- from llama_stack.apis.vector_io import VectorIO
41
- from llama_stack.log import get_logger
42
- from llama_stack.providers.utils.responses.responses_store import (
43
- ResponsesStore,
44
- _OpenAIResponseObjectWithInputAndMessages,
43
+ OpenAIUserMessageParam,
44
+ Order,
45
+ Prompts,
46
+ ResponseGuardrailSpec,
47
+ ResponseItemInclude,
48
+ Safety,
49
+ ToolGroups,
50
+ ToolRuntime,
51
+ VectorIO,
45
52
  )
46
53
 
47
54
  from .streaming import StreamingResponseOrchestrator
48
55
  from .tool_executor import ToolExecutor
49
56
  from .types import ChatCompletionContext, ToolContext
50
57
  from .utils import (
58
+ convert_response_content_to_chat_content,
51
59
  convert_response_input_to_chat_messages,
52
60
  convert_response_text_to_chat_response_format,
53
61
  extract_guardrail_ids,
@@ -69,8 +77,11 @@ class OpenAIResponsesImpl:
69
77
  tool_runtime_api: ToolRuntime,
70
78
  responses_store: ResponsesStore,
71
79
  vector_io_api: VectorIO, # VectorIO
72
- safety_api: Safety,
80
+ safety_api: Safety | None,
73
81
  conversations_api: Conversations,
82
+ prompts_api: Prompts,
83
+ files_api: Files,
84
+ vector_stores_config=None,
74
85
  ):
75
86
  self.inference_api = inference_api
76
87
  self.tool_groups_api = tool_groups_api
@@ -83,14 +94,18 @@ class OpenAIResponsesImpl:
83
94
  tool_groups_api=tool_groups_api,
84
95
  tool_runtime_api=tool_runtime_api,
85
96
  vector_io_api=vector_io_api,
97
+ vector_stores_config=vector_stores_config,
86
98
  )
99
+ self.prompts_api = prompts_api
100
+ self.files_api = files_api
87
101
 
88
102
  async def _prepend_previous_response(
89
103
  self,
90
104
  input: str | list[OpenAIResponseInput],
91
105
  previous_response: _OpenAIResponseObjectWithInputAndMessages,
92
106
  ):
93
- new_input_items = previous_response.input.copy()
107
+ # Convert Sequence to list for mutation
108
+ new_input_items = list(previous_response.input)
94
109
  new_input_items.extend(previous_response.output)
95
110
 
96
111
  if isinstance(input, str):
@@ -106,7 +121,7 @@ class OpenAIResponsesImpl:
106
121
  tools: list[OpenAIResponseInputTool] | None,
107
122
  previous_response_id: str | None,
108
123
  conversation: str | None,
109
- ) -> tuple[str | list[OpenAIResponseInput], list[OpenAIMessageParam]]:
124
+ ) -> tuple[str | list[OpenAIResponseInput], list[OpenAIMessageParam], ToolContext]:
110
125
  """Process input with optional previous response context.
111
126
 
112
127
  Returns:
@@ -123,15 +138,17 @@ class OpenAIResponsesImpl:
123
138
  # Use stored messages directly and convert only new input
124
139
  message_adapter = TypeAdapter(list[OpenAIMessageParam])
125
140
  messages = message_adapter.validate_python(previous_response.messages)
126
- new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages)
141
+ new_messages = await convert_response_input_to_chat_messages(
142
+ input, previous_messages=messages, files_api=self.files_api
143
+ )
127
144
  messages.extend(new_messages)
128
145
  else:
129
146
  # Backward compatibility: reconstruct from inputs
130
- messages = await convert_response_input_to_chat_messages(all_input)
147
+ messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
131
148
 
132
149
  tool_context.recover_tools_from_previous_response(previous_response)
133
150
  elif conversation is not None:
134
- conversation_items = await self.conversations_api.list(conversation, order="asc")
151
+ conversation_items = await self.conversations_api.list_items(conversation, order="asc")
135
152
 
136
153
  # Use stored messages as source of truth (like previous_response.messages)
137
154
  stored_messages = await self.responses_store.get_conversation_messages(conversation)
@@ -139,7 +156,7 @@ class OpenAIResponsesImpl:
139
156
  all_input = input
140
157
  if not conversation_items.data:
141
158
  # First turn - just convert the new input
142
- messages = await convert_response_input_to_chat_messages(input)
159
+ messages = await convert_response_input_to_chat_messages(input, files_api=self.files_api)
143
160
  else:
144
161
  if not stored_messages:
145
162
  all_input = conversation_items.data
@@ -155,14 +172,82 @@ class OpenAIResponsesImpl:
155
172
  all_input = input
156
173
 
157
174
  messages = stored_messages or []
158
- new_messages = await convert_response_input_to_chat_messages(all_input, previous_messages=messages)
175
+ new_messages = await convert_response_input_to_chat_messages(
176
+ all_input, previous_messages=messages, files_api=self.files_api
177
+ )
159
178
  messages.extend(new_messages)
160
179
  else:
161
180
  all_input = input
162
- messages = await convert_response_input_to_chat_messages(all_input)
181
+ messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
163
182
 
164
183
  return all_input, messages, tool_context
165
184
 
185
+ async def _prepend_prompt(
186
+ self,
187
+ messages: list[OpenAIMessageParam],
188
+ openai_response_prompt: OpenAIResponsePrompt | None,
189
+ ) -> None:
190
+ """Prepend prompt template to messages, resolving text/image/file variables.
191
+
192
+ :param messages: List of OpenAIMessageParam objects
193
+ :param openai_response_prompt: (Optional) OpenAIResponsePrompt object with variables
194
+ :returns: string of utf-8 characters
195
+ """
196
+ if not openai_response_prompt or not openai_response_prompt.id:
197
+ return
198
+
199
+ prompt_version = int(openai_response_prompt.version) if openai_response_prompt.version else None
200
+ cur_prompt = await self.prompts_api.get_prompt(openai_response_prompt.id, prompt_version)
201
+
202
+ if not cur_prompt or not cur_prompt.prompt:
203
+ return
204
+
205
+ cur_prompt_text = cur_prompt.prompt
206
+ cur_prompt_variables = cur_prompt.variables
207
+
208
+ if not openai_response_prompt.variables:
209
+ messages.insert(0, OpenAISystemMessageParam(content=cur_prompt_text))
210
+ return
211
+
212
+ # Validate that all provided variables exist in the prompt
213
+ for name in openai_response_prompt.variables.keys():
214
+ if name not in cur_prompt_variables:
215
+ raise ValueError(f"Variable {name} not found in prompt {openai_response_prompt.id}")
216
+
217
+ # Separate text and media variables
218
+ text_substitutions = {}
219
+ media_content_parts: list[OpenAIChatCompletionContentPartParam] = []
220
+
221
+ for name, value in openai_response_prompt.variables.items():
222
+ # Text variable found
223
+ if isinstance(value, OpenAIResponseInputMessageContentText):
224
+ text_substitutions[name] = value.text
225
+
226
+ # Media variable found
227
+ elif isinstance(value, OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile):
228
+ converted_parts = await convert_response_content_to_chat_content([value], files_api=self.files_api)
229
+ if isinstance(converted_parts, list):
230
+ media_content_parts.extend(converted_parts)
231
+
232
+ # Eg: {{product_photo}} becomes "[Image: product_photo]"
233
+ # This gives the model textual context about what media exists in the prompt
234
+ var_type = value.type.replace("input_", "").replace("_", " ").title()
235
+ text_substitutions[name] = f"[{var_type}: {name}]"
236
+
237
+ def replace_variable(match: re.Match[str]) -> str:
238
+ var_name = match.group(1).strip()
239
+ return str(text_substitutions.get(var_name, match.group(0)))
240
+
241
+ pattern = r"\{\{\s*(\w+)\s*\}\}"
242
+ processed_prompt_text = re.sub(pattern, replace_variable, cur_prompt_text)
243
+
244
+ # Insert system message with resolved text
245
+ messages.insert(0, OpenAISystemMessageParam(content=processed_prompt_text))
246
+
247
+ # If we have media, create a new user message because allows to ingest images and files
248
+ if media_content_parts:
249
+ messages.append(OpenAIUserMessageParam(content=media_content_parts))
250
+
166
251
  async def get_openai_response(
167
252
  self,
168
253
  response_id: str,
@@ -184,7 +269,7 @@ class OpenAIResponsesImpl:
184
269
  response_id: str,
185
270
  after: str | None = None,
186
271
  before: str | None = None,
187
- include: list[str] | None = None,
272
+ include: list[ResponseItemInclude] | None = None,
188
273
  limit: int | None = 20,
189
274
  order: Order | None = Order.desc,
190
275
  ) -> ListOpenAIResponseInputItem:
@@ -207,6 +292,9 @@ class OpenAIResponsesImpl:
207
292
  messages: list[OpenAIMessageParam],
208
293
  ) -> None:
209
294
  new_input_id = f"msg_{uuid.uuid4()}"
295
+ # Type input_items_data as the full OpenAIResponseInput union to avoid list invariance issues
296
+ input_items_data: list[OpenAIResponseInput] = []
297
+
210
298
  if isinstance(input, str):
211
299
  # synthesize a message from the input string
212
300
  input_content = OpenAIResponseInputMessageContentText(text=input)
@@ -218,7 +306,6 @@ class OpenAIResponsesImpl:
218
306
  input_items_data = [input_content_item]
219
307
  else:
220
308
  # we already have a list of messages
221
- input_items_data = []
222
309
  for input_item in input:
223
310
  if isinstance(input_item, OpenAIResponseMessage):
224
311
  # These may or may not already have an id, so dump to dict, check for id, and add if missing
@@ -239,6 +326,7 @@ class OpenAIResponsesImpl:
239
326
  self,
240
327
  input: str | list[OpenAIResponseInput],
241
328
  model: str,
329
+ prompt: OpenAIResponsePrompt | None = None,
242
330
  instructions: str | None = None,
243
331
  previous_response_id: str | None = None,
244
332
  conversation: str | None = None,
@@ -246,16 +334,41 @@ class OpenAIResponsesImpl:
246
334
  stream: bool | None = False,
247
335
  temperature: float | None = None,
248
336
  text: OpenAIResponseText | None = None,
337
+ tool_choice: OpenAIResponseInputToolChoice | None = None,
249
338
  tools: list[OpenAIResponseInputTool] | None = None,
250
- include: list[str] | None = None,
339
+ include: list[ResponseItemInclude] | None = None,
251
340
  max_infer_iters: int | None = 10,
252
- guardrails: list[ResponseGuardrailSpec] | None = None,
341
+ guardrails: list[str | ResponseGuardrailSpec] | None = None,
342
+ parallel_tool_calls: bool | None = None,
343
+ max_tool_calls: int | None = None,
344
+ metadata: dict[str, str] | None = None,
253
345
  ):
254
346
  stream = bool(stream)
255
347
  text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
256
348
 
349
+ # Validate MCP tools: ensure Authorization header is not passed via headers dict
350
+ if tools:
351
+ from llama_stack_api.openai_responses import OpenAIResponseInputToolMCP
352
+
353
+ for tool in tools:
354
+ if isinstance(tool, OpenAIResponseInputToolMCP) and tool.headers:
355
+ for key in tool.headers.keys():
356
+ if key.lower() == "authorization":
357
+ raise ValueError(
358
+ "Authorization header cannot be passed via 'headers'. "
359
+ "Please use the 'authorization' parameter instead."
360
+ )
361
+
257
362
  guardrail_ids = extract_guardrail_ids(guardrails) if guardrails else []
258
363
 
364
+ # Validate that Safety API is available if guardrails are requested
365
+ if guardrail_ids and self.safety_api is None:
366
+ raise ValueError(
367
+ "Cannot process guardrails: Safety API is not configured.\n\n"
368
+ "To use guardrails, ensure the Safety API is configured in your stack, or remove "
369
+ "the 'guardrails' parameter from your request."
370
+ )
371
+
259
372
  if conversation is not None:
260
373
  if previous_response_id is not None:
261
374
  raise ValueError(
@@ -265,18 +378,27 @@ class OpenAIResponsesImpl:
265
378
  if not conversation.startswith("conv_"):
266
379
  raise InvalidConversationIdError(conversation)
267
380
 
381
+ if max_tool_calls is not None and max_tool_calls < 1:
382
+ raise ValueError(f"Invalid {max_tool_calls=}; should be >= 1")
383
+
268
384
  stream_gen = self._create_streaming_response(
269
385
  input=input,
270
386
  conversation=conversation,
271
387
  model=model,
388
+ prompt=prompt,
272
389
  instructions=instructions,
273
390
  previous_response_id=previous_response_id,
274
391
  store=store,
275
392
  temperature=temperature,
276
393
  text=text,
277
394
  tools=tools,
395
+ tool_choice=tool_choice,
278
396
  max_infer_iters=max_infer_iters,
279
397
  guardrail_ids=guardrail_ids,
398
+ parallel_tool_calls=parallel_tool_calls,
399
+ max_tool_calls=max_tool_calls,
400
+ metadata=metadata,
401
+ include=include,
280
402
  )
281
403
 
282
404
  if stream:
@@ -287,16 +409,19 @@ class OpenAIResponsesImpl:
287
409
  failed_response = None
288
410
 
289
411
  async for stream_chunk in stream_gen:
290
- if stream_chunk.type in {"response.completed", "response.incomplete"}:
291
- if final_response is not None:
292
- raise ValueError(
293
- "The response stream produced multiple terminal responses! "
294
- f"Earlier response from {final_event_type}"
295
- )
296
- final_response = stream_chunk.response
297
- final_event_type = stream_chunk.type
298
- elif stream_chunk.type == "response.failed":
299
- failed_response = stream_chunk.response
412
+ match stream_chunk.type:
413
+ case "response.completed" | "response.incomplete":
414
+ if final_response is not None:
415
+ raise ValueError(
416
+ "The response stream produced multiple terminal responses! "
417
+ f"Earlier response from {final_event_type}"
418
+ )
419
+ final_response = stream_chunk.response
420
+ final_event_type = stream_chunk.type
421
+ case "response.failed":
422
+ failed_response = stream_chunk.response
423
+ case _:
424
+ pass # Other event types don't have .response
300
425
 
301
426
  if failed_response is not None:
302
427
  error_message = (
@@ -317,13 +442,24 @@ class OpenAIResponsesImpl:
317
442
  instructions: str | None = None,
318
443
  previous_response_id: str | None = None,
319
444
  conversation: str | None = None,
445
+ prompt: OpenAIResponsePrompt | None = None,
320
446
  store: bool | None = True,
321
447
  temperature: float | None = None,
322
448
  text: OpenAIResponseText | None = None,
323
449
  tools: list[OpenAIResponseInputTool] | None = None,
450
+ tool_choice: OpenAIResponseInputToolChoice | None = None,
324
451
  max_infer_iters: int | None = 10,
325
452
  guardrail_ids: list[str] | None = None,
453
+ parallel_tool_calls: bool | None = True,
454
+ max_tool_calls: int | None = None,
455
+ metadata: dict[str, str] | None = None,
456
+ include: list[ResponseItemInclude] | None = None,
326
457
  ) -> AsyncIterator[OpenAIResponseObjectStream]:
458
+ # These should never be None when called from create_openai_response (which sets defaults)
459
+ # but we assert here to help mypy understand the types
460
+ assert text is not None, "text must not be None"
461
+ assert max_infer_iters is not None, "max_infer_iters must not be None"
462
+
327
463
  # Input preprocessing
328
464
  all_input, messages, tool_context = await self._process_input_with_previous_response(
329
465
  input, tools, previous_response_id, conversation
@@ -332,6 +468,9 @@ class OpenAIResponsesImpl:
332
468
  if instructions:
333
469
  messages.insert(0, OpenAISystemMessageParam(content=instructions))
334
470
 
471
+ # Prepend reusable prompt (if provided)
472
+ await self._prepend_prompt(messages, prompt)
473
+
335
474
  # Structured outputs
336
475
  response_format = await convert_response_text_to_chat_response_format(text)
337
476
 
@@ -339,6 +478,7 @@ class OpenAIResponsesImpl:
339
478
  model=model,
340
479
  messages=messages,
341
480
  response_tools=tools,
481
+ tool_choice=tool_choice,
342
482
  temperature=temperature,
343
483
  response_format=response_format,
344
484
  tool_context=tool_context,
@@ -354,32 +494,39 @@ class OpenAIResponsesImpl:
354
494
  ctx=ctx,
355
495
  response_id=response_id,
356
496
  created_at=created_at,
497
+ prompt=prompt,
357
498
  text=text,
358
499
  max_infer_iters=max_infer_iters,
500
+ parallel_tool_calls=parallel_tool_calls,
359
501
  tool_executor=self.tool_executor,
360
502
  safety_api=self.safety_api,
361
503
  guardrail_ids=guardrail_ids,
362
504
  instructions=instructions,
505
+ max_tool_calls=max_tool_calls,
506
+ metadata=metadata,
507
+ include=include,
363
508
  )
364
509
 
365
510
  # Stream the response
366
511
  final_response = None
367
512
  failed_response = None
368
513
 
369
- output_items = []
514
+ # Type as ConversationItem to avoid list invariance issues
515
+ output_items: list[ConversationItem] = []
370
516
  async for stream_chunk in orchestrator.create_response():
371
- if stream_chunk.type in {"response.completed", "response.incomplete"}:
372
- final_response = stream_chunk.response
373
- elif stream_chunk.type == "response.failed":
374
- failed_response = stream_chunk.response
375
- yield stream_chunk
376
-
377
- if stream_chunk.type == "response.output_item.done":
378
- item = stream_chunk.item
379
- output_items.append(item)
380
-
381
- # Store and sync immediately after yielding terminal events
382
- # This ensures the storage/syncing happens even if the consumer breaks early
517
+ match stream_chunk.type:
518
+ case "response.completed" | "response.incomplete":
519
+ final_response = stream_chunk.response
520
+ case "response.failed":
521
+ failed_response = stream_chunk.response
522
+ case "response.output_item.done":
523
+ item = stream_chunk.item
524
+ output_items.append(item)
525
+ case _:
526
+ pass # Other event types
527
+
528
+ # Store and sync before yielding terminal events
529
+ # This ensures the storage/syncing happens even if the consumer breaks after receiving the event
383
530
  if (
384
531
  stream_chunk.type in {"response.completed", "response.incomplete"}
385
532
  and final_response
@@ -400,6 +547,8 @@ class OpenAIResponsesImpl:
400
547
  await self._sync_response_to_conversation(conversation, input, output_items)
401
548
  await self.responses_store.store_conversation_messages(conversation, messages_to_store)
402
549
 
550
+ yield stream_chunk
551
+
403
552
  async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
404
553
  return await self.responses_store.delete_response_object(response_id)
405
554
 
@@ -407,7 +556,8 @@ class OpenAIResponsesImpl:
407
556
  self, conversation_id: str, input: str | list[OpenAIResponseInput] | None, output_items: list[ConversationItem]
408
557
  ) -> None:
409
558
  """Sync content and response messages to the conversation."""
410
- conversation_items = []
559
+ # Type as ConversationItem union to avoid list invariance issues
560
+ conversation_items: list[ConversationItem] = []
411
561
 
412
562
  if isinstance(input, str):
413
563
  conversation_items.append(