llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +12 -21
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.4.dist-info/RECORD +0 -625
  445. llama_stack-0.3.4.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -5,15 +5,36 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
  import asyncio
8
+ import base64
9
+ import mimetypes
8
10
  import re
9
11
  import uuid
12
+ from collections.abc import Sequence
10
13
 
11
- from llama_stack.apis.agents.agents import ResponseGuardrailSpec
12
- from llama_stack.apis.agents.openai_responses import (
14
+ from llama_stack_api import (
15
+ Files,
16
+ OpenAIAssistantMessageParam,
17
+ OpenAIChatCompletionContentPartImageParam,
18
+ OpenAIChatCompletionContentPartParam,
19
+ OpenAIChatCompletionContentPartTextParam,
20
+ OpenAIChatCompletionToolCall,
21
+ OpenAIChatCompletionToolCallFunction,
22
+ OpenAIChoice,
23
+ OpenAIDeveloperMessageParam,
24
+ OpenAIFile,
25
+ OpenAIFileFile,
26
+ OpenAIImageURL,
27
+ OpenAIJSONSchema,
28
+ OpenAIMessageParam,
13
29
  OpenAIResponseAnnotationFileCitation,
30
+ OpenAIResponseFormatJSONObject,
31
+ OpenAIResponseFormatJSONSchema,
32
+ OpenAIResponseFormatParam,
33
+ OpenAIResponseFormatText,
14
34
  OpenAIResponseInput,
15
35
  OpenAIResponseInputFunctionToolCallOutput,
16
36
  OpenAIResponseInputMessageContent,
37
+ OpenAIResponseInputMessageContentFile,
17
38
  OpenAIResponseInputMessageContentImage,
18
39
  OpenAIResponseInputMessageContentText,
19
40
  OpenAIResponseInputTool,
@@ -22,32 +43,58 @@ from llama_stack.apis.agents.openai_responses import (
22
43
  OpenAIResponseMessage,
23
44
  OpenAIResponseOutputMessageContent,
24
45
  OpenAIResponseOutputMessageContentOutputText,
46
+ OpenAIResponseOutputMessageFileSearchToolCall,
25
47
  OpenAIResponseOutputMessageFunctionToolCall,
26
48
  OpenAIResponseOutputMessageMCPCall,
27
49
  OpenAIResponseOutputMessageMCPListTools,
50
+ OpenAIResponseOutputMessageWebSearchToolCall,
28
51
  OpenAIResponseText,
29
- )
30
- from llama_stack.apis.inference import (
31
- OpenAIAssistantMessageParam,
32
- OpenAIChatCompletionContentPartImageParam,
33
- OpenAIChatCompletionContentPartParam,
34
- OpenAIChatCompletionContentPartTextParam,
35
- OpenAIChatCompletionToolCall,
36
- OpenAIChatCompletionToolCallFunction,
37
- OpenAIChoice,
38
- OpenAIDeveloperMessageParam,
39
- OpenAIImageURL,
40
- OpenAIJSONSchema,
41
- OpenAIMessageParam,
42
- OpenAIResponseFormatJSONObject,
43
- OpenAIResponseFormatJSONSchema,
44
- OpenAIResponseFormatParam,
45
- OpenAIResponseFormatText,
46
52
  OpenAISystemMessageParam,
47
53
  OpenAIToolMessageParam,
48
54
  OpenAIUserMessageParam,
55
+ ResponseGuardrailSpec,
56
+ Safety,
49
57
  )
50
- from llama_stack.apis.safety import Safety
58
+
59
+
60
+ async def extract_bytes_from_file(file_id: str, files_api: Files) -> bytes:
61
+ """
62
+ Extract raw bytes from file using the Files API.
63
+
64
+ :param file_id: The file identifier (e.g., "file-abc123")
65
+ :param files_api: Files API instance
66
+ :returns: Raw file content as bytes
67
+ :raises: ValueError if file cannot be retrieved
68
+ """
69
+ try:
70
+ response = await files_api.openai_retrieve_file_content(file_id)
71
+ return bytes(response.body)
72
+ except Exception as e:
73
+ raise ValueError(f"Failed to retrieve file content for file_id '{file_id}': {str(e)}") from e
74
+
75
+
76
+ def generate_base64_ascii_text_from_bytes(raw_bytes: bytes) -> str:
77
+ """
78
+ Converts raw binary bytes into a safe ASCII text representation for URLs
79
+
80
+ :param raw_bytes: the actual bytes that represents file content
81
+ :returns: string of utf-8 characters
82
+ """
83
+ return base64.b64encode(raw_bytes).decode("utf-8")
84
+
85
+
86
+ def construct_data_url(ascii_text: str, mime_type: str | None) -> str:
87
+ """
88
+ Construct data url with decoded data inside
89
+
90
+ :param ascii_text: ASCII content
91
+ :param mime_type: MIME type of file
92
+ :returns: data url string (eg. data:image/png,base64,%3Ch1%3EHello%2C%20World%21%3C%2Fh1%3E)
93
+ """
94
+ if not mime_type:
95
+ mime_type = "application/octet-stream"
96
+
97
+ return f"data:{mime_type};base64,{ascii_text}"
51
98
 
52
99
 
53
100
  async def convert_chat_choice_to_response_message(
@@ -68,36 +115,107 @@ async def convert_chat_choice_to_response_message(
68
115
  )
69
116
 
70
117
  annotations, clean_text = _extract_citations_from_text(output_content, citation_files or {})
118
+ logprobs = choice.logprobs.content if choice.logprobs and choice.logprobs.content else None
71
119
 
72
120
  return OpenAIResponseMessage(
73
121
  id=message_id or f"msg_{uuid.uuid4()}",
74
- content=[OpenAIResponseOutputMessageContentOutputText(text=clean_text, annotations=annotations)],
122
+ content=[
123
+ OpenAIResponseOutputMessageContentOutputText(
124
+ text=clean_text,
125
+ annotations=list(annotations),
126
+ logprobs=logprobs,
127
+ )
128
+ ],
75
129
  status="completed",
76
130
  role="assistant",
77
131
  )
78
132
 
79
133
 
80
134
  async def convert_response_content_to_chat_content(
81
- content: (str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]),
135
+ content: str | Sequence[OpenAIResponseInputMessageContent | OpenAIResponseOutputMessageContent],
136
+ files_api: Files | None,
82
137
  ) -> str | list[OpenAIChatCompletionContentPartParam]:
83
138
  """
84
139
  Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
85
140
 
86
141
  The content schemas of each API look similar, but are not exactly the same.
142
+
143
+ :param content: The content to convert
144
+ :param files_api: Files API for resolving file_id to raw file content (required if content contains files/images)
87
145
  """
88
146
  if isinstance(content, str):
89
147
  return content
90
148
 
91
- converted_parts = []
149
+ # Type with union to avoid list invariance issues
150
+ converted_parts: list[OpenAIChatCompletionContentPartParam] = []
92
151
  for content_part in content:
93
152
  if isinstance(content_part, OpenAIResponseInputMessageContentText):
94
153
  converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
95
154
  elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText):
96
155
  converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
97
156
  elif isinstance(content_part, OpenAIResponseInputMessageContentImage):
157
+ detail = content_part.detail
158
+ image_mime_type = None
98
159
  if content_part.image_url:
99
- image_url = OpenAIImageURL(url=content_part.image_url, detail=content_part.detail)
160
+ image_url = OpenAIImageURL(url=content_part.image_url, detail=detail)
100
161
  converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
162
+ elif content_part.file_id:
163
+ if files_api is None:
164
+ raise ValueError("file_ids are not supported by this implementation of the Stack")
165
+ image_file_response = await files_api.openai_retrieve_file(content_part.file_id)
166
+ if image_file_response.filename:
167
+ image_mime_type, _ = mimetypes.guess_type(image_file_response.filename)
168
+ raw_image_bytes = await extract_bytes_from_file(content_part.file_id, files_api)
169
+ ascii_text = generate_base64_ascii_text_from_bytes(raw_image_bytes)
170
+ image_data_url = construct_data_url(ascii_text, image_mime_type)
171
+ image_url = OpenAIImageURL(url=image_data_url, detail=detail)
172
+ converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
173
+ else:
174
+ raise ValueError(
175
+ f"Image content must have either 'image_url' or 'file_id'. "
176
+ f"Got image_url={content_part.image_url}, file_id={content_part.file_id}"
177
+ )
178
+ elif isinstance(content_part, OpenAIResponseInputMessageContentFile):
179
+ resolved_file_data = None
180
+ file_data = content_part.file_data
181
+ file_id = content_part.file_id
182
+ file_url = content_part.file_url
183
+ filename = content_part.filename
184
+ file_mime_type = None
185
+ if not any([file_data, file_id, file_url]):
186
+ raise ValueError(
187
+ f"File content must have at least one of 'file_data', 'file_id', or 'file_url'. "
188
+ f"Got file_data={file_data}, file_id={file_id}, file_url={file_url}"
189
+ )
190
+ if file_id:
191
+ if files_api is None:
192
+ raise ValueError("file_ids are not supported by this implementation of the Stack")
193
+
194
+ file_response = await files_api.openai_retrieve_file(file_id)
195
+ if not filename:
196
+ filename = file_response.filename
197
+ file_mime_type, _ = mimetypes.guess_type(file_response.filename)
198
+ raw_file_bytes = await extract_bytes_from_file(file_id, files_api)
199
+ ascii_text = generate_base64_ascii_text_from_bytes(raw_file_bytes)
200
+ resolved_file_data = construct_data_url(ascii_text, file_mime_type)
201
+ elif file_data:
202
+ if file_data.startswith("data:"):
203
+ resolved_file_data = file_data
204
+ else:
205
+ # Raw base64 data, wrap in data URL format
206
+ if filename:
207
+ file_mime_type, _ = mimetypes.guess_type(filename)
208
+ resolved_file_data = construct_data_url(file_data, file_mime_type)
209
+ elif file_url:
210
+ resolved_file_data = file_url
211
+ converted_parts.append(
212
+ OpenAIFile(
213
+ file=OpenAIFileFile(
214
+ file_data=resolved_file_data,
215
+ filename=filename,
216
+ )
217
+ )
218
+ )
101
219
  elif isinstance(content_part, str):
102
220
  converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part))
103
221
  else:
@@ -110,12 +228,14 @@ async def convert_response_content_to_chat_content(
110
228
  async def convert_response_input_to_chat_messages(
111
229
  input: str | list[OpenAIResponseInput],
112
230
  previous_messages: list[OpenAIMessageParam] | None = None,
231
+ files_api: Files | None = None,
113
232
  ) -> list[OpenAIMessageParam]:
114
233
  """
115
234
  Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages.
116
235
 
117
236
  :param input: The input to convert
118
237
  :param previous_messages: Optional previous messages to check for function_call references
238
+ :param files_api: Files API for resolving file_id to raw file content (optional, required for file/image content)
119
239
  """
120
240
  messages: list[OpenAIMessageParam] = []
121
241
  if isinstance(input, list):
@@ -158,22 +278,31 @@ async def convert_response_input_to_chat_messages(
158
278
  ),
159
279
  )
160
280
  messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call]))
281
+ # Output can be None, use empty string as fallback
282
+ output_content = input_item.output if input_item.output is not None else ""
161
283
  messages.append(
162
284
  OpenAIToolMessageParam(
163
- content=input_item.output,
285
+ content=output_content,
164
286
  tool_call_id=input_item.id,
165
287
  )
166
288
  )
167
289
  elif isinstance(input_item, OpenAIResponseOutputMessageMCPListTools):
168
290
  # the tool list will be handled separately
169
291
  pass
292
+ elif isinstance(
293
+ input_item,
294
+ OpenAIResponseOutputMessageWebSearchToolCall | OpenAIResponseOutputMessageFileSearchToolCall,
295
+ ):
296
+ # these tool calls are tracked internally but not converted to chat messages
297
+ pass
170
298
  elif isinstance(input_item, OpenAIResponseMCPApprovalRequest) or isinstance(
171
299
  input_item, OpenAIResponseMCPApprovalResponse
172
300
  ):
173
301
  # these are handled by the responses impl itself and not pass through to chat completions
174
302
  pass
175
- else:
176
- content = await convert_response_content_to_chat_content(input_item.content)
303
+ elif isinstance(input_item, OpenAIResponseMessage):
304
+ # Narrow type to OpenAIResponseMessage which has content and role attributes
305
+ content = await convert_response_content_to_chat_content(input_item.content, files_api)
177
306
  message_type = await get_message_type_by_role(input_item.role)
178
307
  if message_type is None:
179
308
  raise ValueError(
@@ -191,7 +320,8 @@ async def convert_response_input_to_chat_messages(
191
320
  last_user_content = getattr(last_user_msg, "content", None)
192
321
  if last_user_content == content:
193
322
  continue # Skip duplicate user message
194
- messages.append(message_type(content=content))
323
+ # Dynamic message type call - different message types have different content expectations
324
+ messages.append(message_type(content=content)) # type: ignore[call-arg,arg-type]
195
325
  if len(tool_call_results):
196
326
  # Check if unpaired function_call_outputs reference function_calls from previous messages
197
327
  if previous_messages:
@@ -237,8 +367,11 @@ async def convert_response_text_to_chat_response_format(
237
367
  if text.format["type"] == "json_object":
238
368
  return OpenAIResponseFormatJSONObject()
239
369
  if text.format["type"] == "json_schema":
370
+ # Assert name exists for json_schema format
371
+ assert text.format.get("name"), "json_schema format requires a name"
372
+ schema_name: str = text.format["name"] # type: ignore[assignment]
240
373
  return OpenAIResponseFormatJSONSchema(
241
- json_schema=OpenAIJSONSchema(name=text.format["name"], schema=text.format["schema"])
374
+ json_schema=OpenAIJSONSchema(name=schema_name, schema=text.format["schema"])
242
375
  )
243
376
  raise ValueError(f"Unsupported text format: {text.format}")
244
377
 
@@ -251,7 +384,7 @@ async def get_message_type_by_role(role: str) -> type[OpenAIMessageParam] | None
251
384
  "assistant": OpenAIAssistantMessageParam,
252
385
  "developer": OpenAIDeveloperMessageParam,
253
386
  }
254
- return role_to_type.get(role)
387
+ return role_to_type.get(role) # type: ignore[return-value] # Pydantic models use ModelMetaclass
255
388
 
256
389
 
257
390
  def _extract_citations_from_text(
@@ -313,14 +446,19 @@ def is_function_tool_call(
313
446
  return False
314
447
 
315
448
 
316
- async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[str]) -> str | None:
449
+ async def run_guardrails(safety_api: Safety | None, messages: str, guardrail_ids: list[str]) -> str | None:
317
450
  """Run guardrails against messages and return violation message if blocked."""
318
451
  if not messages:
319
452
  return None
320
453
 
454
+ # If safety API is not available, skip guardrails
455
+ if safety_api is None:
456
+ return None
457
+
321
458
  # Look up shields to get their provider_resource_id (actual model ID)
322
459
  model_ids = []
323
- shields_list = await safety_api.routing_table.list_shields()
460
+ # TODO: list_shields not in Safety interface but available at runtime via API routing
461
+ shields_list = await safety_api.routing_table.list_shields() # type: ignore[attr-defined]
324
462
 
325
463
  for guardrail_id in guardrail_ids:
326
464
  matching_shields = [shield for shield in shields_list.data if shield.identifier == guardrail_id]
@@ -337,7 +475,9 @@ async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[
337
475
  for result in response.results:
338
476
  if result.flagged:
339
477
  message = result.user_message or "Content blocked by safety guardrails"
340
- flagged_categories = [cat for cat, flagged in result.categories.items() if flagged]
478
+ flagged_categories = (
479
+ [cat for cat, flagged in result.categories.items() if flagged] if result.categories else []
480
+ )
341
481
  violation_type = result.metadata.get("violation_type", []) if result.metadata else []
342
482
 
343
483
  if flagged_categories:
@@ -347,6 +487,9 @@ async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[
347
487
 
348
488
  return message
349
489
 
490
+ # No violations found
491
+ return None
492
+
350
493
 
351
494
  def extract_guardrail_ids(guardrails: list | None) -> list[str]:
352
495
  """Extract guardrail IDs from guardrails parameter, handling both string IDs and ResponseGuardrailSpec objects."""
@@ -363,3 +506,28 @@ def extract_guardrail_ids(guardrails: list | None) -> list[str]:
363
506
  raise ValueError(f"Unknown guardrail format: {guardrail}, expected str or ResponseGuardrailSpec")
364
507
 
365
508
  return guardrail_ids
509
+
510
+
511
+ def convert_mcp_tool_choice(
512
+ chat_tool_names: list[str],
513
+ server_label: str | None = None,
514
+ server_label_to_tools: dict[str, list[str]] | None = None,
515
+ tool_name: str | None = None,
516
+ ) -> dict[str, str] | list[dict[str, str]]:
517
+ """Convert a responses tool choice of type mcp to a chat completions compatible function tool choice."""
518
+
519
+ if tool_name:
520
+ if tool_name not in chat_tool_names:
521
+ return None
522
+ return {"type": "function", "function": {"name": tool_name}}
523
+
524
+ elif server_label and server_label_to_tools:
525
+ # no tool name specified, so we need to enforce an allowed_tools with the function tools derived only from the given server label
526
+ # Use reverse mapping for lookup by server_label
527
+ # This already accounts for allowed_tools restrictions applied during _process_mcp_tool
528
+ tool_names = server_label_to_tools.get(server_label, [])
529
+ if not tool_names:
530
+ return None
531
+ matching_tools = [{"type": "function", "function": {"name": tool_name}} for tool_name in tool_names]
532
+ return matching_tools
533
+ return []
@@ -6,10 +6,8 @@
6
6
 
7
7
  import asyncio
8
8
 
9
- from llama_stack.apis.inference import Message
10
- from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel
11
9
  from llama_stack.log import get_logger
12
- from llama_stack.providers.utils.telemetry import tracing
10
+ from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel
13
11
 
14
12
  log = get_logger(name=__name__, category="agents::meta_reference")
15
13
 
@@ -31,16 +29,13 @@ class ShieldRunnerMixin:
31
29
  self.input_shields = input_shields
32
30
  self.output_shields = output_shields
33
31
 
34
- async def run_multiple_shields(self, messages: list[Message], identifiers: list[str]) -> None:
35
- async def run_shield_with_span(identifier: str):
36
- async with tracing.span(f"run_shield_{identifier}"):
37
- return await self.safety_api.run_shield(
38
- shield_id=identifier,
39
- messages=messages,
40
- params={},
41
- )
42
-
43
- responses = await asyncio.gather(*[run_shield_with_span(identifier) for identifier in identifiers])
32
+ async def run_multiple_shields(self, messages: list[OpenAIMessageParam], identifiers: list[str]) -> None:
33
+ responses = await asyncio.gather(
34
+ *[
35
+ self.safety_api.run_shield(shield_id=identifier, messages=messages, params={})
36
+ for identifier in identifiers
37
+ ]
38
+ )
44
39
  for identifier, response in zip(identifiers, responses, strict=False):
45
40
  if not response.violation:
46
41
  continue
@@ -6,11 +6,9 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from llama_stack.apis.files import Files
10
- from llama_stack.apis.inference import Inference
11
- from llama_stack.apis.models import Models
12
9
  from llama_stack.core.datatypes import AccessRule, Api
13
- from llama_stack.providers.utils.kvstore import kvstore_impl
10
+ from llama_stack.core.storage.kvstore import kvstore_impl
11
+ from llama_stack_api import Files, Inference, Models
14
12
 
15
13
  from .batches import ReferenceBatchesImpl
16
14
  from .config import ReferenceBatchesImplConfig