llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.5.dist-info/RECORD +0 -625
  445. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -7,8 +7,18 @@
7
7
  import asyncio
8
8
  import json
9
9
  from collections.abc import AsyncIterator
10
+ from typing import Any
10
11
 
11
- from llama_stack.apis.agents.openai_responses import (
12
+ from opentelemetry import trace
13
+
14
+ from llama_stack.core.datatypes import VectorStoresConfig
15
+ from llama_stack.log import get_logger
16
+ from llama_stack_api import (
17
+ ImageContentItem,
18
+ OpenAIChatCompletionContentPartImageParam,
19
+ OpenAIChatCompletionContentPartTextParam,
20
+ OpenAIChatCompletionToolCall,
21
+ OpenAIImageURL,
12
22
  OpenAIResponseInputToolFileSearch,
13
23
  OpenAIResponseInputToolMCP,
14
24
  OpenAIResponseObjectStreamResponseFileSearchCallCompleted,
@@ -23,26 +33,18 @@ from llama_stack.apis.agents.openai_responses import (
23
33
  OpenAIResponseOutputMessageFileSearchToolCall,
24
34
  OpenAIResponseOutputMessageFileSearchToolCallResults,
25
35
  OpenAIResponseOutputMessageWebSearchToolCall,
26
- )
27
- from llama_stack.apis.common.content_types import (
28
- ImageContentItem,
29
- TextContentItem,
30
- )
31
- from llama_stack.apis.inference import (
32
- OpenAIChatCompletionContentPartImageParam,
33
- OpenAIChatCompletionContentPartTextParam,
34
- OpenAIChatCompletionToolCall,
35
- OpenAIImageURL,
36
36
  OpenAIToolMessageParam,
37
+ TextContentItem,
38
+ ToolGroups,
39
+ ToolInvocationResult,
40
+ ToolRuntime,
41
+ VectorIO,
37
42
  )
38
- from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime
39
- from llama_stack.apis.vector_io import VectorIO
40
- from llama_stack.log import get_logger
41
- from llama_stack.providers.utils.telemetry import tracing
42
43
 
43
44
  from .types import ChatCompletionContext, ToolExecutionResult
44
45
 
45
46
  logger = get_logger(name=__name__, category="agents::meta_reference")
47
+ tracer = trace.get_tracer(__name__)
46
48
 
47
49
 
48
50
  class ToolExecutor:
@@ -51,10 +53,12 @@ class ToolExecutor:
51
53
  tool_groups_api: ToolGroups,
52
54
  tool_runtime_api: ToolRuntime,
53
55
  vector_io_api: VectorIO,
56
+ vector_stores_config=None,
54
57
  ):
55
58
  self.tool_groups_api = tool_groups_api
56
59
  self.tool_runtime_api = tool_runtime_api
57
60
  self.vector_io_api = vector_io_api
61
+ self.vector_stores_config = vector_stores_config
58
62
 
59
63
  async def execute_tool_call(
60
64
  self,
@@ -67,7 +71,7 @@ class ToolExecutor:
67
71
  ) -> AsyncIterator[ToolExecutionResult]:
68
72
  tool_call_id = tool_call.id
69
73
  function = tool_call.function
70
- tool_kwargs = json.loads(function.arguments) if function.arguments else {}
74
+ tool_kwargs = json.loads(function.arguments) if function and function.arguments else {}
71
75
 
72
76
  if not function or not tool_call_id or not function.name:
73
77
  yield ToolExecutionResult(sequence_number=sequence_number)
@@ -84,7 +88,16 @@ class ToolExecutor:
84
88
  error_exc, result = await self._execute_tool(function.name, tool_kwargs, ctx, mcp_tool_to_server)
85
89
 
86
90
  # Emit completion events for tool execution
87
- has_error = error_exc or (result and ((result.error_code and result.error_code > 0) or result.error_message))
91
+ has_error = bool(
92
+ error_exc
93
+ or (
94
+ result
95
+ and (
96
+ ((error_code := getattr(result, "error_code", None)) and error_code > 0)
97
+ or getattr(result, "error_message", None)
98
+ )
99
+ )
100
+ )
88
101
  async for event_result in self._emit_completion_events(
89
102
  function.name, ctx, sequence_number, output_index, item_id, has_error, mcp_tool_to_server
90
103
  ):
@@ -101,7 +114,9 @@ class ToolExecutor:
101
114
  sequence_number=sequence_number,
102
115
  final_output_message=output_message,
103
116
  final_input_message=input_message,
104
- citation_files=result.metadata.get("citation_files") if result and result.metadata else None,
117
+ citation_files=(
118
+ metadata.get("citation_files") if result and (metadata := getattr(result, "metadata", None)) else None
119
+ ),
105
120
  )
106
121
 
107
122
  async def _execute_knowledge_search_via_vector_store(
@@ -136,13 +151,35 @@ class ToolExecutor:
136
151
  for results in all_results:
137
152
  search_results.extend(results)
138
153
 
139
- content_items = []
140
- content_items.append(
141
- TextContentItem(
142
- text=f"knowledge_search tool found {len(search_results)} chunks:\nBEGIN of knowledge_search tool results.\n"
143
- )
154
+ # Get templates from vector stores config, fallback to constants
155
+
156
+ # Check if annotations are enabled
157
+ enable_annotations = (
158
+ self.vector_stores_config
159
+ and self.vector_stores_config.annotation_prompt_params
160
+ and self.vector_stores_config.annotation_prompt_params.enable_annotations
144
161
  )
145
162
 
163
+ # Get templates
164
+ header_template = self.vector_stores_config.file_search_params.header_template
165
+ footer_template = self.vector_stores_config.file_search_params.footer_template
166
+ context_template = self.vector_stores_config.context_prompt_params.context_template
167
+
168
+ # Get annotation templates (use defaults if annotations disabled)
169
+ if enable_annotations:
170
+ chunk_annotation_template = self.vector_stores_config.annotation_prompt_params.chunk_annotation_template
171
+ annotation_instruction_template = (
172
+ self.vector_stores_config.annotation_prompt_params.annotation_instruction_template
173
+ )
174
+ else:
175
+ # Use defaults from VectorStoresConfig when annotations disabled
176
+ default_config = VectorStoresConfig()
177
+ chunk_annotation_template = default_config.annotation_prompt_params.chunk_annotation_template
178
+ annotation_instruction_template = default_config.annotation_prompt_params.annotation_instruction_template
179
+
180
+ content_items = []
181
+ content_items.append(TextContentItem(text=header_template.format(num_chunks=len(search_results))))
182
+
146
183
  unique_files = set()
147
184
  for i, result_item in enumerate(search_results):
148
185
  chunk_text = result_item.content[0].text if result_item.content else ""
@@ -154,22 +191,23 @@ class ToolExecutor:
154
191
  if result_item.attributes:
155
192
  metadata_text += f", attributes: {result_item.attributes}"
156
193
 
157
- text_content = f"[{i + 1}] {metadata_text} (cite as <|{file_id}|>)\n{chunk_text}\n"
194
+ text_content = chunk_annotation_template.format(
195
+ index=i + 1, metadata_text=metadata_text, file_id=file_id, chunk_text=chunk_text
196
+ )
158
197
  content_items.append(TextContentItem(text=text_content))
159
198
  unique_files.add(file_id)
160
199
 
161
- content_items.append(TextContentItem(text="END of knowledge_search tool results.\n"))
200
+ content_items.append(TextContentItem(text=footer_template))
162
201
 
163
- citation_instruction = ""
202
+ annotation_instruction = ""
164
203
  if unique_files:
165
- citation_instruction = (
166
- " Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). "
167
- "Do not add extra punctuation. Use only the file IDs provided (do not invent new ones)."
168
- )
204
+ annotation_instruction = annotation_instruction_template
169
205
 
170
206
  content_items.append(
171
207
  TextContentItem(
172
- text=f'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.{citation_instruction}\n',
208
+ text=context_template.format(
209
+ query=query, num_chunks=len(search_results), annotation_instruction=annotation_instruction
210
+ )
173
211
  )
174
212
  )
175
213
 
@@ -188,8 +226,9 @@ class ToolExecutor:
188
226
 
189
227
  citation_files[file_id] = filename
190
228
 
229
+ # Cast to proper InterleavedContent type (list invariance)
191
230
  return ToolInvocationResult(
192
- content=content_items,
231
+ content=content_items, # type: ignore[arg-type]
193
232
  metadata={
194
233
  "document_ids": [r.file_id for r in search_results],
195
234
  "chunks": [r.content[0].text if r.content else "" for r in search_results],
@@ -209,51 +248,60 @@ class ToolExecutor:
209
248
  ) -> AsyncIterator[ToolExecutionResult]:
210
249
  """Emit progress events for tool execution start."""
211
250
  # Emit in_progress event based on tool type (only for tools with specific streaming events)
212
- progress_event = None
213
251
  if mcp_tool_to_server and function_name in mcp_tool_to_server:
214
252
  sequence_number += 1
215
- progress_event = OpenAIResponseObjectStreamResponseMcpCallInProgress(
216
- item_id=item_id,
217
- output_index=output_index,
253
+ yield ToolExecutionResult(
254
+ stream_event=OpenAIResponseObjectStreamResponseMcpCallInProgress(
255
+ item_id=item_id,
256
+ output_index=output_index,
257
+ sequence_number=sequence_number,
258
+ ),
218
259
  sequence_number=sequence_number,
219
260
  )
220
261
  elif function_name == "web_search":
221
262
  sequence_number += 1
222
- progress_event = OpenAIResponseObjectStreamResponseWebSearchCallInProgress(
223
- item_id=item_id,
224
- output_index=output_index,
263
+ yield ToolExecutionResult(
264
+ stream_event=OpenAIResponseObjectStreamResponseWebSearchCallInProgress(
265
+ item_id=item_id,
266
+ output_index=output_index,
267
+ sequence_number=sequence_number,
268
+ ),
225
269
  sequence_number=sequence_number,
226
270
  )
227
271
  elif function_name == "knowledge_search":
228
272
  sequence_number += 1
229
- progress_event = OpenAIResponseObjectStreamResponseFileSearchCallInProgress(
230
- item_id=item_id,
231
- output_index=output_index,
273
+ yield ToolExecutionResult(
274
+ stream_event=OpenAIResponseObjectStreamResponseFileSearchCallInProgress(
275
+ item_id=item_id,
276
+ output_index=output_index,
277
+ sequence_number=sequence_number,
278
+ ),
232
279
  sequence_number=sequence_number,
233
280
  )
234
281
 
235
- if progress_event:
236
- yield ToolExecutionResult(stream_event=progress_event, sequence_number=sequence_number)
237
-
238
282
  # For web search, emit searching event
239
283
  if function_name == "web_search":
240
284
  sequence_number += 1
241
- searching_event = OpenAIResponseObjectStreamResponseWebSearchCallSearching(
242
- item_id=item_id,
243
- output_index=output_index,
285
+ yield ToolExecutionResult(
286
+ stream_event=OpenAIResponseObjectStreamResponseWebSearchCallSearching(
287
+ item_id=item_id,
288
+ output_index=output_index,
289
+ sequence_number=sequence_number,
290
+ ),
244
291
  sequence_number=sequence_number,
245
292
  )
246
- yield ToolExecutionResult(stream_event=searching_event, sequence_number=sequence_number)
247
293
 
248
294
  # For file search, emit searching event
249
295
  if function_name == "knowledge_search":
250
296
  sequence_number += 1
251
- searching_event = OpenAIResponseObjectStreamResponseFileSearchCallSearching(
252
- item_id=item_id,
253
- output_index=output_index,
297
+ yield ToolExecutionResult(
298
+ stream_event=OpenAIResponseObjectStreamResponseFileSearchCallSearching(
299
+ item_id=item_id,
300
+ output_index=output_index,
301
+ sequence_number=sequence_number,
302
+ ),
254
303
  sequence_number=sequence_number,
255
304
  )
256
- yield ToolExecutionResult(stream_event=searching_event, sequence_number=sequence_number)
257
305
 
258
306
  async def _execute_tool(
259
307
  self,
@@ -261,7 +309,7 @@ class ToolExecutor:
261
309
  tool_kwargs: dict,
262
310
  ctx: ChatCompletionContext,
263
311
  mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
264
- ) -> tuple[Exception | None, any]:
312
+ ) -> tuple[Exception | None, Any]:
265
313
  """Execute the tool and return error exception and result."""
266
314
  error_exc = None
267
315
  result = None
@@ -276,23 +324,30 @@ class ToolExecutor:
276
324
  "server_url": mcp_tool.server_url,
277
325
  "tool_name": function_name,
278
326
  }
279
- async with tracing.span("invoke_mcp_tool", attributes):
327
+ # TODO: follow semantic conventions for Open Telemetry tool spans
328
+ # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
329
+ with tracer.start_as_current_span("invoke_mcp_tool", attributes=attributes):
280
330
  result = await invoke_mcp_tool(
281
331
  endpoint=mcp_tool.server_url,
282
- headers=mcp_tool.headers or {},
283
332
  tool_name=function_name,
284
333
  kwargs=tool_kwargs,
334
+ headers=mcp_tool.headers,
335
+ authorization=mcp_tool.authorization,
285
336
  )
286
337
  elif function_name == "knowledge_search":
287
- response_file_search_tool = next(
288
- (t for t in ctx.response_tools if isinstance(t, OpenAIResponseInputToolFileSearch)),
289
- None,
338
+ response_file_search_tool = (
339
+ next(
340
+ (t for t in ctx.response_tools if isinstance(t, OpenAIResponseInputToolFileSearch)),
341
+ None,
342
+ )
343
+ if ctx.response_tools
344
+ else None
290
345
  )
291
346
  if response_file_search_tool:
292
347
  # Use vector_stores.search API instead of knowledge_search tool
293
348
  # to support filters and ranking_options
294
349
  query = tool_kwargs.get("query", "")
295
- async with tracing.span("knowledge_search", {}):
350
+ with tracer.start_as_current_span("knowledge_search"):
296
351
  result = await self._execute_knowledge_search_via_vector_store(
297
352
  query=query,
298
353
  response_file_search_tool=response_file_search_tool,
@@ -301,7 +356,9 @@ class ToolExecutor:
301
356
  attributes = {
302
357
  "tool_name": function_name,
303
358
  }
304
- async with tracing.span("invoke_tool", attributes):
359
+ # TODO: follow semantic conventions for Open Telemetry tool spans
360
+ # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
361
+ with tracer.start_as_current_span("invoke_tool", attributes=attributes):
305
362
  result = await self.tool_runtime_api.invoke_tool(
306
363
  tool_name=function_name,
307
364
  kwargs=tool_kwargs,
@@ -322,35 +379,34 @@ class ToolExecutor:
322
379
  mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
323
380
  ) -> AsyncIterator[ToolExecutionResult]:
324
381
  """Emit completion or failure events for tool execution."""
325
- completion_event = None
326
-
327
382
  if mcp_tool_to_server and function_name in mcp_tool_to_server:
328
383
  sequence_number += 1
329
384
  if has_error:
330
- completion_event = OpenAIResponseObjectStreamResponseMcpCallFailed(
385
+ mcp_failed_event = OpenAIResponseObjectStreamResponseMcpCallFailed(
331
386
  sequence_number=sequence_number,
332
387
  )
388
+ yield ToolExecutionResult(stream_event=mcp_failed_event, sequence_number=sequence_number)
333
389
  else:
334
- completion_event = OpenAIResponseObjectStreamResponseMcpCallCompleted(
390
+ mcp_completed_event = OpenAIResponseObjectStreamResponseMcpCallCompleted(
335
391
  sequence_number=sequence_number,
336
392
  )
393
+ yield ToolExecutionResult(stream_event=mcp_completed_event, sequence_number=sequence_number)
337
394
  elif function_name == "web_search":
338
395
  sequence_number += 1
339
- completion_event = OpenAIResponseObjectStreamResponseWebSearchCallCompleted(
396
+ web_completion_event = OpenAIResponseObjectStreamResponseWebSearchCallCompleted(
340
397
  item_id=item_id,
341
398
  output_index=output_index,
342
399
  sequence_number=sequence_number,
343
400
  )
401
+ yield ToolExecutionResult(stream_event=web_completion_event, sequence_number=sequence_number)
344
402
  elif function_name == "knowledge_search":
345
403
  sequence_number += 1
346
- completion_event = OpenAIResponseObjectStreamResponseFileSearchCallCompleted(
404
+ file_completion_event = OpenAIResponseObjectStreamResponseFileSearchCallCompleted(
347
405
  item_id=item_id,
348
406
  output_index=output_index,
349
407
  sequence_number=sequence_number,
350
408
  )
351
-
352
- if completion_event:
353
- yield ToolExecutionResult(stream_event=completion_event, sequence_number=sequence_number)
409
+ yield ToolExecutionResult(stream_event=file_completion_event, sequence_number=sequence_number)
354
410
 
355
411
  async def _build_result_messages(
356
412
  self,
@@ -360,18 +416,19 @@ class ToolExecutor:
360
416
  tool_kwargs: dict,
361
417
  ctx: ChatCompletionContext,
362
418
  error_exc: Exception | None,
363
- result: any,
419
+ result: Any,
364
420
  has_error: bool,
365
421
  mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
366
- ) -> tuple[any, any]:
422
+ ) -> tuple[Any, Any]:
367
423
  """Build output and input messages from tool execution results."""
368
424
  from llama_stack.providers.utils.inference.prompt_adapter import (
369
425
  interleaved_content_as_str,
370
426
  )
371
427
 
372
428
  # Build output message
429
+ message: Any
373
430
  if mcp_tool_to_server and function.name in mcp_tool_to_server:
374
- from llama_stack.apis.agents.openai_responses import (
431
+ from llama_stack_api import (
375
432
  OpenAIResponseOutputMessageMCPCall,
376
433
  )
377
434
 
@@ -383,10 +440,14 @@ class ToolExecutor:
383
440
  )
384
441
  if error_exc:
385
442
  message.error = str(error_exc)
386
- elif (result and result.error_code and result.error_code > 0) or (result and result.error_message):
387
- message.error = f"Error (code {result.error_code}): {result.error_message}"
388
- elif result and result.content:
389
- message.output = interleaved_content_as_str(result.content)
443
+ elif (result and (error_code := getattr(result, "error_code", None)) and error_code > 0) or (
444
+ result and getattr(result, "error_message", None)
445
+ ):
446
+ ec = getattr(result, "error_code", "unknown")
447
+ em = getattr(result, "error_message", "")
448
+ message.error = f"Error (code {ec}): {em}"
449
+ elif result and (content := getattr(result, "content", None)):
450
+ message.output = interleaved_content_as_str(content)
390
451
  else:
391
452
  if function.name == "web_search":
392
453
  message = OpenAIResponseOutputMessageWebSearchToolCall(
@@ -401,17 +462,17 @@ class ToolExecutor:
401
462
  queries=[tool_kwargs.get("query", "")],
402
463
  status="completed",
403
464
  )
404
- if result and "document_ids" in result.metadata:
465
+ if result and (metadata := getattr(result, "metadata", None)) and "document_ids" in metadata:
405
466
  message.results = []
406
- for i, doc_id in enumerate(result.metadata["document_ids"]):
407
- text = result.metadata["chunks"][i] if "chunks" in result.metadata else None
408
- score = result.metadata["scores"][i] if "scores" in result.metadata else None
467
+ for i, doc_id in enumerate(metadata["document_ids"]):
468
+ text = metadata["chunks"][i] if "chunks" in metadata else None
469
+ score = metadata["scores"][i] if "scores" in metadata else None
409
470
  message.results.append(
410
471
  OpenAIResponseOutputMessageFileSearchToolCallResults(
411
472
  file_id=doc_id,
412
473
  filename=doc_id,
413
- text=text,
414
- score=score,
474
+ text=text if text is not None else "",
475
+ score=score if score is not None else 0.0,
415
476
  attributes={},
416
477
  )
417
478
  )
@@ -421,27 +482,32 @@ class ToolExecutor:
421
482
  raise ValueError(f"Unknown tool {function.name} called")
422
483
 
423
484
  # Build input message
424
- input_message = None
425
- if result and result.content:
426
- if isinstance(result.content, str):
427
- content = result.content
428
- elif isinstance(result.content, list):
429
- content = []
430
- for item in result.content:
485
+ input_message: OpenAIToolMessageParam | None = None
486
+ if result and (result_content := getattr(result, "content", None)):
487
+ # all the mypy contortions here are still unsatisfactory with random Any typing
488
+ if isinstance(result_content, str):
489
+ msg_content: str | list[Any] = result_content
490
+ elif isinstance(result_content, list):
491
+ content_list: list[Any] = []
492
+ for item in result_content:
493
+ part: Any
431
494
  if isinstance(item, TextContentItem):
432
495
  part = OpenAIChatCompletionContentPartTextParam(text=item.text)
433
496
  elif isinstance(item, ImageContentItem):
434
497
  if item.image.data:
435
- url = f"data:image;base64,{item.image.data}"
498
+ url_value = f"data:image;base64,{item.image.data}"
436
499
  else:
437
- url = item.image.url
438
- part = OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url=url))
500
+ url_value = str(item.image.url) if item.image.url else ""
501
+ part = OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url=url_value))
439
502
  else:
440
503
  raise ValueError(f"Unknown result content type: {type(item)}")
441
- content.append(part)
504
+ content_list.append(part)
505
+ msg_content = content_list
442
506
  else:
443
- raise ValueError(f"Unknown result content type: {type(result.content)}")
444
- input_message = OpenAIToolMessageParam(content=content, tool_call_id=tool_call_id)
507
+ raise ValueError(f"Unknown result content type: {type(result_content)}")
508
+ # OpenAIToolMessageParam accepts str | list[TextParam] but we may have images
509
+ # This is runtime-safe as the API accepts it, but mypy complains
510
+ input_message = OpenAIToolMessageParam(content=msg_content, tool_call_id=tool_call_id) # type: ignore[arg-type]
445
511
  else:
446
512
  text = str(error_exc) if error_exc else "Tool execution failed"
447
513
  input_message = OpenAIToolMessageParam(content=text, tool_call_id=tool_call_id)
@@ -5,13 +5,18 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
  from dataclasses import dataclass
8
+ from typing import cast
8
9
 
9
10
  from openai.types.chat import ChatCompletionToolParam
10
11
  from pydantic import BaseModel
11
12
 
12
- from llama_stack.apis.agents.openai_responses import (
13
+ from llama_stack_api import (
14
+ OpenAIChatCompletionToolCall,
15
+ OpenAIMessageParam,
16
+ OpenAIResponseFormatParam,
13
17
  OpenAIResponseInput,
14
18
  OpenAIResponseInputTool,
19
+ OpenAIResponseInputToolChoice,
15
20
  OpenAIResponseInputToolFileSearch,
16
21
  OpenAIResponseInputToolFunction,
17
22
  OpenAIResponseInputToolMCP,
@@ -24,8 +29,8 @@ from llama_stack.apis.agents.openai_responses import (
24
29
  OpenAIResponseOutputMessageMCPListTools,
25
30
  OpenAIResponseTool,
26
31
  OpenAIResponseToolMCP,
32
+ OpenAITokenLogProb,
27
33
  )
28
- from llama_stack.apis.inference import OpenAIChatCompletionToolCall, OpenAIMessageParam, OpenAIResponseFormatParam
29
34
 
30
35
 
31
36
  class ToolExecutionResult(BaseModel):
@@ -51,6 +56,7 @@ class ChatCompletionResult:
51
56
  message_item_id: str # For streaming events
52
57
  tool_call_item_ids: dict[int, str] # For streaming events
53
58
  content_part_emitted: bool # Tracking state
59
+ logprobs: list[OpenAITokenLogProb] | None = None
54
60
 
55
61
  @property
56
62
  def content_text(self) -> str:
@@ -100,17 +106,19 @@ class ToolContext(BaseModel):
100
106
  if isinstance(tool, OpenAIResponseToolMCP):
101
107
  previous_tools_by_label[tool.server_label] = tool
102
108
  # collect tool definitions which are the same in current and previous requests:
103
- tools_to_process = []
109
+ tools_to_process: list[OpenAIResponseInputTool] = []
104
110
  matched: dict[str, OpenAIResponseInputToolMCP] = {}
105
- for tool in self.current_tools:
111
+ # Mypy confuses OpenAIResponseInputTool (Input union) with OpenAIResponseTool (output union)
112
+ # which differ only in MCP type (InputToolMCP vs ToolMCP). Code is correct.
113
+ for tool in cast(list[OpenAIResponseInputTool], self.current_tools): # type: ignore[assignment]
106
114
  if isinstance(tool, OpenAIResponseInputToolMCP) and tool.server_label in previous_tools_by_label:
107
115
  previous_tool = previous_tools_by_label[tool.server_label]
108
116
  if previous_tool.allowed_tools == tool.allowed_tools:
109
117
  matched[tool.server_label] = tool
110
118
  else:
111
- tools_to_process.append(tool)
119
+ tools_to_process.append(tool) # type: ignore[arg-type]
112
120
  else:
113
- tools_to_process.append(tool)
121
+ tools_to_process.append(tool) # type: ignore[arg-type]
114
122
  # tools that are not the same or were not previously defined need to be processed:
115
123
  self.tools_to_process = tools_to_process
116
124
  # for all matched definitions, get the mcp_list_tools objects from the previous output:
@@ -119,9 +127,11 @@ class ToolContext(BaseModel):
119
127
  ]
120
128
  # reconstruct the tool to server mappings that can be reused:
121
129
  for listing in self.previous_tool_listings:
130
+ # listing is OpenAIResponseOutputMessageMCPListTools which has tools: list[MCPListToolsTool]
122
131
  definition = matched[listing.server_label]
123
- for tool in listing.tools:
124
- self.previous_tools[tool.name] = definition
132
+ for mcp_tool in listing.tools:
133
+ # mcp_tool is MCPListToolsTool which has a name: str field
134
+ self.previous_tools[mcp_tool.name] = definition
125
135
 
126
136
  def available_tools(self) -> list[OpenAIResponseTool]:
127
137
  if not self.current_tools:
@@ -139,6 +149,8 @@ class ToolContext(BaseModel):
139
149
  server_label=tool.server_label,
140
150
  allowed_tools=tool.allowed_tools,
141
151
  )
152
+ # Exhaustive check - all tool types should be handled above
153
+ raise AssertionError(f"Unexpected tool type: {type(tool)}")
142
154
 
143
155
  return [convert_tool(tool) for tool in self.current_tools]
144
156
 
@@ -151,6 +163,7 @@ class ChatCompletionContext(BaseModel):
151
163
  temperature: float | None
152
164
  response_format: OpenAIResponseFormatParam
153
165
  tool_context: ToolContext | None
166
+ tool_choice: OpenAIResponseInputToolChoice | None = None
154
167
  approval_requests: list[OpenAIResponseMCPApprovalRequest] = []
155
168
  approval_responses: dict[str, OpenAIResponseMCPApprovalResponse] = {}
156
169
 
@@ -163,6 +176,7 @@ class ChatCompletionContext(BaseModel):
163
176
  response_format: OpenAIResponseFormatParam,
164
177
  tool_context: ToolContext,
165
178
  inputs: list[OpenAIResponseInput] | str,
179
+ tool_choice: OpenAIResponseInputToolChoice | None = None,
166
180
  ):
167
181
  super().__init__(
168
182
  model=model,
@@ -171,6 +185,7 @@ class ChatCompletionContext(BaseModel):
171
185
  temperature=temperature,
172
186
  response_format=response_format,
173
187
  tool_context=tool_context,
188
+ tool_choice=tool_choice,
174
189
  )
175
190
  if not isinstance(inputs, str):
176
191
  self.approval_requests = [input for input in inputs if input.type == "mcp_approval_request"]