llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +12 -21
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.4.dist-info/RECORD +0 -625
  445. llama_stack-0.3.4.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -4,25 +4,22 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from llama_stack.apis.agents import (
8
- Order,
9
- )
10
- from llama_stack.apis.agents.openai_responses import (
7
+ from llama_stack.core.datatypes import AccessRule
8
+ from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
9
+ from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
10
+ from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
11
+ from llama_stack.log import get_logger
12
+ from llama_stack_api import (
11
13
  ListOpenAIResponseInputItem,
12
14
  ListOpenAIResponseObject,
13
15
  OpenAIDeleteResponseObject,
16
+ OpenAIMessageParam,
14
17
  OpenAIResponseInput,
15
18
  OpenAIResponseObject,
16
19
  OpenAIResponseObjectWithInput,
20
+ Order,
17
21
  )
18
- from llama_stack.apis.inference import OpenAIMessageParam
19
- from llama_stack.core.datatypes import AccessRule
20
- from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
21
- from llama_stack.log import get_logger
22
-
23
- from ..sqlstore.api import ColumnDefinition, ColumnType
24
- from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
25
- from ..sqlstore.sqlstore import sqlstore_impl
22
+ from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
26
23
 
27
24
  logger = get_logger(name=__name__, category="openai_responses")
28
25
 
@@ -252,19 +249,12 @@ class ResponsesStore:
252
249
  # Serialize messages to dict format for JSON storage
253
250
  messages_data = [msg.model_dump() for msg in messages]
254
251
 
255
- # Upsert: try insert first, update if exists
256
- try:
257
- await self.sql_store.insert(
258
- table="conversation_messages",
259
- data={"conversation_id": conversation_id, "messages": messages_data},
260
- )
261
- except Exception:
262
- # If insert fails due to ID conflict, update existing record
263
- await self.sql_store.update(
264
- table="conversation_messages",
265
- data={"messages": messages_data},
266
- where={"conversation_id": conversation_id},
267
- )
252
+ await self.sql_store.upsert(
253
+ table="conversation_messages",
254
+ data={"conversation_id": conversation_id, "messages": messages_data},
255
+ conflict_columns=["conversation_id"],
256
+ update_columns=["messages"],
257
+ )
268
258
 
269
259
  logger.debug(f"Stored {len(messages)} messages for conversation {conversation_id}")
270
260
 
@@ -6,8 +6,7 @@
6
6
  import statistics
7
7
  from typing import Any
8
8
 
9
- from llama_stack.apis.scoring import ScoringResultRow
10
- from llama_stack.apis.scoring_functions import AggregationFunctionType
9
+ from llama_stack_api import AggregationFunctionType, ScoringResultRow
11
10
 
12
11
 
13
12
  def aggregate_accuracy(scoring_results: list[ScoringResultRow]) -> dict[str, Any]:
@@ -6,9 +6,8 @@
6
6
  from abc import ABC, abstractmethod
7
7
  from typing import Any
8
8
 
9
- from llama_stack.apis.scoring import ScoringFnParams, ScoringResultRow
10
- from llama_stack.apis.scoring_functions import ScoringFn
11
9
  from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics
10
+ from llama_stack_api import ScoringFn, ScoringFnParams, ScoringResultRow
12
11
 
13
12
 
14
13
  class BaseScoringFn(ABC):
@@ -15,18 +15,55 @@ from mcp import types as mcp_types
15
15
  from mcp.client.sse import sse_client
16
16
  from mcp.client.streamable_http import streamablehttp_client
17
17
 
18
- from llama_stack.apis.common.content_types import ImageContentItem, InterleavedContentItem, TextContentItem
19
- from llama_stack.apis.tools import (
18
+ from llama_stack.core.datatypes import AuthenticationRequiredError
19
+ from llama_stack.log import get_logger
20
+ from llama_stack.providers.utils.tools.ttl_dict import TTLDict
21
+ from llama_stack_api import (
22
+ ImageContentItem,
23
+ InterleavedContentItem,
20
24
  ListToolDefsResponse,
25
+ TextContentItem,
21
26
  ToolDef,
22
27
  ToolInvocationResult,
28
+ _URLOrData,
23
29
  )
24
- from llama_stack.core.datatypes import AuthenticationRequiredError
25
- from llama_stack.log import get_logger
26
- from llama_stack.providers.utils.tools.ttl_dict import TTLDict
27
30
 
28
31
  logger = get_logger(__name__, category="tools")
29
32
 
33
+
34
+ def prepare_mcp_headers(base_headers: dict[str, str] | None, authorization: str | None) -> dict[str, str]:
35
+ """
36
+ Prepare headers for MCP requests with authorization support.
37
+
38
+ Args:
39
+ base_headers: Base headers dictionary (can be None)
40
+ authorization: OAuth access token (without "Bearer " prefix)
41
+
42
+ Returns:
43
+ Headers dictionary with Authorization header if token provided
44
+
45
+ Raises:
46
+ ValueError: If Authorization header is specified in the headers dict (security risk)
47
+ """
48
+ headers = dict(base_headers or {})
49
+
50
+ # Security check: reject any Authorization header in the headers dict
51
+ # Users must use the authorization parameter instead to avoid security risks
52
+ existing_keys_lower = {k.lower() for k in headers.keys()}
53
+ if "authorization" in existing_keys_lower:
54
+ raise ValueError(
55
+ "For security reasons, Authorization header cannot be passed via 'headers'. "
56
+ "Please use the 'authorization' parameter instead."
57
+ )
58
+
59
+ # Add Authorization header if token provided
60
+ if authorization:
61
+ # OAuth access token - add "Bearer " prefix
62
+ headers["Authorization"] = f"Bearer {authorization}"
63
+
64
+ return headers
65
+
66
+
30
67
  protocol_cache = TTLDict(ttl_seconds=3600)
31
68
 
32
69
 
@@ -49,7 +86,10 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
49
86
  try:
50
87
  client = streamablehttp_client
51
88
  if strategy == MCPProtol.SSE:
52
- client = sse_client
89
+ # sse_client and streamablehttp_client have different signatures, but both
90
+ # are called the same way here, so we cast to Any to avoid type errors
91
+ client = cast(Any, sse_client)
92
+
53
93
  async with client(endpoint, headers=headers) as client_streams:
54
94
  async with ClientSession(read_stream=client_streams[0], write_stream=client_streams[1]) as session:
55
95
  await session.initialize()
@@ -107,9 +147,29 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
107
147
  raise
108
148
 
109
149
 
110
- async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefsResponse:
150
+ async def list_mcp_tools(
151
+ endpoint: str,
152
+ headers: dict[str, str] | None = None,
153
+ authorization: str | None = None,
154
+ ) -> ListToolDefsResponse:
155
+ """List tools available from an MCP server.
156
+
157
+ Args:
158
+ endpoint: MCP server endpoint URL
159
+ headers: Optional base headers to include
160
+ authorization: Optional OAuth access token (just the token, not "Bearer <token>")
161
+
162
+ Returns:
163
+ List of tool definitions from the MCP server
164
+
165
+ Raises:
166
+ ValueError: If Authorization is found in the headers parameter
167
+ """
168
+ # Prepare headers with authorization handling
169
+ final_headers = prepare_mcp_headers(headers, authorization)
170
+
111
171
  tools = []
112
- async with client_wrapper(endpoint, headers) as session:
172
+ async with client_wrapper(endpoint, final_headers) as session:
113
173
  tools_result = await session.list_tools()
114
174
  for tool in tools_result.tools:
115
175
  tools.append(
@@ -127,9 +187,31 @@ async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefs
127
187
 
128
188
 
129
189
  async def invoke_mcp_tool(
130
- endpoint: str, headers: dict[str, str], tool_name: str, kwargs: dict[str, Any]
190
+ endpoint: str,
191
+ tool_name: str,
192
+ kwargs: dict[str, Any],
193
+ headers: dict[str, str] | None = None,
194
+ authorization: str | None = None,
131
195
  ) -> ToolInvocationResult:
132
- async with client_wrapper(endpoint, headers) as session:
196
+ """Invoke an MCP tool with the given arguments.
197
+
198
+ Args:
199
+ endpoint: MCP server endpoint URL
200
+ tool_name: Name of the tool to invoke
201
+ kwargs: Tool invocation arguments
202
+ headers: Optional base headers to include
203
+ authorization: Optional OAuth access token (just the token, not "Bearer <token>")
204
+
205
+ Returns:
206
+ Tool invocation result with content and error information
207
+
208
+ Raises:
209
+ ValueError: If Authorization header is found in the headers parameter
210
+ """
211
+ # Prepare headers with authorization handling
212
+ final_headers = prepare_mcp_headers(headers, authorization)
213
+
214
+ async with client_wrapper(endpoint, final_headers) as session:
133
215
  result = await session.call_tool(tool_name, kwargs)
134
216
 
135
217
  content: list[InterleavedContentItem] = []
@@ -137,7 +219,7 @@ async def invoke_mcp_tool(
137
219
  if isinstance(item, mcp_types.TextContent):
138
220
  content.append(TextContentItem(text=item.text))
139
221
  elif isinstance(item, mcp_types.ImageContent):
140
- content.append(ImageContentItem(image=item.data))
222
+ content.append(ImageContentItem(image=_URLOrData(data=item.data)))
141
223
  elif isinstance(item, mcp_types.EmbeddedResource):
142
224
  logger.warning(f"EmbeddedResource is not supported: {item}")
143
225
  else:
@@ -0,0 +1,27 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """
8
+ This file contains constants used for naming data captured for telemetry.
9
+
10
+ This is used to ensure that the data captured for telemetry is consistent and can be used to
11
+ identify and correlate data. If custom telemetry data is added to llama stack, please add
12
+ constants for it here.
13
+ """
14
+
15
+ llama_stack_prefix = "llama_stack"
16
+
17
+ # Safety Attributes
18
+ RUN_SHIELD_OPERATION_NAME = "run_shield"
19
+
20
+ SAFETY_REQUEST_PREFIX = f"{llama_stack_prefix}.safety.request"
21
+ SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.shield_id"
22
+ SAFETY_REQUEST_MESSAGES_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.messages"
23
+
24
+ SAFETY_RESPONSE_PREFIX = f"{llama_stack_prefix}.safety.response"
25
+ SAFETY_RESPONSE_METADATA_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.metadata"
26
+ SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.level"
27
+ SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.user_message"
@@ -0,0 +1,43 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ import json
8
+
9
+ from opentelemetry import trace
10
+
11
+ from llama_stack_api import OpenAIMessageParam, RunShieldResponse
12
+
13
+ from .constants import (
14
+ RUN_SHIELD_OPERATION_NAME,
15
+ SAFETY_REQUEST_MESSAGES_ATTRIBUTE,
16
+ SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE,
17
+ SAFETY_RESPONSE_METADATA_ATTRIBUTE,
18
+ SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE,
19
+ SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE,
20
+ )
21
+
22
+
23
+ def safety_span_name(shield_id: str) -> str:
24
+ return f"{RUN_SHIELD_OPERATION_NAME} {shield_id}"
25
+
26
+
27
+ # TODO: Consider using Wrapt to automatically instrument code
28
+ # This is the industry standard way to package automatically instrumentation in python.
29
+ def safety_request_span_attributes(
30
+ shield_id: str, messages: list[OpenAIMessageParam], response: RunShieldResponse
31
+ ) -> None:
32
+ span = trace.get_current_span()
33
+ span.set_attribute(SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE, shield_id)
34
+ messages_json = json.dumps([msg.model_dump() for msg in messages])
35
+ span.set_attribute(SAFETY_REQUEST_MESSAGES_ATTRIBUTE, messages_json)
36
+
37
+ if response.violation:
38
+ if response.violation.metadata:
39
+ metadata_json = json.dumps(response.violation.metadata)
40
+ span.set_attribute(SAFETY_RESPONSE_METADATA_ATTRIBUTE, metadata_json)
41
+ if response.violation.user_message:
42
+ span.set_attribute(SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE, response.violation.user_message)
43
+ span.set_attribute(SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE, response.violation.violation_level.value)
@@ -40,10 +40,12 @@ from openai.types.completion_choice import CompletionChoice
40
40
  from llama_stack.core.testing_context import get_test_context, is_debug_mode
41
41
 
42
42
  # update the "finish_reason" field, since its type definition is wrong (no None is accepted)
43
- CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
43
+ CompletionChoice.model_fields["finish_reason"].annotation = cast(
44
+ type[Any] | None, Literal["stop", "length", "content_filter"] | None
45
+ )
44
46
  CompletionChoice.model_rebuild()
45
47
 
46
- REPO_ROOT = Path(__file__).parent.parent.parent
48
+ REPO_ROOT = Path(__file__).parent.parent.parent.parent
47
49
  DEFAULT_STORAGE_DIR = REPO_ROOT / "tests/integration/common"
48
50
 
49
51
 
@@ -154,7 +156,7 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
154
156
  }
155
157
 
156
158
  # Include test_id for isolation, except for shared infrastructure endpoints
157
- if parsed.path not in ("/api/tags", "/v1/models"):
159
+ if parsed.path not in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
158
160
  normalized["test_id"] = test_id
159
161
 
160
162
  normalized_json = json.dumps(normalized, sort_keys=True)
@@ -428,7 +430,7 @@ class ResponseStorage:
428
430
 
429
431
  # For model-list endpoints, include digest in filename to distinguish different model sets
430
432
  endpoint = request.get("endpoint")
431
- if endpoint in ("/api/tags", "/v1/models"):
433
+ if endpoint in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
432
434
  digest = _model_identifiers_digest(endpoint, response)
433
435
  response_file = f"models-{request_hash}-{digest}.json"
434
436
 
@@ -552,13 +554,14 @@ def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
552
554
  Supported endpoints:
553
555
  - '/api/tags' (Ollama): response body has 'models': [ { name/model/digest/id/... }, ... ]
554
556
  - '/v1/models' (OpenAI): response body is: [ { id: ... }, ... ]
557
+ - '/v1/openai/v1/models' (OpenAI): response body is: [ { id: ... }, ... ]
555
558
  Returns a list of unique identifiers or None if structure doesn't match.
556
559
  """
557
560
  if "models" in response["body"]:
558
561
  # ollama
559
562
  items = response["body"]["models"]
560
563
  else:
561
- # openai
564
+ # openai or openai-style endpoints
562
565
  items = response["body"]
563
566
  idents = [m.model if endpoint == "/api/tags" else m.id for m in items]
564
567
  return sorted(set(idents))
@@ -579,7 +582,7 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]])
579
582
  seen: dict[str, dict[str, Any]] = {}
580
583
  for rec in records:
581
584
  body = rec["response"]["body"]
582
- if endpoint == "/v1/models":
585
+ if endpoint in ("/v1/models", "/v1/openai/v1/models"):
583
586
  for m in body:
584
587
  key = m.id
585
588
  seen[key] = m
@@ -597,19 +600,23 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]])
597
600
  if endpoint == "/api/tags":
598
601
  from ollama import ListResponse
599
602
 
600
- body = ListResponse(models=ordered)
603
+ # Both cast(Any, ...) and type: ignore are needed here:
604
+ # - cast(Any, ...) attempts to bypass type checking on the argument
605
+ # - type: ignore is still needed because mypy checks the call site independently
606
+ # and reports arg-type mismatch even after casting
607
+ body = ListResponse(models=cast(Any, ordered)) # type: ignore[arg-type]
601
608
  return {"request": canonical_req, "response": {"body": body, "is_streaming": False}}
602
609
 
603
610
 
604
611
  async def _patched_tool_invoke_method(
605
- original_method, provider_name: str, self, tool_name: str, kwargs: dict[str, Any]
612
+ original_method, provider_name: str, self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
606
613
  ):
607
614
  """Patched version of tool runtime invoke_tool method for recording/replay."""
608
615
  global _current_mode, _current_storage
609
616
 
610
617
  if _current_mode == APIRecordingMode.LIVE or _current_storage is None:
611
618
  # Normal operation
612
- return await original_method(self, tool_name, kwargs)
619
+ return await original_method(self, tool_name, kwargs, authorization=authorization)
613
620
 
614
621
  request_hash = normalize_tool_request(provider_name, tool_name, kwargs)
615
622
 
@@ -627,7 +634,7 @@ async def _patched_tool_invoke_method(
627
634
 
628
635
  if _current_mode in (APIRecordingMode.RECORD, APIRecordingMode.RECORD_IF_MISSING):
629
636
  # Make the tool call and record it
630
- result = await original_method(self, tool_name, kwargs)
637
+ result = await original_method(self, tool_name, kwargs, authorization=authorization)
631
638
 
632
639
  request_data = {
633
640
  "test_id": get_test_context(),
@@ -659,7 +666,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
659
666
  logger.info(f" Test context: {get_test_context()}")
660
667
 
661
668
  if mode == APIRecordingMode.LIVE or storage is None:
662
- if endpoint == "/v1/models":
669
+ if endpoint in ("/v1/models", "/v1/openai/v1/models"):
663
670
  return original_method(self, *args, **kwargs)
664
671
  else:
665
672
  return await original_method(self, *args, **kwargs)
@@ -693,7 +700,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
693
700
  recording = None
694
701
  if mode == APIRecordingMode.REPLAY or mode == APIRecordingMode.RECORD_IF_MISSING:
695
702
  # Special handling for model-list endpoints: merge all recordings with this hash
696
- if endpoint in ("/api/tags", "/v1/models"):
703
+ if endpoint in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
697
704
  records = storage._model_list_responses(request_hash)
698
705
  recording = _combine_model_list_responses(endpoint, records)
699
706
  else:
@@ -733,13 +740,13 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
733
740
  )
734
741
 
735
742
  if mode == APIRecordingMode.RECORD or (mode == APIRecordingMode.RECORD_IF_MISSING and not recording):
736
- if endpoint == "/v1/models":
743
+ if endpoint in ("/v1/models", "/v1/openai/v1/models"):
737
744
  response = original_method(self, *args, **kwargs)
738
745
  else:
739
746
  response = await original_method(self, *args, **kwargs)
740
747
 
741
748
  # we want to store the result of the iterator, not the iterator itself
742
- if endpoint == "/v1/models":
749
+ if endpoint in ("/v1/models", "/v1/openai/v1/models"):
743
750
  response = [m async for m in response]
744
751
 
745
752
  request_data = {
@@ -878,9 +885,11 @@ def patch_inference_clients():
878
885
  OllamaAsyncClient.list = patched_ollama_list
879
886
 
880
887
  # Create patched methods for tool runtimes
881
- async def patched_tavily_invoke_tool(self, tool_name: str, kwargs: dict[str, Any]):
888
+ async def patched_tavily_invoke_tool(
889
+ self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
890
+ ):
882
891
  return await _patched_tool_invoke_method(
883
- _original_methods["tavily_invoke_tool"], "tavily", self, tool_name, kwargs
892
+ _original_methods["tavily_invoke_tool"], "tavily", self, tool_name, kwargs, authorization=authorization
884
893
  )
885
894
 
886
895
  # Apply tool runtime patches