llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +12 -21
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.4.dist-info/RECORD +0 -625
  445. llama_stack-0.3.4.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -8,20 +8,19 @@ from typing import Any
8
8
 
9
9
  from sqlalchemy.exc import IntegrityError
10
10
 
11
- from llama_stack.apis.inference import (
11
+ from llama_stack.core.datatypes import AccessRule
12
+ from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
13
+ from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
14
+ from llama_stack.core.storage.sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
15
+ from llama_stack.log import get_logger
16
+ from llama_stack_api import (
12
17
  ListOpenAIChatCompletionResponse,
13
18
  OpenAIChatCompletion,
14
19
  OpenAICompletionWithInputMessages,
15
20
  OpenAIMessageParam,
16
21
  Order,
17
22
  )
18
- from llama_stack.core.datatypes import AccessRule
19
- from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
20
- from llama_stack.log import get_logger
21
-
22
- from ..sqlstore.api import ColumnDefinition, ColumnType
23
- from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
24
- from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
23
+ from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
25
24
 
26
25
  logger = get_logger(name=__name__, category="inference")
27
26
 
@@ -56,7 +55,7 @@ class InferenceStore:
56
55
  logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)")
57
56
 
58
57
  await self.sql_store.create_table(
59
- "chat_completions",
58
+ self.reference.table_name,
60
59
  {
61
60
  "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
62
61
  "created": ColumnType.INTEGER,
@@ -66,14 +65,6 @@ class InferenceStore:
66
65
  },
67
66
  )
68
67
 
69
- if self.enable_write_queue:
70
- self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
71
- for _ in range(self._num_writers):
72
- self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
73
- logger.debug(
74
- f"Inference store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}"
75
- )
76
-
77
68
  async def shutdown(self) -> None:
78
69
  if not self._worker_tasks:
79
70
  return
@@ -161,7 +152,7 @@ class InferenceStore:
161
152
 
162
153
  try:
163
154
  await self.sql_store.insert(
164
- table="chat_completions",
155
+ table=self.reference.table_name,
165
156
  data=record_data,
166
157
  )
167
158
  except IntegrityError as e:
@@ -173,7 +164,7 @@ class InferenceStore:
173
164
  error_message = str(e.orig) if e.orig else str(e)
174
165
  if self._is_unique_constraint_error(error_message):
175
166
  # Update the existing record instead
176
- await self.sql_store.update(table="chat_completions", data=record_data, where={"id": data["id"]})
167
+ await self.sql_store.update(table=self.reference.table_name, data=record_data, where={"id": data["id"]})
177
168
  else:
178
169
  # Re-raise if it's not a unique constraint error
179
170
  raise
@@ -217,7 +208,7 @@ class InferenceStore:
217
208
  where_conditions["model"] = model
218
209
 
219
210
  paginated_result = await self.sql_store.fetch_all(
220
- table="chat_completions",
211
+ table=self.reference.table_name,
221
212
  where=where_conditions if where_conditions else None,
222
213
  order_by=[("created", order.value)],
223
214
  cursor=("id", after) if after else None,
@@ -246,7 +237,7 @@ class InferenceStore:
246
237
  raise ValueError("Inference store is not initialized")
247
238
 
248
239
  row = await self.sql_store.fetch_one(
249
- table="chat_completions",
240
+ table=self.reference.table_name,
250
241
  where={"id": completion_id},
251
242
  )
252
243
 
@@ -7,13 +7,20 @@
7
7
  import base64
8
8
  import struct
9
9
  from collections.abc import AsyncIterator
10
+ from typing import Any
10
11
 
11
12
  import litellm
12
13
 
13
- from llama_stack.apis.inference import (
14
- ChatCompletionRequest,
14
+ from llama_stack.core.request_headers import NeedsRequestProviderData
15
+ from llama_stack.log import get_logger
16
+ from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
17
+ from llama_stack.providers.utils.inference.openai_compat import (
18
+ get_stream_options_for_telemetry,
19
+ prepare_openai_completion_params,
20
+ )
21
+ from llama_stack.providers.utils.inference.stream_utils import wrap_async_stream
22
+ from llama_stack_api import (
15
23
  InferenceProvider,
16
- JsonSchemaResponseFormat,
17
24
  OpenAIChatCompletion,
18
25
  OpenAIChatCompletionChunk,
19
26
  OpenAIChatCompletionRequestWithExtraBody,
@@ -23,16 +30,6 @@ from llama_stack.apis.inference import (
23
30
  OpenAIEmbeddingsRequestWithExtraBody,
24
31
  OpenAIEmbeddingsResponse,
25
32
  OpenAIEmbeddingUsage,
26
- ToolChoice,
27
- )
28
- from llama_stack.core.request_headers import NeedsRequestProviderData
29
- from llama_stack.log import get_logger
30
- from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
31
- from llama_stack.providers.utils.inference.openai_compat import (
32
- convert_message_to_openai_dict_new,
33
- convert_tooldef_to_openai_tool,
34
- get_sampling_options,
35
- prepare_openai_completion_params,
36
33
  )
37
34
 
38
35
  logger = get_logger(name=__name__, category="providers::utils")
@@ -55,6 +52,7 @@ class LiteLLMOpenAIMixin(
55
52
  openai_compat_api_base: str | None = None,
56
53
  download_images: bool = False,
57
54
  json_schema_strict: bool = True,
55
+ supports_stream_options: bool = True,
58
56
  ):
59
57
  """
60
58
  Initialize the LiteLLMOpenAIMixin.
@@ -66,6 +64,7 @@ class LiteLLMOpenAIMixin(
66
64
  :param openai_compat_api_base: The base URL for OpenAI compatibility, or None if not using OpenAI compatibility.
67
65
  :param download_images: Whether to download images and convert to base64 for message conversion.
68
66
  :param json_schema_strict: Whether to use strict mode for JSON schema validation.
67
+ :param supports_stream_options: Whether the provider supports stream_options parameter.
69
68
  """
70
69
  ModelRegistryHelper.__init__(self, model_entries=model_entries)
71
70
 
@@ -75,6 +74,7 @@ class LiteLLMOpenAIMixin(
75
74
  self.api_base = openai_compat_api_base
76
75
  self.download_images = download_images
77
76
  self.json_schema_strict = json_schema_strict
77
+ self.supports_stream_options = supports_stream_options
78
78
 
79
79
  if openai_compat_api_base:
80
80
  self.is_openai_compat = True
@@ -127,59 +127,13 @@ class LiteLLMOpenAIMixin(
127
127
 
128
128
  return schema
129
129
 
130
- async def _get_params(self, request: ChatCompletionRequest) -> dict:
131
- input_dict = {}
132
-
133
- input_dict["messages"] = [
134
- await convert_message_to_openai_dict_new(m, download_images=self.download_images) for m in request.messages
135
- ]
136
- if fmt := request.response_format:
137
- if not isinstance(fmt, JsonSchemaResponseFormat):
138
- raise ValueError(
139
- f"Unsupported response format: {type(fmt)}. Only JsonSchemaResponseFormat is supported."
140
- )
141
-
142
- fmt = fmt.json_schema
143
- name = fmt["title"]
144
- del fmt["title"]
145
- fmt["additionalProperties"] = False
146
-
147
- # Apply additionalProperties: False recursively to all objects
148
- fmt = self._add_additional_properties_recursive(fmt)
149
-
150
- input_dict["response_format"] = {
151
- "type": "json_schema",
152
- "json_schema": {
153
- "name": name,
154
- "schema": fmt,
155
- "strict": self.json_schema_strict,
156
- },
157
- }
158
- if request.tools:
159
- input_dict["tools"] = [convert_tooldef_to_openai_tool(tool) for tool in request.tools]
160
- if request.tool_config.tool_choice:
161
- input_dict["tool_choice"] = (
162
- request.tool_config.tool_choice.value
163
- if isinstance(request.tool_config.tool_choice, ToolChoice)
164
- else request.tool_config.tool_choice
165
- )
166
-
167
- return {
168
- "model": request.model,
169
- "api_key": self.get_api_key(),
170
- "api_base": self.api_base,
171
- **input_dict,
172
- "stream": request.stream,
173
- **get_sampling_options(request.sampling_params),
174
- }
175
-
176
130
  def get_api_key(self) -> str:
177
131
  provider_data = self.get_request_provider_data()
178
132
  key_field = self.provider_data_api_key_field
179
- if provider_data and getattr(provider_data, key_field, None):
180
- api_key = getattr(provider_data, key_field)
181
- else:
182
- api_key = self.api_key_from_config
133
+ if provider_data and key_field and (api_key := getattr(provider_data, key_field, None)):
134
+ return str(api_key) # type: ignore[no-any-return] # getattr returns Any, can't narrow without runtime type inspection
135
+
136
+ api_key = self.api_key_from_config
183
137
  if not api_key:
184
138
  raise ValueError(
185
139
  "API key is not set. Please provide a valid API key in the "
@@ -192,7 +146,13 @@ class LiteLLMOpenAIMixin(
192
146
  self,
193
147
  params: OpenAIEmbeddingsRequestWithExtraBody,
194
148
  ) -> OpenAIEmbeddingsResponse:
149
+ if not self.model_store:
150
+ raise ValueError("Model store is not initialized")
151
+
195
152
  model_obj = await self.model_store.get_model(params.model)
153
+ if model_obj.provider_resource_id is None:
154
+ raise ValueError(f"Model {params.model} has no provider_resource_id")
155
+ provider_resource_id = model_obj.provider_resource_id
196
156
 
197
157
  # Convert input to list if it's a string
198
158
  input_list = [params.input] if isinstance(params.input, str) else params.input
@@ -200,7 +160,7 @@ class LiteLLMOpenAIMixin(
200
160
  # Call litellm embedding function
201
161
  # litellm.drop_params = True
202
162
  response = litellm.embedding(
203
- model=self.get_litellm_model_name(model_obj.provider_resource_id),
163
+ model=self.get_litellm_model_name(provider_resource_id),
204
164
  input=input_list,
205
165
  api_key=self.get_api_key(),
206
166
  api_base=self.api_base,
@@ -217,18 +177,29 @@ class LiteLLMOpenAIMixin(
217
177
 
218
178
  return OpenAIEmbeddingsResponse(
219
179
  data=data,
220
- model=model_obj.provider_resource_id,
180
+ model=provider_resource_id,
221
181
  usage=usage,
222
182
  )
223
183
 
224
184
  async def openai_completion(
225
185
  self,
226
186
  params: OpenAICompletionRequestWithExtraBody,
227
- ) -> OpenAICompletion:
187
+ ) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
188
+ # Inject stream_options when streaming and telemetry is active
189
+ stream_options = get_stream_options_for_telemetry(
190
+ params.stream_options, params.stream, self.supports_stream_options
191
+ )
192
+
193
+ if not self.model_store:
194
+ raise ValueError("Model store is not initialized")
195
+
228
196
  model_obj = await self.model_store.get_model(params.model)
197
+ if model_obj.provider_resource_id is None:
198
+ raise ValueError(f"Model {params.model} has no provider_resource_id")
199
+ provider_resource_id = model_obj.provider_resource_id
229
200
 
230
201
  request_params = await prepare_openai_completion_params(
231
- model=self.get_litellm_model_name(model_obj.provider_resource_id),
202
+ model=self.get_litellm_model_name(provider_resource_id),
232
203
  prompt=params.prompt,
233
204
  best_of=params.best_of,
234
205
  echo=params.echo,
@@ -241,34 +212,42 @@ class LiteLLMOpenAIMixin(
241
212
  seed=params.seed,
242
213
  stop=params.stop,
243
214
  stream=params.stream,
244
- stream_options=params.stream_options,
215
+ stream_options=stream_options,
245
216
  temperature=params.temperature,
246
217
  top_p=params.top_p,
247
218
  user=params.user,
248
219
  suffix=params.suffix,
249
220
  api_key=self.get_api_key(),
250
221
  api_base=self.api_base,
222
+ **self._litellm_extra_request_params(params),
251
223
  )
252
- return await litellm.atext_completion(**request_params)
224
+ # LiteLLM returns compatible type but mypy can't verify external library
225
+ result = await litellm.atext_completion(**request_params)
226
+
227
+ if params.stream:
228
+ return wrap_async_stream(result) # type: ignore[arg-type] # LiteLLM streaming types
229
+
230
+ return result # type: ignore[return-value] # external lib lacks type stubs
253
231
 
254
232
  async def openai_chat_completion(
255
233
  self,
256
234
  params: OpenAIChatCompletionRequestWithExtraBody,
257
235
  ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
258
- # Add usage tracking for streaming when telemetry is active
259
- from llama_stack.providers.utils.telemetry.tracing import get_current_span
236
+ # Inject stream_options when streaming and telemetry is active
237
+ stream_options = get_stream_options_for_telemetry(
238
+ params.stream_options, params.stream, self.supports_stream_options
239
+ )
260
240
 
261
- stream_options = params.stream_options
262
- if params.stream and get_current_span() is not None:
263
- if stream_options is None:
264
- stream_options = {"include_usage": True}
265
- elif "include_usage" not in stream_options:
266
- stream_options = {**stream_options, "include_usage": True}
241
+ if not self.model_store:
242
+ raise ValueError("Model store is not initialized")
267
243
 
268
244
  model_obj = await self.model_store.get_model(params.model)
245
+ if model_obj.provider_resource_id is None:
246
+ raise ValueError(f"Model {params.model} has no provider_resource_id")
247
+ provider_resource_id = model_obj.provider_resource_id
269
248
 
270
249
  request_params = await prepare_openai_completion_params(
271
- model=self.get_litellm_model_name(model_obj.provider_resource_id),
250
+ model=self.get_litellm_model_name(provider_resource_id),
272
251
  messages=params.messages,
273
252
  frequency_penalty=params.frequency_penalty,
274
253
  function_call=params.function_call,
@@ -293,8 +272,15 @@ class LiteLLMOpenAIMixin(
293
272
  user=params.user,
294
273
  api_key=self.get_api_key(),
295
274
  api_base=self.api_base,
275
+ **self._litellm_extra_request_params(params),
296
276
  )
297
- return await litellm.acompletion(**request_params)
277
+ # LiteLLM returns compatible type but mypy can't verify external library
278
+ result = await litellm.acompletion(**request_params)
279
+
280
+ if params.stream:
281
+ return wrap_async_stream(result) # type: ignore[arg-type] # LiteLLM streaming types
282
+
283
+ return result # type: ignore[return-value] # external lib lacks type stubs
298
284
 
299
285
  async def check_model_availability(self, model: str) -> bool:
300
286
  """
@@ -310,6 +296,20 @@ class LiteLLMOpenAIMixin(
310
296
 
311
297
  return model in litellm.models_by_provider[self.litellm_provider_name]
312
298
 
299
+ def _litellm_extra_request_params(
300
+ self,
301
+ params: OpenAIChatCompletionRequestWithExtraBody | OpenAICompletionRequestWithExtraBody,
302
+ ) -> dict[str, Any]:
303
+ """
304
+ Provider hook for extra LiteLLM/OpenAI-compat request params.
305
+
306
+ This is intentionally a narrow hook so provider adapters (e.g. WatsonX)
307
+ can add provider-specific kwargs (timeouts, project IDs, etc.) while the
308
+ mixin remains the single source of truth for telemetry-driven
309
+ stream_options injection.
310
+ """
311
+ return {}
312
+
313
313
 
314
314
  def b64_encode_openai_embeddings_response(
315
315
  response_data: list[dict], encoding_format: str | None = "float"
@@ -8,13 +8,11 @@ from typing import Any
8
8
 
9
9
  from pydantic import BaseModel, Field, SecretStr
10
10
 
11
- from llama_stack.apis.common.errors import UnsupportedModelError
12
- from llama_stack.apis.models import ModelType
13
11
  from llama_stack.log import get_logger
14
- from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
15
12
  from llama_stack.providers.utils.inference import (
16
13
  ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR,
17
14
  )
15
+ from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, UnsupportedModelError
18
16
 
19
17
  logger = get_logger(name=__name__, category="providers::utils")
20
18