llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (460) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +235 -62
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
  169. llama_stack/providers/registry/agents.py +8 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/providers/utils/vector_io/__init__.py +16 -0
  284. llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
  285. llama_stack/telemetry/constants.py +27 -0
  286. llama_stack/telemetry/helpers.py +43 -0
  287. llama_stack/testing/api_recorder.py +25 -16
  288. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
  289. llama_stack-0.4.1.dist-info/RECORD +588 -0
  290. llama_stack-0.4.1.dist-info/top_level.txt +2 -0
  291. llama_stack_api/__init__.py +945 -0
  292. llama_stack_api/admin/__init__.py +45 -0
  293. llama_stack_api/admin/api.py +72 -0
  294. llama_stack_api/admin/fastapi_routes.py +117 -0
  295. llama_stack_api/admin/models.py +113 -0
  296. llama_stack_api/agents.py +173 -0
  297. llama_stack_api/batches/__init__.py +40 -0
  298. llama_stack_api/batches/api.py +53 -0
  299. llama_stack_api/batches/fastapi_routes.py +113 -0
  300. llama_stack_api/batches/models.py +78 -0
  301. llama_stack_api/benchmarks/__init__.py +43 -0
  302. llama_stack_api/benchmarks/api.py +39 -0
  303. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  304. llama_stack_api/benchmarks/models.py +109 -0
  305. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  306. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  307. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  308. llama_stack_api/common/responses.py +77 -0
  309. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  310. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  311. llama_stack_api/connectors.py +146 -0
  312. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  313. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  314. llama_stack_api/datasets/__init__.py +61 -0
  315. llama_stack_api/datasets/api.py +35 -0
  316. llama_stack_api/datasets/fastapi_routes.py +104 -0
  317. llama_stack_api/datasets/models.py +152 -0
  318. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  319. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  320. llama_stack_api/file_processors/__init__.py +27 -0
  321. llama_stack_api/file_processors/api.py +64 -0
  322. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  323. llama_stack_api/file_processors/models.py +42 -0
  324. llama_stack_api/files/__init__.py +35 -0
  325. llama_stack_api/files/api.py +51 -0
  326. llama_stack_api/files/fastapi_routes.py +124 -0
  327. llama_stack_api/files/models.py +107 -0
  328. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  329. llama_stack_api/inspect_api/__init__.py +37 -0
  330. llama_stack_api/inspect_api/api.py +25 -0
  331. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  332. llama_stack_api/inspect_api/models.py +28 -0
  333. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  334. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  335. llama_stack_api/internal/sqlstore.py +79 -0
  336. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  337. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  338. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  339. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  340. llama_stack_api/providers/__init__.py +33 -0
  341. llama_stack_api/providers/api.py +16 -0
  342. llama_stack_api/providers/fastapi_routes.py +57 -0
  343. llama_stack_api/providers/models.py +24 -0
  344. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  345. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  346. llama_stack_api/router_utils.py +160 -0
  347. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  348. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  349. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  350. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  351. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  352. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  353. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  354. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  355. llama_stack/apis/agents/agents.py +0 -894
  356. llama_stack/apis/batches/__init__.py +0 -9
  357. llama_stack/apis/batches/batches.py +0 -100
  358. llama_stack/apis/benchmarks/__init__.py +0 -7
  359. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  360. llama_stack/apis/common/responses.py +0 -36
  361. llama_stack/apis/conversations/__init__.py +0 -31
  362. llama_stack/apis/datasets/datasets.py +0 -251
  363. llama_stack/apis/datatypes.py +0 -160
  364. llama_stack/apis/eval/__init__.py +0 -7
  365. llama_stack/apis/files/__init__.py +0 -7
  366. llama_stack/apis/files/files.py +0 -199
  367. llama_stack/apis/inference/__init__.py +0 -7
  368. llama_stack/apis/inference/event_logger.py +0 -43
  369. llama_stack/apis/inspect/__init__.py +0 -7
  370. llama_stack/apis/inspect/inspect.py +0 -94
  371. llama_stack/apis/models/__init__.py +0 -7
  372. llama_stack/apis/post_training/__init__.py +0 -7
  373. llama_stack/apis/prompts/__init__.py +0 -9
  374. llama_stack/apis/providers/__init__.py +0 -7
  375. llama_stack/apis/providers/providers.py +0 -69
  376. llama_stack/apis/safety/__init__.py +0 -7
  377. llama_stack/apis/scoring/__init__.py +0 -7
  378. llama_stack/apis/scoring_functions/__init__.py +0 -7
  379. llama_stack/apis/shields/__init__.py +0 -7
  380. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  381. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  382. llama_stack/apis/telemetry/__init__.py +0 -7
  383. llama_stack/apis/telemetry/telemetry.py +0 -423
  384. llama_stack/apis/tools/__init__.py +0 -8
  385. llama_stack/apis/vector_io/__init__.py +0 -7
  386. llama_stack/apis/vector_stores/__init__.py +0 -7
  387. llama_stack/core/server/tracing.py +0 -80
  388. llama_stack/core/ui/app.py +0 -55
  389. llama_stack/core/ui/modules/__init__.py +0 -5
  390. llama_stack/core/ui/modules/api.py +0 -32
  391. llama_stack/core/ui/modules/utils.py +0 -42
  392. llama_stack/core/ui/page/__init__.py +0 -5
  393. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  394. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  395. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  396. llama_stack/core/ui/page/distribution/models.py +0 -18
  397. llama_stack/core/ui/page/distribution/providers.py +0 -27
  398. llama_stack/core/ui/page/distribution/resources.py +0 -48
  399. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  400. llama_stack/core/ui/page/distribution/shields.py +0 -19
  401. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  402. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  403. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  404. llama_stack/core/ui/page/playground/__init__.py +0 -5
  405. llama_stack/core/ui/page/playground/chat.py +0 -130
  406. llama_stack/core/ui/page/playground/tools.py +0 -352
  407. llama_stack/distributions/dell/build.yaml +0 -33
  408. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  409. llama_stack/distributions/nvidia/build.yaml +0 -29
  410. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  411. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  412. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  413. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  414. llama_stack/distributions/starter/build.yaml +0 -61
  415. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  416. llama_stack/distributions/watsonx/build.yaml +0 -33
  417. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  418. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  419. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  420. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  421. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  422. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  423. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  424. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  425. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  426. llama_stack/providers/utils/sqlstore/api.py +0 -128
  427. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  428. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  429. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  430. llama_stack/strong_typing/__init__.py +0 -19
  431. llama_stack/strong_typing/auxiliary.py +0 -228
  432. llama_stack/strong_typing/classdef.py +0 -440
  433. llama_stack/strong_typing/core.py +0 -46
  434. llama_stack/strong_typing/deserializer.py +0 -877
  435. llama_stack/strong_typing/docstring.py +0 -409
  436. llama_stack/strong_typing/exception.py +0 -23
  437. llama_stack/strong_typing/inspection.py +0 -1085
  438. llama_stack/strong_typing/mapping.py +0 -40
  439. llama_stack/strong_typing/name.py +0 -182
  440. llama_stack/strong_typing/schema.py +0 -792
  441. llama_stack/strong_typing/serialization.py +0 -97
  442. llama_stack/strong_typing/serializer.py +0 -500
  443. llama_stack/strong_typing/slots.py +0 -27
  444. llama_stack/strong_typing/topological.py +0 -89
  445. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  446. llama_stack-0.3.5.dist-info/RECORD +0 -625
  447. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  448. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  451. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  452. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  453. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  454. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
  456. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
  457. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
  458. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  459. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  460. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -12,17 +12,9 @@ from numpy.typing import NDArray
12
12
  from weaviate.classes.init import Auth
13
13
  from weaviate.classes.query import Filter, HybridFusion
14
14
 
15
- from llama_stack.apis.common.content_types import InterleavedContent
16
- from llama_stack.apis.common.errors import VectorStoreNotFoundError
17
- from llama_stack.apis.files import Files
18
- from llama_stack.apis.inference import Inference
19
- from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
20
- from llama_stack.apis.vector_stores import VectorStore
21
15
  from llama_stack.core.request_headers import NeedsRequestProviderData
16
+ from llama_stack.core.storage.kvstore import kvstore_impl
22
17
  from llama_stack.log import get_logger
23
- from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
24
- from llama_stack.providers.utils.kvstore import kvstore_impl
25
- from llama_stack.providers.utils.kvstore.api import KVStore
26
18
  from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
27
19
  from llama_stack.providers.utils.memory.vector_store import (
28
20
  RERANKER_TYPE_RRF,
@@ -30,7 +22,20 @@ from llama_stack.providers.utils.memory.vector_store import (
30
22
  EmbeddingIndex,
31
23
  VectorStoreWithIndex,
32
24
  )
25
+ from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
33
26
  from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
27
+ from llama_stack_api import (
28
+ EmbeddedChunk,
29
+ Files,
30
+ Inference,
31
+ InterleavedContent,
32
+ QueryChunksResponse,
33
+ VectorIO,
34
+ VectorStore,
35
+ VectorStoreNotFoundError,
36
+ VectorStoresProtocolPrivate,
37
+ )
38
+ from llama_stack_api.internal.kvstore import KVStore
34
39
 
35
40
  from .config import WeaviateVectorIOConfig
36
41
 
@@ -53,20 +58,19 @@ class WeaviateIndex(EmbeddingIndex):
53
58
  async def initialize(self):
54
59
  pass
55
60
 
56
- async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
57
- assert len(chunks) == len(embeddings), (
58
- f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
59
- )
61
+ async def add_chunks(self, chunks: list[EmbeddedChunk]):
62
+ if not chunks:
63
+ return
60
64
 
61
65
  data_objects = []
62
- for chunk, embedding in zip(chunks, embeddings, strict=False):
66
+ for chunk in chunks:
63
67
  data_objects.append(
64
68
  wvc.data.DataObject(
65
69
  properties={
66
70
  "chunk_id": chunk.chunk_id,
67
71
  "chunk_content": chunk.model_dump_json(),
68
72
  },
69
- vector=embedding.tolist(),
73
+ vector=chunk.embedding, # Already a list[float]
70
74
  )
71
75
  )
72
76
 
@@ -112,7 +116,7 @@ class WeaviateIndex(EmbeddingIndex):
112
116
  chunk_json = doc.properties["chunk_content"]
113
117
  try:
114
118
  chunk_dict = json.loads(chunk_json)
115
- chunk = Chunk(**chunk_dict)
119
+ chunk = load_embedded_chunk_with_backward_compat(chunk_dict)
116
120
  except Exception:
117
121
  log.exception(f"Failed to parse document: {chunk_json}")
118
122
  continue
@@ -172,7 +176,7 @@ class WeaviateIndex(EmbeddingIndex):
172
176
  chunk_json = doc.properties["chunk_content"]
173
177
  try:
174
178
  chunk_dict = json.loads(chunk_json)
175
- chunk = Chunk(**chunk_dict)
179
+ chunk = load_embedded_chunk_with_backward_compat(chunk_dict)
176
180
  except Exception:
177
181
  log.exception(f"Failed to parse document: {chunk_json}")
178
182
  continue
@@ -241,7 +245,7 @@ class WeaviateIndex(EmbeddingIndex):
241
245
  chunk_json = doc.properties["chunk_content"]
242
246
  try:
243
247
  chunk_dict = json.loads(chunk_json)
244
- chunk = Chunk(**chunk_dict)
248
+ chunk = load_embedded_chunk_with_backward_compat(chunk_dict)
245
249
  except Exception:
246
250
  log.exception(f"Failed to parse document: {chunk_json}")
247
251
  continue
@@ -259,9 +263,8 @@ class WeaviateIndex(EmbeddingIndex):
259
263
 
260
264
  class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorStoresProtocolPrivate):
261
265
  def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
262
- super().__init__(files_api=files_api, kvstore=None)
266
+ super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
263
267
  self.config = config
264
- self.inference_api = inference_api
265
268
  self.client_cache = {}
266
269
  self.cache = {}
267
270
  self.vector_store_table = None
@@ -369,19 +372,21 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
369
372
  self.cache[vector_store_id] = index
370
373
  return index
371
374
 
372
- async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
373
- index = await self._get_and_cache_vector_store_index(vector_db_id)
375
+ async def insert_chunks(
376
+ self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
377
+ ) -> None:
378
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
374
379
  if not index:
375
- raise VectorStoreNotFoundError(vector_db_id)
380
+ raise VectorStoreNotFoundError(vector_store_id)
376
381
 
377
382
  await index.insert_chunks(chunks)
378
383
 
379
384
  async def query_chunks(
380
- self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
385
+ self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
381
386
  ) -> QueryChunksResponse:
382
- index = await self._get_and_cache_vector_store_index(vector_db_id)
387
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
383
388
  if not index:
384
- raise VectorStoreNotFoundError(vector_db_id)
389
+ raise VectorStoreNotFoundError(vector_store_id)
385
390
 
386
391
  return await index.query_chunks(query, params)
387
392
 
@@ -7,12 +7,8 @@
7
7
  from enum import Enum
8
8
  from typing import Any
9
9
 
10
- from llama_stack.apis.common.type_system import (
11
- ChatCompletionInputType,
12
- CompletionInputType,
13
- StringType,
14
- )
15
10
  from llama_stack.core.datatypes import Api
11
+ from llama_stack_api import ChatCompletionInputType, CompletionInputType, StringType
16
12
 
17
13
 
18
14
  class ColumnName(Enum):
@@ -9,7 +9,7 @@ import json
9
9
  from fastapi import Request
10
10
  from pydantic import BaseModel, ValidationError
11
11
 
12
- from llama_stack.apis.files import ExpiresAfter
12
+ from llama_stack_api import ExpiresAfter
13
13
 
14
14
 
15
15
  async def parse_pydantic_from_form[T: BaseModel](request: Request, field_name: str, model_class: type[T]) -> T | None:
@@ -17,7 +17,7 @@ from llama_stack.log import get_logger
17
17
  if TYPE_CHECKING:
18
18
  from sentence_transformers import SentenceTransformer
19
19
 
20
- from llama_stack.apis.inference import (
20
+ from llama_stack_api import (
21
21
  ModelStore,
22
22
  OpenAIEmbeddingData,
23
23
  OpenAIEmbeddingsRequestWithExtraBody,
@@ -8,20 +8,19 @@ from typing import Any
8
8
 
9
9
  from sqlalchemy.exc import IntegrityError
10
10
 
11
- from llama_stack.apis.inference import (
11
+ from llama_stack.core.datatypes import AccessRule
12
+ from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
13
+ from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
14
+ from llama_stack.core.storage.sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
15
+ from llama_stack.log import get_logger
16
+ from llama_stack_api import (
12
17
  ListOpenAIChatCompletionResponse,
13
18
  OpenAIChatCompletion,
14
19
  OpenAICompletionWithInputMessages,
15
20
  OpenAIMessageParam,
16
21
  Order,
17
22
  )
18
- from llama_stack.core.datatypes import AccessRule
19
- from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
20
- from llama_stack.log import get_logger
21
-
22
- from ..sqlstore.api import ColumnDefinition, ColumnType
23
- from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
24
- from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
23
+ from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
25
24
 
26
25
  logger = get_logger(name=__name__, category="inference")
27
26
 
@@ -7,13 +7,20 @@
7
7
  import base64
8
8
  import struct
9
9
  from collections.abc import AsyncIterator
10
+ from typing import Any
10
11
 
11
12
  import litellm
12
13
 
13
- from llama_stack.apis.inference import (
14
- ChatCompletionRequest,
14
+ from llama_stack.core.request_headers import NeedsRequestProviderData
15
+ from llama_stack.log import get_logger
16
+ from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
17
+ from llama_stack.providers.utils.inference.openai_compat import (
18
+ get_stream_options_for_telemetry,
19
+ prepare_openai_completion_params,
20
+ )
21
+ from llama_stack.providers.utils.inference.stream_utils import wrap_async_stream
22
+ from llama_stack_api import (
15
23
  InferenceProvider,
16
- JsonSchemaResponseFormat,
17
24
  OpenAIChatCompletion,
18
25
  OpenAIChatCompletionChunk,
19
26
  OpenAIChatCompletionRequestWithExtraBody,
@@ -23,16 +30,6 @@ from llama_stack.apis.inference import (
23
30
  OpenAIEmbeddingsRequestWithExtraBody,
24
31
  OpenAIEmbeddingsResponse,
25
32
  OpenAIEmbeddingUsage,
26
- ToolChoice,
27
- )
28
- from llama_stack.core.request_headers import NeedsRequestProviderData
29
- from llama_stack.log import get_logger
30
- from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
31
- from llama_stack.providers.utils.inference.openai_compat import (
32
- convert_message_to_openai_dict_new,
33
- convert_tooldef_to_openai_tool,
34
- get_sampling_options,
35
- prepare_openai_completion_params,
36
33
  )
37
34
 
38
35
  logger = get_logger(name=__name__, category="providers::utils")
@@ -55,6 +52,7 @@ class LiteLLMOpenAIMixin(
55
52
  openai_compat_api_base: str | None = None,
56
53
  download_images: bool = False,
57
54
  json_schema_strict: bool = True,
55
+ supports_stream_options: bool = True,
58
56
  ):
59
57
  """
60
58
  Initialize the LiteLLMOpenAIMixin.
@@ -66,6 +64,7 @@ class LiteLLMOpenAIMixin(
66
64
  :param openai_compat_api_base: The base URL for OpenAI compatibility, or None if not using OpenAI compatibility.
67
65
  :param download_images: Whether to download images and convert to base64 for message conversion.
68
66
  :param json_schema_strict: Whether to use strict mode for JSON schema validation.
67
+ :param supports_stream_options: Whether the provider supports stream_options parameter.
69
68
  """
70
69
  ModelRegistryHelper.__init__(self, model_entries=model_entries)
71
70
 
@@ -75,6 +74,7 @@ class LiteLLMOpenAIMixin(
75
74
  self.api_base = openai_compat_api_base
76
75
  self.download_images = download_images
77
76
  self.json_schema_strict = json_schema_strict
77
+ self.supports_stream_options = supports_stream_options
78
78
 
79
79
  if openai_compat_api_base:
80
80
  self.is_openai_compat = True
@@ -127,59 +127,13 @@ class LiteLLMOpenAIMixin(
127
127
 
128
128
  return schema
129
129
 
130
- async def _get_params(self, request: ChatCompletionRequest) -> dict:
131
- input_dict = {}
132
-
133
- input_dict["messages"] = [
134
- await convert_message_to_openai_dict_new(m, download_images=self.download_images) for m in request.messages
135
- ]
136
- if fmt := request.response_format:
137
- if not isinstance(fmt, JsonSchemaResponseFormat):
138
- raise ValueError(
139
- f"Unsupported response format: {type(fmt)}. Only JsonSchemaResponseFormat is supported."
140
- )
141
-
142
- fmt = fmt.json_schema
143
- name = fmt["title"]
144
- del fmt["title"]
145
- fmt["additionalProperties"] = False
146
-
147
- # Apply additionalProperties: False recursively to all objects
148
- fmt = self._add_additional_properties_recursive(fmt)
149
-
150
- input_dict["response_format"] = {
151
- "type": "json_schema",
152
- "json_schema": {
153
- "name": name,
154
- "schema": fmt,
155
- "strict": self.json_schema_strict,
156
- },
157
- }
158
- if request.tools:
159
- input_dict["tools"] = [convert_tooldef_to_openai_tool(tool) for tool in request.tools]
160
- if request.tool_config.tool_choice:
161
- input_dict["tool_choice"] = (
162
- request.tool_config.tool_choice.value
163
- if isinstance(request.tool_config.tool_choice, ToolChoice)
164
- else request.tool_config.tool_choice
165
- )
166
-
167
- return {
168
- "model": request.model,
169
- "api_key": self.get_api_key(),
170
- "api_base": self.api_base,
171
- **input_dict,
172
- "stream": request.stream,
173
- **get_sampling_options(request.sampling_params),
174
- }
175
-
176
130
  def get_api_key(self) -> str:
177
131
  provider_data = self.get_request_provider_data()
178
132
  key_field = self.provider_data_api_key_field
179
- if provider_data and getattr(provider_data, key_field, None):
180
- api_key = getattr(provider_data, key_field)
181
- else:
182
- api_key = self.api_key_from_config
133
+ if provider_data and key_field and (api_key := getattr(provider_data, key_field, None)):
134
+ return str(api_key) # type: ignore[no-any-return] # getattr returns Any, can't narrow without runtime type inspection
135
+
136
+ api_key = self.api_key_from_config
183
137
  if not api_key:
184
138
  raise ValueError(
185
139
  "API key is not set. Please provide a valid API key in the "
@@ -192,7 +146,13 @@ class LiteLLMOpenAIMixin(
192
146
  self,
193
147
  params: OpenAIEmbeddingsRequestWithExtraBody,
194
148
  ) -> OpenAIEmbeddingsResponse:
149
+ if not self.model_store:
150
+ raise ValueError("Model store is not initialized")
151
+
195
152
  model_obj = await self.model_store.get_model(params.model)
153
+ if model_obj.provider_resource_id is None:
154
+ raise ValueError(f"Model {params.model} has no provider_resource_id")
155
+ provider_resource_id = model_obj.provider_resource_id
196
156
 
197
157
  # Convert input to list if it's a string
198
158
  input_list = [params.input] if isinstance(params.input, str) else params.input
@@ -200,7 +160,7 @@ class LiteLLMOpenAIMixin(
200
160
  # Call litellm embedding function
201
161
  # litellm.drop_params = True
202
162
  response = litellm.embedding(
203
- model=self.get_litellm_model_name(model_obj.provider_resource_id),
163
+ model=self.get_litellm_model_name(provider_resource_id),
204
164
  input=input_list,
205
165
  api_key=self.get_api_key(),
206
166
  api_base=self.api_base,
@@ -217,18 +177,29 @@ class LiteLLMOpenAIMixin(
217
177
 
218
178
  return OpenAIEmbeddingsResponse(
219
179
  data=data,
220
- model=model_obj.provider_resource_id,
180
+ model=provider_resource_id,
221
181
  usage=usage,
222
182
  )
223
183
 
224
184
  async def openai_completion(
225
185
  self,
226
186
  params: OpenAICompletionRequestWithExtraBody,
227
- ) -> OpenAICompletion:
187
+ ) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
188
+ # Inject stream_options when streaming and telemetry is active
189
+ stream_options = get_stream_options_for_telemetry(
190
+ params.stream_options, params.stream, self.supports_stream_options
191
+ )
192
+
193
+ if not self.model_store:
194
+ raise ValueError("Model store is not initialized")
195
+
228
196
  model_obj = await self.model_store.get_model(params.model)
197
+ if model_obj.provider_resource_id is None:
198
+ raise ValueError(f"Model {params.model} has no provider_resource_id")
199
+ provider_resource_id = model_obj.provider_resource_id
229
200
 
230
201
  request_params = await prepare_openai_completion_params(
231
- model=self.get_litellm_model_name(model_obj.provider_resource_id),
202
+ model=self.get_litellm_model_name(provider_resource_id),
232
203
  prompt=params.prompt,
233
204
  best_of=params.best_of,
234
205
  echo=params.echo,
@@ -241,34 +212,42 @@ class LiteLLMOpenAIMixin(
241
212
  seed=params.seed,
242
213
  stop=params.stop,
243
214
  stream=params.stream,
244
- stream_options=params.stream_options,
215
+ stream_options=stream_options,
245
216
  temperature=params.temperature,
246
217
  top_p=params.top_p,
247
218
  user=params.user,
248
219
  suffix=params.suffix,
249
220
  api_key=self.get_api_key(),
250
221
  api_base=self.api_base,
222
+ **self._litellm_extra_request_params(params),
251
223
  )
252
- return await litellm.atext_completion(**request_params)
224
+ # LiteLLM returns compatible type but mypy can't verify external library
225
+ result = await litellm.atext_completion(**request_params)
226
+
227
+ if params.stream:
228
+ return wrap_async_stream(result) # type: ignore[arg-type] # LiteLLM streaming types
229
+
230
+ return result # type: ignore[return-value] # external lib lacks type stubs
253
231
 
254
232
  async def openai_chat_completion(
255
233
  self,
256
234
  params: OpenAIChatCompletionRequestWithExtraBody,
257
235
  ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
258
- # Add usage tracking for streaming when telemetry is active
259
- from llama_stack.providers.utils.telemetry.tracing import get_current_span
236
+ # Inject stream_options when streaming and telemetry is active
237
+ stream_options = get_stream_options_for_telemetry(
238
+ params.stream_options, params.stream, self.supports_stream_options
239
+ )
260
240
 
261
- stream_options = params.stream_options
262
- if params.stream and get_current_span() is not None:
263
- if stream_options is None:
264
- stream_options = {"include_usage": True}
265
- elif "include_usage" not in stream_options:
266
- stream_options = {**stream_options, "include_usage": True}
241
+ if not self.model_store:
242
+ raise ValueError("Model store is not initialized")
267
243
 
268
244
  model_obj = await self.model_store.get_model(params.model)
245
+ if model_obj.provider_resource_id is None:
246
+ raise ValueError(f"Model {params.model} has no provider_resource_id")
247
+ provider_resource_id = model_obj.provider_resource_id
269
248
 
270
249
  request_params = await prepare_openai_completion_params(
271
- model=self.get_litellm_model_name(model_obj.provider_resource_id),
250
+ model=self.get_litellm_model_name(provider_resource_id),
272
251
  messages=params.messages,
273
252
  frequency_penalty=params.frequency_penalty,
274
253
  function_call=params.function_call,
@@ -293,8 +272,15 @@ class LiteLLMOpenAIMixin(
293
272
  user=params.user,
294
273
  api_key=self.get_api_key(),
295
274
  api_base=self.api_base,
275
+ **self._litellm_extra_request_params(params),
296
276
  )
297
- return await litellm.acompletion(**request_params)
277
+ # LiteLLM returns compatible type but mypy can't verify external library
278
+ result = await litellm.acompletion(**request_params)
279
+
280
+ if params.stream:
281
+ return wrap_async_stream(result) # type: ignore[arg-type] # LiteLLM streaming types
282
+
283
+ return result # type: ignore[return-value] # external lib lacks type stubs
298
284
 
299
285
  async def check_model_availability(self, model: str) -> bool:
300
286
  """
@@ -310,6 +296,20 @@ class LiteLLMOpenAIMixin(
310
296
 
311
297
  return model in litellm.models_by_provider[self.litellm_provider_name]
312
298
 
299
+ def _litellm_extra_request_params(
300
+ self,
301
+ params: OpenAIChatCompletionRequestWithExtraBody | OpenAICompletionRequestWithExtraBody,
302
+ ) -> dict[str, Any]:
303
+ """
304
+ Provider hook for extra LiteLLM/OpenAI-compat request params.
305
+
306
+ This is intentionally a narrow hook so provider adapters (e.g. WatsonX)
307
+ can add provider-specific kwargs (timeouts, project IDs, etc.) while the
308
+ mixin remains the single source of truth for telemetry-driven
309
+ stream_options injection.
310
+ """
311
+ return {}
312
+
313
313
 
314
314
  def b64_encode_openai_embeddings_response(
315
315
  response_data: list[dict], encoding_format: str | None = "float"
@@ -8,13 +8,11 @@ from typing import Any
8
8
 
9
9
  from pydantic import BaseModel, Field, SecretStr
10
10
 
11
- from llama_stack.apis.common.errors import UnsupportedModelError
12
- from llama_stack.apis.models import ModelType
13
11
  from llama_stack.log import get_logger
14
- from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
15
12
  from llama_stack.providers.utils.inference import (
16
13
  ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR,
17
14
  )
15
+ from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, UnsupportedModelError
18
16
 
19
17
  logger = get_logger(name=__name__, category="providers::utils")
20
18