llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +12 -21
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.4.dist-info/RECORD +0 -625
  445. llama_stack-0.3.4.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -5,28 +5,29 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
 
8
- from pydantic import BaseModel, Field, SecretStr
8
+ from pydantic import BaseModel, Field, HttpUrl, SecretStr
9
9
 
10
10
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
11
- from llama_stack.schema_utils import json_schema_type
11
+ from llama_stack_api import json_schema_type
12
12
 
13
13
 
14
14
  @json_schema_type
15
15
  class TGIImplConfig(RemoteInferenceProviderConfig):
16
16
  auth_credential: SecretStr | None = Field(default=None, exclude=True)
17
17
 
18
- url: str = Field(
19
- description="The URL for the TGI serving endpoint",
18
+ base_url: HttpUrl | None = Field(
19
+ default=None,
20
+ description="The URL for the TGI serving endpoint (should include /v1 path)",
20
21
  )
21
22
 
22
23
  @classmethod
23
24
  def sample_run_config(
24
25
  cls,
25
- url: str = "${env.TGI_URL:=}",
26
+ base_url: str = "${env.TGI_URL:=}",
26
27
  **kwargs,
27
28
  ):
28
29
  return {
29
- "url": url,
30
+ "base_url": base_url,
30
31
  }
31
32
 
32
33
 
@@ -8,14 +8,14 @@
8
8
  from collections.abc import Iterable
9
9
 
10
10
  from huggingface_hub import AsyncInferenceClient, HfApi
11
- from pydantic import SecretStr
11
+ from pydantic import HttpUrl, SecretStr
12
12
 
13
- from llama_stack.apis.inference import (
13
+ from llama_stack.log import get_logger
14
+ from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
15
+ from llama_stack_api import (
14
16
  OpenAIEmbeddingsRequestWithExtraBody,
15
17
  OpenAIEmbeddingsResponse,
16
18
  )
17
- from llama_stack.log import get_logger
18
- from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
19
19
 
20
20
  from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
21
21
 
@@ -23,7 +23,7 @@ log = get_logger(name=__name__, category="inference::tgi")
23
23
 
24
24
 
25
25
  class _HfAdapter(OpenAIMixin):
26
- url: str
26
+ base_url: HttpUrl
27
27
  api_key: SecretStr
28
28
 
29
29
  hf_client: AsyncInferenceClient
@@ -36,7 +36,7 @@ class _HfAdapter(OpenAIMixin):
36
36
  return "NO KEY REQUIRED"
37
37
 
38
38
  def get_base_url(self):
39
- return self.url
39
+ return self.base_url
40
40
 
41
41
  async def list_provider_model_ids(self) -> Iterable[str]:
42
42
  return [self.model_id]
@@ -50,14 +50,22 @@ class _HfAdapter(OpenAIMixin):
50
50
 
51
51
  class TGIAdapter(_HfAdapter):
52
52
  async def initialize(self, config: TGIImplConfig) -> None:
53
- if not config.url:
54
- raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.")
55
- log.info(f"Initializing TGI client with url={config.url}")
56
- self.hf_client = AsyncInferenceClient(model=config.url, provider="hf-inference")
53
+ if not config.base_url:
54
+ raise ValueError(
55
+ "You must provide a URL in config.yaml (or via the TGI_URL environment variable) to use TGI."
56
+ )
57
+ log.info(f"Initializing TGI client with url={config.base_url}")
58
+ # Extract base URL without /v1 for HF client initialization
59
+ base_url_str = str(config.base_url).rstrip("/")
60
+ if base_url_str.endswith("/v1"):
61
+ base_url_for_client = base_url_str[:-3]
62
+ else:
63
+ base_url_for_client = base_url_str
64
+ self.hf_client = AsyncInferenceClient(model=base_url_for_client, provider="hf-inference")
57
65
  endpoint_info = await self.hf_client.get_endpoint_info()
58
66
  self.max_tokens = endpoint_info["max_total_tokens"]
59
67
  self.model_id = endpoint_info["model_id"]
60
- self.url = f"{config.url.rstrip('/')}/v1"
68
+ self.base_url = config.base_url
61
69
  self.api_key = SecretStr("NO_KEY")
62
70
 
63
71
 
@@ -6,22 +6,22 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from pydantic import Field
9
+ from pydantic import Field, HttpUrl
10
10
 
11
11
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type
16
16
  class TogetherImplConfig(RemoteInferenceProviderConfig):
17
- url: str = Field(
18
- default="https://api.together.xyz/v1",
17
+ base_url: HttpUrl | None = Field(
18
+ default=HttpUrl("https://api.together.xyz/v1"),
19
19
  description="The URL for the Together AI server",
20
20
  )
21
21
 
22
22
  @classmethod
23
23
  def sample_run_config(cls, **kwargs) -> dict[str, Any]:
24
24
  return {
25
- "url": "https://api.together.xyz/v1",
25
+ "base_url": "https://api.together.xyz/v1",
26
26
  "api_key": "${env.TOGETHER_API_KEY:=}",
27
27
  }
@@ -6,19 +6,19 @@
6
6
 
7
7
 
8
8
  from collections.abc import Iterable
9
+ from typing import Any, cast
9
10
 
10
- from together import AsyncTogether
11
- from together.constants import BASE_URL
11
+ from together import AsyncTogether # type: ignore[import-untyped]
12
12
 
13
- from llama_stack.apis.inference import (
14
- OpenAIEmbeddingsRequestWithExtraBody,
15
- OpenAIEmbeddingsResponse,
16
- )
17
- from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage
18
- from llama_stack.apis.models import Model
19
13
  from llama_stack.core.request_headers import NeedsRequestProviderData
20
14
  from llama_stack.log import get_logger
21
15
  from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
16
+ from llama_stack_api import (
17
+ Model,
18
+ OpenAIEmbeddingsRequestWithExtraBody,
19
+ OpenAIEmbeddingsResponse,
20
+ OpenAIEmbeddingUsage,
21
+ )
22
22
 
23
23
  from .config import TogetherImplConfig
24
24
 
@@ -41,7 +41,7 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
41
41
  provider_data_api_key_field: str = "together_api_key"
42
42
 
43
43
  def get_base_url(self):
44
- return BASE_URL
44
+ return str(self.config.base_url)
45
45
 
46
46
  def _get_client(self) -> AsyncTogether:
47
47
  together_api_key = None
@@ -81,10 +81,11 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
81
81
  if params.dimensions is not None:
82
82
  raise ValueError("Together's embeddings endpoint does not support dimensions param.")
83
83
 
84
+ # Cast encoding_format to match OpenAI SDK's expected Literal type
84
85
  response = await self.client.embeddings.create(
85
86
  model=await self._get_provider_model_id(params.model),
86
87
  input=params.input,
87
- encoding_format=params.encoding_format,
88
+ encoding_format=cast(Any, params.encoding_format),
88
89
  )
89
90
 
90
91
  response.model = (
@@ -97,6 +98,8 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
97
98
  logger.warning(
98
99
  f"Together's embedding endpoint for {params.model} did not return usage information, substituting -1s."
99
100
  )
100
- response.usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1)
101
+ # Cast to allow monkey-patching the response object
102
+ response.usage = cast(Any, OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1))
101
103
 
102
- return response # type: ignore[no-any-return]
104
+ # Together's CreateEmbeddingResponse is compatible with OpenAIEmbeddingsResponse after monkey-patching
105
+ return cast(OpenAIEmbeddingsResponse, response)
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel, Field, SecretStr
10
10
 
11
11
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  class VertexAIProviderDataValidator(BaseModel):
@@ -6,15 +6,15 @@
6
6
 
7
7
  from pathlib import Path
8
8
 
9
- from pydantic import Field, SecretStr, field_validator
9
+ from pydantic import Field, HttpUrl, SecretStr, field_validator
10
10
 
11
11
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type
16
16
  class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
17
- url: str | None = Field(
17
+ base_url: HttpUrl | None = Field(
18
18
  default=None,
19
19
  description="The URL for the vLLM model serving endpoint",
20
20
  )
@@ -48,11 +48,11 @@ class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
48
48
  @classmethod
49
49
  def sample_run_config(
50
50
  cls,
51
- url: str = "${env.VLLM_URL:=}",
51
+ base_url: str = "${env.VLLM_URL:=}",
52
52
  **kwargs,
53
53
  ):
54
54
  return {
55
- "url": url,
55
+ "base_url": base_url,
56
56
  "max_tokens": "${env.VLLM_MAX_TOKENS:=4096}",
57
57
  "api_token": "${env.VLLM_API_TOKEN:=fake}",
58
58
  "tls_verify": "${env.VLLM_TLS_VERIFY:=true}",
@@ -7,22 +7,18 @@ from collections.abc import AsyncIterator
7
7
  from urllib.parse import urljoin
8
8
 
9
9
  import httpx
10
- from openai.types.chat.chat_completion_chunk import (
11
- ChatCompletionChunk as OpenAIChatCompletionChunk,
12
- )
13
10
  from pydantic import ConfigDict
14
11
 
15
- from llama_stack.apis.inference import (
16
- OpenAIChatCompletion,
17
- OpenAIChatCompletionRequestWithExtraBody,
18
- ToolChoice,
19
- )
20
12
  from llama_stack.log import get_logger
21
- from llama_stack.providers.datatypes import (
13
+ from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
14
+ from llama_stack_api import (
22
15
  HealthResponse,
23
16
  HealthStatus,
17
+ OpenAIChatCompletion,
18
+ OpenAIChatCompletionChunk,
19
+ OpenAIChatCompletionRequestWithExtraBody,
20
+ ToolChoice,
24
21
  )
25
- from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
26
22
 
27
23
  from .config import VLLMInferenceAdapterConfig
28
24
 
@@ -34,6 +30,9 @@ class VLLMInferenceAdapter(OpenAIMixin):
34
30
 
35
31
  model_config = ConfigDict(arbitrary_types_allowed=True)
36
32
 
33
+ # vLLM does not support the stream_options parameter
34
+ supports_stream_options: bool = False
35
+
37
36
  provider_data_api_key_field: str = "vllm_api_token"
38
37
 
39
38
  def get_api_key(self) -> str | None:
@@ -43,14 +42,14 @@ class VLLMInferenceAdapter(OpenAIMixin):
43
42
 
44
43
  def get_base_url(self) -> str:
45
44
  """Get the base URL from config."""
46
- if not self.config.url:
45
+ if not self.config.base_url:
47
46
  raise ValueError("No base URL configured")
48
- return self.config.url
47
+ return str(self.config.base_url)
49
48
 
50
49
  async def initialize(self) -> None:
51
- if not self.config.url:
50
+ if not self.config.base_url:
52
51
  raise ValueError(
53
- "You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM."
52
+ "You must provide a URL in config.yaml (or via the VLLM_URL environment variable) to use vLLM."
54
53
  )
55
54
 
56
55
  async def health(self) -> HealthResponse:
@@ -7,10 +7,10 @@
7
7
  import os
8
8
  from typing import Any
9
9
 
10
- from pydantic import BaseModel, Field
10
+ from pydantic import BaseModel, Field, HttpUrl
11
11
 
12
12
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
13
- from llama_stack.schema_utils import json_schema_type
13
+ from llama_stack_api import json_schema_type
14
14
 
15
15
 
16
16
  class WatsonXProviderDataValidator(BaseModel):
@@ -23,7 +23,7 @@ class WatsonXProviderDataValidator(BaseModel):
23
23
 
24
24
  @json_schema_type
25
25
  class WatsonXConfig(RemoteInferenceProviderConfig):
26
- url: str = Field(
26
+ base_url: HttpUrl | None = Field(
27
27
  default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"),
28
28
  description="A base url for accessing the watsonx.ai",
29
29
  )
@@ -39,7 +39,7 @@ class WatsonXConfig(RemoteInferenceProviderConfig):
39
39
  @classmethod
40
40
  def sample_run_config(cls, **kwargs) -> dict[str, Any]:
41
41
  return {
42
- "url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}",
42
+ "base_url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}",
43
43
  "api_key": "${env.WATSONX_API_KEY:=}",
44
44
  "project_id": "${env.WATSONX_PROJECT_ID:=}",
45
45
  }
@@ -10,23 +10,20 @@ from typing import Any
10
10
  import litellm
11
11
  import requests
12
12
 
13
- from llama_stack.apis.inference.inference import (
13
+ from llama_stack.log import get_logger
14
+ from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
15
+ from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
16
+ from llama_stack_api import (
17
+ Model,
18
+ ModelType,
14
19
  OpenAIChatCompletion,
15
20
  OpenAIChatCompletionChunk,
16
21
  OpenAIChatCompletionRequestWithExtraBody,
17
22
  OpenAIChatCompletionUsage,
18
- OpenAICompletion,
19
23
  OpenAICompletionRequestWithExtraBody,
20
24
  OpenAIEmbeddingsRequestWithExtraBody,
21
25
  OpenAIEmbeddingsResponse,
22
26
  )
23
- from llama_stack.apis.models import Model
24
- from llama_stack.apis.models.models import ModelType
25
- from llama_stack.log import get_logger
26
- from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
27
- from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
28
- from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
29
- from llama_stack.providers.utils.telemetry.tracing import get_current_span
30
27
 
31
28
  logger = get_logger(name=__name__, category="providers::remote::watsonx")
32
29
 
@@ -48,57 +45,25 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
48
45
  openai_compat_api_base=self.get_base_url(),
49
46
  )
50
47
 
48
+ def _litellm_extra_request_params(
49
+ self,
50
+ params: OpenAIChatCompletionRequestWithExtraBody | OpenAICompletionRequestWithExtraBody,
51
+ ) -> dict[str, Any]:
52
+ # These are watsonx-specific parameters used by LiteLLM.
53
+ return {"timeout": self.config.timeout, "project_id": self.config.project_id}
54
+
51
55
  async def openai_chat_completion(
52
56
  self,
53
57
  params: OpenAIChatCompletionRequestWithExtraBody,
54
58
  ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
55
59
  """
56
- Override parent method to add timeout and inject usage object when missing.
60
+ Override parent method to inject usage object when missing.
61
+
57
62
  This works around a LiteLLM defect where usage block is sometimes dropped.
63
+ Note: request parameter construction (including telemetry-driven stream_options injection)
64
+ is handled by LiteLLMOpenAIMixin via _litellm_extra_request_params().
58
65
  """
59
-
60
- # Add usage tracking for streaming when telemetry is active
61
- stream_options = params.stream_options
62
- if params.stream and get_current_span() is not None:
63
- if stream_options is None:
64
- stream_options = {"include_usage": True}
65
- elif "include_usage" not in stream_options:
66
- stream_options = {**stream_options, "include_usage": True}
67
-
68
- model_obj = await self.model_store.get_model(params.model)
69
-
70
- request_params = await prepare_openai_completion_params(
71
- model=self.get_litellm_model_name(model_obj.provider_resource_id),
72
- messages=params.messages,
73
- frequency_penalty=params.frequency_penalty,
74
- function_call=params.function_call,
75
- functions=params.functions,
76
- logit_bias=params.logit_bias,
77
- logprobs=params.logprobs,
78
- max_completion_tokens=params.max_completion_tokens,
79
- max_tokens=params.max_tokens,
80
- n=params.n,
81
- parallel_tool_calls=params.parallel_tool_calls,
82
- presence_penalty=params.presence_penalty,
83
- response_format=params.response_format,
84
- seed=params.seed,
85
- stop=params.stop,
86
- stream=params.stream,
87
- stream_options=stream_options,
88
- temperature=params.temperature,
89
- tool_choice=params.tool_choice,
90
- tools=params.tools,
91
- top_logprobs=params.top_logprobs,
92
- top_p=params.top_p,
93
- user=params.user,
94
- api_key=self.get_api_key(),
95
- api_base=self.api_base,
96
- # These are watsonx-specific parameters
97
- timeout=self.config.timeout,
98
- project_id=self.config.project_id,
99
- )
100
-
101
- result = await litellm.acompletion(**request_params)
66
+ result = await super().openai_chat_completion(params)
102
67
 
103
68
  # If not streaming, check and inject usage if missing
104
69
  if not params.stream:
@@ -175,44 +140,6 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
175
140
  logger.error(f"Error normalizing stream: {e}", exc_info=True)
176
141
  raise
177
142
 
178
- async def openai_completion(
179
- self,
180
- params: OpenAICompletionRequestWithExtraBody,
181
- ) -> OpenAICompletion:
182
- """
183
- Override parent method to add watsonx-specific parameters.
184
- """
185
- from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
186
-
187
- model_obj = await self.model_store.get_model(params.model)
188
-
189
- request_params = await prepare_openai_completion_params(
190
- model=self.get_litellm_model_name(model_obj.provider_resource_id),
191
- prompt=params.prompt,
192
- best_of=params.best_of,
193
- echo=params.echo,
194
- frequency_penalty=params.frequency_penalty,
195
- logit_bias=params.logit_bias,
196
- logprobs=params.logprobs,
197
- max_tokens=params.max_tokens,
198
- n=params.n,
199
- presence_penalty=params.presence_penalty,
200
- seed=params.seed,
201
- stop=params.stop,
202
- stream=params.stream,
203
- stream_options=params.stream_options,
204
- temperature=params.temperature,
205
- top_p=params.top_p,
206
- user=params.user,
207
- suffix=params.suffix,
208
- api_key=self.get_api_key(),
209
- api_base=self.api_base,
210
- # These are watsonx-specific parameters
211
- timeout=self.config.timeout,
212
- project_id=self.config.project_id,
213
- )
214
- return await litellm.atext_completion(**request_params)
215
-
216
143
  async def openai_embeddings(
217
144
  self,
218
145
  params: OpenAIEmbeddingsRequestWithExtraBody,
@@ -238,8 +165,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
238
165
  )
239
166
 
240
167
  # Convert response to OpenAI format
241
- from llama_stack.apis.inference import OpenAIEmbeddingUsage
242
168
  from llama_stack.providers.utils.inference.litellm_openai_mixin import b64_encode_openai_embeddings_response
169
+ from llama_stack_api import OpenAIEmbeddingUsage
243
170
 
244
171
  data = b64_encode_openai_embeddings_response(response.data, params.encoding_format)
245
172
 
@@ -255,7 +182,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
255
182
  )
256
183
 
257
184
  def get_base_url(self) -> str:
258
- return self.config.url
185
+ return str(self.config.base_url)
259
186
 
260
187
  # Copied from OpenAIMixin
261
188
  async def check_model_availability(self, model: str) -> bool:
@@ -316,7 +243,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
316
243
  """
317
244
  Retrieves foundation model specifications from the watsonx.ai API.
318
245
  """
319
- url = f"{self.config.url}/ml/v1/foundation_model_specs?version=2023-10-25"
246
+ url = f"{str(self.config.base_url)}/ml/v1/foundation_model_specs?version=2023-10-25"
320
247
  headers = {
321
248
  # Note that there is no authorization header. Listing models does not require authentication.
322
249
  "Content-Type": "application/json",
@@ -10,7 +10,10 @@ from typing import Any, Literal
10
10
  import aiohttp
11
11
  from pydantic import BaseModel, ConfigDict
12
12
 
13
- from llama_stack.apis.post_training import (
13
+ from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostTrainingConfig
14
+ from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params
15
+ from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
16
+ from llama_stack_api import (
14
17
  AlgorithmConfig,
15
18
  DPOAlignmentConfig,
16
19
  JobStatus,
@@ -19,9 +22,6 @@ from llama_stack.apis.post_training import (
19
22
  PostTrainingJobStatusResponse,
20
23
  TrainingConfig,
21
24
  )
22
- from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostTrainingConfig
23
- from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params
24
- from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
25
25
 
26
26
  from .models import _MODEL_ENTRIES
27
27
 
@@ -9,9 +9,9 @@ from typing import Any
9
9
 
10
10
  from pydantic import BaseModel
11
11
 
12
- from llama_stack.apis.post_training import TrainingConfig
13
12
  from llama_stack.log import get_logger
14
13
  from llama_stack.providers.remote.post_training.nvidia.config import SFTLoRADefaultConfig
14
+ from llama_stack_api import TrainingConfig
15
15
 
16
16
  from .config import NvidiaPostTrainingConfig
17
17
 
@@ -7,17 +7,17 @@
7
7
  import json
8
8
  from typing import Any
9
9
 
10
- from llama_stack.apis.inference import OpenAIMessageParam
11
- from llama_stack.apis.safety import (
10
+ from llama_stack.log import get_logger
11
+ from llama_stack.providers.utils.bedrock.client import create_bedrock_client
12
+ from llama_stack_api import (
13
+ OpenAIMessageParam,
12
14
  RunShieldResponse,
13
15
  Safety,
14
16
  SafetyViolation,
17
+ Shield,
18
+ ShieldsProtocolPrivate,
15
19
  ViolationLevel,
16
20
  )
17
- from llama_stack.apis.shields import Shield
18
- from llama_stack.log import get_logger
19
- from llama_stack.providers.datatypes import ShieldsProtocolPrivate
20
- from llama_stack.providers.utils.bedrock.client import create_bedrock_client
21
21
 
22
22
  from .config import BedrockSafetyConfig
23
23
 
@@ -6,7 +6,7 @@
6
6
 
7
7
 
8
8
  from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
9
- from llama_stack.schema_utils import json_schema_type
9
+ from llama_stack_api import json_schema_type
10
10
 
11
11
 
12
12
  @json_schema_type
@@ -8,7 +8,7 @@ from typing import Any
8
8
 
9
9
  from pydantic import BaseModel, Field
10
10
 
11
- from llama_stack.schema_utils import json_schema_type
11
+ from llama_stack_api import json_schema_type
12
12
 
13
13
 
14
14
  @json_schema_type
@@ -8,11 +8,17 @@ from typing import Any
8
8
 
9
9
  import requests
10
10
 
11
- from llama_stack.apis.inference import OpenAIMessageParam
12
- from llama_stack.apis.safety import ModerationObject, RunShieldResponse, Safety, SafetyViolation, ViolationLevel
13
- from llama_stack.apis.shields import Shield
14
11
  from llama_stack.log import get_logger
15
- from llama_stack.providers.datatypes import ShieldsProtocolPrivate
12
+ from llama_stack_api import (
13
+ ModerationObject,
14
+ OpenAIMessageParam,
15
+ RunShieldResponse,
16
+ Safety,
17
+ SafetyViolation,
18
+ Shield,
19
+ ShieldsProtocolPrivate,
20
+ ViolationLevel,
21
+ )
16
22
 
17
23
  from .config import NVIDIASafetyConfig
18
24
 
@@ -66,7 +72,7 @@ class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate):
66
72
  self.shield = NeMoGuardrails(self.config, shield.shield_id)
67
73
  return await self.shield.run(messages)
68
74
 
69
- async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
75
+ async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject:
70
76
  raise NotImplementedError("NVIDIA safety provider currently does not implement run_moderation")
71
77
 
72
78
 
@@ -8,7 +8,7 @@ from typing import Any
8
8
 
9
9
  from pydantic import BaseModel, Field, SecretStr
10
10
 
11
- from llama_stack.schema_utils import json_schema_type
11
+ from llama_stack_api import json_schema_type
12
12
 
13
13
 
14
14
  class SambaNovaProviderDataValidator(BaseModel):
@@ -9,17 +9,17 @@ from typing import Any
9
9
  import litellm
10
10
  import requests
11
11
 
12
- from llama_stack.apis.inference import OpenAIMessageParam
13
- from llama_stack.apis.safety import (
12
+ from llama_stack.core.request_headers import NeedsRequestProviderData
13
+ from llama_stack.log import get_logger
14
+ from llama_stack_api import (
15
+ OpenAIMessageParam,
14
16
  RunShieldResponse,
15
17
  Safety,
16
18
  SafetyViolation,
19
+ Shield,
20
+ ShieldsProtocolPrivate,
17
21
  ViolationLevel,
18
22
  )
19
- from llama_stack.apis.shields import Shield
20
- from llama_stack.core.request_headers import NeedsRequestProviderData
21
- from llama_stack.log import get_logger
22
- from llama_stack.providers.datatypes import ShieldsProtocolPrivate
23
23
 
24
24
  from .config import SambaNovaSafetyConfig
25
25