llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +12 -21
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.4.dist-info/RECORD +0 -625
  445. llama_stack-0.3.4.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -6,35 +6,45 @@
6
6
 
7
7
  import uuid
8
8
  from datetime import UTC, datetime
9
- from typing import Annotated, Any
9
+ from typing import TYPE_CHECKING, Any, cast
10
10
 
11
11
  import boto3
12
12
  from botocore.exceptions import BotoCoreError, ClientError, NoCredentialsError
13
- from fastapi import Depends, File, Form, Response, UploadFile
13
+ from fastapi import Response, UploadFile
14
14
 
15
- from llama_stack.apis.common.errors import ResourceNotFoundError
16
- from llama_stack.apis.common.responses import Order
17
- from llama_stack.apis.files import (
15
+ if TYPE_CHECKING:
16
+ from mypy_boto3_s3.client import S3Client
17
+
18
+ from llama_stack.core.access_control.datatypes import Action
19
+ from llama_stack.core.datatypes import AccessRule
20
+ from llama_stack.core.id_generation import generate_object_id
21
+ from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
22
+ from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
23
+ from llama_stack_api import (
18
24
  ExpiresAfter,
19
25
  Files,
20
26
  ListOpenAIFileResponse,
21
27
  OpenAIFileDeleteResponse,
22
28
  OpenAIFileObject,
23
29
  OpenAIFilePurpose,
30
+ Order,
31
+ ResourceNotFoundError,
24
32
  )
25
- from llama_stack.core.datatypes import AccessRule
26
- from llama_stack.core.id_generation import generate_object_id
27
- from llama_stack.providers.utils.files.form_data import parse_expires_after
28
- from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
29
- from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
30
- from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
33
+ from llama_stack_api.files.models import (
34
+ DeleteFileRequest,
35
+ ListFilesRequest,
36
+ RetrieveFileContentRequest,
37
+ RetrieveFileRequest,
38
+ UploadFileRequest,
39
+ )
40
+ from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
31
41
 
32
42
  from .config import S3FilesImplConfig
33
43
 
34
44
  # TODO: provider data for S3 credentials
35
45
 
36
46
 
37
- def _create_s3_client(config: S3FilesImplConfig) -> boto3.client:
47
+ def _create_s3_client(config: S3FilesImplConfig) -> "S3Client":
38
48
  try:
39
49
  s3_config = {
40
50
  "region_name": config.region,
@@ -52,13 +62,16 @@ def _create_s3_client(config: S3FilesImplConfig) -> boto3.client:
52
62
  }
53
63
  )
54
64
 
55
- return boto3.client("s3", **s3_config)
65
+ # Both cast and type:ignore are needed here:
66
+ # - cast tells mypy the return type for downstream usage (S3Client vs generic client)
67
+ # - type:ignore suppresses the call-overload error from boto3's complex overloaded signatures
68
+ return cast("S3Client", boto3.client("s3", **s3_config)) # type: ignore[call-overload]
56
69
 
57
70
  except (BotoCoreError, NoCredentialsError) as e:
58
71
  raise RuntimeError(f"Failed to initialize S3 client: {e}") from e
59
72
 
60
73
 
61
- async def _create_bucket_if_not_exists(client: boto3.client, config: S3FilesImplConfig) -> None:
74
+ async def _create_bucket_if_not_exists(client: "S3Client", config: S3FilesImplConfig) -> None:
62
75
  try:
63
76
  client.head_bucket(Bucket=config.bucket_name)
64
77
  except ClientError as e:
@@ -76,7 +89,7 @@ async def _create_bucket_if_not_exists(client: boto3.client, config: S3FilesImpl
76
89
  else:
77
90
  client.create_bucket(
78
91
  Bucket=config.bucket_name,
79
- CreateBucketConfiguration={"LocationConstraint": config.region},
92
+ CreateBucketConfiguration=cast(Any, {"LocationConstraint": config.region}),
80
93
  )
81
94
  except ClientError as create_error:
82
95
  raise RuntimeError(
@@ -128,18 +141,20 @@ class S3FilesImpl(Files):
128
141
  def __init__(self, config: S3FilesImplConfig, policy: list[AccessRule]) -> None:
129
142
  self._config = config
130
143
  self.policy = policy
131
- self._client: boto3.client | None = None
144
+ self._client: S3Client | None = None
132
145
  self._sql_store: AuthorizedSqlStore | None = None
133
146
 
134
147
  def _now(self) -> int:
135
148
  """Return current UTC timestamp as int seconds."""
136
149
  return int(datetime.now(UTC).timestamp())
137
150
 
138
- async def _get_file(self, file_id: str, return_expired: bool = False) -> dict[str, Any]:
151
+ async def _get_file(
152
+ self, file_id: str, return_expired: bool = False, action: Action = Action.READ
153
+ ) -> dict[str, Any]:
139
154
  where: dict[str, str | dict] = {"id": file_id}
140
155
  if not return_expired:
141
156
  where["expires_at"] = {">": self._now()}
142
- if not (row := await self.sql_store.fetch_one("openai_files", where=where)):
157
+ if not (row := await self.sql_store.fetch_one("openai_files", where=where, action=action)):
143
158
  raise ResourceNotFoundError(file_id, "File", "files.list()")
144
159
  return row
145
160
 
@@ -184,7 +199,7 @@ class S3FilesImpl(Files):
184
199
  pass
185
200
 
186
201
  @property
187
- def client(self) -> boto3.client:
202
+ def client(self) -> "S3Client":
188
203
  assert self._client is not None, "Provider not initialized"
189
204
  return self._client
190
205
 
@@ -195,10 +210,12 @@ class S3FilesImpl(Files):
195
210
 
196
211
  async def openai_upload_file(
197
212
  self,
198
- file: Annotated[UploadFile, File()],
199
- purpose: Annotated[OpenAIFilePurpose, Form()],
200
- expires_after: Annotated[ExpiresAfter | None, Depends(parse_expires_after)] = None,
213
+ request: UploadFileRequest,
214
+ file: UploadFile,
201
215
  ) -> OpenAIFileObject:
216
+ purpose = request.purpose
217
+ expires_after = request.expires_after
218
+
202
219
  file_id = generate_object_id("file", lambda: f"file-{uuid.uuid4().hex}")
203
220
 
204
221
  filename = getattr(file, "filename", None) or "uploaded_file"
@@ -246,11 +263,13 @@ class S3FilesImpl(Files):
246
263
 
247
264
  async def openai_list_files(
248
265
  self,
249
- after: str | None = None,
250
- limit: int | None = 10000,
251
- order: Order | None = Order.desc,
252
- purpose: OpenAIFilePurpose | None = None,
266
+ request: ListFilesRequest,
253
267
  ) -> ListOpenAIFileResponse:
268
+ after = request.after
269
+ limit = request.limit
270
+ order = request.order
271
+ purpose = request.purpose
272
+
254
273
  # this purely defensive. it should not happen because the router also default to Order.desc.
255
274
  if not order:
256
275
  order = Order.desc
@@ -277,18 +296,21 @@ class S3FilesImpl(Files):
277
296
  last_id=files[-1].id if files else "",
278
297
  )
279
298
 
280
- async def openai_retrieve_file(self, file_id: str) -> OpenAIFileObject:
299
+ async def openai_retrieve_file(self, request: RetrieveFileRequest) -> OpenAIFileObject:
300
+ file_id = request.file_id
281
301
  await self._delete_if_expired(file_id)
282
302
  row = await self._get_file(file_id)
283
303
  return _make_file_object(**row)
284
304
 
285
- async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse:
305
+ async def openai_delete_file(self, request: DeleteFileRequest) -> OpenAIFileDeleteResponse:
306
+ file_id = request.file_id
286
307
  await self._delete_if_expired(file_id)
287
- _ = await self._get_file(file_id) # raises if not found
308
+ _ = await self._get_file(file_id, action=Action.DELETE) # raises if not found
288
309
  await self._delete_file(file_id)
289
310
  return OpenAIFileDeleteResponse(id=file_id, deleted=True)
290
311
 
291
- async def openai_retrieve_file_content(self, file_id: str) -> Response:
312
+ async def openai_retrieve_file_content(self, request: RetrieveFileContentRequest) -> Response:
313
+ file_id = request.file_id
292
314
  await self._delete_if_expired(file_id)
293
315
 
294
316
  row = await self._get_file(file_id)
@@ -33,4 +33,5 @@ class AnthropicInferenceAdapter(OpenAIMixin):
33
33
  return "https://api.anthropic.com/v1"
34
34
 
35
35
  async def list_provider_model_ids(self) -> Iterable[str]:
36
- return [m.id async for m in AsyncAnthropic(api_key=self.get_api_key()).models.list()]
36
+ api_key = self._get_api_key_from_config_or_provider_data()
37
+ return [m.id async for m in AsyncAnthropic(api_key=api_key).models.list()]
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel, Field
10
10
 
11
11
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  class AnthropicProviderDataValidator(BaseModel):
@@ -4,8 +4,6 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from urllib.parse import urljoin
8
-
9
7
  from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
10
8
 
11
9
  from .config import AzureConfig
@@ -22,4 +20,4 @@ class AzureInferenceAdapter(OpenAIMixin):
22
20
 
23
21
  Returns the Azure API base URL from the configuration.
24
22
  """
25
- return urljoin(str(self.config.api_base), "/openai/v1")
23
+ return str(self.config.base_url)
@@ -7,14 +7,14 @@
7
7
  import os
8
8
  from typing import Any
9
9
 
10
- from pydantic import BaseModel, Field, HttpUrl, SecretStr
10
+ from pydantic import BaseModel, Field, HttpUrl
11
11
 
12
12
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
13
- from llama_stack.schema_utils import json_schema_type
13
+ from llama_stack_api import json_schema_type
14
14
 
15
15
 
16
16
  class AzureProviderDataValidator(BaseModel):
17
- azure_api_key: SecretStr = Field(
17
+ azure_api_key: str = Field(
18
18
  description="Azure API key for Azure",
19
19
  )
20
20
  azure_api_base: HttpUrl = Field(
@@ -32,8 +32,9 @@ class AzureProviderDataValidator(BaseModel):
32
32
 
33
33
  @json_schema_type
34
34
  class AzureConfig(RemoteInferenceProviderConfig):
35
- api_base: HttpUrl = Field(
36
- description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)",
35
+ base_url: HttpUrl | None = Field(
36
+ default=None,
37
+ description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com/openai/v1)",
37
38
  )
38
39
  api_version: str | None = Field(
39
40
  default_factory=lambda: os.getenv("AZURE_API_VERSION"),
@@ -48,14 +49,14 @@ class AzureConfig(RemoteInferenceProviderConfig):
48
49
  def sample_run_config(
49
50
  cls,
50
51
  api_key: str = "${env.AZURE_API_KEY:=}",
51
- api_base: str = "${env.AZURE_API_BASE:=}",
52
+ base_url: str = "${env.AZURE_API_BASE:=}",
52
53
  api_version: str = "${env.AZURE_API_VERSION:=}",
53
54
  api_type: str = "${env.AZURE_API_TYPE:=}",
54
55
  **kwargs,
55
56
  ) -> dict[str, Any]:
56
57
  return {
57
58
  "api_key": api_key,
58
- "api_base": api_base,
59
+ "base_url": base_url,
59
60
  "api_version": api_version,
60
61
  "api_type": api_type,
61
62
  }
@@ -11,7 +11,7 @@ async def get_adapter_impl(config: BedrockConfig, _deps):
11
11
 
12
12
  assert isinstance(config, BedrockConfig), f"Unexpected config type: {type(config)}"
13
13
 
14
- impl = BedrockInferenceAdapter(config)
14
+ impl = BedrockInferenceAdapter(config=config)
15
15
 
16
16
  await impl.initialize()
17
17
 
@@ -4,139 +4,116 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- import json
8
- from collections.abc import AsyncIterator
7
+ from collections.abc import AsyncIterator, Iterable
9
8
 
10
- from botocore.client import BaseClient
9
+ from openai import AuthenticationError
11
10
 
12
- from llama_stack.apis.inference import (
13
- ChatCompletionRequest,
14
- Inference,
11
+ from llama_stack.log import get_logger
12
+ from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
13
+ from llama_stack_api import (
14
+ OpenAIChatCompletion,
15
+ OpenAIChatCompletionChunk,
15
16
  OpenAIChatCompletionRequestWithExtraBody,
17
+ OpenAICompletion,
16
18
  OpenAICompletionRequestWithExtraBody,
17
19
  OpenAIEmbeddingsRequestWithExtraBody,
18
20
  OpenAIEmbeddingsResponse,
19
21
  )
20
- from llama_stack.apis.inference.inference import (
21
- OpenAIChatCompletion,
22
- OpenAIChatCompletionChunk,
23
- OpenAICompletion,
24
- )
25
- from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
26
- from llama_stack.providers.utils.bedrock.client import create_bedrock_client
27
- from llama_stack.providers.utils.inference.model_registry import (
28
- ModelRegistryHelper,
29
- )
30
- from llama_stack.providers.utils.inference.openai_compat import (
31
- get_sampling_strategy_options,
32
- )
33
- from llama_stack.providers.utils.inference.prompt_adapter import (
34
- chat_completion_request_to_prompt,
35
- )
36
-
37
- from .models import MODEL_ENTRIES
38
-
39
- REGION_PREFIX_MAP = {
40
- "us": "us.",
41
- "eu": "eu.",
42
- "ap": "ap.",
43
- }
44
-
45
-
46
- def _get_region_prefix(region: str | None) -> str:
47
- # AWS requires region prefixes for inference profiles
48
- if region is None:
49
- return "us." # default to US when we don't know
50
-
51
- # Handle case insensitive region matching
52
- region_lower = region.lower()
53
- for prefix in REGION_PREFIX_MAP:
54
- if region_lower.startswith(f"{prefix}-"):
55
- return REGION_PREFIX_MAP[prefix]
56
-
57
- # Fallback to US for anything we don't recognize
58
- return "us."
59
-
60
-
61
- def _to_inference_profile_id(model_id: str, region: str = None) -> str:
62
- # Return ARNs unchanged
63
- if model_id.startswith("arn:"):
64
- return model_id
65
-
66
- # Return inference profile IDs that already have regional prefixes
67
- if any(model_id.startswith(p) for p in REGION_PREFIX_MAP.values()):
68
- return model_id
69
-
70
- # Default to US East when no region is provided
71
- if region is None:
72
- region = "us-east-1"
73
-
74
- return _get_region_prefix(region) + model_id
75
-
76
22
 
77
- class BedrockInferenceAdapter(
78
- ModelRegistryHelper,
79
- Inference,
80
- ):
81
- def __init__(self, config: BedrockConfig) -> None:
82
- ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
83
- self._config = config
84
- self._client = None
23
+ from .config import BedrockConfig
85
24
 
86
- @property
87
- def client(self) -> BaseClient:
88
- if self._client is None:
89
- self._client = create_bedrock_client(self._config)
90
- return self._client
25
+ logger = get_logger(name=__name__, category="inference::bedrock")
91
26
 
92
- async def initialize(self) -> None:
93
- pass
94
27
 
95
- async def shutdown(self) -> None:
96
- if self._client is not None:
97
- self._client.close()
28
+ class BedrockInferenceAdapter(OpenAIMixin):
29
+ """
30
+ Adapter for AWS Bedrock's OpenAI-compatible API endpoints.
98
31
 
99
- async def _get_params_for_chat_completion(self, request: ChatCompletionRequest) -> dict:
100
- bedrock_model = request.model
32
+ Supports Llama models across regions and GPT-OSS models (us-west-2 only).
101
33
 
102
- sampling_params = request.sampling_params
103
- options = get_sampling_strategy_options(sampling_params)
34
+ Note: Bedrock's OpenAI-compatible endpoint does not support /v1/models
35
+ for dynamic model discovery. Models must be pre-registered in the config.
36
+ """
104
37
 
105
- if sampling_params.max_tokens:
106
- options["max_gen_len"] = sampling_params.max_tokens
107
- if sampling_params.repetition_penalty > 0:
108
- options["repetition_penalty"] = sampling_params.repetition_penalty
38
+ config: BedrockConfig
39
+ provider_data_api_key_field: str = "aws_bearer_token_bedrock"
109
40
 
110
- prompt = await chat_completion_request_to_prompt(request, self.get_llama_model(request.model))
41
+ def get_base_url(self) -> str:
42
+ """Get base URL for OpenAI client."""
43
+ return f"https://bedrock-runtime.{self.config.region_name}.amazonaws.com/openai/v1"
111
44
 
112
- # Convert foundation model ID to inference profile ID
113
- region_name = self.client.meta.region_name
114
- inference_profile_id = _to_inference_profile_id(bedrock_model, region_name)
45
+ async def list_provider_model_ids(self) -> Iterable[str]:
46
+ """
47
+ Bedrock's OpenAI-compatible endpoint does not support the /v1/models endpoint.
48
+ Returns empty list since models must be pre-registered in the config.
49
+ """
50
+ return []
115
51
 
116
- return {
117
- "modelId": inference_profile_id,
118
- "body": json.dumps(
119
- {
120
- "prompt": prompt,
121
- **options,
122
- }
123
- ),
124
- }
52
+ async def check_model_availability(self, model: str) -> bool:
53
+ """
54
+ Bedrock doesn't support dynamic model listing via /v1/models.
55
+ Always return True to accept all models registered in the config.
56
+ """
57
+ return True
125
58
 
126
59
  async def openai_embeddings(
127
60
  self,
128
61
  params: OpenAIEmbeddingsRequestWithExtraBody,
129
62
  ) -> OpenAIEmbeddingsResponse:
130
- raise NotImplementedError()
63
+ """Bedrock's OpenAI-compatible API does not support the /v1/embeddings endpoint."""
64
+ raise NotImplementedError(
65
+ "Bedrock's OpenAI-compatible API does not support /v1/embeddings endpoint. "
66
+ "See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-chat-completions.html"
67
+ )
131
68
 
132
69
  async def openai_completion(
133
70
  self,
134
71
  params: OpenAICompletionRequestWithExtraBody,
135
- ) -> OpenAICompletion:
136
- raise NotImplementedError("OpenAI completion not supported by the Bedrock provider")
72
+ ) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
73
+ """Bedrock's OpenAI-compatible API does not support the /v1/completions endpoint."""
74
+ raise NotImplementedError(
75
+ "Bedrock's OpenAI-compatible API does not support /v1/completions endpoint. "
76
+ "Only /v1/chat/completions is supported. "
77
+ "See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-chat-completions.html"
78
+ )
137
79
 
138
80
  async def openai_chat_completion(
139
81
  self,
140
82
  params: OpenAIChatCompletionRequestWithExtraBody,
141
83
  ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
142
- raise NotImplementedError("OpenAI chat completion not supported by the Bedrock provider")
84
+ """Override to handle authentication errors and null responses."""
85
+ try:
86
+ logger.debug(f"Calling Bedrock OpenAI API with model={params.model}, stream={params.stream}")
87
+ result = await super().openai_chat_completion(params=params)
88
+ logger.debug(f"Bedrock API returned: {type(result).__name__ if result is not None else 'None'}")
89
+
90
+ if result is None:
91
+ logger.error(f"Bedrock OpenAI client returned None for model={params.model}, stream={params.stream}")
92
+ raise RuntimeError(
93
+ f"Bedrock API returned no response for model '{params.model}'. "
94
+ "This may indicate the model is not supported or a network/API issue occurred."
95
+ )
96
+
97
+ return result
98
+ except AuthenticationError as e:
99
+ error_msg = str(e)
100
+
101
+ # Check if this is a token expiration error
102
+ if "expired" in error_msg.lower() or "Bearer Token has expired" in error_msg:
103
+ logger.error(f"AWS Bedrock authentication token expired: {error_msg}")
104
+ raise ValueError(
105
+ "AWS Bedrock authentication failed: Bearer token has expired. "
106
+ "The AWS_BEARER_TOKEN_BEDROCK environment variable contains an expired pre-signed URL. "
107
+ "Please refresh your token by generating a new pre-signed URL with AWS credentials. "
108
+ "Refer to AWS Bedrock documentation for details on OpenAI-compatible endpoints."
109
+ ) from e
110
+ else:
111
+ logger.error(f"AWS Bedrock authentication failed: {error_msg}")
112
+ raise ValueError(
113
+ f"AWS Bedrock authentication failed: {error_msg}. "
114
+ "Please verify your API key is correct in the provider config or x-llamastack-provider-data header. "
115
+ "The API key should be a valid AWS pre-signed URL for Bedrock's OpenAI-compatible endpoint."
116
+ ) from e
117
+ except Exception as e:
118
+ logger.error(f"Unexpected error calling Bedrock API: {type(e).__name__}: {e}", exc_info=True)
119
+ raise
@@ -4,8 +4,29 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
7
+ import os
8
8
 
9
+ from pydantic import BaseModel, Field
9
10
 
10
- class BedrockConfig(BedrockBaseConfig):
11
- pass
11
+ from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
12
+
13
+
14
+ class BedrockProviderDataValidator(BaseModel):
15
+ aws_bearer_token_bedrock: str | None = Field(
16
+ default=None,
17
+ description="API Key (Bearer token) for Amazon Bedrock",
18
+ )
19
+
20
+
21
+ class BedrockConfig(RemoteInferenceProviderConfig):
22
+ region_name: str = Field(
23
+ default_factory=lambda: os.getenv("AWS_DEFAULT_REGION", "us-east-2"),
24
+ description="AWS Region for the Bedrock Runtime endpoint",
25
+ )
26
+
27
+ @classmethod
28
+ def sample_run_config(cls, **kwargs):
29
+ return {
30
+ "api_key": "${env.AWS_BEARER_TOKEN_BEDROCK:=}",
31
+ "region_name": "${env.AWS_DEFAULT_REGION:=us-east-2}",
32
+ }
@@ -4,13 +4,11 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from urllib.parse import urljoin
8
-
9
- from llama_stack.apis.inference import (
7
+ from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
8
+ from llama_stack_api import (
10
9
  OpenAIEmbeddingsRequestWithExtraBody,
11
10
  OpenAIEmbeddingsResponse,
12
11
  )
13
- from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
14
12
 
15
13
  from .config import CerebrasImplConfig
16
14
 
@@ -18,8 +16,10 @@ from .config import CerebrasImplConfig
18
16
  class CerebrasInferenceAdapter(OpenAIMixin):
19
17
  config: CerebrasImplConfig
20
18
 
19
+ provider_data_api_key_field: str = "cerebras_api_key"
20
+
21
21
  def get_base_url(self) -> str:
22
- return urljoin(self.config.base_url, "v1")
22
+ return str(self.config.base_url)
23
23
 
24
24
  async def openai_embeddings(
25
25
  self,
@@ -7,18 +7,25 @@
7
7
  import os
8
8
  from typing import Any
9
9
 
10
- from pydantic import Field
10
+ from pydantic import BaseModel, Field, HttpUrl
11
11
 
12
12
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
13
- from llama_stack.schema_utils import json_schema_type
13
+ from llama_stack_api import json_schema_type
14
14
 
15
- DEFAULT_BASE_URL = "https://api.cerebras.ai"
15
+ DEFAULT_BASE_URL = "https://api.cerebras.ai/v1"
16
+
17
+
18
+ class CerebrasProviderDataValidator(BaseModel):
19
+ cerebras_api_key: str | None = Field(
20
+ default=None,
21
+ description="API key for Cerebras models",
22
+ )
16
23
 
17
24
 
18
25
  @json_schema_type
19
26
  class CerebrasImplConfig(RemoteInferenceProviderConfig):
20
- base_url: str = Field(
21
- default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL),
27
+ base_url: HttpUrl | None = Field(
28
+ default=HttpUrl(os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL)),
22
29
  description="Base URL for the Cerebras API",
23
30
  )
24
31
 
@@ -6,17 +6,24 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from pydantic import Field, SecretStr
9
+ from pydantic import BaseModel, Field, HttpUrl, SecretStr
10
10
 
11
11
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
+
14
+
15
+ class DatabricksProviderDataValidator(BaseModel):
16
+ databricks_api_token: str | None = Field(
17
+ default=None,
18
+ description="API token for Databricks models",
19
+ )
13
20
 
14
21
 
15
22
  @json_schema_type
16
23
  class DatabricksImplConfig(RemoteInferenceProviderConfig):
17
- url: str | None = Field(
24
+ base_url: HttpUrl | None = Field(
18
25
  default=None,
19
- description="The URL for the Databricks model serving endpoint",
26
+ description="The URL for the Databricks model serving endpoint (should include /serving-endpoints path)",
20
27
  )
21
28
  auth_credential: SecretStr | None = Field(
22
29
  default=None,
@@ -27,11 +34,11 @@ class DatabricksImplConfig(RemoteInferenceProviderConfig):
27
34
  @classmethod
28
35
  def sample_run_config(
29
36
  cls,
30
- url: str = "${env.DATABRICKS_HOST:=}",
37
+ base_url: str = "${env.DATABRICKS_HOST:=}",
31
38
  api_token: str = "${env.DATABRICKS_TOKEN:=}",
32
39
  **kwargs: Any,
33
40
  ) -> dict[str, Any]:
34
41
  return {
35
- "url": url,
42
+ "base_url": base_url,
36
43
  "api_token": api_token,
37
44
  }