llama-stack 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (311) hide show
  1. llama_stack/cli/stack/_list_deps.py +11 -7
  2. llama_stack/cli/stack/run.py +3 -25
  3. llama_stack/core/access_control/datatypes.py +78 -0
  4. llama_stack/core/configure.py +2 -2
  5. {llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
  6. llama_stack/core/connectors/connectors.py +162 -0
  7. llama_stack/core/conversations/conversations.py +61 -58
  8. llama_stack/core/datatypes.py +54 -8
  9. llama_stack/core/library_client.py +60 -13
  10. llama_stack/core/prompts/prompts.py +43 -42
  11. llama_stack/core/routers/datasets.py +20 -17
  12. llama_stack/core/routers/eval_scoring.py +143 -53
  13. llama_stack/core/routers/inference.py +20 -9
  14. llama_stack/core/routers/safety.py +30 -42
  15. llama_stack/core/routers/vector_io.py +15 -7
  16. llama_stack/core/routing_tables/models.py +42 -3
  17. llama_stack/core/routing_tables/scoring_functions.py +19 -19
  18. llama_stack/core/routing_tables/shields.py +20 -17
  19. llama_stack/core/routing_tables/vector_stores.py +8 -5
  20. llama_stack/core/server/auth.py +192 -17
  21. llama_stack/core/server/fastapi_router_registry.py +40 -5
  22. llama_stack/core/server/server.py +24 -5
  23. llama_stack/core/stack.py +54 -10
  24. llama_stack/core/storage/datatypes.py +9 -0
  25. llama_stack/core/store/registry.py +1 -1
  26. llama_stack/core/utils/exec.py +2 -2
  27. llama_stack/core/utils/type_inspection.py +16 -2
  28. llama_stack/distributions/dell/config.yaml +4 -1
  29. llama_stack/distributions/dell/doc_template.md +209 -0
  30. llama_stack/distributions/dell/run-with-safety.yaml +4 -1
  31. llama_stack/distributions/nvidia/config.yaml +4 -1
  32. llama_stack/distributions/nvidia/doc_template.md +170 -0
  33. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
  34. llama_stack/distributions/oci/config.yaml +4 -1
  35. llama_stack/distributions/oci/doc_template.md +140 -0
  36. llama_stack/distributions/open-benchmark/config.yaml +9 -1
  37. llama_stack/distributions/postgres-demo/config.yaml +1 -1
  38. llama_stack/distributions/starter/build.yaml +62 -0
  39. llama_stack/distributions/starter/config.yaml +22 -3
  40. llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
  41. llama_stack/distributions/starter/starter.py +13 -1
  42. llama_stack/distributions/starter-gpu/build.yaml +62 -0
  43. llama_stack/distributions/starter-gpu/config.yaml +22 -3
  44. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
  45. llama_stack/distributions/template.py +10 -2
  46. llama_stack/distributions/watsonx/config.yaml +4 -1
  47. llama_stack/log.py +1 -0
  48. llama_stack/models/llama/resources/dog.jpg +0 -0
  49. llama_stack/models/llama/resources/pasta.jpeg +0 -0
  50. llama_stack/models/llama/resources/small_dog.jpg +0 -0
  51. llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
  52. llama_stack/providers/inline/agents/meta_reference/agents.py +58 -61
  53. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +187 -60
  54. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +99 -22
  55. llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
  56. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
  57. llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
  58. llama_stack/providers/inline/batches/reference/batches.py +2 -1
  59. llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
  60. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
  61. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
  62. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
  63. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
  64. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
  65. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
  66. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  67. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
  68. llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
  69. llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
  70. llama_stack/providers/inline/post_training/torchtune/common/utils.py +5 -9
  71. llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
  72. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
  73. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
  74. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +20 -24
  75. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
  76. llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
  77. llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
  78. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
  79. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
  80. llama_stack/providers/registry/agents.py +1 -0
  81. llama_stack/providers/registry/inference.py +1 -9
  82. llama_stack/providers/registry/vector_io.py +136 -16
  83. llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
  84. llama_stack/providers/remote/eval/nvidia/README.md +134 -0
  85. llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
  86. llama_stack/providers/remote/files/s3/README.md +266 -0
  87. llama_stack/providers/remote/files/s3/config.py +5 -3
  88. llama_stack/providers/remote/files/s3/files.py +2 -2
  89. llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
  90. llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
  91. llama_stack/providers/remote/inference/openai/openai.py +2 -0
  92. llama_stack/providers/remote/inference/together/together.py +4 -0
  93. llama_stack/providers/remote/inference/vertexai/config.py +3 -3
  94. llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
  95. llama_stack/providers/remote/inference/vllm/config.py +37 -18
  96. llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
  97. llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
  98. llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
  99. llama_stack/providers/remote/post_training/nvidia/models.py +3 -11
  100. llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
  101. llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
  102. llama_stack/providers/remote/safety/nvidia/README.md +78 -0
  103. llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
  104. llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
  105. llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
  106. llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
  107. llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
  108. llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
  109. llama_stack/providers/remote/vector_io/oci/config.py +41 -0
  110. llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
  111. llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
  112. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
  113. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
  114. llama_stack/providers/utils/bedrock/client.py +3 -3
  115. llama_stack/providers/utils/bedrock/config.py +7 -7
  116. llama_stack/providers/utils/inference/__init__.py +0 -25
  117. llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
  118. llama_stack/providers/utils/inference/http_client.py +239 -0
  119. llama_stack/providers/utils/inference/litellm_openai_mixin.py +6 -0
  120. llama_stack/providers/utils/inference/model_registry.py +148 -2
  121. llama_stack/providers/utils/inference/openai_compat.py +1 -158
  122. llama_stack/providers/utils/inference/openai_mixin.py +42 -2
  123. llama_stack/providers/utils/inference/prompt_adapter.py +0 -209
  124. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
  125. llama_stack/providers/utils/memory/vector_store.py +46 -19
  126. llama_stack/providers/utils/responses/responses_store.py +40 -6
  127. llama_stack/providers/utils/safety.py +114 -0
  128. llama_stack/providers/utils/tools/mcp.py +44 -3
  129. llama_stack/testing/api_recorder.py +9 -3
  130. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/METADATA +14 -2
  131. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/RECORD +135 -279
  132. llama_stack-0.5.0.dist-info/top_level.txt +1 -0
  133. llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
  134. llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
  135. llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
  136. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
  137. llama_stack/models/llama/hadamard_utils.py +0 -88
  138. llama_stack/models/llama/llama3/args.py +0 -74
  139. llama_stack/models/llama/llama3/generation.py +0 -378
  140. llama_stack/models/llama/llama3/model.py +0 -304
  141. llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
  142. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
  143. llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
  144. llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
  145. llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
  146. llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
  147. llama_stack/models/llama/llama3/quantization/loader.py +0 -316
  148. llama_stack/models/llama/llama3_1/__init__.py +0 -12
  149. llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
  150. llama_stack/models/llama/llama3_1/prompts.py +0 -258
  151. llama_stack/models/llama/llama3_2/__init__.py +0 -5
  152. llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
  153. llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
  154. llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
  155. llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
  156. llama_stack/models/llama/llama3_3/__init__.py +0 -5
  157. llama_stack/models/llama/llama3_3/prompts.py +0 -259
  158. llama_stack/models/llama/llama4/args.py +0 -107
  159. llama_stack/models/llama/llama4/ffn.py +0 -58
  160. llama_stack/models/llama/llama4/moe.py +0 -214
  161. llama_stack/models/llama/llama4/preprocess.py +0 -435
  162. llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
  163. llama_stack/models/llama/llama4/quantization/loader.py +0 -226
  164. llama_stack/models/llama/llama4/vision/__init__.py +0 -5
  165. llama_stack/models/llama/llama4/vision/embedding.py +0 -210
  166. llama_stack/models/llama/llama4/vision/encoder.py +0 -412
  167. llama_stack/models/llama/quantize_impls.py +0 -316
  168. llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
  169. llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
  170. llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
  171. llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
  172. llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
  173. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
  174. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
  175. llama_stack-0.4.3.dist-info/top_level.txt +0 -2
  176. llama_stack_api/__init__.py +0 -945
  177. llama_stack_api/admin/__init__.py +0 -45
  178. llama_stack_api/admin/api.py +0 -72
  179. llama_stack_api/admin/fastapi_routes.py +0 -117
  180. llama_stack_api/admin/models.py +0 -113
  181. llama_stack_api/agents.py +0 -173
  182. llama_stack_api/batches/__init__.py +0 -40
  183. llama_stack_api/batches/api.py +0 -53
  184. llama_stack_api/batches/fastapi_routes.py +0 -113
  185. llama_stack_api/batches/models.py +0 -78
  186. llama_stack_api/benchmarks/__init__.py +0 -43
  187. llama_stack_api/benchmarks/api.py +0 -39
  188. llama_stack_api/benchmarks/fastapi_routes.py +0 -109
  189. llama_stack_api/benchmarks/models.py +0 -109
  190. llama_stack_api/common/__init__.py +0 -5
  191. llama_stack_api/common/content_types.py +0 -101
  192. llama_stack_api/common/errors.py +0 -95
  193. llama_stack_api/common/job_types.py +0 -38
  194. llama_stack_api/common/responses.py +0 -77
  195. llama_stack_api/common/training_types.py +0 -47
  196. llama_stack_api/common/type_system.py +0 -146
  197. llama_stack_api/connectors.py +0 -146
  198. llama_stack_api/conversations.py +0 -270
  199. llama_stack_api/datasetio.py +0 -55
  200. llama_stack_api/datasets/__init__.py +0 -61
  201. llama_stack_api/datasets/api.py +0 -35
  202. llama_stack_api/datasets/fastapi_routes.py +0 -104
  203. llama_stack_api/datasets/models.py +0 -152
  204. llama_stack_api/datatypes.py +0 -373
  205. llama_stack_api/eval.py +0 -137
  206. llama_stack_api/file_processors/__init__.py +0 -27
  207. llama_stack_api/file_processors/api.py +0 -64
  208. llama_stack_api/file_processors/fastapi_routes.py +0 -78
  209. llama_stack_api/file_processors/models.py +0 -42
  210. llama_stack_api/files/__init__.py +0 -35
  211. llama_stack_api/files/api.py +0 -51
  212. llama_stack_api/files/fastapi_routes.py +0 -124
  213. llama_stack_api/files/models.py +0 -107
  214. llama_stack_api/inference.py +0 -1169
  215. llama_stack_api/inspect_api/__init__.py +0 -37
  216. llama_stack_api/inspect_api/api.py +0 -25
  217. llama_stack_api/inspect_api/fastapi_routes.py +0 -76
  218. llama_stack_api/inspect_api/models.py +0 -28
  219. llama_stack_api/internal/kvstore.py +0 -28
  220. llama_stack_api/internal/sqlstore.py +0 -81
  221. llama_stack_api/llama_stack_api/__init__.py +0 -945
  222. llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
  223. llama_stack_api/llama_stack_api/admin/api.py +0 -72
  224. llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
  225. llama_stack_api/llama_stack_api/admin/models.py +0 -113
  226. llama_stack_api/llama_stack_api/agents.py +0 -173
  227. llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
  228. llama_stack_api/llama_stack_api/batches/api.py +0 -53
  229. llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
  230. llama_stack_api/llama_stack_api/batches/models.py +0 -78
  231. llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
  232. llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
  233. llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
  234. llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
  235. llama_stack_api/llama_stack_api/common/__init__.py +0 -5
  236. llama_stack_api/llama_stack_api/common/content_types.py +0 -101
  237. llama_stack_api/llama_stack_api/common/errors.py +0 -95
  238. llama_stack_api/llama_stack_api/common/job_types.py +0 -38
  239. llama_stack_api/llama_stack_api/common/responses.py +0 -77
  240. llama_stack_api/llama_stack_api/common/training_types.py +0 -47
  241. llama_stack_api/llama_stack_api/common/type_system.py +0 -146
  242. llama_stack_api/llama_stack_api/connectors.py +0 -146
  243. llama_stack_api/llama_stack_api/conversations.py +0 -270
  244. llama_stack_api/llama_stack_api/datasetio.py +0 -55
  245. llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
  246. llama_stack_api/llama_stack_api/datasets/api.py +0 -35
  247. llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
  248. llama_stack_api/llama_stack_api/datasets/models.py +0 -152
  249. llama_stack_api/llama_stack_api/datatypes.py +0 -373
  250. llama_stack_api/llama_stack_api/eval.py +0 -137
  251. llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
  252. llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
  253. llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
  254. llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
  255. llama_stack_api/llama_stack_api/files/__init__.py +0 -35
  256. llama_stack_api/llama_stack_api/files/api.py +0 -51
  257. llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
  258. llama_stack_api/llama_stack_api/files/models.py +0 -107
  259. llama_stack_api/llama_stack_api/inference.py +0 -1169
  260. llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
  261. llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
  262. llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
  263. llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
  264. llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
  265. llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
  266. llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
  267. llama_stack_api/llama_stack_api/models.py +0 -171
  268. llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
  269. llama_stack_api/llama_stack_api/post_training.py +0 -370
  270. llama_stack_api/llama_stack_api/prompts.py +0 -203
  271. llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
  272. llama_stack_api/llama_stack_api/providers/api.py +0 -16
  273. llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
  274. llama_stack_api/llama_stack_api/providers/models.py +0 -24
  275. llama_stack_api/llama_stack_api/py.typed +0 -0
  276. llama_stack_api/llama_stack_api/rag_tool.py +0 -168
  277. llama_stack_api/llama_stack_api/resource.py +0 -37
  278. llama_stack_api/llama_stack_api/router_utils.py +0 -160
  279. llama_stack_api/llama_stack_api/safety.py +0 -132
  280. llama_stack_api/llama_stack_api/schema_utils.py +0 -208
  281. llama_stack_api/llama_stack_api/scoring.py +0 -93
  282. llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
  283. llama_stack_api/llama_stack_api/shields.py +0 -93
  284. llama_stack_api/llama_stack_api/tools.py +0 -226
  285. llama_stack_api/llama_stack_api/vector_io.py +0 -941
  286. llama_stack_api/llama_stack_api/vector_stores.py +0 -53
  287. llama_stack_api/llama_stack_api/version.py +0 -9
  288. llama_stack_api/models.py +0 -171
  289. llama_stack_api/openai_responses.py +0 -1468
  290. llama_stack_api/post_training.py +0 -370
  291. llama_stack_api/prompts.py +0 -203
  292. llama_stack_api/providers/__init__.py +0 -33
  293. llama_stack_api/providers/api.py +0 -16
  294. llama_stack_api/providers/fastapi_routes.py +0 -57
  295. llama_stack_api/providers/models.py +0 -24
  296. llama_stack_api/py.typed +0 -0
  297. llama_stack_api/rag_tool.py +0 -168
  298. llama_stack_api/resource.py +0 -37
  299. llama_stack_api/router_utils.py +0 -160
  300. llama_stack_api/safety.py +0 -132
  301. llama_stack_api/schema_utils.py +0 -208
  302. llama_stack_api/scoring.py +0 -93
  303. llama_stack_api/scoring_functions.py +0 -211
  304. llama_stack_api/shields.py +0 -93
  305. llama_stack_api/tools.py +0 -226
  306. llama_stack_api/vector_io.py +0 -941
  307. llama_stack_api/vector_stores.py +0 -53
  308. llama_stack_api/version.py +0 -9
  309. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/WHEEL +0 -0
  310. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/entry_points.txt +0 -0
  311. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -11,7 +11,7 @@ from urllib.parse import urlparse
11
11
 
12
12
  from pydantic import BaseModel, Field, field_validator, model_validator
13
13
 
14
- from llama_stack.core.access_control.datatypes import AccessRule
14
+ from llama_stack.core.access_control.datatypes import AccessRule, RouteAccessRule
15
15
  from llama_stack.core.storage.datatypes import (
16
16
  KVStoreReference,
17
17
  StorageBackendType,
@@ -329,13 +329,17 @@ AuthProviderConfig = Annotated[
329
329
  class AuthenticationConfig(BaseModel):
330
330
  """Top-level authentication configuration."""
331
331
 
332
- provider_config: AuthProviderConfig = Field(
333
- ...,
334
- description="Authentication provider configuration",
332
+ provider_config: AuthProviderConfig | None = Field(
333
+ default=None,
334
+ description="Authentication provider configuration (optional if only using route_policy)",
335
+ )
336
+ route_policy: list[RouteAccessRule] = Field(
337
+ default=[],
338
+ description="Rules for determining access to API routes (infrastructure-level)",
335
339
  )
336
340
  access_policy: list[AccessRule] = Field(
337
341
  default=[],
338
- description="Rules for determining access to resources",
342
+ description="Rules for determining access to resources (data-level)",
339
343
  )
340
344
 
341
345
 
@@ -348,6 +352,7 @@ class QualifiedModel(BaseModel):
348
352
 
349
353
  provider_id: str
350
354
  model_id: str
355
+ embedding_dimensions: int | None = None
351
356
 
352
357
 
353
358
  class RewriteQueryParams(BaseModel):
@@ -654,7 +659,6 @@ class RegisteredResources(BaseModel):
654
659
  scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
655
660
  benchmarks: list[BenchmarkInput] = Field(default_factory=list)
656
661
  tool_groups: list[ToolGroupInput] = Field(default_factory=list)
657
- connectors: list[ConnectorInput] = Field(default_factory=list)
658
662
 
659
663
 
660
664
  class ServerConfig(BaseModel):
@@ -703,11 +707,20 @@ class ServerConfig(BaseModel):
703
707
  class StackConfig(BaseModel):
704
708
  version: int = LLAMA_STACK_RUN_CONFIG_VERSION
705
709
 
706
- image_name: str = Field(
707
- ...,
710
+ distro_name: str | None = Field(
711
+ default=None,
708
712
  description="""
709
713
  Reference to the distribution this package refers to. For unregistered (adhoc) packages,
710
714
  this could be just a hash
715
+ """,
716
+ )
717
+ image_name: str | None = Field(
718
+ default=None,
719
+ deprecated=True,
720
+ description="""
721
+ DEPRECATED: Use 'distro_name' instead. This field is maintained for backward compatibility.
722
+ Reference to the distribution this package refers to. For unregistered (adhoc) packages,
723
+ this could be just a hash
711
724
  """,
712
725
  )
713
726
  container_image: str | None = Field(
@@ -763,6 +776,11 @@ can be instantiated multiple times (with different configs) if necessary.
763
776
  description="Configuration for default moderations model",
764
777
  )
765
778
 
779
+ connectors: list[ConnectorInput] = Field(
780
+ default_factory=list,
781
+ description="List of connectors to register at stack startup",
782
+ )
783
+
766
784
  @field_validator("external_providers_dir")
767
785
  @classmethod
768
786
  def validate_external_providers_dir(cls, v):
@@ -772,6 +790,34 @@ can be instantiated multiple times (with different configs) if necessary.
772
790
  return Path(v)
773
791
  return v
774
792
 
793
+ @model_validator(mode="after")
794
+ def validate_distro_name_migration(self) -> "StackConfig":
795
+ """Handle migration from image_name to distro_name."""
796
+ import warnings
797
+
798
+ if self.distro_name is None and self.image_name is None:
799
+ raise ValueError("Either 'distro_name' or 'image_name' must be provided")
800
+
801
+ if self.image_name is not None and self.distro_name is None:
802
+ # Migrate from image_name to distro_name
803
+ warnings.warn(
804
+ "The 'image_name' field is deprecated. Please use 'distro_name' instead.",
805
+ DeprecationWarning,
806
+ stacklevel=2,
807
+ )
808
+ self.distro_name = self.image_name
809
+ elif self.image_name is not None and self.distro_name is not None:
810
+ # Both provided - warn and prefer distro_name
811
+ warnings.warn(
812
+ "Both 'image_name' and 'distro_name' were provided. "
813
+ "The 'image_name' field is deprecated and will be ignored. "
814
+ "Please use only 'distro_name' in your configuration.",
815
+ DeprecationWarning,
816
+ stacklevel=2,
817
+ )
818
+
819
+ return self
820
+
775
821
  @model_validator(mode="after")
776
822
  def validate_server_stores(self) -> "StackConfig":
777
823
  backend_map = self.storage.backends
@@ -20,7 +20,7 @@ import httpx
20
20
  import yaml
21
21
  from fastapi import Response as FastAPIResponse
22
22
 
23
- from llama_stack.core.utils.type_inspection import is_unwrapped_body_param
23
+ from llama_stack.core.utils.type_inspection import is_body_param, is_unwrapped_body_param
24
24
 
25
25
  try:
26
26
  from llama_stack_client import (
@@ -504,11 +504,30 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
504
504
  # Prepare body for the function call (handles both Pydantic and traditional params)
505
505
  body = self._convert_body(func, body)
506
506
 
507
+ result = await func(**body)
508
+ content_type = "application/json"
509
+ if isinstance(result, FastAPIResponse):
510
+ content_type = result.media_type or content_type
511
+
507
512
  async def gen():
508
- async for chunk in await func(**body):
509
- data = json.dumps(convert_pydantic_to_json_value(chunk))
510
- sse_event = f"data: {data}\n\n"
511
- yield sse_event.encode("utf-8")
513
+ # Handle FastAPI StreamingResponse (returned by router endpoints)
514
+ # Extract the async generator from the StreamingResponse body
515
+ from fastapi.responses import StreamingResponse
516
+
517
+ if isinstance(result, StreamingResponse):
518
+ # StreamingResponse.body_iterator is the async generator
519
+ async for chunk in result.body_iterator:
520
+ # Chunk is already SSE-formatted string from sse_generator, encode to bytes
521
+ if isinstance(chunk, str):
522
+ yield chunk.encode("utf-8")
523
+ else:
524
+ yield chunk
525
+ else:
526
+ # Direct async generator from implementation
527
+ async for chunk in result:
528
+ data = json.dumps(convert_pydantic_to_json_value(chunk))
529
+ sse_event = f"data: {data}\n\n"
530
+ yield sse_event.encode("utf-8")
512
531
 
513
532
  wrapped_gen = preserve_contexts_async_generator(gen(), [PROVIDER_DATA_VAR])
514
533
 
@@ -516,7 +535,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
516
535
  status_code=httpx.codes.OK,
517
536
  content=wrapped_gen,
518
537
  headers={
519
- "Content-Type": "application/json",
538
+ "Content-Type": content_type,
520
539
  },
521
540
  request=httpx.Request(
522
541
  method=options.method,
@@ -549,10 +568,26 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
549
568
  sig = inspect.signature(func)
550
569
  params_list = [p for p in sig.parameters.values() if p.name != "self"]
551
570
 
571
+ # Resolve string annotations (from `from __future__ import annotations`) to actual types
572
+ try:
573
+ type_hints = typing.get_type_hints(func, include_extras=True)
574
+ except NameError as e:
575
+ # Forward reference could not be resolved - fall back to raw annotations
576
+ logger.debug(f"Could not resolve type hints for {func.__name__}: {e}")
577
+ type_hints = {}
578
+ except Exception as e:
579
+ # Unexpected error - log and fall back
580
+ logger.warning(f"Failed to resolve type hints for {func.__name__}: {e}")
581
+ type_hints = {}
582
+
583
+ # Helper to get the resolved type for a parameter
584
+ def get_param_type(param: inspect.Parameter) -> Any:
585
+ return type_hints.get(param.name, param.annotation)
586
+
552
587
  # Flatten if there's a single unwrapped body parameter (BaseModel or Annotated[BaseModel, Body(embed=False)])
553
588
  if len(params_list) == 1:
554
589
  param = params_list[0]
555
- param_type = param.annotation
590
+ param_type = get_param_type(param)
556
591
  if is_unwrapped_body_param(param_type):
557
592
  base_type = get_args(param_type)[0]
558
593
  return {param.name: base_type(**body)}
@@ -563,16 +598,22 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
563
598
  # Check if there's an unwrapped body parameter among multiple parameters
564
599
  # (e.g., path param + body param like: vector_store_id: str, params: Annotated[Model, Body(...)])
565
600
  unwrapped_body_param = None
601
+ unwrapped_body_param_type = None
602
+ body_param = None
566
603
  for param in params_list:
567
- if is_unwrapped_body_param(param.annotation):
604
+ param_type = get_param_type(param)
605
+ if is_unwrapped_body_param(param_type):
568
606
  unwrapped_body_param = param
607
+ unwrapped_body_param_type = param_type
569
608
  break
609
+ if body_param is None and is_body_param(param_type):
610
+ body_param = param
570
611
 
571
612
  # Check for parameters with Depends() annotation (FastAPI router endpoints)
572
613
  # These need special handling: construct the request model from body
573
614
  depends_param = None
574
615
  for param in params_list:
575
- param_type = param.annotation
616
+ param_type = get_param_type(param)
576
617
  if get_origin(param_type) is typing.Annotated:
577
618
  args = get_args(param_type)
578
619
  if len(args) > 1:
@@ -595,11 +636,12 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
595
636
  if param_name in exclude_params:
596
637
  converted_body[param_name] = value
597
638
  else:
598
- converted_body[param_name] = convert_to_pydantic(param.annotation, value)
639
+ resolved_type = get_param_type(param)
640
+ converted_body[param_name] = convert_to_pydantic(resolved_type, value)
599
641
 
600
642
  # Handle Depends parameter: construct request model from body
601
643
  if depends_param and depends_param.name not in converted_body:
602
- param_type = depends_param.annotation
644
+ param_type = get_param_type(depends_param)
603
645
  if get_origin(param_type) is typing.Annotated:
604
646
  base_type = get_args(param_type)[0]
605
647
  # Handle Union types (e.g., SomeRequestModel | None) - extract the non-None type
@@ -619,10 +661,15 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
619
661
  converted_body[depends_param.name] = base_type(**body)
620
662
 
621
663
  # handle unwrapped body parameter after processing all named parameters
622
- if unwrapped_body_param:
623
- base_type = get_args(unwrapped_body_param.annotation)[0]
664
+ if unwrapped_body_param and unwrapped_body_param_type:
665
+ base_type = get_args(unwrapped_body_param_type)[0]
624
666
  # extract only keys not already used by other params
625
667
  remaining_keys = {k: v for k, v in body.items() if k not in converted_body}
626
668
  converted_body[unwrapped_body_param.name] = base_type(**remaining_keys)
669
+ elif body_param and body_param.name not in converted_body:
670
+ body_param_type = get_param_type(body_param)
671
+ base_type = get_args(body_param_type)[0]
672
+ remaining_keys = {k: v for k, v in body.items() if k not in converted_body}
673
+ converted_body[body_param.name] = base_type(**remaining_keys)
627
674
 
628
675
  return converted_body
@@ -11,7 +11,17 @@ from pydantic import BaseModel
11
11
 
12
12
  from llama_stack.core.datatypes import StackConfig
13
13
  from llama_stack.core.storage.kvstore import KVStore, kvstore_impl
14
- from llama_stack_api import ListPromptsResponse, Prompt, Prompts
14
+ from llama_stack_api import (
15
+ CreatePromptRequest,
16
+ DeletePromptRequest,
17
+ GetPromptRequest,
18
+ ListPromptsResponse,
19
+ ListPromptVersionsRequest,
20
+ Prompt,
21
+ Prompts,
22
+ SetDefaultVersionRequest,
23
+ UpdatePromptRequest,
24
+ )
15
25
 
16
26
 
17
27
  class PromptServiceConfig(BaseModel):
@@ -114,26 +124,23 @@ class PromptServiceImpl(Prompts):
114
124
  prompts.sort(key=lambda p: p.prompt_id or "", reverse=True)
115
125
  return ListPromptsResponse(data=prompts)
116
126
 
117
- async def get_prompt(self, prompt_id: str, version: int | None = None) -> Prompt:
127
+ async def get_prompt(self, request: GetPromptRequest) -> Prompt:
118
128
  """Get a prompt by its identifier and optional version."""
119
- key = await self._get_prompt_key(prompt_id, version)
129
+ key = await self._get_prompt_key(request.prompt_id, request.version)
120
130
  data = await self.kvstore.get(key)
121
131
  if data is None:
122
- raise ValueError(f"Prompt {prompt_id}:{version if version else 'default'} not found")
132
+ raise ValueError(
133
+ f"Prompt {request.prompt_id}:{request.version if request.version else 'default'} not found"
134
+ )
123
135
  return self._deserialize_prompt(data)
124
136
 
125
- async def create_prompt(
126
- self,
127
- prompt: str,
128
- variables: list[str] | None = None,
129
- ) -> Prompt:
137
+ async def create_prompt(self, request: CreatePromptRequest) -> Prompt:
130
138
  """Create a new prompt."""
131
- if variables is None:
132
- variables = []
139
+ variables = request.variables if request.variables is not None else []
133
140
 
134
141
  prompt_obj = Prompt(
135
142
  prompt_id=Prompt.generate_prompt_id(),
136
- prompt=prompt,
143
+ prompt=request.prompt,
137
144
  version=1,
138
145
  variables=variables,
139
146
  )
@@ -147,55 +154,49 @@ class PromptServiceImpl(Prompts):
147
154
 
148
155
  return prompt_obj
149
156
 
150
- async def update_prompt(
151
- self,
152
- prompt_id: str,
153
- prompt: str,
154
- version: int,
155
- variables: list[str] | None = None,
156
- set_as_default: bool = True,
157
- ) -> Prompt:
157
+ async def update_prompt(self, request: UpdatePromptRequest) -> Prompt:
158
158
  """Update an existing prompt (increments version)."""
159
- if version < 1:
159
+ if request.version < 1:
160
160
  raise ValueError("Version must be >= 1")
161
- if variables is None:
162
- variables = []
161
+ variables = request.variables if request.variables is not None else []
163
162
 
164
- prompt_versions = await self.list_prompt_versions(prompt_id)
163
+ prompt_versions = await self.list_prompt_versions(ListPromptVersionsRequest(prompt_id=request.prompt_id))
165
164
  latest_prompt = max(prompt_versions.data, key=lambda x: int(x.version))
166
165
 
167
- if version and latest_prompt.version != version:
166
+ if request.version and latest_prompt.version != request.version:
168
167
  raise ValueError(
169
- f"'{version}' is not the latest prompt version for prompt_id='{prompt_id}'. Use the latest version '{latest_prompt.version}' in request."
168
+ f"'{request.version}' is not the latest prompt version for prompt_id='{request.prompt_id}'. Use the latest version '{latest_prompt.version}' in request."
170
169
  )
171
170
 
172
- current_version = latest_prompt.version if version is None else version
171
+ current_version = latest_prompt.version if request.version is None else request.version
173
172
  new_version = current_version + 1
174
173
 
175
- updated_prompt = Prompt(prompt_id=prompt_id, prompt=prompt, version=new_version, variables=variables)
174
+ updated_prompt = Prompt(
175
+ prompt_id=request.prompt_id, prompt=request.prompt, version=new_version, variables=variables
176
+ )
176
177
 
177
- version_key = self._get_version_key(prompt_id, str(new_version))
178
+ version_key = self._get_version_key(request.prompt_id, str(new_version))
178
179
  data = self._serialize_prompt(updated_prompt)
179
180
  await self.kvstore.set(version_key, data)
180
181
 
181
- if set_as_default:
182
- await self.set_default_version(prompt_id, new_version)
182
+ if request.set_as_default:
183
+ await self.set_default_version(SetDefaultVersionRequest(prompt_id=request.prompt_id, version=new_version))
183
184
 
184
185
  return updated_prompt
185
186
 
186
- async def delete_prompt(self, prompt_id: str) -> None:
187
+ async def delete_prompt(self, request: DeletePromptRequest) -> None:
187
188
  """Delete a prompt and all its versions."""
188
- await self.get_prompt(prompt_id)
189
+ await self.get_prompt(GetPromptRequest(prompt_id=request.prompt_id))
189
190
 
190
- prefix = f"prompts:v1:{prompt_id}:"
191
+ prefix = f"prompts:v1:{request.prompt_id}:"
191
192
  keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff")
192
193
 
193
194
  for key in keys:
194
195
  await self.kvstore.delete(key)
195
196
 
196
- async def list_prompt_versions(self, prompt_id: str) -> ListPromptsResponse:
197
+ async def list_prompt_versions(self, request: ListPromptVersionsRequest) -> ListPromptsResponse:
197
198
  """List all versions of a specific prompt."""
198
- prefix = f"prompts:v1:{prompt_id}:"
199
+ prefix = f"prompts:v1:{request.prompt_id}:"
199
200
  keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff")
200
201
 
201
202
  default_version = None
@@ -211,7 +212,7 @@ class PromptServiceImpl(Prompts):
211
212
  prompts.append(prompt_obj)
212
213
 
213
214
  if not prompts:
214
- raise ValueError(f"Prompt {prompt_id} not found")
215
+ raise ValueError(f"Prompt {request.prompt_id} not found")
215
216
 
216
217
  for prompt in prompts:
217
218
  prompt.is_default = str(prompt.version) == default_version
@@ -219,15 +220,15 @@ class PromptServiceImpl(Prompts):
219
220
  prompts.sort(key=lambda x: x.version)
220
221
  return ListPromptsResponse(data=prompts)
221
222
 
222
- async def set_default_version(self, prompt_id: str, version: int) -> Prompt:
223
+ async def set_default_version(self, request: SetDefaultVersionRequest) -> Prompt:
223
224
  """Set which version of a prompt should be the default, If not set. the default is the latest."""
224
- version_key = self._get_version_key(prompt_id, str(version))
225
+ version_key = self._get_version_key(request.prompt_id, str(request.version))
225
226
  data = await self.kvstore.get(version_key)
226
227
  if data is None:
227
- raise ValueError(f"Prompt {prompt_id} version {version} not found")
228
+ raise ValueError(f"Prompt {request.prompt_id} version {request.version} not found")
228
229
 
229
- default_key = self._get_default_key(prompt_id)
230
- await self.kvstore.set(default_key, str(version))
230
+ default_key = self._get_default_key(request.prompt_id)
231
+ await self.kvstore.set(default_key, str(request.version))
231
232
 
232
233
  return self._deserialize_prompt(data)
233
234
 
@@ -7,7 +7,15 @@
7
7
  from typing import Any
8
8
 
9
9
  from llama_stack.log import get_logger
10
- from llama_stack_api import DatasetIO, DatasetPurpose, DataSource, PaginatedResponse, RoutingTable
10
+ from llama_stack_api import (
11
+ AppendRowsRequest,
12
+ DatasetIO,
13
+ DatasetPurpose,
14
+ DataSource,
15
+ IterRowsRequest,
16
+ PaginatedResponse,
17
+ RoutingTable,
18
+ )
11
19
 
12
20
  logger = get_logger(name=__name__, category="core::routers")
13
21
 
@@ -45,26 +53,21 @@ class DatasetIORouter(DatasetIO):
45
53
  dataset_id=dataset_id,
46
54
  )
47
55
 
48
- async def iterrows(
49
- self,
50
- dataset_id: str,
51
- start_index: int | None = None,
52
- limit: int | None = None,
53
- ) -> PaginatedResponse:
56
+ async def iterrows(self, request: IterRowsRequest) -> PaginatedResponse:
54
57
  logger.debug(
55
- f"DatasetIORouter.iterrows: {dataset_id}, {start_index=} {limit=}",
58
+ f"DatasetIORouter.iterrows: {request.dataset_id}, start_index={request.start_index} limit={request.limit}",
56
59
  )
57
- provider = await self.routing_table.get_provider_impl(dataset_id)
60
+ provider = await self.routing_table.get_provider_impl(request.dataset_id)
58
61
  return await provider.iterrows(
59
- dataset_id=dataset_id,
60
- start_index=start_index,
61
- limit=limit,
62
+ dataset_id=request.dataset_id,
63
+ start_index=request.start_index,
64
+ limit=request.limit,
62
65
  )
63
66
 
64
- async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
65
- logger.debug(f"DatasetIORouter.append_rows: {dataset_id}, {len(rows)} rows")
66
- provider = await self.routing_table.get_provider_impl(dataset_id)
67
+ async def append_rows(self, request: AppendRowsRequest) -> None:
68
+ logger.debug(f"DatasetIORouter.append_rows: {request.dataset_id}, {len(request.rows)} rows")
69
+ provider = await self.routing_table.get_provider_impl(request.dataset_id)
67
70
  return await provider.append_rows(
68
- dataset_id=dataset_id,
69
- rows=rows,
71
+ dataset_id=request.dataset_id,
72
+ rows=request.rows,
70
73
  )