llama-stack 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (311) hide show
  1. llama_stack/cli/stack/_list_deps.py +11 -7
  2. llama_stack/cli/stack/run.py +3 -25
  3. llama_stack/core/access_control/datatypes.py +78 -0
  4. llama_stack/core/configure.py +2 -2
  5. {llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
  6. llama_stack/core/connectors/connectors.py +162 -0
  7. llama_stack/core/conversations/conversations.py +61 -58
  8. llama_stack/core/datatypes.py +54 -8
  9. llama_stack/core/library_client.py +60 -13
  10. llama_stack/core/prompts/prompts.py +43 -42
  11. llama_stack/core/routers/datasets.py +20 -17
  12. llama_stack/core/routers/eval_scoring.py +143 -53
  13. llama_stack/core/routers/inference.py +20 -9
  14. llama_stack/core/routers/safety.py +30 -42
  15. llama_stack/core/routers/vector_io.py +15 -7
  16. llama_stack/core/routing_tables/models.py +42 -3
  17. llama_stack/core/routing_tables/scoring_functions.py +19 -19
  18. llama_stack/core/routing_tables/shields.py +20 -17
  19. llama_stack/core/routing_tables/vector_stores.py +8 -5
  20. llama_stack/core/server/auth.py +192 -17
  21. llama_stack/core/server/fastapi_router_registry.py +40 -5
  22. llama_stack/core/server/server.py +24 -5
  23. llama_stack/core/stack.py +54 -10
  24. llama_stack/core/storage/datatypes.py +9 -0
  25. llama_stack/core/store/registry.py +1 -1
  26. llama_stack/core/utils/exec.py +2 -2
  27. llama_stack/core/utils/type_inspection.py +16 -2
  28. llama_stack/distributions/dell/config.yaml +4 -1
  29. llama_stack/distributions/dell/doc_template.md +209 -0
  30. llama_stack/distributions/dell/run-with-safety.yaml +4 -1
  31. llama_stack/distributions/nvidia/config.yaml +4 -1
  32. llama_stack/distributions/nvidia/doc_template.md +170 -0
  33. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
  34. llama_stack/distributions/oci/config.yaml +4 -1
  35. llama_stack/distributions/oci/doc_template.md +140 -0
  36. llama_stack/distributions/open-benchmark/config.yaml +9 -1
  37. llama_stack/distributions/postgres-demo/config.yaml +1 -1
  38. llama_stack/distributions/starter/build.yaml +62 -0
  39. llama_stack/distributions/starter/config.yaml +22 -3
  40. llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
  41. llama_stack/distributions/starter/starter.py +13 -1
  42. llama_stack/distributions/starter-gpu/build.yaml +62 -0
  43. llama_stack/distributions/starter-gpu/config.yaml +22 -3
  44. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
  45. llama_stack/distributions/template.py +10 -2
  46. llama_stack/distributions/watsonx/config.yaml +4 -1
  47. llama_stack/log.py +1 -0
  48. llama_stack/models/llama/resources/dog.jpg +0 -0
  49. llama_stack/models/llama/resources/pasta.jpeg +0 -0
  50. llama_stack/models/llama/resources/small_dog.jpg +0 -0
  51. llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
  52. llama_stack/providers/inline/agents/meta_reference/agents.py +58 -61
  53. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +187 -60
  54. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +99 -22
  55. llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
  56. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
  57. llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
  58. llama_stack/providers/inline/batches/reference/batches.py +2 -1
  59. llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
  60. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
  61. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
  62. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
  63. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
  64. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
  65. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
  66. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  67. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
  68. llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
  69. llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
  70. llama_stack/providers/inline/post_training/torchtune/common/utils.py +5 -9
  71. llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
  72. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
  73. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
  74. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +20 -24
  75. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
  76. llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
  77. llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
  78. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
  79. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
  80. llama_stack/providers/registry/agents.py +1 -0
  81. llama_stack/providers/registry/inference.py +1 -9
  82. llama_stack/providers/registry/vector_io.py +136 -16
  83. llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
  84. llama_stack/providers/remote/eval/nvidia/README.md +134 -0
  85. llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
  86. llama_stack/providers/remote/files/s3/README.md +266 -0
  87. llama_stack/providers/remote/files/s3/config.py +5 -3
  88. llama_stack/providers/remote/files/s3/files.py +2 -2
  89. llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
  90. llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
  91. llama_stack/providers/remote/inference/openai/openai.py +2 -0
  92. llama_stack/providers/remote/inference/together/together.py +4 -0
  93. llama_stack/providers/remote/inference/vertexai/config.py +3 -3
  94. llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
  95. llama_stack/providers/remote/inference/vllm/config.py +37 -18
  96. llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
  97. llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
  98. llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
  99. llama_stack/providers/remote/post_training/nvidia/models.py +3 -11
  100. llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
  101. llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
  102. llama_stack/providers/remote/safety/nvidia/README.md +78 -0
  103. llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
  104. llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
  105. llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
  106. llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
  107. llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
  108. llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
  109. llama_stack/providers/remote/vector_io/oci/config.py +41 -0
  110. llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
  111. llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
  112. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
  113. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
  114. llama_stack/providers/utils/bedrock/client.py +3 -3
  115. llama_stack/providers/utils/bedrock/config.py +7 -7
  116. llama_stack/providers/utils/inference/__init__.py +0 -25
  117. llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
  118. llama_stack/providers/utils/inference/http_client.py +239 -0
  119. llama_stack/providers/utils/inference/litellm_openai_mixin.py +6 -0
  120. llama_stack/providers/utils/inference/model_registry.py +148 -2
  121. llama_stack/providers/utils/inference/openai_compat.py +1 -158
  122. llama_stack/providers/utils/inference/openai_mixin.py +42 -2
  123. llama_stack/providers/utils/inference/prompt_adapter.py +0 -209
  124. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
  125. llama_stack/providers/utils/memory/vector_store.py +46 -19
  126. llama_stack/providers/utils/responses/responses_store.py +40 -6
  127. llama_stack/providers/utils/safety.py +114 -0
  128. llama_stack/providers/utils/tools/mcp.py +44 -3
  129. llama_stack/testing/api_recorder.py +9 -3
  130. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/METADATA +14 -2
  131. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/RECORD +135 -279
  132. llama_stack-0.5.0.dist-info/top_level.txt +1 -0
  133. llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
  134. llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
  135. llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
  136. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
  137. llama_stack/models/llama/hadamard_utils.py +0 -88
  138. llama_stack/models/llama/llama3/args.py +0 -74
  139. llama_stack/models/llama/llama3/generation.py +0 -378
  140. llama_stack/models/llama/llama3/model.py +0 -304
  141. llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
  142. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
  143. llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
  144. llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
  145. llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
  146. llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
  147. llama_stack/models/llama/llama3/quantization/loader.py +0 -316
  148. llama_stack/models/llama/llama3_1/__init__.py +0 -12
  149. llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
  150. llama_stack/models/llama/llama3_1/prompts.py +0 -258
  151. llama_stack/models/llama/llama3_2/__init__.py +0 -5
  152. llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
  153. llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
  154. llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
  155. llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
  156. llama_stack/models/llama/llama3_3/__init__.py +0 -5
  157. llama_stack/models/llama/llama3_3/prompts.py +0 -259
  158. llama_stack/models/llama/llama4/args.py +0 -107
  159. llama_stack/models/llama/llama4/ffn.py +0 -58
  160. llama_stack/models/llama/llama4/moe.py +0 -214
  161. llama_stack/models/llama/llama4/preprocess.py +0 -435
  162. llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
  163. llama_stack/models/llama/llama4/quantization/loader.py +0 -226
  164. llama_stack/models/llama/llama4/vision/__init__.py +0 -5
  165. llama_stack/models/llama/llama4/vision/embedding.py +0 -210
  166. llama_stack/models/llama/llama4/vision/encoder.py +0 -412
  167. llama_stack/models/llama/quantize_impls.py +0 -316
  168. llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
  169. llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
  170. llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
  171. llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
  172. llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
  173. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
  174. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
  175. llama_stack-0.4.3.dist-info/top_level.txt +0 -2
  176. llama_stack_api/__init__.py +0 -945
  177. llama_stack_api/admin/__init__.py +0 -45
  178. llama_stack_api/admin/api.py +0 -72
  179. llama_stack_api/admin/fastapi_routes.py +0 -117
  180. llama_stack_api/admin/models.py +0 -113
  181. llama_stack_api/agents.py +0 -173
  182. llama_stack_api/batches/__init__.py +0 -40
  183. llama_stack_api/batches/api.py +0 -53
  184. llama_stack_api/batches/fastapi_routes.py +0 -113
  185. llama_stack_api/batches/models.py +0 -78
  186. llama_stack_api/benchmarks/__init__.py +0 -43
  187. llama_stack_api/benchmarks/api.py +0 -39
  188. llama_stack_api/benchmarks/fastapi_routes.py +0 -109
  189. llama_stack_api/benchmarks/models.py +0 -109
  190. llama_stack_api/common/__init__.py +0 -5
  191. llama_stack_api/common/content_types.py +0 -101
  192. llama_stack_api/common/errors.py +0 -95
  193. llama_stack_api/common/job_types.py +0 -38
  194. llama_stack_api/common/responses.py +0 -77
  195. llama_stack_api/common/training_types.py +0 -47
  196. llama_stack_api/common/type_system.py +0 -146
  197. llama_stack_api/connectors.py +0 -146
  198. llama_stack_api/conversations.py +0 -270
  199. llama_stack_api/datasetio.py +0 -55
  200. llama_stack_api/datasets/__init__.py +0 -61
  201. llama_stack_api/datasets/api.py +0 -35
  202. llama_stack_api/datasets/fastapi_routes.py +0 -104
  203. llama_stack_api/datasets/models.py +0 -152
  204. llama_stack_api/datatypes.py +0 -373
  205. llama_stack_api/eval.py +0 -137
  206. llama_stack_api/file_processors/__init__.py +0 -27
  207. llama_stack_api/file_processors/api.py +0 -64
  208. llama_stack_api/file_processors/fastapi_routes.py +0 -78
  209. llama_stack_api/file_processors/models.py +0 -42
  210. llama_stack_api/files/__init__.py +0 -35
  211. llama_stack_api/files/api.py +0 -51
  212. llama_stack_api/files/fastapi_routes.py +0 -124
  213. llama_stack_api/files/models.py +0 -107
  214. llama_stack_api/inference.py +0 -1169
  215. llama_stack_api/inspect_api/__init__.py +0 -37
  216. llama_stack_api/inspect_api/api.py +0 -25
  217. llama_stack_api/inspect_api/fastapi_routes.py +0 -76
  218. llama_stack_api/inspect_api/models.py +0 -28
  219. llama_stack_api/internal/kvstore.py +0 -28
  220. llama_stack_api/internal/sqlstore.py +0 -81
  221. llama_stack_api/llama_stack_api/__init__.py +0 -945
  222. llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
  223. llama_stack_api/llama_stack_api/admin/api.py +0 -72
  224. llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
  225. llama_stack_api/llama_stack_api/admin/models.py +0 -113
  226. llama_stack_api/llama_stack_api/agents.py +0 -173
  227. llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
  228. llama_stack_api/llama_stack_api/batches/api.py +0 -53
  229. llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
  230. llama_stack_api/llama_stack_api/batches/models.py +0 -78
  231. llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
  232. llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
  233. llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
  234. llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
  235. llama_stack_api/llama_stack_api/common/__init__.py +0 -5
  236. llama_stack_api/llama_stack_api/common/content_types.py +0 -101
  237. llama_stack_api/llama_stack_api/common/errors.py +0 -95
  238. llama_stack_api/llama_stack_api/common/job_types.py +0 -38
  239. llama_stack_api/llama_stack_api/common/responses.py +0 -77
  240. llama_stack_api/llama_stack_api/common/training_types.py +0 -47
  241. llama_stack_api/llama_stack_api/common/type_system.py +0 -146
  242. llama_stack_api/llama_stack_api/connectors.py +0 -146
  243. llama_stack_api/llama_stack_api/conversations.py +0 -270
  244. llama_stack_api/llama_stack_api/datasetio.py +0 -55
  245. llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
  246. llama_stack_api/llama_stack_api/datasets/api.py +0 -35
  247. llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
  248. llama_stack_api/llama_stack_api/datasets/models.py +0 -152
  249. llama_stack_api/llama_stack_api/datatypes.py +0 -373
  250. llama_stack_api/llama_stack_api/eval.py +0 -137
  251. llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
  252. llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
  253. llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
  254. llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
  255. llama_stack_api/llama_stack_api/files/__init__.py +0 -35
  256. llama_stack_api/llama_stack_api/files/api.py +0 -51
  257. llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
  258. llama_stack_api/llama_stack_api/files/models.py +0 -107
  259. llama_stack_api/llama_stack_api/inference.py +0 -1169
  260. llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
  261. llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
  262. llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
  263. llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
  264. llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
  265. llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
  266. llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
  267. llama_stack_api/llama_stack_api/models.py +0 -171
  268. llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
  269. llama_stack_api/llama_stack_api/post_training.py +0 -370
  270. llama_stack_api/llama_stack_api/prompts.py +0 -203
  271. llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
  272. llama_stack_api/llama_stack_api/providers/api.py +0 -16
  273. llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
  274. llama_stack_api/llama_stack_api/providers/models.py +0 -24
  275. llama_stack_api/llama_stack_api/py.typed +0 -0
  276. llama_stack_api/llama_stack_api/rag_tool.py +0 -168
  277. llama_stack_api/llama_stack_api/resource.py +0 -37
  278. llama_stack_api/llama_stack_api/router_utils.py +0 -160
  279. llama_stack_api/llama_stack_api/safety.py +0 -132
  280. llama_stack_api/llama_stack_api/schema_utils.py +0 -208
  281. llama_stack_api/llama_stack_api/scoring.py +0 -93
  282. llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
  283. llama_stack_api/llama_stack_api/shields.py +0 -93
  284. llama_stack_api/llama_stack_api/tools.py +0 -226
  285. llama_stack_api/llama_stack_api/vector_io.py +0 -941
  286. llama_stack_api/llama_stack_api/vector_stores.py +0 -53
  287. llama_stack_api/llama_stack_api/version.py +0 -9
  288. llama_stack_api/models.py +0 -171
  289. llama_stack_api/openai_responses.py +0 -1468
  290. llama_stack_api/post_training.py +0 -370
  291. llama_stack_api/prompts.py +0 -203
  292. llama_stack_api/providers/__init__.py +0 -33
  293. llama_stack_api/providers/api.py +0 -16
  294. llama_stack_api/providers/fastapi_routes.py +0 -57
  295. llama_stack_api/providers/models.py +0 -24
  296. llama_stack_api/py.typed +0 -0
  297. llama_stack_api/rag_tool.py +0 -168
  298. llama_stack_api/resource.py +0 -37
  299. llama_stack_api/router_utils.py +0 -160
  300. llama_stack_api/safety.py +0 -132
  301. llama_stack_api/schema_utils.py +0 -208
  302. llama_stack_api/scoring.py +0 -93
  303. llama_stack_api/scoring_functions.py +0 -211
  304. llama_stack_api/shields.py +0 -93
  305. llama_stack_api/tools.py +0 -226
  306. llama_stack_api/vector_io.py +0 -941
  307. llama_stack_api/vector_stores.py +0 -53
  308. llama_stack_api/version.py +0 -9
  309. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/WHEEL +0 -0
  310. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/entry_points.txt +0 -0
  311. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -11,15 +11,19 @@ from llama_stack.providers.utils.inference.model_registry import ModelRegistryHe
11
11
  from llama_stack_api import (
12
12
  Agents,
13
13
  Benchmark,
14
- BenchmarkConfig,
15
14
  BenchmarksProtocolPrivate,
16
15
  DatasetIO,
17
16
  Datasets,
18
17
  Eval,
19
18
  EvaluateResponse,
19
+ EvaluateRowsRequest,
20
20
  Inference,
21
21
  Job,
22
+ JobCancelRequest,
23
+ JobResultRequest,
22
24
  JobStatus,
25
+ JobStatusRequest,
26
+ RunEvalRequest,
23
27
  Scoring,
24
28
  ScoringResult,
25
29
  )
@@ -91,21 +95,20 @@ class NVIDIAEvalImpl(
91
95
 
92
96
  async def run_eval(
93
97
  self,
94
- benchmark_id: str,
95
- benchmark_config: BenchmarkConfig,
98
+ request: RunEvalRequest,
96
99
  ) -> Job:
97
100
  """Run an evaluation job for a benchmark."""
98
101
  model = (
99
- benchmark_config.eval_candidate.model
100
- if benchmark_config.eval_candidate.type == "model"
101
- else benchmark_config.eval_candidate.config.model
102
+ request.benchmark_config.eval_candidate.model
103
+ if request.benchmark_config.eval_candidate.type == "model"
104
+ else request.benchmark_config.eval_candidate.config.model
102
105
  )
103
106
  nvidia_model = self.get_provider_model_id(model) or model
104
107
 
105
108
  result = await self._evaluator_post(
106
109
  "/v1/evaluation/jobs",
107
110
  {
108
- "config": f"{DEFAULT_NAMESPACE}/{benchmark_id}",
111
+ "config": f"{DEFAULT_NAMESPACE}/{request.benchmark_id}",
109
112
  "target": {"type": "model", "model": nvidia_model},
110
113
  },
111
114
  )
@@ -114,20 +117,17 @@ class NVIDIAEvalImpl(
114
117
 
115
118
  async def evaluate_rows(
116
119
  self,
117
- benchmark_id: str,
118
- input_rows: list[dict[str, Any]],
119
- scoring_functions: list[str],
120
- benchmark_config: BenchmarkConfig,
120
+ request: EvaluateRowsRequest,
121
121
  ) -> EvaluateResponse:
122
122
  raise NotImplementedError()
123
123
 
124
- async def job_status(self, benchmark_id: str, job_id: str) -> Job:
124
+ async def job_status(self, request: JobStatusRequest) -> Job:
125
125
  """Get the status of an evaluation job.
126
126
 
127
127
  EvaluatorStatus: "created", "pending", "running", "cancelled", "cancelling", "failed", "completed".
128
128
  JobStatus: "scheduled", "in_progress", "completed", "cancelled", "failed"
129
129
  """
130
- result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}")
130
+ result = await self._evaluator_get(f"/v1/evaluation/jobs/{request.job_id}")
131
131
  result_status = result["status"]
132
132
 
133
133
  job_status = JobStatus.failed
@@ -140,27 +140,28 @@ class NVIDIAEvalImpl(
140
140
  elif result_status in ["cancelled"]:
141
141
  job_status = JobStatus.cancelled
142
142
 
143
- return Job(job_id=job_id, status=job_status)
143
+ return Job(job_id=request.job_id, status=job_status)
144
144
 
145
- async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
145
+ async def job_cancel(self, request: JobCancelRequest) -> None:
146
146
  """Cancel the evaluation job."""
147
- await self._evaluator_post(f"/v1/evaluation/jobs/{job_id}/cancel", {})
147
+ await self._evaluator_post(f"/v1/evaluation/jobs/{request.job_id}/cancel", {})
148
148
 
149
- async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
149
+ async def job_result(self, request: JobResultRequest) -> EvaluateResponse:
150
150
  """Returns the results of the evaluation job."""
151
151
 
152
- job = await self.job_status(benchmark_id, job_id)
152
+ job_status_request = JobStatusRequest(benchmark_id=request.benchmark_id, job_id=request.job_id)
153
+ job = await self.job_status(job_status_request)
153
154
  status = job.status
154
155
  if not status or status != JobStatus.completed:
155
- raise ValueError(f"Job {job_id} not completed. Status: {status.value}")
156
+ raise ValueError(f"Job {request.job_id} not completed. Status: {status.value}")
156
157
 
157
- result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}/results")
158
+ result = await self._evaluator_get(f"/v1/evaluation/jobs/{request.job_id}/results")
158
159
 
159
160
  return EvaluateResponse(
160
161
  # TODO: these are stored in detailed results on NeMo Evaluator side; can be added
161
162
  generations=[],
162
163
  scores={
163
- benchmark_id: ScoringResult(
164
+ request.benchmark_id: ScoringResult(
164
165
  score_rows=[],
165
166
  aggregated_results=result,
166
167
  )
@@ -0,0 +1,266 @@
1
+ # S3 Files Provider
2
+
3
+ A remote S3-based implementation of the Llama Stack Files API that provides scalable cloud file storage with metadata persistence.
4
+
5
+ ## Features
6
+
7
+ - **AWS S3 Storage**: Store files in AWS S3 buckets for scalable, durable storage
8
+ - **Metadata Management**: Uses SQL database for efficient file metadata queries
9
+ - **OpenAI API Compatibility**: Full compatibility with OpenAI Files API endpoints
10
+ - **Flexible Authentication**: Support for IAM roles and access keys
11
+ - **Custom S3 Endpoints**: Support for MinIO and other S3-compatible services
12
+
13
+ ## Configuration
14
+
15
+ ### Basic Configuration
16
+
17
+ ```yaml
18
+ api: files
19
+ provider_type: remote::s3
20
+ config:
21
+ bucket_name: my-llama-stack-files
22
+ region: us-east-1
23
+ metadata_store:
24
+ type: sqlite
25
+ db_path: ./s3_files_metadata.db
26
+ ```
27
+
28
+ ### Advanced Configuration
29
+
30
+ ```yaml
31
+ api: files
32
+ provider_type: remote::s3
33
+ config:
34
+ bucket_name: my-llama-stack-files
35
+ region: us-east-1
36
+ aws_access_key_id: YOUR_ACCESS_KEY
37
+ aws_secret_access_key: YOUR_SECRET_KEY
38
+ endpoint_url: https://s3.amazonaws.com # Optional for custom endpoints
39
+ metadata_store:
40
+ type: sqlite
41
+ db_path: ./s3_files_metadata.db
42
+ ```
43
+
44
+ ### Environment Variables
45
+
46
+ The configuration supports environment variable substitution:
47
+
48
+ ```yaml
49
+ config:
50
+ bucket_name: "${env.S3_BUCKET_NAME}"
51
+ region: "${env.AWS_REGION:=us-east-1}"
52
+ aws_access_key_id: "${env.AWS_ACCESS_KEY_ID:=}"
53
+ aws_secret_access_key: "${env.AWS_SECRET_ACCESS_KEY:=}"
54
+ endpoint_url: "${env.S3_ENDPOINT_URL:=}"
55
+ ```
56
+
57
+ Note: `S3_BUCKET_NAME` has no default value since S3 bucket names must be globally unique.
58
+
59
+ ## Authentication
60
+
61
+ ### IAM Roles (Recommended)
62
+
63
+ For production deployments, use IAM roles:
64
+
65
+ ```yaml
66
+ config:
67
+ bucket_name: my-bucket
68
+ region: us-east-1
69
+ # No credentials needed - will use IAM role
70
+ ```
71
+
72
+ ### Access Keys
73
+
74
+ For development or specific use cases:
75
+
76
+ ```yaml
77
+ config:
78
+ bucket_name: my-bucket
79
+ region: us-east-1
80
+ aws_access_key_id: AKIAIOSFODNN7EXAMPLE
81
+ aws_secret_access_key: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
82
+ ```
83
+
84
+ ## S3 Bucket Setup
85
+
86
+ ### Required Permissions
87
+
88
+ The S3 provider requires the following permissions:
89
+
90
+ ```json
91
+ {
92
+ "Version": "2012-10-17",
93
+ "Statement": [
94
+ {
95
+ "Effect": "Allow",
96
+ "Action": [
97
+ "s3:GetObject",
98
+ "s3:PutObject",
99
+ "s3:DeleteObject",
100
+ "s3:ListBucket"
101
+ ],
102
+ "Resource": [
103
+ "arn:aws:s3:::your-bucket-name",
104
+ "arn:aws:s3:::your-bucket-name/*"
105
+ ]
106
+ }
107
+ ]
108
+ }
109
+ ```
110
+
111
+ ### Automatic Bucket Creation
112
+
113
+ By default, the S3 provider expects the bucket to already exist. If you want the provider to automatically create the bucket when it doesn't exist, set `auto_create_bucket: true` in your configuration:
114
+
115
+ ```yaml
116
+ config:
117
+ bucket_name: my-bucket
118
+ auto_create_bucket: true # Will create bucket if it doesn't exist
119
+ region: us-east-1
120
+ ```
121
+
122
+ **Note**: When `auto_create_bucket` is enabled, the provider will need additional permissions:
123
+
124
+ ```json
125
+ {
126
+ "Version": "2012-10-17",
127
+ "Statement": [
128
+ {
129
+ "Effect": "Allow",
130
+ "Action": [
131
+ "s3:GetObject",
132
+ "s3:PutObject",
133
+ "s3:DeleteObject",
134
+ "s3:ListBucket",
135
+ "s3:CreateBucket"
136
+ ],
137
+ "Resource": [
138
+ "arn:aws:s3:::your-bucket-name",
139
+ "arn:aws:s3:::your-bucket-name/*"
140
+ ]
141
+ }
142
+ ]
143
+ }
144
+ ```
145
+
146
+ ### Bucket Policy (Optional)
147
+
148
+ For additional security, you can add a bucket policy:
149
+
150
+ ```json
151
+ {
152
+ "Version": "2012-10-17",
153
+ "Statement": [
154
+ {
155
+ "Sid": "LlamaStackAccess",
156
+ "Effect": "Allow",
157
+ "Principal": {
158
+ "AWS": "arn:aws:iam::YOUR-ACCOUNT:role/LlamaStackRole"
159
+ },
160
+ "Action": [
161
+ "s3:GetObject",
162
+ "s3:PutObject",
163
+ "s3:DeleteObject"
164
+ ],
165
+ "Resource": "arn:aws:s3:::your-bucket-name/*"
166
+ },
167
+ {
168
+ "Sid": "LlamaStackBucketAccess",
169
+ "Effect": "Allow",
170
+ "Principal": {
171
+ "AWS": "arn:aws:iam::YOUR-ACCOUNT:role/LlamaStackRole"
172
+ },
173
+ "Action": [
174
+ "s3:ListBucket"
175
+ ],
176
+ "Resource": "arn:aws:s3:::your-bucket-name"
177
+ }
178
+ ]
179
+ }
180
+ ```
181
+
182
+ ## Features
183
+
184
+ ### Metadata Persistence
185
+
186
+ File metadata is stored in a SQL database for fast queries and OpenAI API compatibility. The metadata includes:
187
+
188
+ - File ID
189
+ - Original filename
190
+ - Purpose (assistants, batch, etc.)
191
+ - File size in bytes
192
+ - Created and expiration timestamps
193
+
194
+ ### TTL and Cleanup
195
+
196
+ Files currently have a fixed long expiration time (100 years).
197
+
198
+ ## Development and Testing
199
+
200
+ ### Using MinIO
201
+
202
+ For self-hosted S3-compatible storage:
203
+
204
+ ```yaml
205
+ config:
206
+ bucket_name: test-bucket
207
+ region: us-east-1
208
+ endpoint_url: http://localhost:9000
209
+ aws_access_key_id: minioadmin
210
+ aws_secret_access_key: minioadmin
211
+ ```
212
+
213
+ ### Using OCI Object Storage with S3 Compatibility
214
+ [Official Object Storage Amazon S3 Compatibility API Documentation](https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/s3compatibleapi.htm)
215
+
216
+ OCI Object Storage can be utilized through the OCI S3 Compatibility API. Simply Update the `config.yaml` and set the env-vars as below.
217
+
218
+ #### config.yaml
219
+ ```yaml
220
+ provider_type: remote::s3
221
+ config:
222
+ bucket_name: "${env.S3_BUCKET_NAME}"
223
+ region: "${env.AWS_REGION:=us-east-1}"
224
+ aws_access_key_id: "${env.AWS_ACCESS_KEY_ID:=}"
225
+ aws_secret_access_key: "${env.AWS_SECRET_ACCESS_KEY:=}"
226
+ endpoint_url: "${env.S3_ENDPOINT_URL:=}"
227
+ metadata_store:
228
+ table_name: files_metadata
229
+ backend: sql_default
230
+ ```
231
+ #### .env
232
+ ```
233
+ AWS_ACCESS_KEY_ID=OCI_ACCESS_KEY
234
+ AWS_SECRET_ACCESS_KEY=OCI_SECRET_KEY
235
+ S3_BUCKET_NAME=OCI_BUCKET_NAME
236
+ S3_ENDPOINT_URL=https://<namespace>.compat.objectstorage.<region>.oci.customer-oci.com
237
+ AWS_REQUEST_CHECKSUM_CALCULATION=when_required
238
+ AWS_RESPONSE_CHECKSUM_VALIDATION=when_required
239
+ ```
240
+
241
+
242
+ ## Monitoring and Logging
243
+
244
+ The provider logs important operations and errors. For production deployments, consider:
245
+
246
+ - CloudWatch monitoring for S3 operations
247
+ - Custom metrics for file upload/download rates
248
+ - Error rate monitoring
249
+ - Performance metrics tracking
250
+
251
+ ## Error Handling
252
+
253
+ The provider handles various error scenarios:
254
+
255
+ - S3 connectivity issues
256
+ - Bucket access permissions
257
+ - File not found errors
258
+ - Metadata consistency checks
259
+
260
+ ## Known Limitations
261
+
262
+ - Fixed long TTL (100 years) instead of configurable expiration
263
+ - No server-side encryption enabled by default
264
+ - No support for AWS session tokens
265
+ - No S3 key prefix organization support
266
+ - No multipart upload support (all files uploaded as single objects)
@@ -6,7 +6,7 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from pydantic import BaseModel, Field
9
+ from pydantic import BaseModel, Field, SecretStr
10
10
 
11
11
  from llama_stack.core.storage.datatypes import SqlStoreReference
12
12
 
@@ -16,8 +16,10 @@ class S3FilesImplConfig(BaseModel):
16
16
 
17
17
  bucket_name: str = Field(description="S3 bucket name to store files")
18
18
  region: str = Field(default="us-east-1", description="AWS region where the bucket is located")
19
- aws_access_key_id: str | None = Field(default=None, description="AWS access key ID (optional if using IAM roles)")
20
- aws_secret_access_key: str | None = Field(
19
+ aws_access_key_id: SecretStr | None = Field(
20
+ default=None, description="AWS access key ID (optional if using IAM roles)"
21
+ )
22
+ aws_secret_access_key: SecretStr | None = Field(
21
23
  default=None, description="AWS secret access key (optional if using IAM roles)"
22
24
  )
23
25
  endpoint_url: str | None = Field(default=None, description="Custom S3 endpoint URL (for MinIO, LocalStack, etc.)")
@@ -57,8 +57,8 @@ def _create_s3_client(config: S3FilesImplConfig) -> "S3Client":
57
57
  if config.aws_access_key_id and config.aws_secret_access_key:
58
58
  s3_config.update(
59
59
  {
60
- "aws_access_key_id": config.aws_access_key_id,
61
- "aws_secret_access_key": config.aws_secret_access_key,
60
+ "aws_access_key_id": config.aws_access_key_id.get_secret_value(),
61
+ "aws_secret_access_key": config.aws_secret_access_key.get_secret_value(),
62
62
  }
63
63
  )
64
64
 
@@ -12,6 +12,7 @@ from llama_stack_api import (
12
12
  OpenAIEmbeddingsRequestWithExtraBody,
13
13
  OpenAIEmbeddingsResponse,
14
14
  OpenAIEmbeddingUsage,
15
+ validate_embeddings_input_is_text,
15
16
  )
16
17
 
17
18
  from .config import GeminiConfig
@@ -37,6 +38,9 @@ class GeminiInferenceAdapter(OpenAIMixin):
37
38
  Override embeddings method to handle Gemini's missing usage statistics.
38
39
  Gemini's embedding API doesn't return usage information, so we provide default values.
39
40
  """
41
+ # Validate that input contains only text, not token arrays
42
+ validate_embeddings_input_is_text(params)
43
+
40
44
  # Build request params conditionally to avoid NotGiven/Omit type mismatch
41
45
  request_params: dict[str, Any] = {
42
46
  "model": await self._get_provider_model_id(params.model),
@@ -0,0 +1,203 @@
1
+ # NVIDIA Inference Provider for LlamaStack
2
+
3
+ This provider enables running inference using NVIDIA NIM.
4
+
5
+ ## Features
6
+ - Endpoints for completions, chat completions, and embeddings for registered models
7
+
8
+ ## Getting Started
9
+
10
+ ### Prerequisites
11
+
12
+ - LlamaStack with NVIDIA configuration
13
+ - Access to NVIDIA NIM deployment
14
+ - NIM for model to use for inference is deployed
15
+
16
+ ### Setup
17
+
18
+ Build the NVIDIA environment:
19
+
20
+ ```bash
21
+ uv pip install llama-stack-client
22
+ uv run llama stack list-deps nvidia | xargs -L1 uv pip install
23
+ ```
24
+
25
+ ### Basic Usage using the LlamaStack Python Client
26
+
27
+ #### Initialize the client
28
+
29
+ ```python
30
+ import os
31
+
32
+ os.environ["NVIDIA_API_KEY"] = (
33
+ "" # Required if using hosted NIM endpoint. If self-hosted, not required.
34
+ )
35
+ os.environ["NVIDIA_BASE_URL"] = "http://nim.test" # NIM URL
36
+
37
+ from llama_stack.core.library_client import LlamaStackAsLibraryClient
38
+
39
+ client = LlamaStackAsLibraryClient("nvidia")
40
+ client.initialize()
41
+ ```
42
+
43
+ ### Create Chat Completion
44
+
45
+ The following example shows how to create a chat completion for an NVIDIA NIM.
46
+
47
+ ```python
48
+ response = client.chat.completions.create(
49
+ model="nvidia/meta/llama-3.1-8b-instruct",
50
+ messages=[
51
+ {
52
+ "role": "system",
53
+ "content": "You must respond to each message with only one word",
54
+ },
55
+ {
56
+ "role": "user",
57
+ "content": "Complete the sentence using one word: Roses are red, violets are:",
58
+ },
59
+ ],
60
+ stream=False,
61
+ max_tokens=50,
62
+ )
63
+ print(f"Response: {response.choices[0].message.content}")
64
+ ```
65
+
66
+ ### Tool Calling Example ###
67
+
68
+ The following example shows how to do tool calling for an NVIDIA NIM.
69
+
70
+ ```python
71
+ tool_definition = {
72
+ "type": "function",
73
+ "function": {
74
+ "name": "get_weather",
75
+ "description": "Get current weather information for a location",
76
+ "parameters": {
77
+ "type": "object",
78
+ "properties": {
79
+ "location": {
80
+ "type": "string",
81
+ "description": "The city and state, e.g. San Francisco, CA",
82
+ },
83
+ "unit": {
84
+ "type": "string",
85
+ "description": "Temperature unit (celsius or fahrenheit)",
86
+ "default": "celsius",
87
+ },
88
+ },
89
+ "required": ["location"],
90
+ },
91
+ },
92
+ }
93
+
94
+ tool_response = client.chat.completions.create(
95
+ model="nvidia/meta/llama-3.1-8b-instruct",
96
+ messages=[{"role": "user", "content": "What's the weather like in San Francisco?"}],
97
+ tools=[tool_definition],
98
+ )
99
+
100
+ print(f"Response content: {tool_response.choices[0].message.content}")
101
+ if tool_response.choices[0].message.tool_calls:
102
+ for tool_call in tool_response.choices[0].message.tool_calls:
103
+ print(f"Tool Called: {tool_call.function.name}")
104
+ print(f"Arguments: {tool_call.function.arguments}")
105
+ ```
106
+
107
+ ### Structured Output Example
108
+
109
+ The following example shows how to do structured output for an NVIDIA NIM.
110
+
111
+ ```python
112
+ person_schema = {
113
+ "type": "object",
114
+ "properties": {
115
+ "name": {"type": "string"},
116
+ "age": {"type": "number"},
117
+ "occupation": {"type": "string"},
118
+ },
119
+ "required": ["name", "age", "occupation"],
120
+ }
121
+
122
+ structured_response = client.chat.completions.create(
123
+ model="nvidia/meta/llama-3.1-8b-instruct",
124
+ messages=[
125
+ {
126
+ "role": "user",
127
+ "content": "Create a profile for a fictional person named Alice who is 30 years old and is a software engineer. ",
128
+ }
129
+ ],
130
+ extra_body={"nvext": {"guided_json": person_schema}},
131
+ )
132
+ print(f"Structured Response: {structured_response.choices[0].message.content}")
133
+ ```
134
+
135
+ ### Create Embeddings
136
+
137
+ The following example shows how to create embeddings for an NVIDIA NIM.
138
+
139
+ ```python
140
+ response = client.embeddings.create(
141
+ model="nvidia/nvidia/llama-3.2-nv-embedqa-1b-v2",
142
+ input=["What is the capital of France?"],
143
+ extra_body={"input_type": "query"},
144
+ )
145
+ print(f"Embeddings: {response.data}")
146
+ ```
147
+
148
+ ### Vision Language Models Example
149
+
150
+ The following example shows how to run vision inference by using an NVIDIA NIM.
151
+
152
+ ```python
153
+ def load_image_as_base64(image_path):
154
+ with open(image_path, "rb") as image_file:
155
+ img_bytes = image_file.read()
156
+ return base64.b64encode(img_bytes).decode("utf-8")
157
+
158
+
159
+ image_path = {path_to_the_image}
160
+ demo_image_b64 = load_image_as_base64(image_path)
161
+
162
+ vlm_response = client.chat.completions.create(
163
+ model="nvidia/meta/llama-3.2-11b-vision-instruct",
164
+ messages=[
165
+ {
166
+ "role": "user",
167
+ "content": [
168
+ {
169
+ "type": "image_url",
170
+ "image_url": {
171
+ "url": f"data:image/png;base64,{demo_image_b64}",
172
+ },
173
+ },
174
+ {
175
+ "type": "text",
176
+ "text": "Please describe what you see in this image in detail.",
177
+ },
178
+ ],
179
+ }
180
+ ],
181
+ )
182
+
183
+ print(f"VLM Response: {vlm_response.choices[0].message.content}")
184
+ ```
185
+
186
+ ### Rerank Example
187
+
188
+ The following example shows how to rerank documents using an NVIDIA NIM.
189
+
190
+ ```python
191
+ rerank_response = client.alpha.inference.rerank(
192
+ model="nvidia/nvidia/llama-3.2-nv-rerankqa-1b-v2",
193
+ query="query",
194
+ items=[
195
+ "item_1",
196
+ "item_2",
197
+ "item_3",
198
+ ],
199
+ )
200
+
201
+ for i, result in enumerate(rerank_response):
202
+ print(f"{i+1}. [Index: {result.index}, " f"Score: {(result.relevance_score):.3f}]")
203
+ ```
@@ -24,6 +24,8 @@ class OpenAIInferenceAdapter(OpenAIMixin):
24
24
 
25
25
  provider_data_api_key_field: str = "openai_api_key"
26
26
 
27
+ supports_tokenized_embeddings_input: bool = True
28
+
27
29
  embedding_model_metadata: dict[str, dict[str, int]] = {
28
30
  "text-embedding-3-small": {"embedding_dimension": 1536, "context_length": 8192},
29
31
  "text-embedding-3-large": {"embedding_dimension": 3072, "context_length": 8192},
@@ -18,6 +18,7 @@ from llama_stack_api import (
18
18
  OpenAIEmbeddingsRequestWithExtraBody,
19
19
  OpenAIEmbeddingsResponse,
20
20
  OpenAIEmbeddingUsage,
21
+ validate_embeddings_input_is_text,
21
22
  )
22
23
 
23
24
  from .config import TogetherImplConfig
@@ -74,6 +75,9 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
74
75
  - does not support user param, returns 400 Unrecognized request arguments supplied: user
75
76
  - does not support dimensions param, returns 400 Unrecognized request arguments supplied: dimensions
76
77
  """
78
+ # Validate that input contains only text, not token arrays
79
+ validate_embeddings_input_is_text(params)
80
+
77
81
  # Together support ticket #13332 -> will not fix
78
82
  if params.user is not None:
79
83
  raise ValueError("Together's embeddings endpoint does not support user param.")
@@ -19,7 +19,7 @@ class VertexAIProviderDataValidator(BaseModel):
19
19
  )
20
20
  vertex_location: str | None = Field(
21
21
  default=None,
22
- description="Google Cloud location for Vertex AI (e.g., us-central1)",
22
+ description="Google Cloud location for Vertex AI (e.g., global)",
23
23
  )
24
24
 
25
25
 
@@ -31,7 +31,7 @@ class VertexAIConfig(RemoteInferenceProviderConfig):
31
31
  description="Google Cloud project ID for Vertex AI",
32
32
  )
33
33
  location: str = Field(
34
- default="us-central1",
34
+ default="global",
35
35
  description="Google Cloud location for Vertex AI",
36
36
  )
37
37
 
@@ -39,7 +39,7 @@ class VertexAIConfig(RemoteInferenceProviderConfig):
39
39
  def sample_run_config(
40
40
  cls,
41
41
  project: str = "${env.VERTEX_AI_PROJECT:=}",
42
- location: str = "${env.VERTEX_AI_LOCATION:=us-central1}",
42
+ location: str = "${env.VERTEX_AI_LOCATION:=global}",
43
43
  **kwargs,
44
44
  ) -> dict[str, Any]:
45
45
  return {