llama-stack 0.4.3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (307) hide show
  1. llama_stack/cli/stack/_list_deps.py +11 -7
  2. llama_stack/cli/stack/run.py +3 -25
  3. llama_stack/core/access_control/datatypes.py +78 -0
  4. llama_stack/core/configure.py +2 -2
  5. {llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
  6. llama_stack/core/connectors/connectors.py +162 -0
  7. llama_stack/core/conversations/conversations.py +61 -58
  8. llama_stack/core/datatypes.py +54 -8
  9. llama_stack/core/library_client.py +60 -13
  10. llama_stack/core/prompts/prompts.py +43 -42
  11. llama_stack/core/routers/datasets.py +20 -17
  12. llama_stack/core/routers/eval_scoring.py +143 -53
  13. llama_stack/core/routers/inference.py +20 -9
  14. llama_stack/core/routers/safety.py +30 -42
  15. llama_stack/core/routers/vector_io.py +15 -7
  16. llama_stack/core/routing_tables/models.py +42 -3
  17. llama_stack/core/routing_tables/scoring_functions.py +19 -19
  18. llama_stack/core/routing_tables/shields.py +20 -17
  19. llama_stack/core/routing_tables/vector_stores.py +8 -5
  20. llama_stack/core/server/auth.py +192 -17
  21. llama_stack/core/server/fastapi_router_registry.py +40 -5
  22. llama_stack/core/server/server.py +24 -5
  23. llama_stack/core/stack.py +54 -10
  24. llama_stack/core/storage/datatypes.py +9 -0
  25. llama_stack/core/store/registry.py +1 -1
  26. llama_stack/core/utils/exec.py +2 -2
  27. llama_stack/core/utils/type_inspection.py +16 -2
  28. llama_stack/distributions/dell/config.yaml +4 -1
  29. llama_stack/distributions/dell/doc_template.md +209 -0
  30. llama_stack/distributions/dell/run-with-safety.yaml +4 -1
  31. llama_stack/distributions/nvidia/config.yaml +4 -1
  32. llama_stack/distributions/nvidia/doc_template.md +170 -0
  33. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
  34. llama_stack/distributions/oci/config.yaml +4 -1
  35. llama_stack/distributions/oci/doc_template.md +140 -0
  36. llama_stack/distributions/open-benchmark/config.yaml +9 -1
  37. llama_stack/distributions/postgres-demo/config.yaml +1 -1
  38. llama_stack/distributions/starter/build.yaml +62 -0
  39. llama_stack/distributions/starter/config.yaml +22 -3
  40. llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
  41. llama_stack/distributions/starter/starter.py +13 -1
  42. llama_stack/distributions/starter-gpu/build.yaml +62 -0
  43. llama_stack/distributions/starter-gpu/config.yaml +22 -3
  44. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
  45. llama_stack/distributions/template.py +10 -2
  46. llama_stack/distributions/watsonx/config.yaml +4 -1
  47. llama_stack/log.py +1 -0
  48. llama_stack/models/llama/resources/dog.jpg +0 -0
  49. llama_stack/models/llama/resources/pasta.jpeg +0 -0
  50. llama_stack/models/llama/resources/small_dog.jpg +0 -0
  51. llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
  52. llama_stack/providers/inline/agents/meta_reference/agents.py +57 -61
  53. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +183 -60
  54. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +94 -22
  55. llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
  56. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
  57. llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
  58. llama_stack/providers/inline/batches/reference/batches.py +2 -1
  59. llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
  60. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
  61. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
  62. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
  63. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
  64. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
  65. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
  66. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  67. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
  68. llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
  69. llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
  70. llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
  71. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
  72. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
  73. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +15 -18
  74. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
  75. llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
  76. llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
  77. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
  78. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
  79. llama_stack/providers/registry/agents.py +1 -0
  80. llama_stack/providers/registry/inference.py +1 -9
  81. llama_stack/providers/registry/vector_io.py +136 -16
  82. llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
  83. llama_stack/providers/remote/eval/nvidia/README.md +134 -0
  84. llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
  85. llama_stack/providers/remote/files/s3/README.md +266 -0
  86. llama_stack/providers/remote/files/s3/config.py +5 -3
  87. llama_stack/providers/remote/files/s3/files.py +2 -2
  88. llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
  89. llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
  90. llama_stack/providers/remote/inference/openai/openai.py +2 -0
  91. llama_stack/providers/remote/inference/together/together.py +4 -0
  92. llama_stack/providers/remote/inference/vertexai/config.py +3 -3
  93. llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
  94. llama_stack/providers/remote/inference/vllm/config.py +37 -18
  95. llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
  96. llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
  97. llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
  98. llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
  99. llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
  100. llama_stack/providers/remote/safety/nvidia/README.md +78 -0
  101. llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
  102. llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
  103. llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
  104. llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
  105. llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
  106. llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
  107. llama_stack/providers/remote/vector_io/oci/config.py +41 -0
  108. llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
  109. llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
  110. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
  111. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
  112. llama_stack/providers/utils/bedrock/client.py +3 -3
  113. llama_stack/providers/utils/bedrock/config.py +7 -7
  114. llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
  115. llama_stack/providers/utils/inference/http_client.py +239 -0
  116. llama_stack/providers/utils/inference/litellm_openai_mixin.py +5 -0
  117. llama_stack/providers/utils/inference/model_registry.py +148 -2
  118. llama_stack/providers/utils/inference/openai_compat.py +2 -1
  119. llama_stack/providers/utils/inference/openai_mixin.py +41 -2
  120. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
  121. llama_stack/providers/utils/memory/vector_store.py +46 -19
  122. llama_stack/providers/utils/responses/responses_store.py +40 -6
  123. llama_stack/providers/utils/safety.py +114 -0
  124. llama_stack/providers/utils/tools/mcp.py +44 -3
  125. llama_stack/testing/api_recorder.py +9 -3
  126. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/METADATA +14 -2
  127. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/RECORD +131 -275
  128. llama_stack-0.5.0rc1.dist-info/top_level.txt +1 -0
  129. llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
  130. llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
  131. llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
  132. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
  133. llama_stack/models/llama/hadamard_utils.py +0 -88
  134. llama_stack/models/llama/llama3/args.py +0 -74
  135. llama_stack/models/llama/llama3/generation.py +0 -378
  136. llama_stack/models/llama/llama3/model.py +0 -304
  137. llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
  138. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
  139. llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
  140. llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
  141. llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
  142. llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
  143. llama_stack/models/llama/llama3/quantization/loader.py +0 -316
  144. llama_stack/models/llama/llama3_1/__init__.py +0 -12
  145. llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
  146. llama_stack/models/llama/llama3_1/prompts.py +0 -258
  147. llama_stack/models/llama/llama3_2/__init__.py +0 -5
  148. llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
  149. llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
  150. llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
  151. llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
  152. llama_stack/models/llama/llama3_3/__init__.py +0 -5
  153. llama_stack/models/llama/llama3_3/prompts.py +0 -259
  154. llama_stack/models/llama/llama4/args.py +0 -107
  155. llama_stack/models/llama/llama4/ffn.py +0 -58
  156. llama_stack/models/llama/llama4/moe.py +0 -214
  157. llama_stack/models/llama/llama4/preprocess.py +0 -435
  158. llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
  159. llama_stack/models/llama/llama4/quantization/loader.py +0 -226
  160. llama_stack/models/llama/llama4/vision/__init__.py +0 -5
  161. llama_stack/models/llama/llama4/vision/embedding.py +0 -210
  162. llama_stack/models/llama/llama4/vision/encoder.py +0 -412
  163. llama_stack/models/llama/quantize_impls.py +0 -316
  164. llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
  165. llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
  166. llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
  167. llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
  168. llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
  169. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
  170. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
  171. llama_stack-0.4.3.dist-info/top_level.txt +0 -2
  172. llama_stack_api/__init__.py +0 -945
  173. llama_stack_api/admin/__init__.py +0 -45
  174. llama_stack_api/admin/api.py +0 -72
  175. llama_stack_api/admin/fastapi_routes.py +0 -117
  176. llama_stack_api/admin/models.py +0 -113
  177. llama_stack_api/agents.py +0 -173
  178. llama_stack_api/batches/__init__.py +0 -40
  179. llama_stack_api/batches/api.py +0 -53
  180. llama_stack_api/batches/fastapi_routes.py +0 -113
  181. llama_stack_api/batches/models.py +0 -78
  182. llama_stack_api/benchmarks/__init__.py +0 -43
  183. llama_stack_api/benchmarks/api.py +0 -39
  184. llama_stack_api/benchmarks/fastapi_routes.py +0 -109
  185. llama_stack_api/benchmarks/models.py +0 -109
  186. llama_stack_api/common/__init__.py +0 -5
  187. llama_stack_api/common/content_types.py +0 -101
  188. llama_stack_api/common/errors.py +0 -95
  189. llama_stack_api/common/job_types.py +0 -38
  190. llama_stack_api/common/responses.py +0 -77
  191. llama_stack_api/common/training_types.py +0 -47
  192. llama_stack_api/common/type_system.py +0 -146
  193. llama_stack_api/connectors.py +0 -146
  194. llama_stack_api/conversations.py +0 -270
  195. llama_stack_api/datasetio.py +0 -55
  196. llama_stack_api/datasets/__init__.py +0 -61
  197. llama_stack_api/datasets/api.py +0 -35
  198. llama_stack_api/datasets/fastapi_routes.py +0 -104
  199. llama_stack_api/datasets/models.py +0 -152
  200. llama_stack_api/datatypes.py +0 -373
  201. llama_stack_api/eval.py +0 -137
  202. llama_stack_api/file_processors/__init__.py +0 -27
  203. llama_stack_api/file_processors/api.py +0 -64
  204. llama_stack_api/file_processors/fastapi_routes.py +0 -78
  205. llama_stack_api/file_processors/models.py +0 -42
  206. llama_stack_api/files/__init__.py +0 -35
  207. llama_stack_api/files/api.py +0 -51
  208. llama_stack_api/files/fastapi_routes.py +0 -124
  209. llama_stack_api/files/models.py +0 -107
  210. llama_stack_api/inference.py +0 -1169
  211. llama_stack_api/inspect_api/__init__.py +0 -37
  212. llama_stack_api/inspect_api/api.py +0 -25
  213. llama_stack_api/inspect_api/fastapi_routes.py +0 -76
  214. llama_stack_api/inspect_api/models.py +0 -28
  215. llama_stack_api/internal/kvstore.py +0 -28
  216. llama_stack_api/internal/sqlstore.py +0 -81
  217. llama_stack_api/llama_stack_api/__init__.py +0 -945
  218. llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
  219. llama_stack_api/llama_stack_api/admin/api.py +0 -72
  220. llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
  221. llama_stack_api/llama_stack_api/admin/models.py +0 -113
  222. llama_stack_api/llama_stack_api/agents.py +0 -173
  223. llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
  224. llama_stack_api/llama_stack_api/batches/api.py +0 -53
  225. llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
  226. llama_stack_api/llama_stack_api/batches/models.py +0 -78
  227. llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
  228. llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
  229. llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
  230. llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
  231. llama_stack_api/llama_stack_api/common/__init__.py +0 -5
  232. llama_stack_api/llama_stack_api/common/content_types.py +0 -101
  233. llama_stack_api/llama_stack_api/common/errors.py +0 -95
  234. llama_stack_api/llama_stack_api/common/job_types.py +0 -38
  235. llama_stack_api/llama_stack_api/common/responses.py +0 -77
  236. llama_stack_api/llama_stack_api/common/training_types.py +0 -47
  237. llama_stack_api/llama_stack_api/common/type_system.py +0 -146
  238. llama_stack_api/llama_stack_api/connectors.py +0 -146
  239. llama_stack_api/llama_stack_api/conversations.py +0 -270
  240. llama_stack_api/llama_stack_api/datasetio.py +0 -55
  241. llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
  242. llama_stack_api/llama_stack_api/datasets/api.py +0 -35
  243. llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
  244. llama_stack_api/llama_stack_api/datasets/models.py +0 -152
  245. llama_stack_api/llama_stack_api/datatypes.py +0 -373
  246. llama_stack_api/llama_stack_api/eval.py +0 -137
  247. llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
  248. llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
  249. llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
  250. llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
  251. llama_stack_api/llama_stack_api/files/__init__.py +0 -35
  252. llama_stack_api/llama_stack_api/files/api.py +0 -51
  253. llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
  254. llama_stack_api/llama_stack_api/files/models.py +0 -107
  255. llama_stack_api/llama_stack_api/inference.py +0 -1169
  256. llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
  257. llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
  258. llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
  259. llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
  260. llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
  261. llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
  262. llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
  263. llama_stack_api/llama_stack_api/models.py +0 -171
  264. llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
  265. llama_stack_api/llama_stack_api/post_training.py +0 -370
  266. llama_stack_api/llama_stack_api/prompts.py +0 -203
  267. llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
  268. llama_stack_api/llama_stack_api/providers/api.py +0 -16
  269. llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
  270. llama_stack_api/llama_stack_api/providers/models.py +0 -24
  271. llama_stack_api/llama_stack_api/py.typed +0 -0
  272. llama_stack_api/llama_stack_api/rag_tool.py +0 -168
  273. llama_stack_api/llama_stack_api/resource.py +0 -37
  274. llama_stack_api/llama_stack_api/router_utils.py +0 -160
  275. llama_stack_api/llama_stack_api/safety.py +0 -132
  276. llama_stack_api/llama_stack_api/schema_utils.py +0 -208
  277. llama_stack_api/llama_stack_api/scoring.py +0 -93
  278. llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
  279. llama_stack_api/llama_stack_api/shields.py +0 -93
  280. llama_stack_api/llama_stack_api/tools.py +0 -226
  281. llama_stack_api/llama_stack_api/vector_io.py +0 -941
  282. llama_stack_api/llama_stack_api/vector_stores.py +0 -53
  283. llama_stack_api/llama_stack_api/version.py +0 -9
  284. llama_stack_api/models.py +0 -171
  285. llama_stack_api/openai_responses.py +0 -1468
  286. llama_stack_api/post_training.py +0 -370
  287. llama_stack_api/prompts.py +0 -203
  288. llama_stack_api/providers/__init__.py +0 -33
  289. llama_stack_api/providers/api.py +0 -16
  290. llama_stack_api/providers/fastapi_routes.py +0 -57
  291. llama_stack_api/providers/models.py +0 -24
  292. llama_stack_api/py.typed +0 -0
  293. llama_stack_api/rag_tool.py +0 -168
  294. llama_stack_api/resource.py +0 -37
  295. llama_stack_api/router_utils.py +0 -160
  296. llama_stack_api/safety.py +0 -132
  297. llama_stack_api/schema_utils.py +0 -208
  298. llama_stack_api/scoring.py +0 -93
  299. llama_stack_api/scoring_functions.py +0 -211
  300. llama_stack_api/shields.py +0 -93
  301. llama_stack_api/tools.py +0 -226
  302. llama_stack_api/vector_io.py +0 -941
  303. llama_stack_api/vector_stores.py +0 -53
  304. llama_stack_api/version.py +0 -9
  305. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/WHEEL +0 -0
  306. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/entry_points.txt +0 -0
  307. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/licenses/LICENSE +0 -0
@@ -3,17 +3,18 @@
3
3
  #
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
- from typing import Any
7
6
 
8
7
  from llama_stack_api import (
9
8
  DatasetIO,
10
9
  Datasets,
11
10
  Inference,
11
+ IterRowsRequest,
12
+ ScoreBatchRequest,
12
13
  ScoreBatchResponse,
14
+ ScoreRequest,
13
15
  ScoreResponse,
14
16
  Scoring,
15
17
  ScoringFn,
16
- ScoringFnParams,
17
18
  ScoringFunctionsProtocolPrivate,
18
19
  ScoringResult,
19
20
  )
@@ -64,19 +65,15 @@ class LlmAsJudgeScoringImpl(
64
65
 
65
66
  async def score_batch(
66
67
  self,
67
- dataset_id: str,
68
- scoring_functions: dict[str, ScoringFnParams | None] = None,
69
- save_results_dataset: bool = False,
68
+ request: ScoreBatchRequest,
70
69
  ) -> ScoreBatchResponse:
71
- all_rows = await self.datasetio_api.iterrows(
72
- dataset_id=dataset_id,
73
- limit=-1,
74
- )
75
- res = await self.score(
70
+ all_rows = await self.datasetio_api.iterrows(IterRowsRequest(dataset_id=request.dataset_id, limit=-1))
71
+ score_request = ScoreRequest(
76
72
  input_rows=all_rows.data,
77
- scoring_functions=scoring_functions,
73
+ scoring_functions=request.scoring_functions,
78
74
  )
79
- if save_results_dataset:
75
+ res = await self.score(score_request)
76
+ if request.save_results_dataset:
80
77
  # TODO: persist and register dataset on to server for reading
81
78
  # self.datasets_api.register_dataset()
82
79
  raise NotImplementedError("Save results dataset not implemented yet")
@@ -87,14 +84,13 @@ class LlmAsJudgeScoringImpl(
87
84
 
88
85
  async def score(
89
86
  self,
90
- input_rows: list[dict[str, Any]],
91
- scoring_functions: dict[str, ScoringFnParams | None] = None,
87
+ request: ScoreRequest,
92
88
  ) -> ScoreResponse:
93
89
  res = {}
94
- for scoring_fn_id in scoring_functions.keys():
90
+ for scoring_fn_id in request.scoring_functions.keys():
95
91
  scoring_fn = self.llm_as_judge_fn
96
- scoring_fn_params = scoring_functions.get(scoring_fn_id, None)
97
- score_results = await scoring_fn.score(input_rows, scoring_fn_id, scoring_fn_params)
92
+ scoring_fn_params = request.scoring_functions.get(scoring_fn_id, None)
93
+ score_results = await scoring_fn.score(request.input_rows, scoring_fn_id, scoring_fn_params)
98
94
  agg_results = await scoring_fn.aggregate(score_results, scoring_fn_id, scoring_fn_params)
99
95
  res[scoring_fn_id] = ScoringResult(
100
96
  score_rows=score_results,
@@ -59,7 +59,7 @@ def serialize_vector(vector: list[float]) -> bytes:
59
59
  return struct.pack(f"{len(vector)}f", *vector)
60
60
 
61
61
 
62
- def _create_sqlite_connection(db_path):
62
+ def _create_sqlite_connection(db_path: str):
63
63
  """Create a SQLite connection with sqlite_vec extension loaded."""
64
64
  connection = sqlite3.connect(db_path)
65
65
  connection.enable_load_extension(True)
@@ -37,6 +37,7 @@ def available_providers() -> list[ProviderSpec]:
37
37
  Api.conversations,
38
38
  Api.prompts,
39
39
  Api.files,
40
+ Api.connectors,
40
41
  ],
41
42
  optional_api_dependencies=[
42
43
  Api.safety,
@@ -28,14 +28,6 @@ META_REFERENCE_DEPS = [
28
28
 
29
29
  def available_providers() -> list[ProviderSpec]:
30
30
  return [
31
- InlineProviderSpec(
32
- api=Api.inference,
33
- provider_type="inline::meta-reference",
34
- pip_packages=META_REFERENCE_DEPS,
35
- module="llama_stack.providers.inline.inference.meta_reference",
36
- config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig",
37
- description="Meta's reference implementation of inference with support for various model formats and optimization techniques.",
38
- ),
39
31
  InlineProviderSpec(
40
32
  api=Api.inference,
41
33
  provider_type="inline::sentence-transformers",
@@ -223,7 +215,7 @@ def available_providers() -> list[ProviderSpec]:
223
215
 
224
216
  Configuration:
225
217
  - Set VERTEX_AI_PROJECT environment variable (required)
226
- - Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1)
218
+ - Set VERTEX_AI_LOCATION environment variable (optional, defaults to global)
227
219
  - Use Google Cloud Application Default Credentials or service account key
228
220
 
229
221
  Authentication Setup:
@@ -419,6 +419,7 @@ There are three implementations of search for PGVectoIndex available:
419
419
  - Semantic understanding - finds documents similar in meaning even if they don't share keywords
420
420
  - Works with high-dimensional vector embeddings (typically 768, 1024, or higher dimensions)
421
421
  - Best for: Finding conceptually related content, handling synonyms, cross-language search
422
+ - By default, Llama Stack creates a HNSW (Hierarchical Navigable Small Worlds) index on a column "embedding" in a vector store table enabling production-ready, performant and scalable vector search for large datasets out of the box.
422
423
 
423
424
  2. Keyword Search
424
425
  - How it works:
@@ -448,6 +449,7 @@ There are three implementations of search for PGVectoIndex available:
448
449
  - Best for: General-purpose search where you want both precision and recall
449
450
 
450
451
  4. Database Schema
452
+
451
453
  The PGVector implementation stores data optimized for all three search types:
452
454
  CREATE TABLE vector_store_xxx (
453
455
  id TEXT PRIMARY KEY,
@@ -457,9 +459,6 @@ CREATE TABLE vector_store_xxx (
457
459
  tokenized_content TSVECTOR -- For keyword search
458
460
  );
459
461
 
460
- -- Indexes for performance
461
- CREATE INDEX content_gin_idx ON table USING GIN(tokenized_content); -- Keyword search
462
- -- Vector index created automatically by pgvector
463
462
 
464
463
  ## Usage
465
464
 
@@ -469,32 +468,55 @@ To use PGVector in your Llama Stack project, follow these steps:
469
468
  2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector).
470
469
  3. Start storing and querying vectors.
471
470
 
472
- ## This is an example how you can set up your environment for using PGVector
471
+ ## This is an example how you can set up your environment for using PGVector (you can use either Podman or Docker)
473
472
 
474
- 1. Export env vars:
473
+ 1. Export PGVector environment variables:
475
474
  ```bash
476
- export ENABLE_PGVECTOR=true
475
+ export PGVECTOR_DB=testvectordb
477
476
  export PGVECTOR_HOST=localhost
478
477
  export PGVECTOR_PORT=5432
479
- export PGVECTOR_DB=llamastack
480
- export PGVECTOR_USER=llamastack
481
- export PGVECTOR_PASSWORD=llamastack
478
+ export PGVECTOR_USER=user
479
+ export PGVECTOR_PASSWORD=password
482
480
  ```
483
481
 
484
- 2. Create DB:
482
+ 2. Pull pgvector image with that tag you want:
483
+
484
+ Via Podman:
485
485
  ```bash
486
- psql -h localhost -U postgres -c "CREATE ROLE llamastack LOGIN PASSWORD 'llamastack';"
487
- psql -h localhost -U postgres -c "CREATE DATABASE llamastack OWNER llamastack;"
488
- psql -h localhost -U llamastack -d llamastack -c "CREATE EXTENSION IF NOT EXISTS vector;"
486
+ podman pull pgvector/pgvector:0.8.1-pg18-trixie
489
487
  ```
490
488
 
491
- ## Installation
489
+ Via Docker:
490
+ ```bash
491
+ docker pull pgvector/pgvector:0.8.1-pg18-trixie
492
+ ```
493
+
494
+ 3. Run container with PGVector:
492
495
 
493
- You can install PGVector using docker:
496
+ Via Podman
497
+ ```bash
498
+ podman run -d \
499
+ --name pgvector \
500
+ -e POSTGRES_PASSWORD=password \
501
+ -e POSTGRES_USER=user \
502
+ -e POSTGRES_DB=testvectordb \
503
+ -p 5432:5432 \
504
+ -v pgvector_data:/var/lib/postgresql \
505
+ pgvector/pgvector:0.8.1-pg18-trixie
506
+ ```
494
507
 
508
+ Via Docker
495
509
  ```bash
496
- docker pull pgvector/pgvector:pg17
510
+ docker run -d \
511
+ --name pgvector \
512
+ -e POSTGRES_PASSWORD=password \
513
+ -e POSTGRES_USER=user \
514
+ -e POSTGRES_DB=testvectordb \
515
+ -p 5432:5432 \
516
+ -v pgvector_data:/var/lib/postgresql \
517
+ pgvector/pgvector:0.8.1-pg18-trixie
497
518
  ```
519
+
498
520
  ## Documentation
499
521
  See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general.
500
522
  """,
@@ -823,6 +845,104 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
823
845
  optional_api_dependencies=[Api.files, Api.models],
824
846
  description="""
825
847
  Please refer to the remote provider documentation.
848
+ """,
849
+ ),
850
+ RemoteProviderSpec(
851
+ api=Api.vector_io,
852
+ adapter_type="elasticsearch",
853
+ provider_type="remote::elasticsearch",
854
+ pip_packages=["elasticsearch>=8.16.0,<9.0.0"] + DEFAULT_VECTOR_IO_DEPS,
855
+ module="llama_stack.providers.remote.vector_io.elasticsearch",
856
+ config_class="llama_stack.providers.remote.vector_io.elasticsearch.ElasticsearchVectorIOConfig",
857
+ api_dependencies=[Api.inference],
858
+ optional_api_dependencies=[Api.files, Api.models],
859
+ description="""
860
+ [Elasticsearch](https://www.elastic.co/) is a vector database provider for Llama Stack.
861
+ It allows you to store and query vectors directly within an Elasticsearch database.
862
+ That means you're not limited to storing vectors in memory or in a separate service.
863
+
864
+ ## Features
865
+ Elasticsearch supports:
866
+ - Store embeddings and their metadata
867
+ - Vector search
868
+ - Full-text search
869
+ - Fuzzy search
870
+ - Hybrid search
871
+ - Document storage
872
+ - Metadata filtering
873
+ - Inference service
874
+ - Machine Learning integrations
875
+
876
+ ## Usage
877
+
878
+ To use Elasticsearch in your Llama Stack project, follow these steps:
879
+
880
+ 1. Install the necessary dependencies.
881
+ 2. Configure your Llama Stack project to use Elasticsearch.
882
+ 3. Start storing and querying vectors.
883
+
884
+ ## Installation
885
+
886
+ You can test Elasticsearch locally by running this script in the terminal:
887
+
888
+ ```bash
889
+ curl -fsSL https://elastic.co/start-local | sh
890
+ ```
891
+
892
+ Or you can [start a free trial](https://www.elastic.co/cloud/cloud-trial-overview?utm_campaign=llama-stack-integration) on Elastic Cloud.
893
+ For more information on how to deploy Elasticsearch, see the [official documentation](https://www.elastic.co/docs/deploy-manage/deploy).
894
+
895
+ ## Documentation
896
+ See [Elasticsearch's documentation](https://www.elastic.co/docs/solutions/search) for more details about Elasticsearch in general.
897
+ """,
898
+ ),
899
+ RemoteProviderSpec(
900
+ api=Api.vector_io,
901
+ adapter_type="oci",
902
+ provider_type="remote::oci",
903
+ pip_packages=["oracledb", "numpy"] + DEFAULT_VECTOR_IO_DEPS,
904
+ module="llama_stack.providers.remote.vector_io.oci",
905
+ config_class="llama_stack.providers.remote.vector_io.oci.OCI26aiVectorIOConfig",
906
+ api_dependencies=[Api.inference],
907
+ optional_api_dependencies=[Api.files, Api.models],
908
+ description="""
909
+ [Oracle 26ai](https://docs.oracle.com/en/database/oracle/oracle-database/26/index.html)
910
+ is a remote vector database provider for Llama Stack. It allows you to store and query vectors directly
911
+ in an Oracle 26ai database.
912
+ ## Features
913
+ - Easy to use
914
+ - Fully integrated with Llama Stack
915
+ - Supports vector search, keyword search, and hybrid search
916
+ ## Usage
917
+ To use Oracle 26ai in your Llama Stack project, follow these steps:
918
+ 1. Install the necessary dependencies.
919
+ 2. Configure your Llama Stack project to use Oracle 26ai.
920
+ 3. Start storing and querying vectors.
921
+ ## Installation
922
+ You can install the Oracle 26ai client using pip:
923
+ ```bash
924
+ pip install oracledb
925
+ ```
926
+ ## Configuration
927
+ ```yaml
928
+ vector_io:
929
+ - provider_id: oci
930
+ provider_type: remote::oci
931
+ config:
932
+ conn_str: "${env.OCI26AI_CONNECTION_STRING}"
933
+ user: "${env.OCI26AI_USER}"
934
+ password: "${env.OCI26AI_PASSWORD}"
935
+ tnsnames_loc: "${env.OCI26AI_TNSNAMES_LOC}"
936
+ ewallet_pem_loc: "${env.OCI26AI_EWALLET_PEM_LOC}"
937
+ ewallet_password: "${env.OCI26AI_EWALLET_PWD}"
938
+ vector_datatype: "${env.OCI26AI_VECTOR_DATATYPE:=FLOAT32}"
939
+ persistence:
940
+ namespace: vector_id::oci26ai
941
+ backend: kv_default
942
+ ```
943
+ ## Documentation
944
+ See the [Oracle 26ai documentation](https://docs.oracle.com/en/database/oracle/oracle-database/26/index.html)
945
+ for more details about Oracle 26ai in general.
826
946
  """,
827
947
  ),
828
948
  ]
@@ -0,0 +1,74 @@
1
+ # NVIDIA DatasetIO Provider for LlamaStack
2
+
3
+ This provider enables dataset management using NVIDIA's NeMo Customizer service.
4
+
5
+ ## Features
6
+
7
+ - Register datasets for fine-tuning LLMs
8
+ - Unregister datasets
9
+
10
+ ## Getting Started
11
+
12
+ ### Prerequisites
13
+
14
+ - LlamaStack with NVIDIA configuration
15
+ - Access to Hosted NVIDIA NeMo Microservice
16
+ - API key for authentication with the NVIDIA service
17
+
18
+ ### Setup
19
+
20
+ Build the NVIDIA environment:
21
+
22
+ ```bash
23
+ uv pip install llama-stack-client
24
+ uv run llama stack list-deps nvidia | xargs -L1 uv pip install
25
+ ```
26
+
27
+ ### Basic Usage using the LlamaStack Python Client
28
+
29
+ #### Initialize the client
30
+
31
+ ```python
32
+ import os
33
+
34
+ os.environ["NVIDIA_API_KEY"] = "your-api-key"
35
+ os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test"
36
+ os.environ["NVIDIA_DATASET_NAMESPACE"] = "default"
37
+ os.environ["NVIDIA_PROJECT_ID"] = "test-project"
38
+ from llama_stack.core.library_client import LlamaStackAsLibraryClient
39
+
40
+ client = LlamaStackAsLibraryClient("nvidia")
41
+ client.initialize()
42
+ ```
43
+
44
+ #### Register a dataset
45
+
46
+ ```python
47
+ client.datasets.register(
48
+ purpose="post-training/messages",
49
+ dataset_id="my-training-dataset",
50
+ source={"type": "uri", "uri": "hf://datasets/default/sample-dataset"},
51
+ metadata={
52
+ "format": "json",
53
+ "description": "Dataset for LLM fine-tuning",
54
+ "provider": "nvidia",
55
+ },
56
+ )
57
+ ```
58
+
59
+ #### Get a list of all registered datasets
60
+
61
+ ```python
62
+ datasets = client.datasets.list()
63
+ for dataset in datasets:
64
+ print(f"Dataset ID: {dataset.identifier}")
65
+ print(f"Description: {dataset.metadata.get('description', '')}")
66
+ print(f"Source: {dataset.source.uri}")
67
+ print("---")
68
+ ```
69
+
70
+ #### Unregister a dataset
71
+
72
+ ```python
73
+ client.datasets.unregister(dataset_id="my-training-dataset")
74
+ ```
@@ -0,0 +1,134 @@
1
+ # NVIDIA NeMo Evaluator Eval Provider
2
+
3
+
4
+ ## Overview
5
+
6
+ For the first integration, Benchmarks are mapped to Evaluation Configs on in the NeMo Evaluator. The full evaluation config object is provided as part of the meta-data. The `dataset_id` and `scoring_functions` are not used.
7
+
8
+ Below are a few examples of how to register a benchmark, which in turn will create an evaluation config in NeMo Evaluator and how to trigger an evaluation.
9
+
10
+ ### Example for register an academic benchmark
11
+
12
+ ```
13
+ POST /eval/benchmarks
14
+ ```
15
+ ```json
16
+ {
17
+ "benchmark_id": "mmlu",
18
+ "dataset_id": "",
19
+ "scoring_functions": [],
20
+ "metadata": {
21
+ "type": "mmlu"
22
+ }
23
+ }
24
+ ```
25
+
26
+ ### Example for register a custom evaluation
27
+
28
+ ```
29
+ POST /eval/benchmarks
30
+ ```
31
+ ```json
32
+ {
33
+ "benchmark_id": "my-custom-benchmark",
34
+ "dataset_id": "",
35
+ "scoring_functions": [],
36
+ "metadata": {
37
+ "type": "custom",
38
+ "params": {
39
+ "parallelism": 8
40
+ },
41
+ "tasks": {
42
+ "qa": {
43
+ "type": "completion",
44
+ "params": {
45
+ "template": {
46
+ "prompt": "{{prompt}}",
47
+ "max_tokens": 200
48
+ }
49
+ },
50
+ "dataset": {
51
+ "files_url": "hf://datasets/default/sample-basic-test/testing/testing.jsonl"
52
+ },
53
+ "metrics": {
54
+ "bleu": {
55
+ "type": "bleu",
56
+ "params": {
57
+ "references": [
58
+ "{{ideal_response}}"
59
+ ]
60
+ }
61
+ }
62
+ }
63
+ }
64
+ }
65
+ }
66
+ }
67
+ ```
68
+
69
+ ### Example for triggering a benchmark/custom evaluation
70
+
71
+ ```
72
+ POST /eval/benchmarks/{benchmark_id}/jobs
73
+ ```
74
+ ```json
75
+ {
76
+ "benchmark_id": "my-custom-benchmark",
77
+ "benchmark_config": {
78
+ "eval_candidate": {
79
+ "type": "model",
80
+ "model": "meta-llama/Llama3.1-8B-Instruct",
81
+ "sampling_params": {
82
+ "max_tokens": 100,
83
+ "temperature": 0.7
84
+ }
85
+ },
86
+ "scoring_params": {}
87
+ }
88
+ }
89
+ ```
90
+
91
+ Response example:
92
+ ```json
93
+ {
94
+ "job_id": "eval-1234",
95
+ "status": "in_progress"
96
+ }
97
+ ```
98
+
99
+ ### Example for getting the status of a job
100
+ ```
101
+ GET /eval/benchmarks/{benchmark_id}/jobs/{job_id}
102
+ ```
103
+
104
+ Response example:
105
+ ```json
106
+ {
107
+ "job_id": "eval-1234",
108
+ "status": "in_progress"
109
+ }
110
+ ```
111
+
112
+ ### Example for cancelling a job
113
+ ```
114
+ POST /eval/benchmarks/{benchmark_id}/jobs/{job_id}/cancel
115
+ ```
116
+
117
+ ### Example for getting the results
118
+ ```
119
+ GET /eval/benchmarks/{benchmark_id}/results
120
+ ```
121
+ ```json
122
+ {
123
+ "generations": [],
124
+ "scores": {
125
+ "{benchmark_id}": {
126
+ "score_rows": [],
127
+ "aggregated_results": {
128
+ "tasks": {},
129
+ "groups": {}
130
+ }
131
+ }
132
+ }
133
+ }
134
+ ```
@@ -11,15 +11,19 @@ from llama_stack.providers.utils.inference.model_registry import ModelRegistryHe
11
11
  from llama_stack_api import (
12
12
  Agents,
13
13
  Benchmark,
14
- BenchmarkConfig,
15
14
  BenchmarksProtocolPrivate,
16
15
  DatasetIO,
17
16
  Datasets,
18
17
  Eval,
19
18
  EvaluateResponse,
19
+ EvaluateRowsRequest,
20
20
  Inference,
21
21
  Job,
22
+ JobCancelRequest,
23
+ JobResultRequest,
22
24
  JobStatus,
25
+ JobStatusRequest,
26
+ RunEvalRequest,
23
27
  Scoring,
24
28
  ScoringResult,
25
29
  )
@@ -91,21 +95,20 @@ class NVIDIAEvalImpl(
91
95
 
92
96
  async def run_eval(
93
97
  self,
94
- benchmark_id: str,
95
- benchmark_config: BenchmarkConfig,
98
+ request: RunEvalRequest,
96
99
  ) -> Job:
97
100
  """Run an evaluation job for a benchmark."""
98
101
  model = (
99
- benchmark_config.eval_candidate.model
100
- if benchmark_config.eval_candidate.type == "model"
101
- else benchmark_config.eval_candidate.config.model
102
+ request.benchmark_config.eval_candidate.model
103
+ if request.benchmark_config.eval_candidate.type == "model"
104
+ else request.benchmark_config.eval_candidate.config.model
102
105
  )
103
106
  nvidia_model = self.get_provider_model_id(model) or model
104
107
 
105
108
  result = await self._evaluator_post(
106
109
  "/v1/evaluation/jobs",
107
110
  {
108
- "config": f"{DEFAULT_NAMESPACE}/{benchmark_id}",
111
+ "config": f"{DEFAULT_NAMESPACE}/{request.benchmark_id}",
109
112
  "target": {"type": "model", "model": nvidia_model},
110
113
  },
111
114
  )
@@ -114,20 +117,17 @@ class NVIDIAEvalImpl(
114
117
 
115
118
  async def evaluate_rows(
116
119
  self,
117
- benchmark_id: str,
118
- input_rows: list[dict[str, Any]],
119
- scoring_functions: list[str],
120
- benchmark_config: BenchmarkConfig,
120
+ request: EvaluateRowsRequest,
121
121
  ) -> EvaluateResponse:
122
122
  raise NotImplementedError()
123
123
 
124
- async def job_status(self, benchmark_id: str, job_id: str) -> Job:
124
+ async def job_status(self, request: JobStatusRequest) -> Job:
125
125
  """Get the status of an evaluation job.
126
126
 
127
127
  EvaluatorStatus: "created", "pending", "running", "cancelled", "cancelling", "failed", "completed".
128
128
  JobStatus: "scheduled", "in_progress", "completed", "cancelled", "failed"
129
129
  """
130
- result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}")
130
+ result = await self._evaluator_get(f"/v1/evaluation/jobs/{request.job_id}")
131
131
  result_status = result["status"]
132
132
 
133
133
  job_status = JobStatus.failed
@@ -140,27 +140,28 @@ class NVIDIAEvalImpl(
140
140
  elif result_status in ["cancelled"]:
141
141
  job_status = JobStatus.cancelled
142
142
 
143
- return Job(job_id=job_id, status=job_status)
143
+ return Job(job_id=request.job_id, status=job_status)
144
144
 
145
- async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
145
+ async def job_cancel(self, request: JobCancelRequest) -> None:
146
146
  """Cancel the evaluation job."""
147
- await self._evaluator_post(f"/v1/evaluation/jobs/{job_id}/cancel", {})
147
+ await self._evaluator_post(f"/v1/evaluation/jobs/{request.job_id}/cancel", {})
148
148
 
149
- async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
149
+ async def job_result(self, request: JobResultRequest) -> EvaluateResponse:
150
150
  """Returns the results of the evaluation job."""
151
151
 
152
- job = await self.job_status(benchmark_id, job_id)
152
+ job_status_request = JobStatusRequest(benchmark_id=request.benchmark_id, job_id=request.job_id)
153
+ job = await self.job_status(job_status_request)
153
154
  status = job.status
154
155
  if not status or status != JobStatus.completed:
155
- raise ValueError(f"Job {job_id} not completed. Status: {status.value}")
156
+ raise ValueError(f"Job {request.job_id} not completed. Status: {status.value}")
156
157
 
157
- result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}/results")
158
+ result = await self._evaluator_get(f"/v1/evaluation/jobs/{request.job_id}/results")
158
159
 
159
160
  return EvaluateResponse(
160
161
  # TODO: these are stored in detailed results on NeMo Evaluator side; can be added
161
162
  generations=[],
162
163
  scores={
163
- benchmark_id: ScoringResult(
164
+ request.benchmark_id: ScoringResult(
164
165
  score_rows=[],
165
166
  aggregated_results=result,
166
167
  )