llama-stack 0.4.3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (307) hide show
  1. llama_stack/cli/stack/_list_deps.py +11 -7
  2. llama_stack/cli/stack/run.py +3 -25
  3. llama_stack/core/access_control/datatypes.py +78 -0
  4. llama_stack/core/configure.py +2 -2
  5. {llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
  6. llama_stack/core/connectors/connectors.py +162 -0
  7. llama_stack/core/conversations/conversations.py +61 -58
  8. llama_stack/core/datatypes.py +54 -8
  9. llama_stack/core/library_client.py +60 -13
  10. llama_stack/core/prompts/prompts.py +43 -42
  11. llama_stack/core/routers/datasets.py +20 -17
  12. llama_stack/core/routers/eval_scoring.py +143 -53
  13. llama_stack/core/routers/inference.py +20 -9
  14. llama_stack/core/routers/safety.py +30 -42
  15. llama_stack/core/routers/vector_io.py +15 -7
  16. llama_stack/core/routing_tables/models.py +42 -3
  17. llama_stack/core/routing_tables/scoring_functions.py +19 -19
  18. llama_stack/core/routing_tables/shields.py +20 -17
  19. llama_stack/core/routing_tables/vector_stores.py +8 -5
  20. llama_stack/core/server/auth.py +192 -17
  21. llama_stack/core/server/fastapi_router_registry.py +40 -5
  22. llama_stack/core/server/server.py +24 -5
  23. llama_stack/core/stack.py +54 -10
  24. llama_stack/core/storage/datatypes.py +9 -0
  25. llama_stack/core/store/registry.py +1 -1
  26. llama_stack/core/utils/exec.py +2 -2
  27. llama_stack/core/utils/type_inspection.py +16 -2
  28. llama_stack/distributions/dell/config.yaml +4 -1
  29. llama_stack/distributions/dell/doc_template.md +209 -0
  30. llama_stack/distributions/dell/run-with-safety.yaml +4 -1
  31. llama_stack/distributions/nvidia/config.yaml +4 -1
  32. llama_stack/distributions/nvidia/doc_template.md +170 -0
  33. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
  34. llama_stack/distributions/oci/config.yaml +4 -1
  35. llama_stack/distributions/oci/doc_template.md +140 -0
  36. llama_stack/distributions/open-benchmark/config.yaml +9 -1
  37. llama_stack/distributions/postgres-demo/config.yaml +1 -1
  38. llama_stack/distributions/starter/build.yaml +62 -0
  39. llama_stack/distributions/starter/config.yaml +22 -3
  40. llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
  41. llama_stack/distributions/starter/starter.py +13 -1
  42. llama_stack/distributions/starter-gpu/build.yaml +62 -0
  43. llama_stack/distributions/starter-gpu/config.yaml +22 -3
  44. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
  45. llama_stack/distributions/template.py +10 -2
  46. llama_stack/distributions/watsonx/config.yaml +4 -1
  47. llama_stack/log.py +1 -0
  48. llama_stack/models/llama/resources/dog.jpg +0 -0
  49. llama_stack/models/llama/resources/pasta.jpeg +0 -0
  50. llama_stack/models/llama/resources/small_dog.jpg +0 -0
  51. llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
  52. llama_stack/providers/inline/agents/meta_reference/agents.py +57 -61
  53. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +183 -60
  54. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +94 -22
  55. llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
  56. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
  57. llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
  58. llama_stack/providers/inline/batches/reference/batches.py +2 -1
  59. llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
  60. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
  61. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
  62. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
  63. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
  64. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
  65. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
  66. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  67. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
  68. llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
  69. llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
  70. llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
  71. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
  72. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
  73. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +15 -18
  74. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
  75. llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
  76. llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
  77. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
  78. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
  79. llama_stack/providers/registry/agents.py +1 -0
  80. llama_stack/providers/registry/inference.py +1 -9
  81. llama_stack/providers/registry/vector_io.py +136 -16
  82. llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
  83. llama_stack/providers/remote/eval/nvidia/README.md +134 -0
  84. llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
  85. llama_stack/providers/remote/files/s3/README.md +266 -0
  86. llama_stack/providers/remote/files/s3/config.py +5 -3
  87. llama_stack/providers/remote/files/s3/files.py +2 -2
  88. llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
  89. llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
  90. llama_stack/providers/remote/inference/openai/openai.py +2 -0
  91. llama_stack/providers/remote/inference/together/together.py +4 -0
  92. llama_stack/providers/remote/inference/vertexai/config.py +3 -3
  93. llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
  94. llama_stack/providers/remote/inference/vllm/config.py +37 -18
  95. llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
  96. llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
  97. llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
  98. llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
  99. llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
  100. llama_stack/providers/remote/safety/nvidia/README.md +78 -0
  101. llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
  102. llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
  103. llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
  104. llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
  105. llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
  106. llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
  107. llama_stack/providers/remote/vector_io/oci/config.py +41 -0
  108. llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
  109. llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
  110. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
  111. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
  112. llama_stack/providers/utils/bedrock/client.py +3 -3
  113. llama_stack/providers/utils/bedrock/config.py +7 -7
  114. llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
  115. llama_stack/providers/utils/inference/http_client.py +239 -0
  116. llama_stack/providers/utils/inference/litellm_openai_mixin.py +5 -0
  117. llama_stack/providers/utils/inference/model_registry.py +148 -2
  118. llama_stack/providers/utils/inference/openai_compat.py +2 -1
  119. llama_stack/providers/utils/inference/openai_mixin.py +41 -2
  120. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
  121. llama_stack/providers/utils/memory/vector_store.py +46 -19
  122. llama_stack/providers/utils/responses/responses_store.py +40 -6
  123. llama_stack/providers/utils/safety.py +114 -0
  124. llama_stack/providers/utils/tools/mcp.py +44 -3
  125. llama_stack/testing/api_recorder.py +9 -3
  126. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/METADATA +14 -2
  127. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/RECORD +131 -275
  128. llama_stack-0.5.0rc1.dist-info/top_level.txt +1 -0
  129. llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
  130. llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
  131. llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
  132. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
  133. llama_stack/models/llama/hadamard_utils.py +0 -88
  134. llama_stack/models/llama/llama3/args.py +0 -74
  135. llama_stack/models/llama/llama3/generation.py +0 -378
  136. llama_stack/models/llama/llama3/model.py +0 -304
  137. llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
  138. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
  139. llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
  140. llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
  141. llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
  142. llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
  143. llama_stack/models/llama/llama3/quantization/loader.py +0 -316
  144. llama_stack/models/llama/llama3_1/__init__.py +0 -12
  145. llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
  146. llama_stack/models/llama/llama3_1/prompts.py +0 -258
  147. llama_stack/models/llama/llama3_2/__init__.py +0 -5
  148. llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
  149. llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
  150. llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
  151. llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
  152. llama_stack/models/llama/llama3_3/__init__.py +0 -5
  153. llama_stack/models/llama/llama3_3/prompts.py +0 -259
  154. llama_stack/models/llama/llama4/args.py +0 -107
  155. llama_stack/models/llama/llama4/ffn.py +0 -58
  156. llama_stack/models/llama/llama4/moe.py +0 -214
  157. llama_stack/models/llama/llama4/preprocess.py +0 -435
  158. llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
  159. llama_stack/models/llama/llama4/quantization/loader.py +0 -226
  160. llama_stack/models/llama/llama4/vision/__init__.py +0 -5
  161. llama_stack/models/llama/llama4/vision/embedding.py +0 -210
  162. llama_stack/models/llama/llama4/vision/encoder.py +0 -412
  163. llama_stack/models/llama/quantize_impls.py +0 -316
  164. llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
  165. llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
  166. llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
  167. llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
  168. llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
  169. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
  170. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
  171. llama_stack-0.4.3.dist-info/top_level.txt +0 -2
  172. llama_stack_api/__init__.py +0 -945
  173. llama_stack_api/admin/__init__.py +0 -45
  174. llama_stack_api/admin/api.py +0 -72
  175. llama_stack_api/admin/fastapi_routes.py +0 -117
  176. llama_stack_api/admin/models.py +0 -113
  177. llama_stack_api/agents.py +0 -173
  178. llama_stack_api/batches/__init__.py +0 -40
  179. llama_stack_api/batches/api.py +0 -53
  180. llama_stack_api/batches/fastapi_routes.py +0 -113
  181. llama_stack_api/batches/models.py +0 -78
  182. llama_stack_api/benchmarks/__init__.py +0 -43
  183. llama_stack_api/benchmarks/api.py +0 -39
  184. llama_stack_api/benchmarks/fastapi_routes.py +0 -109
  185. llama_stack_api/benchmarks/models.py +0 -109
  186. llama_stack_api/common/__init__.py +0 -5
  187. llama_stack_api/common/content_types.py +0 -101
  188. llama_stack_api/common/errors.py +0 -95
  189. llama_stack_api/common/job_types.py +0 -38
  190. llama_stack_api/common/responses.py +0 -77
  191. llama_stack_api/common/training_types.py +0 -47
  192. llama_stack_api/common/type_system.py +0 -146
  193. llama_stack_api/connectors.py +0 -146
  194. llama_stack_api/conversations.py +0 -270
  195. llama_stack_api/datasetio.py +0 -55
  196. llama_stack_api/datasets/__init__.py +0 -61
  197. llama_stack_api/datasets/api.py +0 -35
  198. llama_stack_api/datasets/fastapi_routes.py +0 -104
  199. llama_stack_api/datasets/models.py +0 -152
  200. llama_stack_api/datatypes.py +0 -373
  201. llama_stack_api/eval.py +0 -137
  202. llama_stack_api/file_processors/__init__.py +0 -27
  203. llama_stack_api/file_processors/api.py +0 -64
  204. llama_stack_api/file_processors/fastapi_routes.py +0 -78
  205. llama_stack_api/file_processors/models.py +0 -42
  206. llama_stack_api/files/__init__.py +0 -35
  207. llama_stack_api/files/api.py +0 -51
  208. llama_stack_api/files/fastapi_routes.py +0 -124
  209. llama_stack_api/files/models.py +0 -107
  210. llama_stack_api/inference.py +0 -1169
  211. llama_stack_api/inspect_api/__init__.py +0 -37
  212. llama_stack_api/inspect_api/api.py +0 -25
  213. llama_stack_api/inspect_api/fastapi_routes.py +0 -76
  214. llama_stack_api/inspect_api/models.py +0 -28
  215. llama_stack_api/internal/kvstore.py +0 -28
  216. llama_stack_api/internal/sqlstore.py +0 -81
  217. llama_stack_api/llama_stack_api/__init__.py +0 -945
  218. llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
  219. llama_stack_api/llama_stack_api/admin/api.py +0 -72
  220. llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
  221. llama_stack_api/llama_stack_api/admin/models.py +0 -113
  222. llama_stack_api/llama_stack_api/agents.py +0 -173
  223. llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
  224. llama_stack_api/llama_stack_api/batches/api.py +0 -53
  225. llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
  226. llama_stack_api/llama_stack_api/batches/models.py +0 -78
  227. llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
  228. llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
  229. llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
  230. llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
  231. llama_stack_api/llama_stack_api/common/__init__.py +0 -5
  232. llama_stack_api/llama_stack_api/common/content_types.py +0 -101
  233. llama_stack_api/llama_stack_api/common/errors.py +0 -95
  234. llama_stack_api/llama_stack_api/common/job_types.py +0 -38
  235. llama_stack_api/llama_stack_api/common/responses.py +0 -77
  236. llama_stack_api/llama_stack_api/common/training_types.py +0 -47
  237. llama_stack_api/llama_stack_api/common/type_system.py +0 -146
  238. llama_stack_api/llama_stack_api/connectors.py +0 -146
  239. llama_stack_api/llama_stack_api/conversations.py +0 -270
  240. llama_stack_api/llama_stack_api/datasetio.py +0 -55
  241. llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
  242. llama_stack_api/llama_stack_api/datasets/api.py +0 -35
  243. llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
  244. llama_stack_api/llama_stack_api/datasets/models.py +0 -152
  245. llama_stack_api/llama_stack_api/datatypes.py +0 -373
  246. llama_stack_api/llama_stack_api/eval.py +0 -137
  247. llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
  248. llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
  249. llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
  250. llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
  251. llama_stack_api/llama_stack_api/files/__init__.py +0 -35
  252. llama_stack_api/llama_stack_api/files/api.py +0 -51
  253. llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
  254. llama_stack_api/llama_stack_api/files/models.py +0 -107
  255. llama_stack_api/llama_stack_api/inference.py +0 -1169
  256. llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
  257. llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
  258. llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
  259. llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
  260. llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
  261. llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
  262. llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
  263. llama_stack_api/llama_stack_api/models.py +0 -171
  264. llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
  265. llama_stack_api/llama_stack_api/post_training.py +0 -370
  266. llama_stack_api/llama_stack_api/prompts.py +0 -203
  267. llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
  268. llama_stack_api/llama_stack_api/providers/api.py +0 -16
  269. llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
  270. llama_stack_api/llama_stack_api/providers/models.py +0 -24
  271. llama_stack_api/llama_stack_api/py.typed +0 -0
  272. llama_stack_api/llama_stack_api/rag_tool.py +0 -168
  273. llama_stack_api/llama_stack_api/resource.py +0 -37
  274. llama_stack_api/llama_stack_api/router_utils.py +0 -160
  275. llama_stack_api/llama_stack_api/safety.py +0 -132
  276. llama_stack_api/llama_stack_api/schema_utils.py +0 -208
  277. llama_stack_api/llama_stack_api/scoring.py +0 -93
  278. llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
  279. llama_stack_api/llama_stack_api/shields.py +0 -93
  280. llama_stack_api/llama_stack_api/tools.py +0 -226
  281. llama_stack_api/llama_stack_api/vector_io.py +0 -941
  282. llama_stack_api/llama_stack_api/vector_stores.py +0 -53
  283. llama_stack_api/llama_stack_api/version.py +0 -9
  284. llama_stack_api/models.py +0 -171
  285. llama_stack_api/openai_responses.py +0 -1468
  286. llama_stack_api/post_training.py +0 -370
  287. llama_stack_api/prompts.py +0 -203
  288. llama_stack_api/providers/__init__.py +0 -33
  289. llama_stack_api/providers/api.py +0 -16
  290. llama_stack_api/providers/fastapi_routes.py +0 -57
  291. llama_stack_api/providers/models.py +0 -24
  292. llama_stack_api/py.typed +0 -0
  293. llama_stack_api/rag_tool.py +0 -168
  294. llama_stack_api/resource.py +0 -37
  295. llama_stack_api/router_utils.py +0 -160
  296. llama_stack_api/safety.py +0 -132
  297. llama_stack_api/schema_utils.py +0 -208
  298. llama_stack_api/scoring.py +0 -93
  299. llama_stack_api/scoring_functions.py +0 -211
  300. llama_stack_api/shields.py +0 -93
  301. llama_stack_api/tools.py +0 -226
  302. llama_stack_api/vector_io.py +0 -941
  303. llama_stack_api/vector_stores.py +0 -53
  304. llama_stack_api/version.py +0 -9
  305. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/WHEEL +0 -0
  306. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/entry_points.txt +0 -0
  307. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/licenses/LICENSE +0 -0
@@ -671,6 +671,19 @@ class OpenAIVectorStoreMixin(ABC):
671
671
  search_query = query
672
672
 
673
673
  try:
674
+ # Validate neural ranker requires model parameter
675
+ if ranking_options is not None:
676
+ if getattr(ranking_options, "ranker", None) == "neural":
677
+ model_value = getattr(ranking_options, "model", None)
678
+ if model_value is None or (isinstance(model_value, str) and model_value.strip() == ""):
679
+ # Return empty results when model is missing for neural ranker
680
+ logger.warning("model parameter is required when ranker='neural', returning empty results")
681
+ return VectorStoreSearchResponsePage(
682
+ search_query=query if isinstance(query, list) else [query],
683
+ data=[],
684
+ has_more=False,
685
+ next_page=None,
686
+ )
674
687
  score_threshold = (
675
688
  ranking_options.score_threshold
676
689
  if ranking_options and ranking_options.score_threshold is not None
@@ -681,7 +694,10 @@ class OpenAIVectorStoreMixin(ABC):
681
694
  "score_threshold": score_threshold,
682
695
  "mode": search_mode,
683
696
  }
684
- # TODO: Add support for ranking_options.ranker
697
+
698
+ # Use VectorStoresConfig defaults when ranking_options values are not provided
699
+ config = self.vector_stores_config or VectorStoresConfig()
700
+ params.update(self._build_reranker_params(ranking_options, config))
685
701
 
686
702
  response = await self.query_chunks(
687
703
  vector_store_id=vector_store_id,
@@ -722,8 +738,8 @@ class OpenAIVectorStoreMixin(ABC):
722
738
  )
723
739
 
724
740
  except Exception as e:
741
+ # Log the error and return empty results
725
742
  logger.error(f"Error searching vector store {vector_store_id}: {e}")
726
- # Return empty results on error
727
743
  return VectorStoreSearchResponsePage(
728
744
  search_query=query if isinstance(query, list) else [query],
729
745
  data=[],
@@ -731,6 +747,62 @@ class OpenAIVectorStoreMixin(ABC):
731
747
  next_page=None,
732
748
  )
733
749
 
750
+ def _build_reranker_params(
751
+ self,
752
+ ranking_options: SearchRankingOptions | None,
753
+ config: VectorStoresConfig,
754
+ ) -> dict[str, Any]:
755
+ reranker_params: dict[str, Any] = {}
756
+ params: dict[str, Any] = {}
757
+
758
+ if ranking_options and ranking_options.ranker:
759
+ reranker_type = ranking_options.ranker
760
+
761
+ if ranking_options.ranker == "weighted":
762
+ alpha = ranking_options.alpha
763
+ if alpha is None:
764
+ alpha = config.chunk_retrieval_params.weighted_search_alpha
765
+ reranker_params["alpha"] = alpha
766
+ if ranking_options.weights:
767
+ reranker_params["weights"] = ranking_options.weights
768
+ elif ranking_options.ranker == "rrf":
769
+ # For RRF ranker, use impact_factor from request if provided, otherwise use VectorStoresConfig default
770
+ impact_factor = ranking_options.impact_factor
771
+ if impact_factor is None:
772
+ impact_factor = config.chunk_retrieval_params.rrf_impact_factor
773
+ reranker_params["impact_factor"] = impact_factor
774
+ # If weights dict is provided (for neural combination), store it
775
+ if ranking_options.weights:
776
+ reranker_params["weights"] = ranking_options.weights
777
+ elif ranking_options.ranker == "neural":
778
+ reranker_params["model"] = ranking_options.model
779
+ else:
780
+ logger.debug(f"Unknown ranker value: {ranking_options.ranker}, passing through")
781
+
782
+ params["reranker_type"] = reranker_type
783
+ params["reranker_params"] = reranker_params
784
+
785
+ # Store model and weights for neural reranking (TODO: implemented in Part II)
786
+ if ranking_options.model:
787
+ params["neural_model"] = ranking_options.model
788
+ if ranking_options.weights:
789
+ params["neural_weights"] = ranking_options.weights
790
+ elif ranking_options is None or ranking_options.ranker is None:
791
+ # No ranker specified in request - use VectorStoresConfig default
792
+ default_strategy = config.chunk_retrieval_params.default_reranker_strategy
793
+ if default_strategy in ("weighted", "rrf"):
794
+ params["reranker_type"] = default_strategy
795
+ reranker_params = {}
796
+
797
+ if default_strategy == "weighted":
798
+ reranker_params["alpha"] = config.chunk_retrieval_params.weighted_search_alpha
799
+ elif default_strategy == "rrf":
800
+ reranker_params["impact_factor"] = config.chunk_retrieval_params.rrf_impact_factor
801
+
802
+ params["reranker_params"] = reranker_params
803
+
804
+ return params
805
+
734
806
  def _matches_filters(self, metadata: dict[str, Any], filters: dict[str, Any]) -> bool:
735
807
  """Check if metadata matches the provided filters."""
736
808
  if not filters:
@@ -738,15 +810,29 @@ class OpenAIVectorStoreMixin(ABC):
738
810
 
739
811
  filter_type = filters.get("type")
740
812
 
813
+ if filter_type is None:
814
+ if "key" not in filters and "value" not in filters and "filters" not in filters:
815
+ for key, value in filters.items():
816
+ if key not in metadata:
817
+ return False
818
+ if metadata[key] != value:
819
+ return False
820
+ return True
821
+ else:
822
+ raise ValueError("Unsupported filter structure: missing 'type' field")
823
+
741
824
  if filter_type in ["eq", "ne", "gt", "gte", "lt", "lte"]:
742
825
  # Comparison filter
743
- key = filters.get("key")
826
+ filter_key = filters.get("key")
744
827
  value = filters.get("value")
745
828
 
746
- if key not in metadata:
829
+ if filter_key is None or not isinstance(filter_key, str):
830
+ return False
831
+
832
+ if filter_key not in metadata:
747
833
  return False
748
834
 
749
- metadata_value = metadata[key]
835
+ metadata_value = metadata[filter_key]
750
836
 
751
837
  if filter_type == "eq":
752
838
  return bool(metadata_value == value)
@@ -901,6 +987,7 @@ class OpenAIVectorStoreMixin(ABC):
901
987
  params = OpenAIEmbeddingsRequestWithExtraBody(
902
988
  model=embedding_model,
903
989
  input=[interleaved_content_as_str(c.content) for c in chunks],
990
+ dimensions=embedding_dimension,
904
991
  )
905
992
  resp = await self.inference_api.openai_embeddings(params)
906
993
 
@@ -297,37 +297,64 @@ class VectorStoreWithIndex:
297
297
  mode = params.get("mode")
298
298
  score_threshold = params.get("score_threshold", 0.0)
299
299
 
300
- ranker = params.get("ranker")
301
- if ranker is None:
300
+ # Get reranker configuration from params (set by openai_vector_store_mixin)
301
+ # NOTE: Breaking change - removed support for old nested "ranker" format.
302
+ # Now uses flattened format: reranker_type and reranker_params.
303
+ reranker_type = params.get("reranker_type")
304
+ reranker_params = params.get("reranker_params", {})
305
+
306
+ # If no ranker specified, use VectorStoresConfig default
307
+ if reranker_type is None:
302
308
  reranker_type = (
303
309
  RERANKER_TYPE_RRF
304
310
  if config.chunk_retrieval_params.default_reranker_strategy == "rrf"
305
311
  else config.chunk_retrieval_params.default_reranker_strategy
306
312
  )
307
313
  reranker_params = {"impact_factor": config.chunk_retrieval_params.rrf_impact_factor}
314
+
315
+ # Normalize reranker_type to use constants
316
+ if reranker_type == "weighted":
317
+ reranker_type = RERANKER_TYPE_WEIGHTED
318
+ # Ensure alpha is set (use default if not provided)
319
+ if "alpha" not in reranker_params:
320
+ reranker_params["alpha"] = config.chunk_retrieval_params.weighted_search_alpha
321
+ elif reranker_type == "rrf":
322
+ reranker_type = RERANKER_TYPE_RRF
323
+ # Ensure impact_factor is set (use default if not provided)
324
+ if "impact_factor" not in reranker_params:
325
+ reranker_params["impact_factor"] = config.chunk_retrieval_params.rrf_impact_factor
326
+ elif reranker_type == "neural":
327
+ # TODO: Implement neural reranking
328
+ log.warning(
329
+ "TODO: Neural reranking for vector stores is not implemented yet; "
330
+ "using configured reranker params without algorithm fallback."
331
+ )
332
+ elif reranker_type == "normalized":
333
+ reranker_type = RERANKER_TYPE_NORMALIZED
308
334
  else:
309
- strategy = ranker.get("strategy", config.chunk_retrieval_params.default_reranker_strategy)
310
- if strategy == "weighted":
311
- weights = ranker.get("params", {}).get("weights", [0.5, 0.5])
312
- reranker_type = RERANKER_TYPE_WEIGHTED
313
- reranker_params = {
314
- "alpha": weights[0] if len(weights) > 0 else config.chunk_retrieval_params.weighted_search_alpha
315
- }
316
- elif strategy == "normalized":
317
- reranker_type = RERANKER_TYPE_NORMALIZED
318
- else:
319
- reranker_type = RERANKER_TYPE_RRF
320
- k_value = ranker.get("params", {}).get("k", config.chunk_retrieval_params.rrf_impact_factor)
321
- reranker_params = {"impact_factor": k_value}
335
+ # Default to RRF for unknown strategies
336
+ reranker_type = RERANKER_TYPE_RRF
337
+ if "impact_factor" not in reranker_params:
338
+ reranker_params["impact_factor"] = config.chunk_retrieval_params.rrf_impact_factor
339
+
340
+ # Store neural model and weights from params if provided (for future neural reranking in Part II)
341
+ if "neural_model" in params:
342
+ reranker_params["neural_model"] = params["neural_model"]
343
+ if "neural_weights" in params:
344
+ reranker_params["neural_weights"] = params["neural_weights"]
322
345
 
323
346
  query_string = interleaved_content_as_str(query)
324
347
  if mode == "keyword":
325
348
  return await self.index.query_keyword(query_string, k, score_threshold)
326
349
 
327
- params = OpenAIEmbeddingsRequestWithExtraBody(
328
- model=self.vector_store.embedding_model,
329
- input=[query_string],
330
- )
350
+ if "embedding_dimensions" in params:
351
+ params = OpenAIEmbeddingsRequestWithExtraBody(
352
+ model=self.vector_store.embedding_model,
353
+ input=[query_string],
354
+ dimensions=params.get("embedding_dimensions"),
355
+ )
356
+ else:
357
+ params = OpenAIEmbeddingsRequestWithExtraBody(model=self.vector_store.embedding_model, input=[query_string])
331
358
  embeddings_response = await self.inference_api.openai_embeddings(params)
332
359
  query_vector = np.array(embeddings_response.data[0].embedding, dtype=np.float32)
333
360
  if mode == "hybrid":
@@ -57,7 +57,7 @@ class ResponsesStore:
57
57
  self.sql_store = AuthorizedSqlStore(base_store, self.policy)
58
58
 
59
59
  await self.sql_store.create_table(
60
- "openai_responses",
60
+ self.reference.table_name,
61
61
  {
62
62
  "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
63
63
  "created_at": ColumnType.INTEGER,
@@ -89,6 +89,40 @@ class ResponsesStore:
89
89
  ) -> None:
90
90
  await self._write_response_object(response_object, input, messages)
91
91
 
92
+ async def upsert_response_object(
93
+ self,
94
+ response_object: OpenAIResponseObject,
95
+ input: list[OpenAIResponseInput],
96
+ messages: list[OpenAIMessageParam],
97
+ ) -> None:
98
+ """Upsert response object using INSERT on first call, UPDATE on subsequent calls.
99
+
100
+ This method enables incremental persistence during streaming, allowing clients
101
+ to poll GET /v1/responses/{response_id} and see in-progress turn state.
102
+
103
+ :param response_object: The response object to store/update.
104
+ :param input: The input items for the response.
105
+ :param messages: The chat completion messages (for conversation continuity).
106
+ """
107
+ if self.sql_store is None:
108
+ raise ValueError("Responses store is not initialized")
109
+
110
+ data = response_object.model_dump()
111
+ data["input"] = [input_item.model_dump() for input_item in input]
112
+ data["messages"] = [msg.model_dump() for msg in messages]
113
+
114
+ await self.sql_store.upsert(
115
+ table=self.reference.table_name,
116
+ data={
117
+ "id": data["id"],
118
+ "created_at": data["created_at"],
119
+ "model": data["model"],
120
+ "response_object": data,
121
+ },
122
+ conflict_columns=["id"],
123
+ update_columns=["response_object"],
124
+ )
125
+
92
126
  async def _write_response_object(
93
127
  self,
94
128
  response_object: OpenAIResponseObject,
@@ -103,7 +137,7 @@ class ResponsesStore:
103
137
  data["messages"] = [msg.model_dump() for msg in messages]
104
138
 
105
139
  await self.sql_store.insert(
106
- "openai_responses",
140
+ self.reference.table_name,
107
141
  {
108
142
  "id": data["id"],
109
143
  "created_at": data["created_at"],
@@ -138,7 +172,7 @@ class ResponsesStore:
138
172
  where_conditions["model"] = model
139
173
 
140
174
  paginated_result = await self.sql_store.fetch_all(
141
- table="openai_responses",
175
+ table=self.reference.table_name,
142
176
  where=where_conditions if where_conditions else None,
143
177
  order_by=[("created_at", order.value)],
144
178
  cursor=("id", after) if after else None,
@@ -161,7 +195,7 @@ class ResponsesStore:
161
195
  raise ValueError("Responses store is not initialized")
162
196
 
163
197
  row = await self.sql_store.fetch_one(
164
- "openai_responses",
198
+ self.reference.table_name,
165
199
  where={"id": response_id},
166
200
  )
167
201
 
@@ -176,10 +210,10 @@ class ResponsesStore:
176
210
  if not self.sql_store:
177
211
  raise ValueError("Responses store is not initialized")
178
212
 
179
- row = await self.sql_store.fetch_one("openai_responses", where={"id": response_id})
213
+ row = await self.sql_store.fetch_one(self.reference.table_name, where={"id": response_id})
180
214
  if not row:
181
215
  raise ValueError(f"Response with id {response_id} not found")
182
- await self.sql_store.delete("openai_responses", where={"id": response_id})
216
+ await self.sql_store.delete(self.reference.table_name, where={"id": response_id})
183
217
  return OpenAIDeleteResponseObject(id=response_id)
184
218
 
185
219
  async def list_response_input_items(
@@ -0,0 +1,114 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ import uuid
8
+ from typing import TYPE_CHECKING
9
+
10
+ from llama_stack_api import (
11
+ ModerationObject,
12
+ ModerationObjectResults,
13
+ OpenAIUserMessageParam,
14
+ RunModerationRequest,
15
+ RunShieldRequest,
16
+ RunShieldResponse,
17
+ )
18
+
19
+ if TYPE_CHECKING:
20
+ # Type stub for mypy - actual implementation provided by provider class
21
+ class _RunShieldProtocol:
22
+ async def run_shield(self, request: RunShieldRequest) -> RunShieldResponse: ...
23
+
24
+
25
+ class ShieldToModerationMixin:
26
+ """
27
+ Mixin that provides run_moderation implementation by delegating to run_shield.
28
+
29
+ Providers must implement run_shield(request: RunShieldRequest) for this mixin to work.
30
+ Providers with custom run_moderation implementations will override this automatically.
31
+ """
32
+
33
+ if TYPE_CHECKING:
34
+ # Type hint for mypy - run_shield is provided by the mixed-in class
35
+ async def run_shield(self, request: RunShieldRequest) -> RunShieldResponse: ...
36
+
37
+ async def run_moderation(self, request: RunModerationRequest) -> ModerationObject:
38
+ """
39
+ Run moderation by converting input to messages and delegating to run_shield.
40
+
41
+ Args:
42
+ request: RunModerationRequest with input and model
43
+
44
+ Returns:
45
+ ModerationObject with results for each input
46
+
47
+ Raises:
48
+ ValueError: If model is None
49
+ """
50
+ if request.model is None:
51
+ raise ValueError(f"{self.__class__.__name__} moderation requires a model identifier")
52
+
53
+ inputs = request.input if isinstance(request.input, list) else [request.input]
54
+ results = []
55
+
56
+ for text_input in inputs:
57
+ # Convert string to OpenAI message format
58
+ message = OpenAIUserMessageParam(content=text_input)
59
+
60
+ # Call run_shield (must be implemented by the provider)
61
+ shield_request = RunShieldRequest(
62
+ shield_id=request.model,
63
+ messages=[message],
64
+ )
65
+ shield_response = await self.run_shield(shield_request)
66
+
67
+ # Convert RunShieldResponse to ModerationObjectResults
68
+ results.append(self._shield_response_to_moderation_result(shield_response))
69
+
70
+ return ModerationObject(
71
+ id=f"modr-{uuid.uuid4()}",
72
+ model=request.model,
73
+ results=results,
74
+ )
75
+
76
+ def _shield_response_to_moderation_result(
77
+ self,
78
+ shield_response: RunShieldResponse,
79
+ ) -> ModerationObjectResults:
80
+ """Convert RunShieldResponse to ModerationObjectResults.
81
+
82
+ Args:
83
+ shield_response: The response from run_shield
84
+
85
+ Returns:
86
+ ModerationObjectResults with appropriate fields set
87
+ """
88
+ if shield_response.violation is None:
89
+ # Safe content
90
+ return ModerationObjectResults(
91
+ flagged=False,
92
+ categories={},
93
+ category_scores={},
94
+ category_applied_input_types={},
95
+ user_message=None,
96
+ metadata={},
97
+ )
98
+
99
+ # Unsafe content - extract violation details
100
+ v = shield_response.violation
101
+ violation_type = v.metadata.get("violation_type", "unsafe")
102
+
103
+ # Ensure violation_type is a string (metadata values can be Any)
104
+ if not isinstance(violation_type, str):
105
+ violation_type = "unsafe"
106
+
107
+ return ModerationObjectResults(
108
+ flagged=True,
109
+ categories={violation_type: True},
110
+ category_scores={violation_type: 1.0},
111
+ category_applied_input_types={violation_type: ["text"]},
112
+ user_message=v.user_message,
113
+ metadata=v.metadata,
114
+ )
@@ -8,6 +8,7 @@ import asyncio
8
8
  import hashlib
9
9
  from collections.abc import AsyncGenerator
10
10
  from contextlib import asynccontextmanager
11
+ from dataclasses import dataclass
11
12
  from enum import Enum
12
13
  from typing import Any, cast
13
14
 
@@ -241,10 +242,12 @@ class MCPSessionManager:
241
242
  raise last_exception
242
243
  raise RuntimeError(f"Failed to create MCP session for {endpoint}")
243
244
 
244
- async def close_all(self) -> None:
245
- """Close all cached sessions.
245
+ async def __aenter__(self):
246
+ """Enter the async context manager."""
247
+ return self
246
248
 
247
- Should be called at the end of a request to clean up resources.
249
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
250
+ """Exit the async context manager and cleanup all sessions.
248
251
 
249
252
  Note: We catch BaseException (not just Exception) because:
250
253
  1. CancelledError is a BaseException and can occur during cleanup
@@ -275,6 +278,8 @@ class MCPSessionManager:
275
278
  if errors:
276
279
  logger.debug(f"Encountered {len(errors)} errors while closing MCP sessions (expected in streaming)")
277
280
 
281
+ return False
282
+
278
283
 
279
284
  @asynccontextmanager
280
285
  async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerator[ClientSession, Any]:
@@ -470,3 +475,39 @@ async def invoke_mcp_tool(
470
475
  async with client_wrapper(endpoint, final_headers) as session:
471
476
  result = await session.call_tool(tool_name, kwargs)
472
477
  return _parse_mcp_result(result)
478
+
479
+
480
+ @dataclass
481
+ class MCPServerInfo:
482
+ """Server information from an MCP server."""
483
+
484
+ name: str
485
+ version: str
486
+ title: str | None = None
487
+ description: str | None = None
488
+
489
+
490
+ async def get_mcp_server_info(
491
+ endpoint: str,
492
+ headers: dict[str, str] | None = None,
493
+ authorization: str | None = None,
494
+ ) -> MCPServerInfo:
495
+ """Get server info from an MCP server.
496
+ Args:
497
+ endpoint: MCP server endpoint URL
498
+ headers: Optional base headers to include
499
+ authorization: Optional OAuth access token (just the token, not "Bearer <token>")
500
+ Returns:
501
+ MCPServerInfo containing name, version, title, and description
502
+ """
503
+ final_headers = prepare_mcp_headers(headers, authorization)
504
+
505
+ async with client_wrapper(endpoint, final_headers) as session:
506
+ init_result = await session.initialize()
507
+
508
+ return MCPServerInfo(
509
+ name=init_result.serverInfo.name,
510
+ version=init_result.serverInfo.version,
511
+ title=init_result.serverInfo.title,
512
+ description=init_result.instructions,
513
+ )
@@ -77,11 +77,14 @@ def _normalize_numeric_literal_strings(value: str) -> str:
77
77
  return _FLOAT_IN_STRING_PATTERN.sub(_replace, value)
78
78
 
79
79
 
80
- def _normalize_body_for_hash(value: Any) -> Any:
80
+ def _normalize_body_for_hash(value: Any, exclude_stream_options: bool = False) -> Any:
81
81
  """Recursively normalize a JSON-like value to improve hash stability."""
82
82
 
83
83
  if isinstance(value, dict):
84
- return {key: _normalize_body_for_hash(item) for key, item in value.items()}
84
+ normalized = {key: _normalize_body_for_hash(item) for key, item in value.items()}
85
+ if exclude_stream_options and "stream_options" in normalized:
86
+ del normalized["stream_options"]
87
+ return normalized
85
88
  if isinstance(value, list):
86
89
  return [_normalize_body_for_hash(item) for item in value]
87
90
  if isinstance(value, tuple):
@@ -146,7 +149,10 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
146
149
 
147
150
  parsed = urlparse(url)
148
151
 
149
- body_for_hash = _normalize_body_for_hash(body)
152
+ # Bedrock's OpenAI-compatible endpoint includes stream_options that vary between
153
+ # runs but don't affect the logical request. Exclude it for stable hashing.
154
+ is_bedrock = "bedrock" in parsed.netloc
155
+ body_for_hash = _normalize_body_for_hash(body, exclude_stream_options=is_bedrock)
150
156
 
151
157
  test_id = get_test_context()
152
158
  normalized: dict[str, Any] = {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llama_stack
3
- Version: 0.4.3
3
+ Version: 0.5.0rc1
4
4
  Summary: Llama Stack
5
5
  Author-email: Meta Llama <llama-oss@meta.com>
6
6
  License: MIT
@@ -45,8 +45,12 @@ Requires-Dist: starlette>=0.49.1
45
45
  Requires-Dist: psycopg2-binary
46
46
  Requires-Dist: tornado>=6.5.3
47
47
  Requires-Dist: urllib3>=2.6.3
48
+ Requires-Dist: oracledb>=3.4.1
49
+ Requires-Dist: oci>=2.165.0
50
+ Requires-Dist: numpy>=2.3.2
51
+ Requires-Dist: mcp>=1.23.0
48
52
  Provides-Extra: client
49
- Requires-Dist: llama-stack-client==0.4.3; extra == "client"
53
+ Requires-Dist: llama-stack-client>=0.4.0.dev0; extra == "client"
50
54
  Dynamic: license-file
51
55
 
52
56
  # Llama Stack
@@ -158,6 +162,7 @@ Please checkout our [Documentation](https://llamastack.github.io/docs) page for
158
162
  * A [Zero-to-Hero Guide](https://github.com/meta-llama/llama-stack/tree/main/docs/zero_to_hero_guide) that guide you through all the key components of llama stack with code samples.
159
163
  * [Contributing](CONTRIBUTING.md)
160
164
  * [Adding a new API Provider](https://llamastack.github.io/docs/contributing/new_api_provider) to walk-through how to add a new API provider.
165
+ * [Release Process](RELEASE_PROCESS.md) for information about release schedules and versioning.
161
166
 
162
167
  ### Llama Stack Client SDKs
163
168
 
@@ -172,6 +177,13 @@ Check out our client SDKs for connecting to a Llama Stack server in your preferr
172
177
 
173
178
  You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo.
174
179
 
180
+ ## Community
181
+
182
+ We hold regular community calls to discuss the latest developments and get feedback from the community.
183
+
184
+ - Date: every Thursday
185
+ - Time: 09:00 AM PST (check the [Community Event on Discord](https://discord.com/events/1257833999603335178/1413266296748900513) for the latest details)
186
+
175
187
  ## 🌟 GitHub Star History
176
188
  ## Star History
177
189