llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (460) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +235 -62
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
  169. llama_stack/providers/registry/agents.py +8 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/providers/utils/vector_io/__init__.py +16 -0
  284. llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
  285. llama_stack/telemetry/constants.py +27 -0
  286. llama_stack/telemetry/helpers.py +43 -0
  287. llama_stack/testing/api_recorder.py +25 -16
  288. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
  289. llama_stack-0.4.1.dist-info/RECORD +588 -0
  290. llama_stack-0.4.1.dist-info/top_level.txt +2 -0
  291. llama_stack_api/__init__.py +945 -0
  292. llama_stack_api/admin/__init__.py +45 -0
  293. llama_stack_api/admin/api.py +72 -0
  294. llama_stack_api/admin/fastapi_routes.py +117 -0
  295. llama_stack_api/admin/models.py +113 -0
  296. llama_stack_api/agents.py +173 -0
  297. llama_stack_api/batches/__init__.py +40 -0
  298. llama_stack_api/batches/api.py +53 -0
  299. llama_stack_api/batches/fastapi_routes.py +113 -0
  300. llama_stack_api/batches/models.py +78 -0
  301. llama_stack_api/benchmarks/__init__.py +43 -0
  302. llama_stack_api/benchmarks/api.py +39 -0
  303. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  304. llama_stack_api/benchmarks/models.py +109 -0
  305. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  306. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  307. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  308. llama_stack_api/common/responses.py +77 -0
  309. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  310. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  311. llama_stack_api/connectors.py +146 -0
  312. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  313. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  314. llama_stack_api/datasets/__init__.py +61 -0
  315. llama_stack_api/datasets/api.py +35 -0
  316. llama_stack_api/datasets/fastapi_routes.py +104 -0
  317. llama_stack_api/datasets/models.py +152 -0
  318. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  319. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  320. llama_stack_api/file_processors/__init__.py +27 -0
  321. llama_stack_api/file_processors/api.py +64 -0
  322. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  323. llama_stack_api/file_processors/models.py +42 -0
  324. llama_stack_api/files/__init__.py +35 -0
  325. llama_stack_api/files/api.py +51 -0
  326. llama_stack_api/files/fastapi_routes.py +124 -0
  327. llama_stack_api/files/models.py +107 -0
  328. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  329. llama_stack_api/inspect_api/__init__.py +37 -0
  330. llama_stack_api/inspect_api/api.py +25 -0
  331. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  332. llama_stack_api/inspect_api/models.py +28 -0
  333. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  334. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  335. llama_stack_api/internal/sqlstore.py +79 -0
  336. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  337. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  338. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  339. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  340. llama_stack_api/providers/__init__.py +33 -0
  341. llama_stack_api/providers/api.py +16 -0
  342. llama_stack_api/providers/fastapi_routes.py +57 -0
  343. llama_stack_api/providers/models.py +24 -0
  344. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  345. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  346. llama_stack_api/router_utils.py +160 -0
  347. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  348. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  349. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  350. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  351. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  352. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  353. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  354. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  355. llama_stack/apis/agents/agents.py +0 -894
  356. llama_stack/apis/batches/__init__.py +0 -9
  357. llama_stack/apis/batches/batches.py +0 -100
  358. llama_stack/apis/benchmarks/__init__.py +0 -7
  359. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  360. llama_stack/apis/common/responses.py +0 -36
  361. llama_stack/apis/conversations/__init__.py +0 -31
  362. llama_stack/apis/datasets/datasets.py +0 -251
  363. llama_stack/apis/datatypes.py +0 -160
  364. llama_stack/apis/eval/__init__.py +0 -7
  365. llama_stack/apis/files/__init__.py +0 -7
  366. llama_stack/apis/files/files.py +0 -199
  367. llama_stack/apis/inference/__init__.py +0 -7
  368. llama_stack/apis/inference/event_logger.py +0 -43
  369. llama_stack/apis/inspect/__init__.py +0 -7
  370. llama_stack/apis/inspect/inspect.py +0 -94
  371. llama_stack/apis/models/__init__.py +0 -7
  372. llama_stack/apis/post_training/__init__.py +0 -7
  373. llama_stack/apis/prompts/__init__.py +0 -9
  374. llama_stack/apis/providers/__init__.py +0 -7
  375. llama_stack/apis/providers/providers.py +0 -69
  376. llama_stack/apis/safety/__init__.py +0 -7
  377. llama_stack/apis/scoring/__init__.py +0 -7
  378. llama_stack/apis/scoring_functions/__init__.py +0 -7
  379. llama_stack/apis/shields/__init__.py +0 -7
  380. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  381. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  382. llama_stack/apis/telemetry/__init__.py +0 -7
  383. llama_stack/apis/telemetry/telemetry.py +0 -423
  384. llama_stack/apis/tools/__init__.py +0 -8
  385. llama_stack/apis/vector_io/__init__.py +0 -7
  386. llama_stack/apis/vector_stores/__init__.py +0 -7
  387. llama_stack/core/server/tracing.py +0 -80
  388. llama_stack/core/ui/app.py +0 -55
  389. llama_stack/core/ui/modules/__init__.py +0 -5
  390. llama_stack/core/ui/modules/api.py +0 -32
  391. llama_stack/core/ui/modules/utils.py +0 -42
  392. llama_stack/core/ui/page/__init__.py +0 -5
  393. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  394. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  395. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  396. llama_stack/core/ui/page/distribution/models.py +0 -18
  397. llama_stack/core/ui/page/distribution/providers.py +0 -27
  398. llama_stack/core/ui/page/distribution/resources.py +0 -48
  399. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  400. llama_stack/core/ui/page/distribution/shields.py +0 -19
  401. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  402. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  403. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  404. llama_stack/core/ui/page/playground/__init__.py +0 -5
  405. llama_stack/core/ui/page/playground/chat.py +0 -130
  406. llama_stack/core/ui/page/playground/tools.py +0 -352
  407. llama_stack/distributions/dell/build.yaml +0 -33
  408. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  409. llama_stack/distributions/nvidia/build.yaml +0 -29
  410. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  411. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  412. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  413. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  414. llama_stack/distributions/starter/build.yaml +0 -61
  415. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  416. llama_stack/distributions/watsonx/build.yaml +0 -33
  417. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  418. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  419. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  420. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  421. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  422. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  423. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  424. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  425. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  426. llama_stack/providers/utils/sqlstore/api.py +0 -128
  427. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  428. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  429. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  430. llama_stack/strong_typing/__init__.py +0 -19
  431. llama_stack/strong_typing/auxiliary.py +0 -228
  432. llama_stack/strong_typing/classdef.py +0 -440
  433. llama_stack/strong_typing/core.py +0 -46
  434. llama_stack/strong_typing/deserializer.py +0 -877
  435. llama_stack/strong_typing/docstring.py +0 -409
  436. llama_stack/strong_typing/exception.py +0 -23
  437. llama_stack/strong_typing/inspection.py +0 -1085
  438. llama_stack/strong_typing/mapping.py +0 -40
  439. llama_stack/strong_typing/name.py +0 -182
  440. llama_stack/strong_typing/schema.py +0 -792
  441. llama_stack/strong_typing/serialization.py +0 -97
  442. llama_stack/strong_typing/serializer.py +0 -500
  443. llama_stack/strong_typing/slots.py +0 -27
  444. llama_stack/strong_typing/topological.py +0 -89
  445. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  446. llama_stack-0.3.5.dist-info/RECORD +0 -625
  447. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  448. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  451. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  452. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  453. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  454. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
  456. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
  457. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
  458. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  459. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  460. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -11,16 +11,9 @@ from typing import Any
11
11
  from numpy.typing import NDArray
12
12
  from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker
13
13
 
14
- from llama_stack.apis.common.errors import VectorStoreNotFoundError
15
- from llama_stack.apis.files import Files
16
- from llama_stack.apis.inference import Inference, InterleavedContent
17
- from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
18
- from llama_stack.apis.vector_stores import VectorStore
14
+ from llama_stack.core.storage.kvstore import kvstore_impl
19
15
  from llama_stack.log import get_logger
20
- from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
21
16
  from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
22
- from llama_stack.providers.utils.kvstore import kvstore_impl
23
- from llama_stack.providers.utils.kvstore.api import KVStore
24
17
  from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
25
18
  from llama_stack.providers.utils.memory.vector_store import (
26
19
  RERANKER_TYPE_WEIGHTED,
@@ -28,12 +21,28 @@ from llama_stack.providers.utils.memory.vector_store import (
28
21
  EmbeddingIndex,
29
22
  VectorStoreWithIndex,
30
23
  )
31
- from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
24
+ from llama_stack.providers.utils.vector_io.vector_utils import (
25
+ load_embedded_chunk_with_backward_compat,
26
+ sanitize_collection_name,
27
+ )
28
+ from llama_stack_api import (
29
+ EmbeddedChunk,
30
+ Files,
31
+ Inference,
32
+ InterleavedContent,
33
+ QueryChunksResponse,
34
+ VectorIO,
35
+ VectorStore,
36
+ VectorStoreNotFoundError,
37
+ VectorStoresProtocolPrivate,
38
+ )
39
+ from llama_stack_api.internal.kvstore import KVStore
32
40
 
33
41
  from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
34
42
 
35
43
  logger = get_logger(name=__name__, category="vector_io::milvus")
36
44
 
45
+
37
46
  VERSION = "v3"
38
47
  VECTOR_DBS_PREFIX = f"vector_stores:milvus:{VERSION}::"
39
48
  VECTOR_INDEX_PREFIX = f"vector_index:milvus:{VERSION}::"
@@ -60,10 +69,9 @@ class MilvusIndex(EmbeddingIndex):
60
69
  if await asyncio.to_thread(self.client.has_collection, self.collection_name):
61
70
  await asyncio.to_thread(self.client.drop_collection, collection_name=self.collection_name)
62
71
 
63
- async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
64
- assert len(chunks) == len(embeddings), (
65
- f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
66
- )
72
+ async def add_chunks(self, chunks: list[EmbeddedChunk]):
73
+ if not chunks:
74
+ return
67
75
 
68
76
  if not await asyncio.to_thread(self.client.has_collection, self.collection_name):
69
77
  logger.info(f"Creating new collection {self.collection_name} with nullable sparse field")
@@ -76,7 +84,7 @@ class MilvusIndex(EmbeddingIndex):
76
84
  max_length=65535,
77
85
  enable_analyzer=True, # Enable text analysis for BM25
78
86
  )
79
- schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=len(embeddings[0]))
87
+ schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=len(chunks[0].embedding))
80
88
  schema.add_field(field_name="chunk_content", datatype=DataType.JSON)
81
89
  # Add sparse vector field for BM25 (required by the function)
82
90
  schema.add_field(field_name="sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)
@@ -105,12 +113,12 @@ class MilvusIndex(EmbeddingIndex):
105
113
  )
106
114
 
107
115
  data = []
108
- for chunk, embedding in zip(chunks, embeddings, strict=False):
116
+ for chunk in chunks:
109
117
  data.append(
110
118
  {
111
119
  "chunk_id": chunk.chunk_id,
112
120
  "content": chunk.content,
113
- "vector": embedding,
121
+ "vector": chunk.embedding, # Already a list[float]
114
122
  "chunk_content": chunk.model_dump(),
115
123
  # sparse field will be handled by BM25 function automatically
116
124
  }
@@ -131,7 +139,7 @@ class MilvusIndex(EmbeddingIndex):
131
139
  output_fields=["*"],
132
140
  search_params={"params": {"radius": score_threshold}},
133
141
  )
134
- chunks = [Chunk(**res["entity"]["chunk_content"]) for res in search_res[0]]
142
+ chunks = [load_embedded_chunk_with_backward_compat(res["entity"]["chunk_content"]) for res in search_res[0]]
135
143
  scores = [res["distance"] for res in search_res[0]]
136
144
  return QueryChunksResponse(chunks=chunks, scores=scores)
137
145
 
@@ -158,7 +166,7 @@ class MilvusIndex(EmbeddingIndex):
158
166
  chunks = []
159
167
  scores = []
160
168
  for res in search_res[0]:
161
- chunk = Chunk(**res["entity"]["chunk_content"])
169
+ chunk = load_embedded_chunk_with_backward_compat(res["entity"]["chunk_content"])
162
170
  chunks.append(chunk)
163
171
  scores.append(res["distance"]) # BM25 score from Milvus
164
172
 
@@ -186,7 +194,7 @@ class MilvusIndex(EmbeddingIndex):
186
194
  output_fields=["*"],
187
195
  limit=k,
188
196
  )
189
- chunks = [Chunk(**res["chunk_content"]) for res in search_res]
197
+ chunks = [load_embedded_chunk_with_backward_compat(res["chunk_content"]) for res in search_res]
190
198
  scores = [1.0] * len(chunks) # Simple binary score for text search
191
199
  return QueryChunksResponse(chunks=chunks, scores=scores)
192
200
 
@@ -238,7 +246,7 @@ class MilvusIndex(EmbeddingIndex):
238
246
  chunks = []
239
247
  scores = []
240
248
  for res in search_res[0]:
241
- chunk = Chunk(**res["entity"]["chunk_content"])
249
+ chunk = load_embedded_chunk_with_backward_compat(res["entity"]["chunk_content"])
242
250
  chunks.append(chunk)
243
251
  scores.append(res["distance"])
244
252
 
@@ -268,11 +276,10 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
268
276
  inference_api: Inference,
269
277
  files_api: Files | None,
270
278
  ) -> None:
271
- super().__init__(files_api=files_api, kvstore=None)
279
+ super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
272
280
  self.config = config
273
281
  self.cache = {}
274
282
  self.client = None
275
- self.inference_api = inference_api
276
283
  self.vector_store_table = None
277
284
  self.metadata_collection_name = "openai_vector_stores_metadata"
278
285
 
@@ -351,19 +358,21 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
351
358
  await self.cache[vector_store_id].index.delete()
352
359
  del self.cache[vector_store_id]
353
360
 
354
- async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
355
- index = await self._get_and_cache_vector_store_index(vector_db_id)
361
+ async def insert_chunks(
362
+ self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
363
+ ) -> None:
364
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
356
365
  if not index:
357
- raise VectorStoreNotFoundError(vector_db_id)
366
+ raise VectorStoreNotFoundError(vector_store_id)
358
367
 
359
368
  await index.insert_chunks(chunks)
360
369
 
361
370
  async def query_chunks(
362
- self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
371
+ self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
363
372
  ) -> QueryChunksResponse:
364
- index = await self._get_and_cache_vector_store_index(vector_db_id)
373
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
365
374
  if not index:
366
- raise VectorStoreNotFoundError(vector_db_id)
375
+ raise VectorStoreNotFoundError(vector_store_id)
367
376
  return await index.query_chunks(query, params)
368
377
 
369
378
  async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
@@ -4,7 +4,7 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from llama_stack.providers.datatypes import Api, ProviderSpec
7
+ from llama_stack_api import Api, ProviderSpec
8
8
 
9
9
  from .config import PGVectorVectorIOConfig
10
10
 
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel, Field
10
10
 
11
11
  from llama_stack.core.storage.datatypes import KVStoreReference
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type
@@ -13,19 +13,28 @@ from psycopg2 import sql
13
13
  from psycopg2.extras import Json, execute_values
14
14
  from pydantic import BaseModel, TypeAdapter
15
15
 
16
- from llama_stack.apis.common.errors import VectorStoreNotFoundError
17
- from llama_stack.apis.files import Files
18
- from llama_stack.apis.inference import Inference, InterleavedContent
19
- from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
20
- from llama_stack.apis.vector_stores import VectorStore
16
+ from llama_stack.core.storage.kvstore import kvstore_impl
21
17
  from llama_stack.log import get_logger
22
- from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
23
18
  from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
24
- from llama_stack.providers.utils.kvstore import kvstore_impl
25
- from llama_stack.providers.utils.kvstore.api import KVStore
26
19
  from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
27
20
  from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
28
- from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name
21
+ from llama_stack.providers.utils.vector_io.vector_utils import (
22
+ WeightedInMemoryAggregator,
23
+ load_embedded_chunk_with_backward_compat,
24
+ sanitize_collection_name,
25
+ )
26
+ from llama_stack_api import (
27
+ EmbeddedChunk,
28
+ Files,
29
+ Inference,
30
+ InterleavedContent,
31
+ QueryChunksResponse,
32
+ VectorIO,
33
+ VectorStore,
34
+ VectorStoreNotFoundError,
35
+ VectorStoresProtocolPrivate,
36
+ )
37
+ from llama_stack_api.internal.kvstore import KVStore
29
38
 
30
39
  from .config import PGVectorVectorIOConfig
31
40
 
@@ -125,19 +134,18 @@ class PGVectorIndex(EmbeddingIndex):
125
134
  log.exception(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}")
126
135
  raise RuntimeError(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") from e
127
136
 
128
- async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
129
- assert len(chunks) == len(embeddings), (
130
- f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
131
- )
137
+ async def add_chunks(self, chunks: list[EmbeddedChunk]):
138
+ if not chunks:
139
+ return
132
140
 
133
141
  values = []
134
- for i, chunk in enumerate(chunks):
142
+ for chunk in chunks:
135
143
  content_text = interleaved_content_as_str(chunk.content)
136
144
  values.append(
137
145
  (
138
146
  f"{chunk.chunk_id}",
139
147
  Json(chunk.model_dump()),
140
- embeddings[i].tolist(),
148
+ chunk.embedding, # Already a list[float]
141
149
  content_text,
142
150
  content_text, # Pass content_text twice - once for content_text column, once for to_tsvector function. Eg. to_tsvector(content_text) = tokenized_content
143
151
  )
@@ -189,7 +197,7 @@ class PGVectorIndex(EmbeddingIndex):
189
197
  score = 1.0 / float(dist) if dist != 0 else float("inf")
190
198
  if score < score_threshold:
191
199
  continue
192
- chunks.append(Chunk(**doc))
200
+ chunks.append(load_embedded_chunk_with_backward_compat(doc))
193
201
  scores.append(score)
194
202
 
195
203
  return QueryChunksResponse(chunks=chunks, scores=scores)
@@ -225,7 +233,7 @@ class PGVectorIndex(EmbeddingIndex):
225
233
  for doc, score in results:
226
234
  if score < score_threshold:
227
235
  continue
228
- chunks.append(Chunk(**doc))
236
+ chunks.append(load_embedded_chunk_with_backward_compat(doc))
229
237
  scores.append(float(score))
230
238
 
231
239
  return QueryChunksResponse(chunks=chunks, scores=scores)
@@ -301,7 +309,8 @@ class PGVectorIndex(EmbeddingIndex):
301
309
  """Remove a chunk from the PostgreSQL table."""
302
310
  chunk_ids = [c.chunk_id for c in chunks_for_deletion]
303
311
  with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
304
- cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s)", (chunk_ids))
312
+ # Fix: Use proper tuple parameter binding with explicit array cast
313
+ cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s::text[])", (chunk_ids,))
305
314
 
306
315
  def get_pgvector_search_function(self) -> str:
307
316
  return self.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION[self.distance_metric]
@@ -327,16 +336,17 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
327
336
  def __init__(
328
337
  self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None
329
338
  ) -> None:
330
- super().__init__(files_api=files_api, kvstore=None)
339
+ super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
331
340
  self.config = config
332
- self.inference_api = inference_api
333
341
  self.conn = None
334
342
  self.cache = {}
335
343
  self.vector_store_table = None
336
344
  self.metadata_collection_name = "openai_vector_stores_metadata"
337
345
 
338
346
  async def initialize(self) -> None:
339
- log.info(f"Initializing PGVector memory adapter with config: {self.config}")
347
+ # Create a safe config representation with masked password for logging
348
+ safe_config = {**self.config.model_dump(exclude={"password"}), "password": "******"}
349
+ log.info(f"Initializing PGVector memory adapter with config: {safe_config}")
340
350
  self.kvstore = await kvstore_impl(self.config.persistence)
341
351
  await self.initialize_openai_vector_stores()
342
352
 
@@ -422,14 +432,16 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
422
432
  raise RuntimeError("KVStore not initialized. Call initialize() before unregistering vector stores.")
423
433
  await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}")
424
434
 
425
- async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
426
- index = await self._get_and_cache_vector_store_index(vector_db_id)
435
+ async def insert_chunks(
436
+ self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
437
+ ) -> None:
438
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
427
439
  await index.insert_chunks(chunks)
428
440
 
429
441
  async def query_chunks(
430
- self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
442
+ self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
431
443
  ) -> QueryChunksResponse:
432
- index = await self._get_and_cache_vector_store_index(vector_db_id)
444
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
433
445
  return await index.query_chunks(query, params)
434
446
 
435
447
  async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
@@ -4,7 +4,7 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from llama_stack.providers.datatypes import Api, ProviderSpec
7
+ from llama_stack_api import Api, ProviderSpec
8
8
 
9
9
  from .config import QdrantVectorIOConfig
10
10
 
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel
10
10
 
11
11
  from llama_stack.core.storage.datatypes import KVStoreReference
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type
@@ -13,23 +13,25 @@ from numpy.typing import NDArray
13
13
  from qdrant_client import AsyncQdrantClient, models
14
14
  from qdrant_client.models import PointStruct
15
15
 
16
- from llama_stack.apis.common.errors import VectorStoreNotFoundError
17
- from llama_stack.apis.files import Files
18
- from llama_stack.apis.inference import Inference, InterleavedContent
19
- from llama_stack.apis.vector_io import (
20
- Chunk,
16
+ from llama_stack.core.storage.kvstore import kvstore_impl
17
+ from llama_stack.log import get_logger
18
+ from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
19
+ from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
20
+ from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
21
+ from llama_stack.providers.utils.vector_io.vector_utils import load_embedded_chunk_with_backward_compat
22
+ from llama_stack_api import (
23
+ EmbeddedChunk,
24
+ Files,
25
+ Inference,
26
+ InterleavedContent,
21
27
  QueryChunksResponse,
22
28
  VectorIO,
29
+ VectorStore,
23
30
  VectorStoreChunkingStrategy,
24
31
  VectorStoreFileObject,
32
+ VectorStoreNotFoundError,
33
+ VectorStoresProtocolPrivate,
25
34
  )
26
- from llama_stack.apis.vector_stores import VectorStore
27
- from llama_stack.log import get_logger
28
- from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
29
- from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
30
- from llama_stack.providers.utils.kvstore import kvstore_impl
31
- from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
32
- from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
33
35
 
34
36
  from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
35
37
 
@@ -65,24 +67,23 @@ class QdrantIndex(EmbeddingIndex):
65
67
  # If the collection does not exist, it will be created in add_chunks.
66
68
  pass
67
69
 
68
- async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
69
- assert len(chunks) == len(embeddings), (
70
- f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
71
- )
70
+ async def add_chunks(self, chunks: list[EmbeddedChunk]):
71
+ if not chunks:
72
+ return
72
73
 
73
74
  if not await self.client.collection_exists(self.collection_name):
74
75
  await self.client.create_collection(
75
76
  self.collection_name,
76
- vectors_config=models.VectorParams(size=len(embeddings[0]), distance=models.Distance.COSINE),
77
+ vectors_config=models.VectorParams(size=len(chunks[0].embedding), distance=models.Distance.COSINE),
77
78
  )
78
79
 
79
80
  points = []
80
- for _i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=False)):
81
+ for chunk in chunks:
81
82
  chunk_id = chunk.chunk_id
82
83
  points.append(
83
84
  PointStruct(
84
85
  id=convert_id(chunk_id),
85
- vector=embedding,
86
+ vector=chunk.embedding, # Already a list[float]
86
87
  payload={"chunk_content": chunk.model_dump()} | {CHUNK_ID_KEY: chunk_id},
87
88
  )
88
89
  )
@@ -117,7 +118,7 @@ class QdrantIndex(EmbeddingIndex):
117
118
  assert point.payload is not None
118
119
 
119
120
  try:
120
- chunk = Chunk(**point.payload["chunk_content"])
121
+ chunk = load_embedded_chunk_with_backward_compat(point.payload["chunk_content"])
121
122
  except Exception:
122
123
  log.exception("Failed to parse chunk")
123
124
  continue
@@ -128,7 +129,63 @@ class QdrantIndex(EmbeddingIndex):
128
129
  return QueryChunksResponse(chunks=chunks, scores=scores)
129
130
 
130
131
  async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
131
- raise NotImplementedError("Keyword search is not supported in Qdrant")
132
+ """
133
+ Performs keyword-based search using Qdrant's MatchText filter.
134
+
135
+ Uses Qdrant's query_filter with MatchText to search for chunks containing
136
+ the specified text query string in the chunk content.
137
+
138
+ Args:
139
+ query_string: The text query for keyword search
140
+ k: Number of results to return
141
+ score_threshold: Minimum similarity score threshold
142
+
143
+ Returns:
144
+ QueryChunksResponse with chunks and scores matching the keyword query
145
+ """
146
+ try:
147
+ results = (
148
+ await self.client.query_points(
149
+ collection_name=self.collection_name,
150
+ query_filter=models.Filter(
151
+ must=[
152
+ models.FieldCondition(
153
+ key="chunk_content.content", match=models.MatchText(text=query_string)
154
+ )
155
+ ]
156
+ ),
157
+ limit=k,
158
+ with_payload=True,
159
+ with_vectors=False,
160
+ score_threshold=score_threshold,
161
+ )
162
+ ).points
163
+ except Exception as e:
164
+ log.error(f"Error querying keyword search in Qdrant collection {self.collection_name}: {e}")
165
+ raise
166
+
167
+ chunks, scores = [], []
168
+ for point in results:
169
+ if not isinstance(point, models.ScoredPoint):
170
+ raise RuntimeError(f"Expected ScoredPoint from Qdrant query, got {type(point).__name__}")
171
+ if point.payload is None:
172
+ raise RuntimeError("Qdrant query returned point with no payload")
173
+
174
+ try:
175
+ chunk = load_embedded_chunk_with_backward_compat(point.payload["chunk_content"])
176
+ except Exception:
177
+ chunk_id = point.payload.get(CHUNK_ID_KEY, "unknown") if point.payload else "unknown"
178
+ point_id = getattr(point, "id", "unknown")
179
+ log.exception(
180
+ f"Failed to parse chunk in collection {self.collection_name}: "
181
+ f"chunk_id={chunk_id}, point_id={point_id}"
182
+ )
183
+ continue
184
+
185
+ chunks.append(chunk)
186
+ scores.append(point.score)
187
+
188
+ return QueryChunksResponse(chunks=chunks, scores=scores)
132
189
 
133
190
  async def query_hybrid(
134
191
  self,
@@ -139,7 +196,66 @@ class QdrantIndex(EmbeddingIndex):
139
196
  reranker_type: str,
140
197
  reranker_params: dict[str, Any] | None = None,
141
198
  ) -> QueryChunksResponse:
142
- raise NotImplementedError("Hybrid search is not supported in Qdrant")
199
+ """
200
+ Hybrid search combining vector similarity and keyword filtering in a single query.
201
+
202
+ Uses Qdrant's native capability to combine a vector query with a query_filter,
203
+ allowing vector similarity search to be filtered by keyword matches in one call.
204
+
205
+ Args:
206
+ embedding: The query embedding vector
207
+ query_string: The text query for keyword filtering
208
+ k: Number of results to return
209
+ score_threshold: Minimum similarity score threshold
210
+ reranker_type: Not used with this approach, but kept for API compatibility
211
+ reranker_params: Not used with this approach, but kept for API compatibility
212
+
213
+ Returns:
214
+ QueryChunksResponse with filtered vector search results
215
+ """
216
+ try:
217
+ results = (
218
+ await self.client.query_points(
219
+ collection_name=self.collection_name,
220
+ query=embedding.tolist(),
221
+ query_filter=models.Filter(
222
+ must=[
223
+ models.FieldCondition(
224
+ key="chunk_content.content", match=models.MatchText(text=query_string)
225
+ )
226
+ ]
227
+ ),
228
+ limit=k,
229
+ with_payload=True,
230
+ score_threshold=score_threshold,
231
+ )
232
+ ).points
233
+ except Exception as e:
234
+ log.error(f"Error querying hybrid search in Qdrant collection {self.collection_name}: {e}")
235
+ raise
236
+
237
+ chunks, scores = [], []
238
+ for point in results:
239
+ if not isinstance(point, models.ScoredPoint):
240
+ raise RuntimeError(f"Expected ScoredPoint from Qdrant query, got {type(point).__name__}")
241
+ if point.payload is None:
242
+ raise RuntimeError("Qdrant query returned point with no payload")
243
+
244
+ try:
245
+ chunk = load_embedded_chunk_with_backward_compat(point.payload["chunk_content"])
246
+ except Exception:
247
+ chunk_id = point.payload.get(CHUNK_ID_KEY, "unknown") if point.payload else "unknown"
248
+ point_id = getattr(point, "id", "unknown")
249
+ log.exception(
250
+ f"Failed to parse chunk in collection {self.collection_name}: "
251
+ f"chunk_id={chunk_id}, point_id={point_id}"
252
+ )
253
+ continue
254
+
255
+ chunks.append(chunk)
256
+ scores.append(point.score)
257
+
258
+ return QueryChunksResponse(chunks=chunks, scores=scores)
143
259
 
144
260
  async def delete(self):
145
261
  await self.client.delete_collection(collection_name=self.collection_name)
@@ -152,11 +268,10 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
152
268
  inference_api: Inference,
153
269
  files_api: Files | None = None,
154
270
  ) -> None:
155
- super().__init__(files_api=files_api, kvstore=None)
271
+ super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
156
272
  self.config = config
157
273
  self.client: AsyncQdrantClient = None
158
274
  self.cache = {}
159
- self.inference_api = inference_api
160
275
  self.vector_store_table = None
161
276
  self._qdrant_lock = asyncio.Lock()
162
277
 
@@ -227,19 +342,21 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
227
342
  self.cache[vector_store_id] = index
228
343
  return index
229
344
 
230
- async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
231
- index = await self._get_and_cache_vector_store_index(vector_db_id)
345
+ async def insert_chunks(
346
+ self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
347
+ ) -> None:
348
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
232
349
  if not index:
233
- raise VectorStoreNotFoundError(vector_db_id)
350
+ raise VectorStoreNotFoundError(vector_store_id)
234
351
 
235
352
  await index.insert_chunks(chunks)
236
353
 
237
354
  async def query_chunks(
238
- self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
355
+ self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
239
356
  ) -> QueryChunksResponse:
240
- index = await self._get_and_cache_vector_store_index(vector_db_id)
357
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
241
358
  if not index:
242
- raise VectorStoreNotFoundError(vector_db_id)
359
+ raise VectorStoreNotFoundError(vector_store_id)
243
360
 
244
361
  return await index.query_chunks(query, params)
245
362
 
@@ -4,7 +4,7 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from llama_stack.providers.datatypes import Api, ProviderSpec
7
+ from llama_stack_api import Api, ProviderSpec
8
8
 
9
9
  from .config import WeaviateVectorIOConfig
10
10
 
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel, Field
10
10
 
11
11
  from llama_stack.core.storage.datatypes import KVStoreReference
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type