llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +12 -21
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.4.dist-info/RECORD +0 -625
  445. llama_stack-0.3.4.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -11,16 +11,9 @@ from typing import Any
11
11
  from numpy.typing import NDArray
12
12
  from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker
13
13
 
14
- from llama_stack.apis.common.errors import VectorStoreNotFoundError
15
- from llama_stack.apis.files import Files
16
- from llama_stack.apis.inference import Inference, InterleavedContent
17
- from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
18
- from llama_stack.apis.vector_stores import VectorStore
14
+ from llama_stack.core.storage.kvstore import kvstore_impl
19
15
  from llama_stack.log import get_logger
20
- from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
21
16
  from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
22
- from llama_stack.providers.utils.kvstore import kvstore_impl
23
- from llama_stack.providers.utils.kvstore.api import KVStore
24
17
  from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
25
18
  from llama_stack.providers.utils.memory.vector_store import (
26
19
  RERANKER_TYPE_WEIGHTED,
@@ -29,6 +22,18 @@ from llama_stack.providers.utils.memory.vector_store import (
29
22
  VectorStoreWithIndex,
30
23
  )
31
24
  from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
25
+ from llama_stack_api import (
26
+ EmbeddedChunk,
27
+ Files,
28
+ Inference,
29
+ InterleavedContent,
30
+ QueryChunksResponse,
31
+ VectorIO,
32
+ VectorStore,
33
+ VectorStoreNotFoundError,
34
+ VectorStoresProtocolPrivate,
35
+ )
36
+ from llama_stack_api.internal.kvstore import KVStore
32
37
 
33
38
  from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
34
39
 
@@ -60,7 +65,7 @@ class MilvusIndex(EmbeddingIndex):
60
65
  if await asyncio.to_thread(self.client.has_collection, self.collection_name):
61
66
  await asyncio.to_thread(self.client.drop_collection, collection_name=self.collection_name)
62
67
 
63
- async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
68
+ async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
64
69
  assert len(chunks) == len(embeddings), (
65
70
  f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
66
71
  )
@@ -131,7 +136,7 @@ class MilvusIndex(EmbeddingIndex):
131
136
  output_fields=["*"],
132
137
  search_params={"params": {"radius": score_threshold}},
133
138
  )
134
- chunks = [Chunk(**res["entity"]["chunk_content"]) for res in search_res[0]]
139
+ chunks = [EmbeddedChunk(**res["entity"]["chunk_content"]) for res in search_res[0]]
135
140
  scores = [res["distance"] for res in search_res[0]]
136
141
  return QueryChunksResponse(chunks=chunks, scores=scores)
137
142
 
@@ -158,7 +163,7 @@ class MilvusIndex(EmbeddingIndex):
158
163
  chunks = []
159
164
  scores = []
160
165
  for res in search_res[0]:
161
- chunk = Chunk(**res["entity"]["chunk_content"])
166
+ chunk = EmbeddedChunk(**res["entity"]["chunk_content"])
162
167
  chunks.append(chunk)
163
168
  scores.append(res["distance"]) # BM25 score from Milvus
164
169
 
@@ -186,7 +191,7 @@ class MilvusIndex(EmbeddingIndex):
186
191
  output_fields=["*"],
187
192
  limit=k,
188
193
  )
189
- chunks = [Chunk(**res["chunk_content"]) for res in search_res]
194
+ chunks = [EmbeddedChunk(**res["chunk_content"]) for res in search_res]
190
195
  scores = [1.0] * len(chunks) # Simple binary score for text search
191
196
  return QueryChunksResponse(chunks=chunks, scores=scores)
192
197
 
@@ -238,7 +243,7 @@ class MilvusIndex(EmbeddingIndex):
238
243
  chunks = []
239
244
  scores = []
240
245
  for res in search_res[0]:
241
- chunk = Chunk(**res["entity"]["chunk_content"])
246
+ chunk = EmbeddedChunk(**res["entity"]["chunk_content"])
242
247
  chunks.append(chunk)
243
248
  scores.append(res["distance"])
244
249
 
@@ -268,11 +273,10 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
268
273
  inference_api: Inference,
269
274
  files_api: Files | None,
270
275
  ) -> None:
271
- super().__init__(files_api=files_api, kvstore=None)
276
+ super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
272
277
  self.config = config
273
278
  self.cache = {}
274
279
  self.client = None
275
- self.inference_api = inference_api
276
280
  self.vector_store_table = None
277
281
  self.metadata_collection_name = "openai_vector_stores_metadata"
278
282
 
@@ -351,19 +355,21 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
351
355
  await self.cache[vector_store_id].index.delete()
352
356
  del self.cache[vector_store_id]
353
357
 
354
- async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
355
- index = await self._get_and_cache_vector_store_index(vector_db_id)
358
+ async def insert_chunks(
359
+ self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
360
+ ) -> None:
361
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
356
362
  if not index:
357
- raise VectorStoreNotFoundError(vector_db_id)
363
+ raise VectorStoreNotFoundError(vector_store_id)
358
364
 
359
365
  await index.insert_chunks(chunks)
360
366
 
361
367
  async def query_chunks(
362
- self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
368
+ self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
363
369
  ) -> QueryChunksResponse:
364
- index = await self._get_and_cache_vector_store_index(vector_db_id)
370
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
365
371
  if not index:
366
- raise VectorStoreNotFoundError(vector_db_id)
372
+ raise VectorStoreNotFoundError(vector_store_id)
367
373
  return await index.query_chunks(query, params)
368
374
 
369
375
  async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
@@ -4,7 +4,7 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from llama_stack.providers.datatypes import Api, ProviderSpec
7
+ from llama_stack_api import Api, ProviderSpec
8
8
 
9
9
  from .config import PGVectorVectorIOConfig
10
10
 
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel, Field
10
10
 
11
11
  from llama_stack.core.storage.datatypes import KVStoreReference
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type
@@ -13,19 +13,24 @@ from psycopg2 import sql
13
13
  from psycopg2.extras import Json, execute_values
14
14
  from pydantic import BaseModel, TypeAdapter
15
15
 
16
- from llama_stack.apis.common.errors import VectorStoreNotFoundError
17
- from llama_stack.apis.files import Files
18
- from llama_stack.apis.inference import Inference, InterleavedContent
19
- from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
20
- from llama_stack.apis.vector_stores import VectorStore
16
+ from llama_stack.core.storage.kvstore import kvstore_impl
21
17
  from llama_stack.log import get_logger
22
- from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
23
18
  from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
24
- from llama_stack.providers.utils.kvstore import kvstore_impl
25
- from llama_stack.providers.utils.kvstore.api import KVStore
26
19
  from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
27
20
  from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
28
21
  from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name
22
+ from llama_stack_api import (
23
+ EmbeddedChunk,
24
+ Files,
25
+ Inference,
26
+ InterleavedContent,
27
+ QueryChunksResponse,
28
+ VectorIO,
29
+ VectorStore,
30
+ VectorStoreNotFoundError,
31
+ VectorStoresProtocolPrivate,
32
+ )
33
+ from llama_stack_api.internal.kvstore import KVStore
29
34
 
30
35
  from .config import PGVectorVectorIOConfig
31
36
 
@@ -125,7 +130,7 @@ class PGVectorIndex(EmbeddingIndex):
125
130
  log.exception(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}")
126
131
  raise RuntimeError(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") from e
127
132
 
128
- async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
133
+ async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
129
134
  assert len(chunks) == len(embeddings), (
130
135
  f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
131
136
  )
@@ -189,7 +194,7 @@ class PGVectorIndex(EmbeddingIndex):
189
194
  score = 1.0 / float(dist) if dist != 0 else float("inf")
190
195
  if score < score_threshold:
191
196
  continue
192
- chunks.append(Chunk(**doc))
197
+ chunks.append(EmbeddedChunk(**doc))
193
198
  scores.append(score)
194
199
 
195
200
  return QueryChunksResponse(chunks=chunks, scores=scores)
@@ -225,7 +230,7 @@ class PGVectorIndex(EmbeddingIndex):
225
230
  for doc, score in results:
226
231
  if score < score_threshold:
227
232
  continue
228
- chunks.append(Chunk(**doc))
233
+ chunks.append(EmbeddedChunk(**doc))
229
234
  scores.append(float(score))
230
235
 
231
236
  return QueryChunksResponse(chunks=chunks, scores=scores)
@@ -327,16 +332,17 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
327
332
  def __init__(
328
333
  self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None
329
334
  ) -> None:
330
- super().__init__(files_api=files_api, kvstore=None)
335
+ super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
331
336
  self.config = config
332
- self.inference_api = inference_api
333
337
  self.conn = None
334
338
  self.cache = {}
335
339
  self.vector_store_table = None
336
340
  self.metadata_collection_name = "openai_vector_stores_metadata"
337
341
 
338
342
  async def initialize(self) -> None:
339
- log.info(f"Initializing PGVector memory adapter with config: {self.config}")
343
+ # Create a safe config representation with masked password for logging
344
+ safe_config = {**self.config.model_dump(exclude={"password"}), "password": "******"}
345
+ log.info(f"Initializing PGVector memory adapter with config: {safe_config}")
340
346
  self.kvstore = await kvstore_impl(self.config.persistence)
341
347
  await self.initialize_openai_vector_stores()
342
348
 
@@ -422,14 +428,16 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
422
428
  raise RuntimeError("KVStore not initialized. Call initialize() before unregistering vector stores.")
423
429
  await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}")
424
430
 
425
- async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
426
- index = await self._get_and_cache_vector_store_index(vector_db_id)
431
+ async def insert_chunks(
432
+ self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
433
+ ) -> None:
434
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
427
435
  await index.insert_chunks(chunks)
428
436
 
429
437
  async def query_chunks(
430
- self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
438
+ self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
431
439
  ) -> QueryChunksResponse:
432
- index = await self._get_and_cache_vector_store_index(vector_db_id)
440
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
433
441
  return await index.query_chunks(query, params)
434
442
 
435
443
  async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
@@ -4,7 +4,7 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from llama_stack.providers.datatypes import Api, ProviderSpec
7
+ from llama_stack_api import Api, ProviderSpec
8
8
 
9
9
  from .config import QdrantVectorIOConfig
10
10
 
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel
10
10
 
11
11
  from llama_stack.core.storage.datatypes import KVStoreReference
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type
@@ -13,23 +13,24 @@ from numpy.typing import NDArray
13
13
  from qdrant_client import AsyncQdrantClient, models
14
14
  from qdrant_client.models import PointStruct
15
15
 
16
- from llama_stack.apis.common.errors import VectorStoreNotFoundError
17
- from llama_stack.apis.files import Files
18
- from llama_stack.apis.inference import Inference, InterleavedContent
19
- from llama_stack.apis.vector_io import (
20
- Chunk,
16
+ from llama_stack.core.storage.kvstore import kvstore_impl
17
+ from llama_stack.log import get_logger
18
+ from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
19
+ from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
20
+ from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
21
+ from llama_stack_api import (
22
+ EmbeddedChunk,
23
+ Files,
24
+ Inference,
25
+ InterleavedContent,
21
26
  QueryChunksResponse,
22
27
  VectorIO,
28
+ VectorStore,
23
29
  VectorStoreChunkingStrategy,
24
30
  VectorStoreFileObject,
31
+ VectorStoreNotFoundError,
32
+ VectorStoresProtocolPrivate,
25
33
  )
26
- from llama_stack.apis.vector_stores import VectorStore
27
- from llama_stack.log import get_logger
28
- from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
29
- from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
30
- from llama_stack.providers.utils.kvstore import kvstore_impl
31
- from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
32
- from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
33
34
 
34
35
  from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
35
36
 
@@ -65,7 +66,7 @@ class QdrantIndex(EmbeddingIndex):
65
66
  # If the collection does not exist, it will be created in add_chunks.
66
67
  pass
67
68
 
68
- async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
69
+ async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
69
70
  assert len(chunks) == len(embeddings), (
70
71
  f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
71
72
  )
@@ -117,7 +118,7 @@ class QdrantIndex(EmbeddingIndex):
117
118
  assert point.payload is not None
118
119
 
119
120
  try:
120
- chunk = Chunk(**point.payload["chunk_content"])
121
+ chunk = EmbeddedChunk(**point.payload["chunk_content"])
121
122
  except Exception:
122
123
  log.exception("Failed to parse chunk")
123
124
  continue
@@ -128,7 +129,63 @@ class QdrantIndex(EmbeddingIndex):
128
129
  return QueryChunksResponse(chunks=chunks, scores=scores)
129
130
 
130
131
  async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
131
- raise NotImplementedError("Keyword search is not supported in Qdrant")
132
+ """
133
+ Performs keyword-based search using Qdrant's MatchText filter.
134
+
135
+ Uses Qdrant's query_filter with MatchText to search for chunks containing
136
+ the specified text query string in the chunk content.
137
+
138
+ Args:
139
+ query_string: The text query for keyword search
140
+ k: Number of results to return
141
+ score_threshold: Minimum similarity score threshold
142
+
143
+ Returns:
144
+ QueryChunksResponse with chunks and scores matching the keyword query
145
+ """
146
+ try:
147
+ results = (
148
+ await self.client.query_points(
149
+ collection_name=self.collection_name,
150
+ query_filter=models.Filter(
151
+ must=[
152
+ models.FieldCondition(
153
+ key="chunk_content.content", match=models.MatchText(text=query_string)
154
+ )
155
+ ]
156
+ ),
157
+ limit=k,
158
+ with_payload=True,
159
+ with_vectors=False,
160
+ score_threshold=score_threshold,
161
+ )
162
+ ).points
163
+ except Exception as e:
164
+ log.error(f"Error querying keyword search in Qdrant collection {self.collection_name}: {e}")
165
+ raise
166
+
167
+ chunks, scores = [], []
168
+ for point in results:
169
+ if not isinstance(point, models.ScoredPoint):
170
+ raise RuntimeError(f"Expected ScoredPoint from Qdrant query, got {type(point).__name__}")
171
+ if point.payload is None:
172
+ raise RuntimeError("Qdrant query returned point with no payload")
173
+
174
+ try:
175
+ chunk = EmbeddedChunk(**point.payload["chunk_content"])
176
+ except Exception:
177
+ chunk_id = point.payload.get(CHUNK_ID_KEY, "unknown") if point.payload else "unknown"
178
+ point_id = getattr(point, "id", "unknown")
179
+ log.exception(
180
+ f"Failed to parse chunk in collection {self.collection_name}: "
181
+ f"chunk_id={chunk_id}, point_id={point_id}"
182
+ )
183
+ continue
184
+
185
+ chunks.append(chunk)
186
+ scores.append(point.score)
187
+
188
+ return QueryChunksResponse(chunks=chunks, scores=scores)
132
189
 
133
190
  async def query_hybrid(
134
191
  self,
@@ -139,7 +196,66 @@ class QdrantIndex(EmbeddingIndex):
139
196
  reranker_type: str,
140
197
  reranker_params: dict[str, Any] | None = None,
141
198
  ) -> QueryChunksResponse:
142
- raise NotImplementedError("Hybrid search is not supported in Qdrant")
199
+ """
200
+ Hybrid search combining vector similarity and keyword filtering in a single query.
201
+
202
+ Uses Qdrant's native capability to combine a vector query with a query_filter,
203
+ allowing vector similarity search to be filtered by keyword matches in one call.
204
+
205
+ Args:
206
+ embedding: The query embedding vector
207
+ query_string: The text query for keyword filtering
208
+ k: Number of results to return
209
+ score_threshold: Minimum similarity score threshold
210
+ reranker_type: Not used with this approach, but kept for API compatibility
211
+ reranker_params: Not used with this approach, but kept for API compatibility
212
+
213
+ Returns:
214
+ QueryChunksResponse with filtered vector search results
215
+ """
216
+ try:
217
+ results = (
218
+ await self.client.query_points(
219
+ collection_name=self.collection_name,
220
+ query=embedding.tolist(),
221
+ query_filter=models.Filter(
222
+ must=[
223
+ models.FieldCondition(
224
+ key="chunk_content.content", match=models.MatchText(text=query_string)
225
+ )
226
+ ]
227
+ ),
228
+ limit=k,
229
+ with_payload=True,
230
+ score_threshold=score_threshold,
231
+ )
232
+ ).points
233
+ except Exception as e:
234
+ log.error(f"Error querying hybrid search in Qdrant collection {self.collection_name}: {e}")
235
+ raise
236
+
237
+ chunks, scores = [], []
238
+ for point in results:
239
+ if not isinstance(point, models.ScoredPoint):
240
+ raise RuntimeError(f"Expected ScoredPoint from Qdrant query, got {type(point).__name__}")
241
+ if point.payload is None:
242
+ raise RuntimeError("Qdrant query returned point with no payload")
243
+
244
+ try:
245
+ chunk = EmbeddedChunk(**point.payload["chunk_content"])
246
+ except Exception:
247
+ chunk_id = point.payload.get(CHUNK_ID_KEY, "unknown") if point.payload else "unknown"
248
+ point_id = getattr(point, "id", "unknown")
249
+ log.exception(
250
+ f"Failed to parse chunk in collection {self.collection_name}: "
251
+ f"chunk_id={chunk_id}, point_id={point_id}"
252
+ )
253
+ continue
254
+
255
+ chunks.append(chunk)
256
+ scores.append(point.score)
257
+
258
+ return QueryChunksResponse(chunks=chunks, scores=scores)
143
259
 
144
260
  async def delete(self):
145
261
  await self.client.delete_collection(collection_name=self.collection_name)
@@ -152,11 +268,10 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
152
268
  inference_api: Inference,
153
269
  files_api: Files | None = None,
154
270
  ) -> None:
155
- super().__init__(files_api=files_api, kvstore=None)
271
+ super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
156
272
  self.config = config
157
273
  self.client: AsyncQdrantClient = None
158
274
  self.cache = {}
159
- self.inference_api = inference_api
160
275
  self.vector_store_table = None
161
276
  self._qdrant_lock = asyncio.Lock()
162
277
 
@@ -227,19 +342,21 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
227
342
  self.cache[vector_store_id] = index
228
343
  return index
229
344
 
230
- async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
231
- index = await self._get_and_cache_vector_store_index(vector_db_id)
345
+ async def insert_chunks(
346
+ self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
347
+ ) -> None:
348
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
232
349
  if not index:
233
- raise VectorStoreNotFoundError(vector_db_id)
350
+ raise VectorStoreNotFoundError(vector_store_id)
234
351
 
235
352
  await index.insert_chunks(chunks)
236
353
 
237
354
  async def query_chunks(
238
- self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
355
+ self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
239
356
  ) -> QueryChunksResponse:
240
- index = await self._get_and_cache_vector_store_index(vector_db_id)
357
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
241
358
  if not index:
242
- raise VectorStoreNotFoundError(vector_db_id)
359
+ raise VectorStoreNotFoundError(vector_store_id)
243
360
 
244
361
  return await index.query_chunks(query, params)
245
362
 
@@ -4,7 +4,7 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from llama_stack.providers.datatypes import Api, ProviderSpec
7
+ from llama_stack_api import Api, ProviderSpec
8
8
 
9
9
  from .config import WeaviateVectorIOConfig
10
10
 
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel, Field
10
10
 
11
11
  from llama_stack.core.storage.datatypes import KVStoreReference
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type
@@ -12,17 +12,9 @@ from numpy.typing import NDArray
12
12
  from weaviate.classes.init import Auth
13
13
  from weaviate.classes.query import Filter, HybridFusion
14
14
 
15
- from llama_stack.apis.common.content_types import InterleavedContent
16
- from llama_stack.apis.common.errors import VectorStoreNotFoundError
17
- from llama_stack.apis.files import Files
18
- from llama_stack.apis.inference import Inference
19
- from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
20
- from llama_stack.apis.vector_stores import VectorStore
21
15
  from llama_stack.core.request_headers import NeedsRequestProviderData
16
+ from llama_stack.core.storage.kvstore import kvstore_impl
22
17
  from llama_stack.log import get_logger
23
- from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
24
- from llama_stack.providers.utils.kvstore import kvstore_impl
25
- from llama_stack.providers.utils.kvstore.api import KVStore
26
18
  from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
27
19
  from llama_stack.providers.utils.memory.vector_store import (
28
20
  RERANKER_TYPE_RRF,
@@ -31,6 +23,18 @@ from llama_stack.providers.utils.memory.vector_store import (
31
23
  VectorStoreWithIndex,
32
24
  )
33
25
  from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
26
+ from llama_stack_api import (
27
+ EmbeddedChunk,
28
+ Files,
29
+ Inference,
30
+ InterleavedContent,
31
+ QueryChunksResponse,
32
+ VectorIO,
33
+ VectorStore,
34
+ VectorStoreNotFoundError,
35
+ VectorStoresProtocolPrivate,
36
+ )
37
+ from llama_stack_api.internal.kvstore import KVStore
34
38
 
35
39
  from .config import WeaviateVectorIOConfig
36
40
 
@@ -53,7 +57,7 @@ class WeaviateIndex(EmbeddingIndex):
53
57
  async def initialize(self):
54
58
  pass
55
59
 
56
- async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
60
+ async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
57
61
  assert len(chunks) == len(embeddings), (
58
62
  f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
59
63
  )
@@ -112,7 +116,7 @@ class WeaviateIndex(EmbeddingIndex):
112
116
  chunk_json = doc.properties["chunk_content"]
113
117
  try:
114
118
  chunk_dict = json.loads(chunk_json)
115
- chunk = Chunk(**chunk_dict)
119
+ chunk = EmbeddedChunk(**chunk_dict)
116
120
  except Exception:
117
121
  log.exception(f"Failed to parse document: {chunk_json}")
118
122
  continue
@@ -172,7 +176,7 @@ class WeaviateIndex(EmbeddingIndex):
172
176
  chunk_json = doc.properties["chunk_content"]
173
177
  try:
174
178
  chunk_dict = json.loads(chunk_json)
175
- chunk = Chunk(**chunk_dict)
179
+ chunk = EmbeddedChunk(**chunk_dict)
176
180
  except Exception:
177
181
  log.exception(f"Failed to parse document: {chunk_json}")
178
182
  continue
@@ -241,7 +245,7 @@ class WeaviateIndex(EmbeddingIndex):
241
245
  chunk_json = doc.properties["chunk_content"]
242
246
  try:
243
247
  chunk_dict = json.loads(chunk_json)
244
- chunk = Chunk(**chunk_dict)
248
+ chunk = EmbeddedChunk(**chunk_dict)
245
249
  except Exception:
246
250
  log.exception(f"Failed to parse document: {chunk_json}")
247
251
  continue
@@ -259,9 +263,8 @@ class WeaviateIndex(EmbeddingIndex):
259
263
 
260
264
  class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorStoresProtocolPrivate):
261
265
  def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
262
- super().__init__(files_api=files_api, kvstore=None)
266
+ super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
263
267
  self.config = config
264
- self.inference_api = inference_api
265
268
  self.client_cache = {}
266
269
  self.cache = {}
267
270
  self.vector_store_table = None
@@ -369,19 +372,21 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
369
372
  self.cache[vector_store_id] = index
370
373
  return index
371
374
 
372
- async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
373
- index = await self._get_and_cache_vector_store_index(vector_db_id)
375
+ async def insert_chunks(
376
+ self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
377
+ ) -> None:
378
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
374
379
  if not index:
375
- raise VectorStoreNotFoundError(vector_db_id)
380
+ raise VectorStoreNotFoundError(vector_store_id)
376
381
 
377
382
  await index.insert_chunks(chunks)
378
383
 
379
384
  async def query_chunks(
380
- self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
385
+ self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
381
386
  ) -> QueryChunksResponse:
382
- index = await self._get_and_cache_vector_store_index(vector_db_id)
387
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
383
388
  if not index:
384
- raise VectorStoreNotFoundError(vector_db_id)
389
+ raise VectorStoreNotFoundError(vector_store_id)
385
390
 
386
391
  return await index.query_chunks(query, params)
387
392
 
@@ -7,12 +7,8 @@
7
7
  from enum import Enum
8
8
  from typing import Any
9
9
 
10
- from llama_stack.apis.common.type_system import (
11
- ChatCompletionInputType,
12
- CompletionInputType,
13
- StringType,
14
- )
15
10
  from llama_stack.core.datatypes import Api
11
+ from llama_stack_api import ChatCompletionInputType, CompletionInputType, StringType
16
12
 
17
13
 
18
14
  class ColumnName(Enum):
@@ -9,7 +9,7 @@ import json
9
9
  from fastapi import Request
10
10
  from pydantic import BaseModel, ValidationError
11
11
 
12
- from llama_stack.apis.files import ExpiresAfter
12
+ from llama_stack_api import ExpiresAfter
13
13
 
14
14
 
15
15
  async def parse_pydantic_from_form[T: BaseModel](request: Request, field_name: str, model_class: type[T]) -> T | None:
@@ -17,7 +17,7 @@ from llama_stack.log import get_logger
17
17
  if TYPE_CHECKING:
18
18
  from sentence_transformers import SentenceTransformer
19
19
 
20
- from llama_stack.apis.inference import (
20
+ from llama_stack_api import (
21
21
  ModelStore,
22
22
  OpenAIEmbeddingData,
23
23
  OpenAIEmbeddingsRequestWithExtraBody,