llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.5.dist-info/RECORD +0 -625
  445. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -14,35 +14,32 @@ import httpx
14
14
  from fastapi import UploadFile
15
15
  from pydantic import TypeAdapter
16
16
 
17
- from llama_stack.apis.common.content_types import (
17
+ from llama_stack.log import get_logger
18
+ from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
19
+ from llama_stack.providers.utils.memory.vector_store import parse_data_url
20
+ from llama_stack_api import (
18
21
  URL,
22
+ Files,
23
+ Inference,
19
24
  InterleavedContent,
20
25
  InterleavedContentItem,
21
- TextContentItem,
22
- )
23
- from llama_stack.apis.files import Files, OpenAIFilePurpose
24
- from llama_stack.apis.inference import Inference
25
- from llama_stack.apis.tools import (
26
26
  ListToolDefsResponse,
27
+ OpenAIFilePurpose,
28
+ QueryChunksResponse,
27
29
  RAGDocument,
28
30
  RAGQueryConfig,
29
31
  RAGQueryResult,
30
- RAGToolRuntime,
32
+ TextContentItem,
31
33
  ToolDef,
32
34
  ToolGroup,
35
+ ToolGroupsProtocolPrivate,
33
36
  ToolInvocationResult,
34
37
  ToolRuntime,
35
- )
36
- from llama_stack.apis.vector_io import (
37
- QueryChunksResponse,
38
+ UploadFileRequest,
38
39
  VectorIO,
39
40
  VectorStoreChunkingStrategyStatic,
40
41
  VectorStoreChunkingStrategyStaticConfig,
41
42
  )
42
- from llama_stack.log import get_logger
43
- from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
44
- from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
45
- from llama_stack.providers.utils.memory.vector_store import parse_data_url
46
43
 
47
44
  from .config import RagToolRuntimeConfig
48
45
  from .context_retriever import generate_rag_query
@@ -91,7 +88,7 @@ async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
91
88
  return content_str.encode("utf-8"), "text/plain"
92
89
 
93
90
 
94
- class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRuntime):
91
+ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime):
95
92
  def __init__(
96
93
  self,
97
94
  config: RagToolRuntimeConfig,
@@ -119,9 +116,11 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
119
116
  async def insert(
120
117
  self,
121
118
  documents: list[RAGDocument],
122
- vector_db_id: str,
123
- chunk_size_in_tokens: int = 512,
119
+ vector_store_id: str,
120
+ chunk_size_in_tokens: int | None = None,
124
121
  ) -> None:
122
+ if chunk_size_in_tokens is None:
123
+ chunk_size_in_tokens = self.config.vector_stores_config.file_ingestion_params.default_chunk_size_tokens
125
124
  if not documents:
126
125
  return
127
126
 
@@ -143,29 +142,31 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
143
142
 
144
143
  try:
145
144
  created_file = await self.files_api.openai_upload_file(
146
- file=upload_file, purpose=OpenAIFilePurpose.ASSISTANTS
145
+ request=UploadFileRequest(purpose=OpenAIFilePurpose.ASSISTANTS),
146
+ file=upload_file,
147
147
  )
148
148
  except Exception as e:
149
149
  log.error(f"Failed to upload file for document {doc.document_id}: {e}")
150
150
  continue
151
151
 
152
+ overlap_tokens = self.config.vector_stores_config.file_ingestion_params.default_chunk_overlap_tokens
152
153
  chunking_strategy = VectorStoreChunkingStrategyStatic(
153
154
  static=VectorStoreChunkingStrategyStaticConfig(
154
155
  max_chunk_size_tokens=chunk_size_in_tokens,
155
- chunk_overlap_tokens=chunk_size_in_tokens // 4,
156
+ chunk_overlap_tokens=overlap_tokens,
156
157
  )
157
158
  )
158
159
 
159
160
  try:
160
161
  await self.vector_io_api.openai_attach_file_to_vector_store(
161
- vector_store_id=vector_db_id,
162
+ vector_store_id=vector_store_id,
162
163
  file_id=created_file.id,
163
164
  attributes=doc.metadata,
164
165
  chunking_strategy=chunking_strategy,
165
166
  )
166
167
  except Exception as e:
167
168
  log.error(
168
- f"Failed to attach file {created_file.id} to vector store {vector_db_id} for document {doc.document_id}: {e}"
169
+ f"Failed to attach file {created_file.id} to vector store {vector_store_id} for document {doc.document_id}: {e}"
169
170
  )
170
171
  continue
171
172
 
@@ -176,15 +177,17 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
176
177
  async def query(
177
178
  self,
178
179
  content: InterleavedContent,
179
- vector_db_ids: list[str],
180
+ vector_store_ids: list[str],
180
181
  query_config: RAGQueryConfig | None = None,
181
182
  ) -> RAGQueryResult:
182
- if not vector_db_ids:
183
+ if not vector_store_ids:
183
184
  raise ValueError(
184
185
  "No vector DBs were provided to the knowledge search tool. Please provide at least one vector DB ID."
185
186
  )
186
187
 
187
- query_config = query_config or RAGQueryConfig()
188
+ query_config = query_config or RAGQueryConfig(
189
+ max_tokens_in_context=self.config.vector_stores_config.chunk_retrieval_params.max_tokens_in_context
190
+ )
188
191
  query = await generate_rag_query(
189
192
  query_config.query_generator_config,
190
193
  content,
@@ -192,7 +195,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
192
195
  )
193
196
  tasks = [
194
197
  self.vector_io_api.query_chunks(
195
- vector_db_id=vector_db_id,
198
+ vector_store_id=vector_store_id,
196
199
  query=query,
197
200
  params={
198
201
  "mode": query_config.mode,
@@ -201,18 +204,20 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
201
204
  "ranker": query_config.ranker,
202
205
  },
203
206
  )
204
- for vector_db_id in vector_db_ids
207
+ for vector_store_id in vector_store_ids
205
208
  ]
206
209
  results: list[QueryChunksResponse] = await asyncio.gather(*tasks)
207
210
 
208
211
  chunks = []
209
212
  scores = []
210
213
 
211
- for vector_db_id, result in zip(vector_db_ids, results, strict=False):
212
- for chunk, score in zip(result.chunks, result.scores, strict=False):
213
- if not hasattr(chunk, "metadata") or chunk.metadata is None:
214
+ for vector_store_id, result in zip(vector_store_ids, results, strict=False):
215
+ for embedded_chunk, score in zip(result.chunks, result.scores, strict=False):
216
+ # EmbeddedChunk inherits from Chunk, so use it directly
217
+ chunk = embedded_chunk
218
+ if chunk.metadata is None:
214
219
  chunk.metadata = {}
215
- chunk.metadata["vector_db_id"] = vector_db_id
220
+ chunk.metadata["vector_store_id"] = vector_store_id
216
221
 
217
222
  chunks.append(chunk)
218
223
  scores.append(score)
@@ -225,13 +230,17 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
225
230
  chunks = chunks[: query_config.max_chunks]
226
231
 
227
232
  tokens = 0
228
- picked: list[InterleavedContentItem] = [
229
- TextContentItem(
230
- text=f"knowledge_search tool found {len(chunks)} chunks:\nBEGIN of knowledge_search tool results.\n"
231
- )
232
- ]
233
- for i, chunk in enumerate(chunks):
234
- metadata = chunk.metadata
233
+
234
+ # Get templates from vector stores config
235
+ vector_stores_config = self.config.vector_stores_config
236
+ header_template = vector_stores_config.file_search_params.header_template
237
+ footer_template = vector_stores_config.file_search_params.footer_template
238
+ chunk_template = vector_stores_config.context_prompt_params.chunk_annotation_template
239
+ context_template = vector_stores_config.context_prompt_params.context_template
240
+
241
+ picked: list[InterleavedContentItem] = [TextContentItem(text=header_template.format(num_chunks=len(chunks)))]
242
+ for i, embedded_chunk in enumerate(chunks):
243
+ metadata = embedded_chunk.metadata
235
244
  tokens += metadata.get("token_count", 0)
236
245
  tokens += metadata.get("metadata_token_count", 0)
237
246
 
@@ -250,22 +259,22 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
250
259
  metadata_keys_to_exclude_from_context = [
251
260
  "token_count",
252
261
  "metadata_token_count",
253
- "vector_db_id",
262
+ "vector_store_id",
254
263
  ]
255
264
  metadata_for_context = {}
256
265
  for k in chunk_metadata_keys_to_include_from_context:
257
- metadata_for_context[k] = getattr(chunk.chunk_metadata, k)
266
+ metadata_for_context[k] = getattr(embedded_chunk.chunk_metadata, k)
258
267
  for k in metadata:
259
268
  if k not in metadata_keys_to_exclude_from_context:
260
269
  metadata_for_context[k] = metadata[k]
261
270
 
262
- text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_for_context)
271
+ text_content = chunk_template.format(index=i + 1, chunk=embedded_chunk, metadata=metadata_for_context)
263
272
  picked.append(TextContentItem(text=text_content))
264
273
 
265
- picked.append(TextContentItem(text="END of knowledge_search tool results.\n"))
274
+ picked.append(TextContentItem(text=footer_template))
266
275
  picked.append(
267
276
  TextContentItem(
268
- text=f'The above results were retrieved to help answer the user\'s query: "{interleaved_content_as_str(content)}". Use them as supporting information only in answering this query.\n',
277
+ text=context_template.format(query=interleaved_content_as_str(content), annotation_instruction="")
269
278
  )
270
279
  )
271
280
 
@@ -275,12 +284,15 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
275
284
  "document_ids": [c.document_id for c in chunks[: len(picked)]],
276
285
  "chunks": [c.content for c in chunks[: len(picked)]],
277
286
  "scores": scores[: len(picked)],
278
- "vector_db_ids": [c.metadata["vector_db_id"] for c in chunks[: len(picked)]],
287
+ "vector_store_ids": [c.metadata["vector_store_id"] for c in chunks[: len(picked)]],
279
288
  },
280
289
  )
281
290
 
282
291
  async def list_runtime_tools(
283
- self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
292
+ self,
293
+ tool_group_id: str | None = None,
294
+ mcp_endpoint: URL | None = None,
295
+ authorization: str | None = None,
284
296
  ) -> ListToolDefsResponse:
285
297
  # Parameters are not listed since these methods are not yet invoked automatically
286
298
  # by the LLM. The method is only implemented so things like /tools can list without
@@ -308,18 +320,22 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
308
320
  ]
309
321
  )
310
322
 
311
- async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
312
- vector_db_ids = kwargs.get("vector_db_ids", [])
323
+ async def invoke_tool(
324
+ self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
325
+ ) -> ToolInvocationResult:
326
+ vector_store_ids = kwargs.get("vector_store_ids", [])
313
327
  query_config = kwargs.get("query_config")
314
328
  if query_config:
315
329
  query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config)
316
330
  else:
317
- query_config = RAGQueryConfig()
331
+ query_config = RAGQueryConfig(
332
+ max_tokens_in_context=self.config.vector_stores_config.chunk_retrieval_params.max_tokens_in_context
333
+ )
318
334
 
319
335
  query = kwargs["query"]
320
336
  result = await self.query(
321
337
  content=query,
322
- vector_db_ids=vector_db_ids,
338
+ vector_store_ids=vector_store_ids,
323
339
  query_config=query_config,
324
340
  )
325
341
 
@@ -6,7 +6,7 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from llama_stack.providers.datatypes import Api
9
+ from llama_stack_api import Api
10
10
 
11
11
  from .config import ChromaVectorIOConfig
12
12
 
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel, Field
10
10
 
11
11
  from llama_stack.core.storage.datatypes import KVStoreReference
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type
@@ -6,7 +6,7 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from llama_stack.providers.datatypes import Api
9
+ from llama_stack_api import Api
10
10
 
11
11
  from .config import FaissVectorIOConfig
12
12
 
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel
10
10
 
11
11
  from llama_stack.core.storage.datatypes import KVStoreReference
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type
@@ -10,21 +10,28 @@ import io
10
10
  import json
11
11
  from typing import Any
12
12
 
13
- import faiss
13
+ import faiss # type: ignore[import-untyped]
14
14
  import numpy as np
15
15
  from numpy.typing import NDArray
16
16
 
17
- from llama_stack.apis.common.errors import VectorStoreNotFoundError
18
- from llama_stack.apis.files import Files
19
- from llama_stack.apis.inference import Inference, InterleavedContent
20
- from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
21
- from llama_stack.apis.vector_stores import VectorStore
17
+ from llama_stack.core.storage.kvstore import kvstore_impl
22
18
  from llama_stack.log import get_logger
23
- from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate
24
- from llama_stack.providers.utils.kvstore import kvstore_impl
25
- from llama_stack.providers.utils.kvstore.api import KVStore
26
19
  from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
27
20
  from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
21
+ from llama_stack_api import (
22
+ EmbeddedChunk,
23
+ Files,
24
+ HealthResponse,
25
+ HealthStatus,
26
+ Inference,
27
+ InterleavedContent,
28
+ QueryChunksResponse,
29
+ VectorIO,
30
+ VectorStore,
31
+ VectorStoreNotFoundError,
32
+ VectorStoresProtocolPrivate,
33
+ )
34
+ from llama_stack_api.internal.kvstore import KVStore
28
35
 
29
36
  from .config import FaissVectorIOConfig
30
37
 
@@ -41,7 +48,7 @@ OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_conten
41
48
  class FaissIndex(EmbeddingIndex):
42
49
  def __init__(self, dimension: int, kvstore: KVStore | None = None, bank_id: str | None = None):
43
50
  self.index = faiss.IndexFlatL2(dimension)
44
- self.chunk_by_index: dict[int, Chunk] = {}
51
+ self.chunk_by_index: dict[int, EmbeddedChunk] = {}
45
52
  self.kvstore = kvstore
46
53
  self.bank_id = bank_id
47
54
 
@@ -65,12 +72,14 @@ class FaissIndex(EmbeddingIndex):
65
72
 
66
73
  if stored_data:
67
74
  data = json.loads(stored_data)
68
- self.chunk_by_index = {int(k): Chunk.model_validate_json(v) for k, v in data["chunk_by_index"].items()}
75
+ self.chunk_by_index = {
76
+ int(k): EmbeddedChunk.model_validate_json(v) for k, v in data["chunk_by_index"].items()
77
+ }
69
78
 
70
79
  buffer = io.BytesIO(base64.b64decode(data["faiss_index"]))
71
80
  try:
72
81
  self.index = faiss.deserialize_index(np.load(buffer, allow_pickle=False))
73
- self.chunk_ids = [chunk.chunk_id for chunk in self.chunk_by_index.values()]
82
+ self.chunk_ids = [embedded_chunk.chunk_id for embedded_chunk in self.chunk_by_index.values()]
74
83
  except Exception as e:
75
84
  logger.debug(e, exc_info=True)
76
85
  raise ValueError(
@@ -100,19 +109,24 @@ class FaissIndex(EmbeddingIndex):
100
109
 
101
110
  await self.kvstore.delete(f"{FAISS_INDEX_PREFIX}{self.bank_id}")
102
111
 
103
- async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
104
- # Add dimension check
112
+ async def add_chunks(self, embedded_chunks: list[EmbeddedChunk]):
113
+ if not embedded_chunks:
114
+ return
115
+
116
+ # Extract embeddings and validate dimensions
117
+ embeddings = np.array([ec.embedding for ec in embedded_chunks], dtype=np.float32)
105
118
  embedding_dim = embeddings.shape[1] if len(embeddings.shape) > 1 else embeddings.shape[0]
106
119
  if embedding_dim != self.index.d:
107
120
  raise ValueError(f"Embedding dimension mismatch. Expected {self.index.d}, got {embedding_dim}")
108
121
 
122
+ # Store chunks by index
109
123
  indexlen = len(self.chunk_by_index)
110
- for i, chunk in enumerate(chunks):
111
- self.chunk_by_index[indexlen + i] = chunk
124
+ for i, embedded_chunk in enumerate(embedded_chunks):
125
+ self.chunk_by_index[indexlen + i] = embedded_chunk
112
126
 
113
127
  async with self.chunk_id_lock:
114
- self.index.add(np.array(embeddings).astype(np.float32))
115
- self.chunk_ids.extend([chunk.chunk_id for chunk in chunks])
128
+ self.index.add(embeddings)
129
+ self.chunk_ids.extend([ec.chunk_id for ec in embedded_chunks]) # EmbeddedChunk inherits from Chunk
116
130
 
117
131
  # Save updated index
118
132
  await self._save_index()
@@ -144,8 +158,8 @@ class FaissIndex(EmbeddingIndex):
144
158
 
145
159
  async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
146
160
  distances, indices = await asyncio.to_thread(self.index.search, embedding.reshape(1, -1).astype(np.float32), k)
147
- chunks = []
148
- scores = []
161
+ chunks: list[EmbeddedChunk] = []
162
+ scores: list[float] = []
149
163
  for d, i in zip(distances[0], indices[0], strict=False):
150
164
  if i < 0:
151
165
  continue
@@ -178,9 +192,8 @@ class FaissIndex(EmbeddingIndex):
178
192
 
179
193
  class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
180
194
  def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
181
- super().__init__(files_api=files_api, kvstore=None)
195
+ super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
182
196
  self.config = config
183
- self.inference_api = inference_api
184
197
  self.cache: dict[str, VectorStoreWithIndex] = {}
185
198
 
186
199
  async def initialize(self) -> None:
@@ -271,19 +284,21 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
271
284
  self.cache[vector_store_id] = index
272
285
  return index
273
286
 
274
- async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
275
- index = self.cache.get(vector_db_id)
287
+ async def insert_chunks(
288
+ self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
289
+ ) -> None:
290
+ index = self.cache.get(vector_store_id)
276
291
  if index is None:
277
- raise ValueError(f"Vector DB {vector_db_id} not found. found: {self.cache.keys()}")
292
+ raise ValueError(f"Vector DB {vector_store_id} not found. found: {self.cache.keys()}")
278
293
 
279
294
  await index.insert_chunks(chunks)
280
295
 
281
296
  async def query_chunks(
282
- self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
297
+ self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
283
298
  ) -> QueryChunksResponse:
284
- index = self.cache.get(vector_db_id)
299
+ index = self.cache.get(vector_store_id)
285
300
  if index is None:
286
- raise VectorStoreNotFoundError(vector_db_id)
301
+ raise VectorStoreNotFoundError(vector_store_id)
287
302
 
288
303
  return await index.query_chunks(query, params)
289
304
 
@@ -6,7 +6,7 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from llama_stack.providers.datatypes import Api
9
+ from llama_stack_api import Api
10
10
 
11
11
  from .config import MilvusVectorIOConfig
12
12
 
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel, Field
10
10
 
11
11
  from llama_stack.core.storage.datatypes import KVStoreReference
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type
@@ -6,7 +6,7 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from llama_stack.providers.datatypes import Api
9
+ from llama_stack_api import Api
10
10
 
11
11
  from .config import QdrantVectorIOConfig
12
12
 
@@ -10,7 +10,7 @@ from typing import Any
10
10
  from pydantic import BaseModel
11
11
 
12
12
  from llama_stack.core.storage.datatypes import KVStoreReference
13
- from llama_stack.schema_utils import json_schema_type
13
+ from llama_stack_api import json_schema_type
14
14
 
15
15
 
16
16
  @json_schema_type
@@ -6,7 +6,7 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from llama_stack.providers.datatypes import Api
9
+ from llama_stack_api import Api
10
10
 
11
11
  from .config import SQLiteVectorIOConfig
12
12
 
@@ -11,18 +11,11 @@ import struct
11
11
  from typing import Any
12
12
 
13
13
  import numpy as np
14
- import sqlite_vec
14
+ import sqlite_vec # type: ignore[import-untyped]
15
15
  from numpy.typing import NDArray
16
16
 
17
- from llama_stack.apis.common.errors import VectorStoreNotFoundError
18
- from llama_stack.apis.files import Files
19
- from llama_stack.apis.inference import Inference
20
- from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
21
- from llama_stack.apis.vector_stores import VectorStore
17
+ from llama_stack.core.storage.kvstore import kvstore_impl
22
18
  from llama_stack.log import get_logger
23
- from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
24
- from llama_stack.providers.utils.kvstore import kvstore_impl
25
- from llama_stack.providers.utils.kvstore.api import KVStore
26
19
  from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
27
20
  from llama_stack.providers.utils.memory.vector_store import (
28
21
  RERANKER_TYPE_RRF,
@@ -31,6 +24,17 @@ from llama_stack.providers.utils.memory.vector_store import (
31
24
  VectorStoreWithIndex,
32
25
  )
33
26
  from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
27
+ from llama_stack_api import (
28
+ EmbeddedChunk,
29
+ Files,
30
+ Inference,
31
+ QueryChunksResponse,
32
+ VectorIO,
33
+ VectorStore,
34
+ VectorStoreNotFoundError,
35
+ VectorStoresProtocolPrivate,
36
+ )
37
+ from llama_stack_api.internal.kvstore import KVStore
34
38
 
35
39
  logger = get_logger(name=__name__, category="vector_io")
36
40
 
@@ -137,14 +141,16 @@ class SQLiteVecIndex(EmbeddingIndex):
137
141
 
138
142
  await asyncio.to_thread(_drop_tables)
139
143
 
140
- async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray, batch_size: int = 500):
144
+ async def add_chunks(self, embedded_chunks: list[EmbeddedChunk], batch_size: int = 500):
141
145
  """
142
- Add new chunks along with their embeddings using batch inserts.
143
- For each chunk, we insert its JSON into the metadata table and then insert its
146
+ Add new embedded chunks using batch inserts.
147
+ For each embedded chunk, we insert the chunk JSON into the metadata table and then insert its
144
148
  embedding (serialized to raw bytes) into the virtual table using the assigned rowid.
145
149
  If any insert fails, the transaction is rolled back to maintain consistency.
146
150
  Also inserts chunk content into FTS table for keyword search support.
147
151
  """
152
+ chunks = embedded_chunks # EmbeddedChunk now inherits from Chunk
153
+ embeddings = np.array([ec.embedding for ec in embedded_chunks], dtype=np.float32)
148
154
  assert all(isinstance(chunk.content, str) for chunk in chunks), "SQLiteVecIndex only supports text chunks"
149
155
 
150
156
  def _execute_all_batch_inserts():
@@ -229,11 +235,11 @@ class SQLiteVecIndex(EmbeddingIndex):
229
235
  if score < score_threshold:
230
236
  continue
231
237
  try:
232
- chunk = Chunk.model_validate_json(chunk_json)
238
+ embedded_chunk = EmbeddedChunk.model_validate_json(chunk_json)
233
239
  except Exception as e:
234
240
  logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
235
241
  continue
236
- chunks.append(chunk)
242
+ chunks.append(embedded_chunk)
237
243
  scores.append(score)
238
244
  return QueryChunksResponse(chunks=chunks, scores=scores)
239
245
 
@@ -270,11 +276,11 @@ class SQLiteVecIndex(EmbeddingIndex):
270
276
  if score > -score_threshold:
271
277
  continue
272
278
  try:
273
- chunk = Chunk.model_validate_json(chunk_json)
279
+ embedded_chunk = EmbeddedChunk.model_validate_json(chunk_json)
274
280
  except Exception as e:
275
281
  logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
276
282
  continue
277
- chunks.append(chunk)
283
+ chunks.append(embedded_chunk)
278
284
  scores.append(score)
279
285
  return QueryChunksResponse(chunks=chunks, scores=scores)
280
286
 
@@ -308,13 +314,14 @@ class SQLiteVecIndex(EmbeddingIndex):
308
314
  vector_response = await self.query_vector(embedding, k, score_threshold)
309
315
  keyword_response = await self.query_keyword(query_string, k, score_threshold)
310
316
 
311
- # Convert responses to score dictionaries using chunk_id
317
+ # Convert responses to score dictionaries using chunk_id (EmbeddedChunk inherits from Chunk)
312
318
  vector_scores = {
313
- chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
319
+ embedded_chunk.chunk_id: score
320
+ for embedded_chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
314
321
  }
315
322
  keyword_scores = {
316
- chunk.chunk_id: score
317
- for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
323
+ embedded_chunk.chunk_id: score
324
+ for embedded_chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
318
325
  }
319
326
 
320
327
  # Combine scores using the reranking utility
@@ -329,10 +336,10 @@ class SQLiteVecIndex(EmbeddingIndex):
329
336
  # Filter by score threshold
330
337
  filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
331
338
 
332
- # Create a map of chunk_id to chunk for both responses
333
- chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
339
+ # Create a map of chunk_id to embedded_chunk for both responses
340
+ chunk_map = {ec.chunk_id: ec for ec in vector_response.chunks + keyword_response.chunks}
334
341
 
335
- # Use the map to look up chunks by their IDs
342
+ # Use the map to look up embedded chunks by their IDs
336
343
  chunks = []
337
344
  scores = []
338
345
  for doc_id, score in filtered_items:
@@ -382,9 +389,8 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
382
389
  """
383
390
 
384
391
  def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
385
- super().__init__(files_api=files_api, kvstore=None)
392
+ super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
386
393
  self.config = config
387
- self.inference_api = inference_api
388
394
  self.cache: dict[str, VectorStoreWithIndex] = {}
389
395
  self.vector_store_table = None
390
396
 
@@ -458,20 +464,21 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
458
464
  await self.cache[vector_store_id].index.delete()
459
465
  del self.cache[vector_store_id]
460
466
 
461
- async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
462
- index = await self._get_and_cache_vector_store_index(vector_db_id)
467
+ async def insert_chunks(
468
+ self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
469
+ ) -> None:
470
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
463
471
  if not index:
464
- raise VectorStoreNotFoundError(vector_db_id)
465
- # The VectorStoreWithIndex helper is expected to compute embeddings via the inference_api
466
- # and then call our index's add_chunks.
472
+ raise VectorStoreNotFoundError(vector_store_id)
473
+ # The VectorStoreWithIndex helper validates embeddings and calls the index's add_chunks method
467
474
  await index.insert_chunks(chunks)
468
475
 
469
476
  async def query_chunks(
470
- self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None
477
+ self, vector_store_id: str, query: Any, params: dict[str, Any] | None = None
471
478
  ) -> QueryChunksResponse:
472
- index = await self._get_and_cache_vector_store_index(vector_db_id)
479
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
473
480
  if not index:
474
- raise VectorStoreNotFoundError(vector_db_id)
481
+ raise VectorStoreNotFoundError(vector_store_id)
475
482
  return await index.query_chunks(query, params)
476
483
 
477
484
  async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: