llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (460) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +235 -62
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
  169. llama_stack/providers/registry/agents.py +8 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/providers/utils/vector_io/__init__.py +16 -0
  284. llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
  285. llama_stack/telemetry/constants.py +27 -0
  286. llama_stack/telemetry/helpers.py +43 -0
  287. llama_stack/testing/api_recorder.py +25 -16
  288. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
  289. llama_stack-0.4.1.dist-info/RECORD +588 -0
  290. llama_stack-0.4.1.dist-info/top_level.txt +2 -0
  291. llama_stack_api/__init__.py +945 -0
  292. llama_stack_api/admin/__init__.py +45 -0
  293. llama_stack_api/admin/api.py +72 -0
  294. llama_stack_api/admin/fastapi_routes.py +117 -0
  295. llama_stack_api/admin/models.py +113 -0
  296. llama_stack_api/agents.py +173 -0
  297. llama_stack_api/batches/__init__.py +40 -0
  298. llama_stack_api/batches/api.py +53 -0
  299. llama_stack_api/batches/fastapi_routes.py +113 -0
  300. llama_stack_api/batches/models.py +78 -0
  301. llama_stack_api/benchmarks/__init__.py +43 -0
  302. llama_stack_api/benchmarks/api.py +39 -0
  303. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  304. llama_stack_api/benchmarks/models.py +109 -0
  305. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  306. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  307. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  308. llama_stack_api/common/responses.py +77 -0
  309. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  310. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  311. llama_stack_api/connectors.py +146 -0
  312. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  313. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  314. llama_stack_api/datasets/__init__.py +61 -0
  315. llama_stack_api/datasets/api.py +35 -0
  316. llama_stack_api/datasets/fastapi_routes.py +104 -0
  317. llama_stack_api/datasets/models.py +152 -0
  318. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  319. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  320. llama_stack_api/file_processors/__init__.py +27 -0
  321. llama_stack_api/file_processors/api.py +64 -0
  322. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  323. llama_stack_api/file_processors/models.py +42 -0
  324. llama_stack_api/files/__init__.py +35 -0
  325. llama_stack_api/files/api.py +51 -0
  326. llama_stack_api/files/fastapi_routes.py +124 -0
  327. llama_stack_api/files/models.py +107 -0
  328. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  329. llama_stack_api/inspect_api/__init__.py +37 -0
  330. llama_stack_api/inspect_api/api.py +25 -0
  331. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  332. llama_stack_api/inspect_api/models.py +28 -0
  333. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  334. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  335. llama_stack_api/internal/sqlstore.py +79 -0
  336. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  337. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  338. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  339. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  340. llama_stack_api/providers/__init__.py +33 -0
  341. llama_stack_api/providers/api.py +16 -0
  342. llama_stack_api/providers/fastapi_routes.py +57 -0
  343. llama_stack_api/providers/models.py +24 -0
  344. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  345. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  346. llama_stack_api/router_utils.py +160 -0
  347. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  348. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  349. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  350. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  351. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  352. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  353. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  354. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  355. llama_stack/apis/agents/agents.py +0 -894
  356. llama_stack/apis/batches/__init__.py +0 -9
  357. llama_stack/apis/batches/batches.py +0 -100
  358. llama_stack/apis/benchmarks/__init__.py +0 -7
  359. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  360. llama_stack/apis/common/responses.py +0 -36
  361. llama_stack/apis/conversations/__init__.py +0 -31
  362. llama_stack/apis/datasets/datasets.py +0 -251
  363. llama_stack/apis/datatypes.py +0 -160
  364. llama_stack/apis/eval/__init__.py +0 -7
  365. llama_stack/apis/files/__init__.py +0 -7
  366. llama_stack/apis/files/files.py +0 -199
  367. llama_stack/apis/inference/__init__.py +0 -7
  368. llama_stack/apis/inference/event_logger.py +0 -43
  369. llama_stack/apis/inspect/__init__.py +0 -7
  370. llama_stack/apis/inspect/inspect.py +0 -94
  371. llama_stack/apis/models/__init__.py +0 -7
  372. llama_stack/apis/post_training/__init__.py +0 -7
  373. llama_stack/apis/prompts/__init__.py +0 -9
  374. llama_stack/apis/providers/__init__.py +0 -7
  375. llama_stack/apis/providers/providers.py +0 -69
  376. llama_stack/apis/safety/__init__.py +0 -7
  377. llama_stack/apis/scoring/__init__.py +0 -7
  378. llama_stack/apis/scoring_functions/__init__.py +0 -7
  379. llama_stack/apis/shields/__init__.py +0 -7
  380. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  381. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  382. llama_stack/apis/telemetry/__init__.py +0 -7
  383. llama_stack/apis/telemetry/telemetry.py +0 -423
  384. llama_stack/apis/tools/__init__.py +0 -8
  385. llama_stack/apis/vector_io/__init__.py +0 -7
  386. llama_stack/apis/vector_stores/__init__.py +0 -7
  387. llama_stack/core/server/tracing.py +0 -80
  388. llama_stack/core/ui/app.py +0 -55
  389. llama_stack/core/ui/modules/__init__.py +0 -5
  390. llama_stack/core/ui/modules/api.py +0 -32
  391. llama_stack/core/ui/modules/utils.py +0 -42
  392. llama_stack/core/ui/page/__init__.py +0 -5
  393. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  394. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  395. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  396. llama_stack/core/ui/page/distribution/models.py +0 -18
  397. llama_stack/core/ui/page/distribution/providers.py +0 -27
  398. llama_stack/core/ui/page/distribution/resources.py +0 -48
  399. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  400. llama_stack/core/ui/page/distribution/shields.py +0 -19
  401. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  402. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  403. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  404. llama_stack/core/ui/page/playground/__init__.py +0 -5
  405. llama_stack/core/ui/page/playground/chat.py +0 -130
  406. llama_stack/core/ui/page/playground/tools.py +0 -352
  407. llama_stack/distributions/dell/build.yaml +0 -33
  408. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  409. llama_stack/distributions/nvidia/build.yaml +0 -29
  410. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  411. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  412. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  413. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  414. llama_stack/distributions/starter/build.yaml +0 -61
  415. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  416. llama_stack/distributions/watsonx/build.yaml +0 -33
  417. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  418. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  419. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  420. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  421. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  422. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  423. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  424. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  425. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  426. llama_stack/providers/utils/sqlstore/api.py +0 -128
  427. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  428. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  429. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  430. llama_stack/strong_typing/__init__.py +0 -19
  431. llama_stack/strong_typing/auxiliary.py +0 -228
  432. llama_stack/strong_typing/classdef.py +0 -440
  433. llama_stack/strong_typing/core.py +0 -46
  434. llama_stack/strong_typing/deserializer.py +0 -877
  435. llama_stack/strong_typing/docstring.py +0 -409
  436. llama_stack/strong_typing/exception.py +0 -23
  437. llama_stack/strong_typing/inspection.py +0 -1085
  438. llama_stack/strong_typing/mapping.py +0 -40
  439. llama_stack/strong_typing/name.py +0 -182
  440. llama_stack/strong_typing/schema.py +0 -792
  441. llama_stack/strong_typing/serialization.py +0 -97
  442. llama_stack/strong_typing/serializer.py +0 -500
  443. llama_stack/strong_typing/slots.py +0 -27
  444. llama_stack/strong_typing/topological.py +0 -89
  445. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  446. llama_stack-0.3.5.dist-info/RECORD +0 -625
  447. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  448. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  451. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  452. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  453. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  454. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
  456. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
  457. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
  458. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  459. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  460. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -14,35 +14,32 @@ import httpx
14
14
  from fastapi import UploadFile
15
15
  from pydantic import TypeAdapter
16
16
 
17
- from llama_stack.apis.common.content_types import (
17
+ from llama_stack.log import get_logger
18
+ from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
19
+ from llama_stack.providers.utils.memory.vector_store import parse_data_url
20
+ from llama_stack_api import (
18
21
  URL,
22
+ Files,
23
+ Inference,
19
24
  InterleavedContent,
20
25
  InterleavedContentItem,
21
- TextContentItem,
22
- )
23
- from llama_stack.apis.files import Files, OpenAIFilePurpose
24
- from llama_stack.apis.inference import Inference
25
- from llama_stack.apis.tools import (
26
26
  ListToolDefsResponse,
27
+ OpenAIFilePurpose,
28
+ QueryChunksResponse,
27
29
  RAGDocument,
28
30
  RAGQueryConfig,
29
31
  RAGQueryResult,
30
- RAGToolRuntime,
32
+ TextContentItem,
31
33
  ToolDef,
32
34
  ToolGroup,
35
+ ToolGroupsProtocolPrivate,
33
36
  ToolInvocationResult,
34
37
  ToolRuntime,
35
- )
36
- from llama_stack.apis.vector_io import (
37
- QueryChunksResponse,
38
+ UploadFileRequest,
38
39
  VectorIO,
39
40
  VectorStoreChunkingStrategyStatic,
40
41
  VectorStoreChunkingStrategyStaticConfig,
41
42
  )
42
- from llama_stack.log import get_logger
43
- from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
44
- from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
45
- from llama_stack.providers.utils.memory.vector_store import parse_data_url
46
43
 
47
44
  from .config import RagToolRuntimeConfig
48
45
  from .context_retriever import generate_rag_query
@@ -91,7 +88,7 @@ async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
91
88
  return content_str.encode("utf-8"), "text/plain"
92
89
 
93
90
 
94
- class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRuntime):
91
+ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime):
95
92
  def __init__(
96
93
  self,
97
94
  config: RagToolRuntimeConfig,
@@ -119,9 +116,11 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
119
116
  async def insert(
120
117
  self,
121
118
  documents: list[RAGDocument],
122
- vector_db_id: str,
123
- chunk_size_in_tokens: int = 512,
119
+ vector_store_id: str,
120
+ chunk_size_in_tokens: int | None = None,
124
121
  ) -> None:
122
+ if chunk_size_in_tokens is None:
123
+ chunk_size_in_tokens = self.config.vector_stores_config.file_ingestion_params.default_chunk_size_tokens
125
124
  if not documents:
126
125
  return
127
126
 
@@ -143,29 +142,31 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
143
142
 
144
143
  try:
145
144
  created_file = await self.files_api.openai_upload_file(
146
- file=upload_file, purpose=OpenAIFilePurpose.ASSISTANTS
145
+ request=UploadFileRequest(purpose=OpenAIFilePurpose.ASSISTANTS),
146
+ file=upload_file,
147
147
  )
148
148
  except Exception as e:
149
149
  log.error(f"Failed to upload file for document {doc.document_id}: {e}")
150
150
  continue
151
151
 
152
+ overlap_tokens = self.config.vector_stores_config.file_ingestion_params.default_chunk_overlap_tokens
152
153
  chunking_strategy = VectorStoreChunkingStrategyStatic(
153
154
  static=VectorStoreChunkingStrategyStaticConfig(
154
155
  max_chunk_size_tokens=chunk_size_in_tokens,
155
- chunk_overlap_tokens=chunk_size_in_tokens // 4,
156
+ chunk_overlap_tokens=overlap_tokens,
156
157
  )
157
158
  )
158
159
 
159
160
  try:
160
161
  await self.vector_io_api.openai_attach_file_to_vector_store(
161
- vector_store_id=vector_db_id,
162
+ vector_store_id=vector_store_id,
162
163
  file_id=created_file.id,
163
164
  attributes=doc.metadata,
164
165
  chunking_strategy=chunking_strategy,
165
166
  )
166
167
  except Exception as e:
167
168
  log.error(
168
- f"Failed to attach file {created_file.id} to vector store {vector_db_id} for document {doc.document_id}: {e}"
169
+ f"Failed to attach file {created_file.id} to vector store {vector_store_id} for document {doc.document_id}: {e}"
169
170
  )
170
171
  continue
171
172
 
@@ -176,15 +177,17 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
176
177
  async def query(
177
178
  self,
178
179
  content: InterleavedContent,
179
- vector_db_ids: list[str],
180
+ vector_store_ids: list[str],
180
181
  query_config: RAGQueryConfig | None = None,
181
182
  ) -> RAGQueryResult:
182
- if not vector_db_ids:
183
+ if not vector_store_ids:
183
184
  raise ValueError(
184
185
  "No vector DBs were provided to the knowledge search tool. Please provide at least one vector DB ID."
185
186
  )
186
187
 
187
- query_config = query_config or RAGQueryConfig()
188
+ query_config = query_config or RAGQueryConfig(
189
+ max_tokens_in_context=self.config.vector_stores_config.chunk_retrieval_params.max_tokens_in_context
190
+ )
188
191
  query = await generate_rag_query(
189
192
  query_config.query_generator_config,
190
193
  content,
@@ -192,7 +195,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
192
195
  )
193
196
  tasks = [
194
197
  self.vector_io_api.query_chunks(
195
- vector_db_id=vector_db_id,
198
+ vector_store_id=vector_store_id,
196
199
  query=query,
197
200
  params={
198
201
  "mode": query_config.mode,
@@ -201,18 +204,20 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
201
204
  "ranker": query_config.ranker,
202
205
  },
203
206
  )
204
- for vector_db_id in vector_db_ids
207
+ for vector_store_id in vector_store_ids
205
208
  ]
206
209
  results: list[QueryChunksResponse] = await asyncio.gather(*tasks)
207
210
 
208
211
  chunks = []
209
212
  scores = []
210
213
 
211
- for vector_db_id, result in zip(vector_db_ids, results, strict=False):
212
- for chunk, score in zip(result.chunks, result.scores, strict=False):
213
- if not hasattr(chunk, "metadata") or chunk.metadata is None:
214
+ for vector_store_id, result in zip(vector_store_ids, results, strict=False):
215
+ for embedded_chunk, score in zip(result.chunks, result.scores, strict=False):
216
+ # EmbeddedChunk inherits from Chunk, so use it directly
217
+ chunk = embedded_chunk
218
+ if chunk.metadata is None:
214
219
  chunk.metadata = {}
215
- chunk.metadata["vector_db_id"] = vector_db_id
220
+ chunk.metadata["vector_store_id"] = vector_store_id
216
221
 
217
222
  chunks.append(chunk)
218
223
  scores.append(score)
@@ -225,13 +230,17 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
225
230
  chunks = chunks[: query_config.max_chunks]
226
231
 
227
232
  tokens = 0
228
- picked: list[InterleavedContentItem] = [
229
- TextContentItem(
230
- text=f"knowledge_search tool found {len(chunks)} chunks:\nBEGIN of knowledge_search tool results.\n"
231
- )
232
- ]
233
- for i, chunk in enumerate(chunks):
234
- metadata = chunk.metadata
233
+
234
+ # Get templates from vector stores config
235
+ vector_stores_config = self.config.vector_stores_config
236
+ header_template = vector_stores_config.file_search_params.header_template
237
+ footer_template = vector_stores_config.file_search_params.footer_template
238
+ chunk_template = vector_stores_config.context_prompt_params.chunk_annotation_template
239
+ context_template = vector_stores_config.context_prompt_params.context_template
240
+
241
+ picked: list[InterleavedContentItem] = [TextContentItem(text=header_template.format(num_chunks=len(chunks)))]
242
+ for i, embedded_chunk in enumerate(chunks):
243
+ metadata = embedded_chunk.metadata
235
244
  tokens += metadata.get("token_count", 0)
236
245
  tokens += metadata.get("metadata_token_count", 0)
237
246
 
@@ -250,22 +259,22 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
250
259
  metadata_keys_to_exclude_from_context = [
251
260
  "token_count",
252
261
  "metadata_token_count",
253
- "vector_db_id",
262
+ "vector_store_id",
254
263
  ]
255
264
  metadata_for_context = {}
256
265
  for k in chunk_metadata_keys_to_include_from_context:
257
- metadata_for_context[k] = getattr(chunk.chunk_metadata, k)
266
+ metadata_for_context[k] = getattr(embedded_chunk.chunk_metadata, k)
258
267
  for k in metadata:
259
268
  if k not in metadata_keys_to_exclude_from_context:
260
269
  metadata_for_context[k] = metadata[k]
261
270
 
262
- text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_for_context)
271
+ text_content = chunk_template.format(index=i + 1, chunk=embedded_chunk, metadata=metadata_for_context)
263
272
  picked.append(TextContentItem(text=text_content))
264
273
 
265
- picked.append(TextContentItem(text="END of knowledge_search tool results.\n"))
274
+ picked.append(TextContentItem(text=footer_template))
266
275
  picked.append(
267
276
  TextContentItem(
268
- text=f'The above results were retrieved to help answer the user\'s query: "{interleaved_content_as_str(content)}". Use them as supporting information only in answering this query.\n',
277
+ text=context_template.format(query=interleaved_content_as_str(content), annotation_instruction="")
269
278
  )
270
279
  )
271
280
 
@@ -275,12 +284,15 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
275
284
  "document_ids": [c.document_id for c in chunks[: len(picked)]],
276
285
  "chunks": [c.content for c in chunks[: len(picked)]],
277
286
  "scores": scores[: len(picked)],
278
- "vector_db_ids": [c.metadata["vector_db_id"] for c in chunks[: len(picked)]],
287
+ "vector_store_ids": [c.metadata["vector_store_id"] for c in chunks[: len(picked)]],
279
288
  },
280
289
  )
281
290
 
282
291
  async def list_runtime_tools(
283
- self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
292
+ self,
293
+ tool_group_id: str | None = None,
294
+ mcp_endpoint: URL | None = None,
295
+ authorization: str | None = None,
284
296
  ) -> ListToolDefsResponse:
285
297
  # Parameters are not listed since these methods are not yet invoked automatically
286
298
  # by the LLM. The method is only implemented so things like /tools can list without
@@ -308,18 +320,22 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
308
320
  ]
309
321
  )
310
322
 
311
- async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
312
- vector_db_ids = kwargs.get("vector_db_ids", [])
323
+ async def invoke_tool(
324
+ self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
325
+ ) -> ToolInvocationResult:
326
+ vector_store_ids = kwargs.get("vector_store_ids", [])
313
327
  query_config = kwargs.get("query_config")
314
328
  if query_config:
315
329
  query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config)
316
330
  else:
317
- query_config = RAGQueryConfig()
331
+ query_config = RAGQueryConfig(
332
+ max_tokens_in_context=self.config.vector_stores_config.chunk_retrieval_params.max_tokens_in_context
333
+ )
318
334
 
319
335
  query = kwargs["query"]
320
336
  result = await self.query(
321
337
  content=query,
322
- vector_db_ids=vector_db_ids,
338
+ vector_store_ids=vector_store_ids,
323
339
  query_config=query_config,
324
340
  )
325
341
 
@@ -6,7 +6,7 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from llama_stack.providers.datatypes import Api
9
+ from llama_stack_api import Api
10
10
 
11
11
  from .config import ChromaVectorIOConfig
12
12
 
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel, Field
10
10
 
11
11
  from llama_stack.core.storage.datatypes import KVStoreReference
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type
@@ -6,7 +6,7 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from llama_stack.providers.datatypes import Api
9
+ from llama_stack_api import Api
10
10
 
11
11
  from .config import FaissVectorIOConfig
12
12
 
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel
10
10
 
11
11
  from llama_stack.core.storage.datatypes import KVStoreReference
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type
@@ -10,21 +10,29 @@ import io
10
10
  import json
11
11
  from typing import Any
12
12
 
13
- import faiss
13
+ import faiss # type: ignore[import-untyped]
14
14
  import numpy as np
15
15
  from numpy.typing import NDArray
16
16
 
17
- from llama_stack.apis.common.errors import VectorStoreNotFoundError
18
- from llama_stack.apis.files import Files
19
- from llama_stack.apis.inference import Inference, InterleavedContent
20
- from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
21
- from llama_stack.apis.vector_stores import VectorStore
17
+ from llama_stack.core.storage.kvstore import kvstore_impl
22
18
  from llama_stack.log import get_logger
23
- from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate
24
- from llama_stack.providers.utils.kvstore import kvstore_impl
25
- from llama_stack.providers.utils.kvstore.api import KVStore
26
19
  from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
27
20
  from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
21
+ from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
22
+ from llama_stack_api import (
23
+ EmbeddedChunk,
24
+ Files,
25
+ HealthResponse,
26
+ HealthStatus,
27
+ Inference,
28
+ InterleavedContent,
29
+ QueryChunksResponse,
30
+ VectorIO,
31
+ VectorStore,
32
+ VectorStoreNotFoundError,
33
+ VectorStoresProtocolPrivate,
34
+ )
35
+ from llama_stack_api.internal.kvstore import KVStore
28
36
 
29
37
  from .config import FaissVectorIOConfig
30
38
 
@@ -41,7 +49,7 @@ OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_conten
41
49
  class FaissIndex(EmbeddingIndex):
42
50
  def __init__(self, dimension: int, kvstore: KVStore | None = None, bank_id: str | None = None):
43
51
  self.index = faiss.IndexFlatL2(dimension)
44
- self.chunk_by_index: dict[int, Chunk] = {}
52
+ self.chunk_by_index: dict[int, EmbeddedChunk] = {}
45
53
  self.kvstore = kvstore
46
54
  self.bank_id = bank_id
47
55
 
@@ -65,12 +73,16 @@ class FaissIndex(EmbeddingIndex):
65
73
 
66
74
  if stored_data:
67
75
  data = json.loads(stored_data)
68
- self.chunk_by_index = {int(k): Chunk.model_validate_json(v) for k, v in data["chunk_by_index"].items()}
76
+ self.chunk_by_index = {}
77
+ for k, v in data["chunk_by_index"].items():
78
+ chunk_data = json.loads(v)
79
+ # Use generic backward compatibility utility
80
+ self.chunk_by_index[int(k)] = load_embedded_chunk_with_backward_compat(chunk_data)
69
81
 
70
82
  buffer = io.BytesIO(base64.b64decode(data["faiss_index"]))
71
83
  try:
72
84
  self.index = faiss.deserialize_index(np.load(buffer, allow_pickle=False))
73
- self.chunk_ids = [chunk.chunk_id for chunk in self.chunk_by_index.values()]
85
+ self.chunk_ids = [embedded_chunk.chunk_id for embedded_chunk in self.chunk_by_index.values()]
74
86
  except Exception as e:
75
87
  logger.debug(e, exc_info=True)
76
88
  raise ValueError(
@@ -100,19 +112,24 @@ class FaissIndex(EmbeddingIndex):
100
112
 
101
113
  await self.kvstore.delete(f"{FAISS_INDEX_PREFIX}{self.bank_id}")
102
114
 
103
- async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
104
- # Add dimension check
115
+ async def add_chunks(self, embedded_chunks: list[EmbeddedChunk]):
116
+ if not embedded_chunks:
117
+ return
118
+
119
+ # Extract embeddings and validate dimensions
120
+ embeddings = np.array([ec.embedding for ec in embedded_chunks], dtype=np.float32)
105
121
  embedding_dim = embeddings.shape[1] if len(embeddings.shape) > 1 else embeddings.shape[0]
106
122
  if embedding_dim != self.index.d:
107
123
  raise ValueError(f"Embedding dimension mismatch. Expected {self.index.d}, got {embedding_dim}")
108
124
 
125
+ # Store chunks by index
109
126
  indexlen = len(self.chunk_by_index)
110
- for i, chunk in enumerate(chunks):
111
- self.chunk_by_index[indexlen + i] = chunk
127
+ for i, embedded_chunk in enumerate(embedded_chunks):
128
+ self.chunk_by_index[indexlen + i] = embedded_chunk
112
129
 
113
130
  async with self.chunk_id_lock:
114
- self.index.add(np.array(embeddings).astype(np.float32))
115
- self.chunk_ids.extend([chunk.chunk_id for chunk in chunks])
131
+ self.index.add(embeddings)
132
+ self.chunk_ids.extend([ec.chunk_id for ec in embedded_chunks]) # EmbeddedChunk inherits from Chunk
116
133
 
117
134
  # Save updated index
118
135
  await self._save_index()
@@ -144,8 +161,8 @@ class FaissIndex(EmbeddingIndex):
144
161
 
145
162
  async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
146
163
  distances, indices = await asyncio.to_thread(self.index.search, embedding.reshape(1, -1).astype(np.float32), k)
147
- chunks = []
148
- scores = []
164
+ chunks: list[EmbeddedChunk] = []
165
+ scores: list[float] = []
149
166
  for d, i in zip(distances[0], indices[0], strict=False):
150
167
  if i < 0:
151
168
  continue
@@ -178,9 +195,8 @@ class FaissIndex(EmbeddingIndex):
178
195
 
179
196
  class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
180
197
  def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
181
- super().__init__(files_api=files_api, kvstore=None)
198
+ super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
182
199
  self.config = config
183
- self.inference_api = inference_api
184
200
  self.cache: dict[str, VectorStoreWithIndex] = {}
185
201
 
186
202
  async def initialize(self) -> None:
@@ -271,19 +287,21 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
271
287
  self.cache[vector_store_id] = index
272
288
  return index
273
289
 
274
- async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
275
- index = self.cache.get(vector_db_id)
290
+ async def insert_chunks(
291
+ self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
292
+ ) -> None:
293
+ index = self.cache.get(vector_store_id)
276
294
  if index is None:
277
- raise ValueError(f"Vector DB {vector_db_id} not found. found: {self.cache.keys()}")
295
+ raise ValueError(f"Vector DB {vector_store_id} not found. found: {self.cache.keys()}")
278
296
 
279
297
  await index.insert_chunks(chunks)
280
298
 
281
299
  async def query_chunks(
282
- self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
300
+ self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
283
301
  ) -> QueryChunksResponse:
284
- index = self.cache.get(vector_db_id)
302
+ index = self.cache.get(vector_store_id)
285
303
  if index is None:
286
- raise VectorStoreNotFoundError(vector_db_id)
304
+ raise VectorStoreNotFoundError(vector_store_id)
287
305
 
288
306
  return await index.query_chunks(query, params)
289
307
 
@@ -6,7 +6,7 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from llama_stack.providers.datatypes import Api
9
+ from llama_stack_api import Api
10
10
 
11
11
  from .config import MilvusVectorIOConfig
12
12
 
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel, Field
10
10
 
11
11
  from llama_stack.core.storage.datatypes import KVStoreReference
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type
@@ -6,7 +6,7 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from llama_stack.providers.datatypes import Api
9
+ from llama_stack_api import Api
10
10
 
11
11
  from .config import QdrantVectorIOConfig
12
12
 
@@ -10,7 +10,7 @@ from typing import Any
10
10
  from pydantic import BaseModel
11
11
 
12
12
  from llama_stack.core.storage.datatypes import KVStoreReference
13
- from llama_stack.schema_utils import json_schema_type
13
+ from llama_stack_api import json_schema_type
14
14
 
15
15
 
16
16
  @json_schema_type
@@ -6,7 +6,7 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from llama_stack.providers.datatypes import Api
9
+ from llama_stack_api import Api
10
10
 
11
11
  from .config import SQLiteVectorIOConfig
12
12
 
@@ -5,24 +5,18 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
  import asyncio
8
+ import json
8
9
  import re
9
10
  import sqlite3
10
11
  import struct
11
12
  from typing import Any
12
13
 
13
14
  import numpy as np
14
- import sqlite_vec
15
+ import sqlite_vec # type: ignore[import-untyped]
15
16
  from numpy.typing import NDArray
16
17
 
17
- from llama_stack.apis.common.errors import VectorStoreNotFoundError
18
- from llama_stack.apis.files import Files
19
- from llama_stack.apis.inference import Inference
20
- from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
21
- from llama_stack.apis.vector_stores import VectorStore
18
+ from llama_stack.core.storage.kvstore import kvstore_impl
22
19
  from llama_stack.log import get_logger
23
- from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
24
- from llama_stack.providers.utils.kvstore import kvstore_impl
25
- from llama_stack.providers.utils.kvstore.api import KVStore
26
20
  from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
27
21
  from llama_stack.providers.utils.memory.vector_store import (
28
22
  RERANKER_TYPE_RRF,
@@ -30,7 +24,19 @@ from llama_stack.providers.utils.memory.vector_store import (
30
24
  EmbeddingIndex,
31
25
  VectorStoreWithIndex,
32
26
  )
27
+ from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
33
28
  from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
29
+ from llama_stack_api import (
30
+ EmbeddedChunk,
31
+ Files,
32
+ Inference,
33
+ QueryChunksResponse,
34
+ VectorIO,
35
+ VectorStore,
36
+ VectorStoreNotFoundError,
37
+ VectorStoresProtocolPrivate,
38
+ )
39
+ from llama_stack_api.internal.kvstore import KVStore
34
40
 
35
41
  logger = get_logger(name=__name__, category="vector_io")
36
42
 
@@ -137,14 +143,16 @@ class SQLiteVecIndex(EmbeddingIndex):
137
143
 
138
144
  await asyncio.to_thread(_drop_tables)
139
145
 
140
- async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray, batch_size: int = 500):
146
+ async def add_chunks(self, embedded_chunks: list[EmbeddedChunk], batch_size: int = 500):
141
147
  """
142
- Add new chunks along with their embeddings using batch inserts.
143
- For each chunk, we insert its JSON into the metadata table and then insert its
148
+ Add new embedded chunks using batch inserts.
149
+ For each embedded chunk, we insert the chunk JSON into the metadata table and then insert its
144
150
  embedding (serialized to raw bytes) into the virtual table using the assigned rowid.
145
151
  If any insert fails, the transaction is rolled back to maintain consistency.
146
152
  Also inserts chunk content into FTS table for keyword search support.
147
153
  """
154
+ chunks = embedded_chunks # EmbeddedChunk now inherits from Chunk
155
+ embeddings = np.array([ec.embedding for ec in embedded_chunks], dtype=np.float32)
148
156
  assert all(isinstance(chunk.content, str) for chunk in chunks), "SQLiteVecIndex only supports text chunks"
149
157
 
150
158
  def _execute_all_batch_inserts():
@@ -229,11 +237,12 @@ class SQLiteVecIndex(EmbeddingIndex):
229
237
  if score < score_threshold:
230
238
  continue
231
239
  try:
232
- chunk = Chunk.model_validate_json(chunk_json)
240
+ chunk_data = json.loads(chunk_json)
241
+ embedded_chunk = load_embedded_chunk_with_backward_compat(chunk_data)
233
242
  except Exception as e:
234
243
  logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
235
244
  continue
236
- chunks.append(chunk)
245
+ chunks.append(embedded_chunk)
237
246
  scores.append(score)
238
247
  return QueryChunksResponse(chunks=chunks, scores=scores)
239
248
 
@@ -270,11 +279,12 @@ class SQLiteVecIndex(EmbeddingIndex):
270
279
  if score > -score_threshold:
271
280
  continue
272
281
  try:
273
- chunk = Chunk.model_validate_json(chunk_json)
282
+ chunk_data = json.loads(chunk_json)
283
+ embedded_chunk = load_embedded_chunk_with_backward_compat(chunk_data)
274
284
  except Exception as e:
275
285
  logger.error(f"Error parsing chunk JSON for id {_id}: {e}")
276
286
  continue
277
- chunks.append(chunk)
287
+ chunks.append(embedded_chunk)
278
288
  scores.append(score)
279
289
  return QueryChunksResponse(chunks=chunks, scores=scores)
280
290
 
@@ -308,13 +318,14 @@ class SQLiteVecIndex(EmbeddingIndex):
308
318
  vector_response = await self.query_vector(embedding, k, score_threshold)
309
319
  keyword_response = await self.query_keyword(query_string, k, score_threshold)
310
320
 
311
- # Convert responses to score dictionaries using chunk_id
321
+ # Convert responses to score dictionaries using chunk_id (EmbeddedChunk inherits from Chunk)
312
322
  vector_scores = {
313
- chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
323
+ embedded_chunk.chunk_id: score
324
+ for embedded_chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
314
325
  }
315
326
  keyword_scores = {
316
- chunk.chunk_id: score
317
- for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
327
+ embedded_chunk.chunk_id: score
328
+ for embedded_chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
318
329
  }
319
330
 
320
331
  # Combine scores using the reranking utility
@@ -329,10 +340,10 @@ class SQLiteVecIndex(EmbeddingIndex):
329
340
  # Filter by score threshold
330
341
  filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
331
342
 
332
- # Create a map of chunk_id to chunk for both responses
333
- chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
343
+ # Create a map of chunk_id to embedded_chunk for both responses
344
+ chunk_map = {ec.chunk_id: ec for ec in vector_response.chunks + keyword_response.chunks}
334
345
 
335
- # Use the map to look up chunks by their IDs
346
+ # Use the map to look up embedded chunks by their IDs
336
347
  chunks = []
337
348
  scores = []
338
349
  for doc_id, score in filtered_items:
@@ -382,9 +393,8 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
382
393
  """
383
394
 
384
395
  def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
385
- super().__init__(files_api=files_api, kvstore=None)
396
+ super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
386
397
  self.config = config
387
- self.inference_api = inference_api
388
398
  self.cache: dict[str, VectorStoreWithIndex] = {}
389
399
  self.vector_store_table = None
390
400
 
@@ -458,20 +468,21 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
458
468
  await self.cache[vector_store_id].index.delete()
459
469
  del self.cache[vector_store_id]
460
470
 
461
- async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
462
- index = await self._get_and_cache_vector_store_index(vector_db_id)
471
+ async def insert_chunks(
472
+ self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
473
+ ) -> None:
474
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
463
475
  if not index:
464
- raise VectorStoreNotFoundError(vector_db_id)
465
- # The VectorStoreWithIndex helper is expected to compute embeddings via the inference_api
466
- # and then call our index's add_chunks.
476
+ raise VectorStoreNotFoundError(vector_store_id)
477
+ # The VectorStoreWithIndex helper validates embeddings and calls the index's add_chunks method
467
478
  await index.insert_chunks(chunks)
468
479
 
469
480
  async def query_chunks(
470
- self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None
481
+ self, vector_store_id: str, query: Any, params: dict[str, Any] | None = None
471
482
  ) -> QueryChunksResponse:
472
- index = await self._get_and_cache_vector_store_index(vector_db_id)
483
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
473
484
  if not index:
474
- raise VectorStoreNotFoundError(vector_db_id)
485
+ raise VectorStoreNotFoundError(vector_store_id)
475
486
  return await index.query_chunks(query, params)
476
487
 
477
488
  async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: