llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +12 -21
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.4.dist-info/RECORD +0 -625
  445. llama_stack-0.3.4.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -8,19 +8,15 @@
8
8
  #
9
9
  # This source code is licensed under the terms described in the LICENSE file in
10
10
  # the root directory of this source tree.
11
- import uuid
12
11
  from typing import Annotated, Any, Literal, Protocol, runtime_checkable
13
12
 
14
- from fastapi import Body
15
- from pydantic import BaseModel, Field
13
+ from fastapi import Body, Query
14
+ from pydantic import BaseModel, Field, field_validator
16
15
 
17
- from llama_stack.apis.inference import InterleavedContent
18
- from llama_stack.apis.vector_stores import VectorStore
19
- from llama_stack.apis.version import LLAMA_STACK_API_V1
20
- from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
21
- from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
22
- from llama_stack.schema_utils import json_schema_type, webmethod
23
- from llama_stack.strong_typing.schema import register_schema
16
+ from llama_stack_api.inference import InterleavedContent
17
+ from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
18
+ from llama_stack_api.vector_stores import VectorStore
19
+ from llama_stack_api.version import LLAMA_STACK_API_V1
24
20
 
25
21
 
26
22
  @json_schema_type
@@ -37,8 +33,6 @@ class ChunkMetadata(BaseModel):
37
33
  :param updated_timestamp: An optional timestamp indicating when the chunk was last updated.
38
34
  :param chunk_window: The window of the chunk, which can be used to group related chunks together.
39
35
  :param chunk_tokenizer: The tokenizer used to create the chunk. Default is Tiktoken.
40
- :param chunk_embedding_model: The embedding model used to create the chunk's embedding.
41
- :param chunk_embedding_dimension: The dimension of the embedding vector for the chunk.
42
36
  :param content_token_count: The number of tokens in the content of the chunk.
43
37
  :param metadata_token_count: The number of tokens in the metadata of the chunk.
44
38
  """
@@ -50,8 +44,6 @@ class ChunkMetadata(BaseModel):
50
44
  updated_timestamp: int | None = None
51
45
  chunk_window: str | None = None
52
46
  chunk_tokenizer: str | None = None
53
- chunk_embedding_model: str | None = None
54
- chunk_embedding_dimension: int | None = None
55
47
  content_token_count: int | None = None
56
48
  metadata_token_count: int | None = None
57
49
 
@@ -59,39 +51,18 @@ class ChunkMetadata(BaseModel):
59
51
  @json_schema_type
60
52
  class Chunk(BaseModel):
61
53
  """
62
- A chunk of content that can be inserted into a vector database.
54
+ A chunk of content from file processing.
63
55
  :param content: The content of the chunk, which can be interleaved text, images, or other types.
64
- :param embedding: Optional embedding for the chunk. If not provided, it will be computed later.
56
+ :param chunk_id: Unique identifier for the chunk. Must be provided explicitly.
65
57
  :param metadata: Metadata associated with the chunk that will be used in the model context during inference.
66
- :param stored_chunk_id: The chunk ID that is stored in the vector database. Used for backend functionality.
67
58
  :param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference.
68
59
  The `chunk_metadata` is required backend functionality.
69
60
  """
70
61
 
71
62
  content: InterleavedContent
63
+ chunk_id: str
72
64
  metadata: dict[str, Any] = Field(default_factory=dict)
73
- embedding: list[float] | None = None
74
- # The alias parameter serializes the field as "chunk_id" in JSON but keeps the internal name as "stored_chunk_id"
75
- stored_chunk_id: str | None = Field(default=None, alias="chunk_id")
76
- chunk_metadata: ChunkMetadata | None = None
77
-
78
- model_config = {"populate_by_name": True}
79
-
80
- def model_post_init(self, __context):
81
- # Extract chunk_id from metadata if present
82
- if self.metadata and "chunk_id" in self.metadata:
83
- self.stored_chunk_id = self.metadata.pop("chunk_id")
84
-
85
- @property
86
- def chunk_id(self) -> str:
87
- """Returns the chunk ID, which is either an input `chunk_id` or a generated one if not set."""
88
- if self.stored_chunk_id:
89
- return self.stored_chunk_id
90
-
91
- if "document_id" in self.metadata:
92
- return generate_chunk_id(self.metadata["document_id"], str(self.content))
93
-
94
- return generate_chunk_id(str(uuid.uuid4()), str(self.content))
65
+ chunk_metadata: ChunkMetadata
95
66
 
96
67
  @property
97
68
  def document_id(self) -> str | None:
@@ -110,15 +81,30 @@ class Chunk(BaseModel):
110
81
  return None
111
82
 
112
83
 
84
+ @json_schema_type
85
+ class EmbeddedChunk(Chunk):
86
+ """
87
+ A chunk of content with its embedding vector for vector database operations.
88
+ Inherits all fields from Chunk and adds embedding-related fields.
89
+ :param embedding: The embedding vector for the chunk content.
90
+ :param embedding_model: The model used to generate the embedding (e.g., 'openai/text-embedding-3-small').
91
+ :param embedding_dimension: The dimension of the embedding vector.
92
+ """
93
+
94
+ embedding: list[float]
95
+ embedding_model: str
96
+ embedding_dimension: int
97
+
98
+
113
99
  @json_schema_type
114
100
  class QueryChunksResponse(BaseModel):
115
101
  """Response from querying chunks in a vector database.
116
102
 
117
- :param chunks: List of content chunks returned from the query
103
+ :param chunks: List of embedded chunks returned from the query
118
104
  :param scores: Relevance scores corresponding to each returned chunk
119
105
  """
120
106
 
121
- chunks: list[Chunk]
107
+ chunks: list[EmbeddedChunk]
122
108
  scores: list[float]
123
109
 
124
110
 
@@ -245,10 +231,16 @@ class VectorStoreContent(BaseModel):
245
231
 
246
232
  :param type: Content type, currently only "text" is supported
247
233
  :param text: The actual text content
234
+ :param embedding: Optional embedding vector for this content chunk
235
+ :param chunk_metadata: Optional chunk metadata
236
+ :param metadata: Optional user-defined metadata
248
237
  """
249
238
 
250
239
  type: Literal["text"]
251
240
  text: str
241
+ embedding: list[float] | None = None
242
+ chunk_metadata: ChunkMetadata | None = None
243
+ metadata: dict[str, Any] | None = None
252
244
 
253
245
 
254
246
  @json_schema_type
@@ -281,7 +273,7 @@ class VectorStoreSearchResponsePage(BaseModel):
281
273
  """
282
274
 
283
275
  object: str = "vector_store.search_results.page"
284
- search_query: str
276
+ search_query: list[str]
285
277
  data: list[VectorStoreSearchResponse]
286
278
  has_more: bool = False
287
279
  next_page: str | None = None
@@ -301,6 +293,22 @@ class VectorStoreDeleteResponse(BaseModel):
301
293
  deleted: bool = True
302
294
 
303
295
 
296
+ @json_schema_type
297
+ class VectorStoreFileContentResponse(BaseModel):
298
+ """Represents the parsed content of a vector store file.
299
+
300
+ :param object: The object type, which is always `vector_store.file_content.page`
301
+ :param data: Parsed content of the file
302
+ :param has_more: Indicates if there are more content pages to fetch
303
+ :param next_page: The token for the next page, if any
304
+ """
305
+
306
+ object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
307
+ data: list[VectorStoreContent]
308
+ has_more: bool = False
309
+ next_page: str | None = None
310
+
311
+
304
312
  @json_schema_type
305
313
  class VectorStoreChunkingStrategyAuto(BaseModel):
306
314
  """Automatic chunking strategy for vector store files.
@@ -372,6 +380,65 @@ VectorStoreFileStatus = Literal["completed"] | Literal["in_progress"] | Literal[
372
380
  register_schema(VectorStoreFileStatus, name="VectorStoreFileStatus")
373
381
 
374
382
 
383
+ # VectorStoreFileAttributes type with OpenAPI constraints
384
+ VectorStoreFileAttributes = Annotated[
385
+ dict[str, Annotated[str, Field(max_length=512)] | float | bool],
386
+ Field(
387
+ max_length=16,
388
+ json_schema_extra={
389
+ "propertyNames": {"type": "string", "maxLength": 64},
390
+ "x-oaiTypeLabel": "map",
391
+ },
392
+ description=(
393
+ "Set of 16 key-value pairs that can be attached to an object. This can be "
394
+ "useful for storing additional information about the object in a structured "
395
+ "format, and querying for objects via API or the dashboard. Keys are strings "
396
+ "with a maximum length of 64 characters. Values are strings with a maximum "
397
+ "length of 512 characters, booleans, or numbers."
398
+ ),
399
+ ),
400
+ ]
401
+
402
+
403
+ def _sanitize_vector_store_attributes(metadata: dict[str, Any] | None) -> dict[str, str | float | bool]:
404
+ """
405
+ Sanitize metadata to VectorStoreFileAttributes spec (max 16 properties, primitives only).
406
+
407
+ Converts dict[str, Any] to dict[str, str | float | bool]:
408
+ - Preserves: str (truncated to 512 chars), bool, int/float (as float)
409
+ - Converts: list -> comma-separated string
410
+ - Filters: dict, None, other types
411
+ - Enforces: max 16 properties, max 64 char keys, max 512 char string values
412
+ """
413
+ if not metadata:
414
+ return {}
415
+
416
+ sanitized: dict[str, str | float | bool] = {}
417
+ for key, value in metadata.items():
418
+ # Enforce max 16 properties
419
+ if len(sanitized) >= 16:
420
+ break
421
+
422
+ # Enforce max 64 char keys
423
+ if len(key) > 64:
424
+ continue
425
+
426
+ # Convert to supported primitive types
427
+ if isinstance(value, bool):
428
+ sanitized[key] = value
429
+ elif isinstance(value, int | float):
430
+ sanitized[key] = float(value)
431
+ elif isinstance(value, str):
432
+ # Enforce max 512 char string values
433
+ sanitized[key] = value[:512] if len(value) > 512 else value
434
+ elif isinstance(value, list):
435
+ # Convert lists to comma-separated strings (max 512 chars)
436
+ list_str = ", ".join(str(item) for item in value)
437
+ sanitized[key] = list_str[:512] if len(list_str) > 512 else list_str
438
+
439
+ return sanitized
440
+
441
+
375
442
  @json_schema_type
376
443
  class VectorStoreFileObject(BaseModel):
377
444
  """OpenAI Vector Store File object.
@@ -389,7 +456,7 @@ class VectorStoreFileObject(BaseModel):
389
456
 
390
457
  id: str
391
458
  object: str = "vector_store.file"
392
- attributes: dict[str, Any] = Field(default_factory=dict)
459
+ attributes: VectorStoreFileAttributes = Field(default_factory=dict)
393
460
  chunking_strategy: VectorStoreChunkingStrategy
394
461
  created_at: int
395
462
  last_error: VectorStoreFileLastError | None = None
@@ -397,6 +464,12 @@ class VectorStoreFileObject(BaseModel):
397
464
  usage_bytes: int = 0
398
465
  vector_store_id: str
399
466
 
467
+ @field_validator("attributes", mode="before")
468
+ @classmethod
469
+ def _validate_attributes(cls, v: dict[str, Any] | None) -> dict[str, str | float | bool]:
470
+ """Sanitize attributes to match VectorStoreFileAttributes OpenAPI spec."""
471
+ return _sanitize_vector_store_attributes(v)
472
+
400
473
 
401
474
  @json_schema_type
402
475
  class VectorStoreListFilesResponse(BaseModel):
@@ -416,22 +489,6 @@ class VectorStoreListFilesResponse(BaseModel):
416
489
  has_more: bool = False
417
490
 
418
491
 
419
- @json_schema_type
420
- class VectorStoreFileContentsResponse(BaseModel):
421
- """Response from retrieving the contents of a vector store file.
422
-
423
- :param file_id: Unique identifier for the file
424
- :param filename: Name of the file
425
- :param attributes: Key-value attributes associated with the file
426
- :param content: List of content items from the file
427
- """
428
-
429
- file_id: str
430
- filename: str
431
- attributes: dict[str, Any]
432
- content: list[VectorStoreContent]
433
-
434
-
435
492
  @json_schema_type
436
493
  class VectorStoreFileDeleteResponse(BaseModel):
437
494
  """Response from deleting a vector store file.
@@ -499,7 +556,7 @@ class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"):
499
556
  name: str | None = None
500
557
  file_ids: list[str] | None = None
501
558
  expires_after: dict[str, Any] | None = None
502
- chunking_strategy: dict[str, Any] | None = None
559
+ chunking_strategy: VectorStoreChunkingStrategy | None = None
503
560
  metadata: dict[str, Any] | None = None
504
561
 
505
562
 
@@ -523,42 +580,39 @@ class VectorStoreTable(Protocol):
523
580
 
524
581
 
525
582
  @runtime_checkable
526
- @trace_protocol
527
583
  class VectorIO(Protocol):
528
584
  vector_store_table: VectorStoreTable | None = None
529
585
 
530
586
  # this will just block now until chunks are inserted, but it should
531
587
  # probably return a Job instance which can be polled for completion
532
- # TODO: rename vector_db_id to vector_store_id once Stainless is working
588
+ # TODO: rename vector_store_id to vector_store_id once Stainless is working
533
589
  @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
534
590
  async def insert_chunks(
535
591
  self,
536
- vector_db_id: str,
537
- chunks: list[Chunk],
592
+ vector_store_id: str,
593
+ chunks: list[EmbeddedChunk],
538
594
  ttl_seconds: int | None = None,
539
595
  ) -> None:
540
- """Insert chunks into a vector database.
596
+ """Insert embedded chunks into a vector database.
541
597
 
542
- :param vector_db_id: The identifier of the vector database to insert the chunks into.
543
- :param chunks: The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types.
544
- `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional.
545
- If `metadata` is provided, you configure how Llama Stack formats the chunk during generation.
546
- If `embedding` is not provided, it will be computed later.
598
+ :param vector_store_id: The identifier of the vector database to insert the chunks into.
599
+ :param chunks: The embedded chunks to insert. Each `EmbeddedChunk` contains the content, metadata,
600
+ and embedding vector ready for storage.
547
601
  :param ttl_seconds: The time to live of the chunks.
548
602
  """
549
603
  ...
550
604
 
551
- # TODO: rename vector_db_id to vector_store_id once Stainless is working
605
+ # TODO: rename vector_store_id to vector_store_id once Stainless is working
552
606
  @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
553
607
  async def query_chunks(
554
608
  self,
555
- vector_db_id: str,
609
+ vector_store_id: str,
556
610
  query: InterleavedContent,
557
611
  params: dict[str, Any] | None = None,
558
612
  ) -> QueryChunksResponse:
559
613
  """Query chunks from a vector database.
560
614
 
561
- :param vector_db_id: The identifier of the vector database to query.
615
+ :param vector_store_id: The identifier of the vector database to query.
562
616
  :param query: The query to search for.
563
617
  :param params: The parameters of the query.
564
618
  :returns: A QueryChunksResponse.
@@ -566,7 +620,6 @@ class VectorIO(Protocol):
566
620
  ...
567
621
 
568
622
  # OpenAI Vector Stores API endpoints
569
- @webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
570
623
  @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
571
624
  async def openai_create_vector_store(
572
625
  self,
@@ -579,7 +632,6 @@ class VectorIO(Protocol):
579
632
  """
580
633
  ...
581
634
 
582
- @webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
583
635
  @webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
584
636
  async def openai_list_vector_stores(
585
637
  self,
@@ -598,9 +650,6 @@ class VectorIO(Protocol):
598
650
  """
599
651
  ...
600
652
 
601
- @webmethod(
602
- route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
603
- )
604
653
  @webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
605
654
  async def openai_retrieve_vector_store(
606
655
  self,
@@ -613,9 +662,6 @@ class VectorIO(Protocol):
613
662
  """
614
663
  ...
615
664
 
616
- @webmethod(
617
- route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
618
- )
619
665
  @webmethod(
620
666
  route="/vector_stores/{vector_store_id}",
621
667
  method="POST",
@@ -638,9 +684,6 @@ class VectorIO(Protocol):
638
684
  """
639
685
  ...
640
686
 
641
- @webmethod(
642
- route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True
643
- )
644
687
  @webmethod(
645
688
  route="/vector_stores/{vector_store_id}",
646
689
  method="DELETE",
@@ -657,12 +700,6 @@ class VectorIO(Protocol):
657
700
  """
658
701
  ...
659
702
 
660
- @webmethod(
661
- route="/openai/v1/vector_stores/{vector_store_id}/search",
662
- method="POST",
663
- level=LLAMA_STACK_API_V1,
664
- deprecated=True,
665
- )
666
703
  @webmethod(
667
704
  route="/vector_stores/{vector_store_id}/search",
668
705
  method="POST",
@@ -695,12 +732,6 @@ class VectorIO(Protocol):
695
732
  """
696
733
  ...
697
734
 
698
- @webmethod(
699
- route="/openai/v1/vector_stores/{vector_store_id}/files",
700
- method="POST",
701
- level=LLAMA_STACK_API_V1,
702
- deprecated=True,
703
- )
704
735
  @webmethod(
705
736
  route="/vector_stores/{vector_store_id}/files",
706
737
  method="POST",
@@ -723,12 +754,6 @@ class VectorIO(Protocol):
723
754
  """
724
755
  ...
725
756
 
726
- @webmethod(
727
- route="/openai/v1/vector_stores/{vector_store_id}/files",
728
- method="GET",
729
- level=LLAMA_STACK_API_V1,
730
- deprecated=True,
731
- )
732
757
  @webmethod(
733
758
  route="/vector_stores/{vector_store_id}/files",
734
759
  method="GET",
@@ -755,12 +780,6 @@ class VectorIO(Protocol):
755
780
  """
756
781
  ...
757
782
 
758
- @webmethod(
759
- route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
760
- method="GET",
761
- level=LLAMA_STACK_API_V1,
762
- deprecated=True,
763
- )
764
783
  @webmethod(
765
784
  route="/vector_stores/{vector_store_id}/files/{file_id}",
766
785
  method="GET",
@@ -779,12 +798,6 @@ class VectorIO(Protocol):
779
798
  """
780
799
  ...
781
800
 
782
- @webmethod(
783
- route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
784
- method="GET",
785
- level=LLAMA_STACK_API_V1,
786
- deprecated=True,
787
- )
788
801
  @webmethod(
789
802
  route="/vector_stores/{vector_store_id}/files/{file_id}/content",
790
803
  method="GET",
@@ -794,21 +807,19 @@ class VectorIO(Protocol):
794
807
  self,
795
808
  vector_store_id: str,
796
809
  file_id: str,
797
- ) -> VectorStoreFileContentsResponse:
810
+ include_embeddings: Annotated[bool | None, Query()] = False,
811
+ include_metadata: Annotated[bool | None, Query()] = False,
812
+ ) -> VectorStoreFileContentResponse:
798
813
  """Retrieves the contents of a vector store file.
799
814
 
800
815
  :param vector_store_id: The ID of the vector store containing the file to retrieve.
801
816
  :param file_id: The ID of the file to retrieve.
802
- :returns: A list of InterleavedContent representing the file contents.
817
+ :param include_embeddings: Whether to include embedding vectors in the response.
818
+ :param include_metadata: Whether to include chunk metadata in the response.
819
+ :returns: File contents, optionally with embeddings and metadata based on query parameters.
803
820
  """
804
821
  ...
805
822
 
806
- @webmethod(
807
- route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
808
- method="POST",
809
- level=LLAMA_STACK_API_V1,
810
- deprecated=True,
811
- )
812
823
  @webmethod(
813
824
  route="/vector_stores/{vector_store_id}/files/{file_id}",
814
825
  method="POST",
@@ -829,12 +840,6 @@ class VectorIO(Protocol):
829
840
  """
830
841
  ...
831
842
 
832
- @webmethod(
833
- route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
834
- method="DELETE",
835
- level=LLAMA_STACK_API_V1,
836
- deprecated=True,
837
- )
838
843
  @webmethod(
839
844
  route="/vector_stores/{vector_store_id}/files/{file_id}",
840
845
  method="DELETE",
@@ -858,12 +863,6 @@ class VectorIO(Protocol):
858
863
  method="POST",
859
864
  level=LLAMA_STACK_API_V1,
860
865
  )
861
- @webmethod(
862
- route="/openai/v1/vector_stores/{vector_store_id}/file_batches",
863
- method="POST",
864
- level=LLAMA_STACK_API_V1,
865
- deprecated=True,
866
- )
867
866
  async def openai_create_vector_store_file_batch(
868
867
  self,
869
868
  vector_store_id: str,
@@ -882,12 +881,6 @@ class VectorIO(Protocol):
882
881
  method="GET",
883
882
  level=LLAMA_STACK_API_V1,
884
883
  )
885
- @webmethod(
886
- route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
887
- method="GET",
888
- level=LLAMA_STACK_API_V1,
889
- deprecated=True,
890
- )
891
884
  async def openai_retrieve_vector_store_file_batch(
892
885
  self,
893
886
  batch_id: str,
@@ -901,12 +894,6 @@ class VectorIO(Protocol):
901
894
  """
902
895
  ...
903
896
 
904
- @webmethod(
905
- route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
906
- method="GET",
907
- level=LLAMA_STACK_API_V1,
908
- deprecated=True,
909
- )
910
897
  @webmethod(
911
898
  route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
912
899
  method="GET",
@@ -935,12 +922,6 @@ class VectorIO(Protocol):
935
922
  """
936
923
  ...
937
924
 
938
- @webmethod(
939
- route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
940
- method="POST",
941
- level=LLAMA_STACK_API_V1,
942
- deprecated=True,
943
- )
944
925
  @webmethod(
945
926
  route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
946
927
  method="POST",
@@ -8,7 +8,7 @@ from typing import Literal
8
8
 
9
9
  from pydantic import BaseModel
10
10
 
11
- from llama_stack.apis.resource import Resource, ResourceType
11
+ from llama_stack_api.resource import Resource, ResourceType
12
12
 
13
13
 
14
14
  # Internal resource type for storing the vector store routing and other information