llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.5.dist-info/RECORD +0 -625
  445. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -10,19 +10,31 @@ from typing import Annotated, Any
10
10
 
11
11
  from fastapi import Body
12
12
 
13
- from llama_stack.apis.common.content_types import InterleavedContent
14
- from llama_stack.apis.models import ModelType
15
- from llama_stack.apis.vector_io import (
16
- Chunk,
13
+ from llama_stack.core.datatypes import VectorStoresConfig
14
+ from llama_stack.log import get_logger
15
+ from llama_stack_api import (
16
+ EmbeddedChunk,
17
+ HealthResponse,
18
+ HealthStatus,
19
+ Inference,
20
+ InterleavedContent,
21
+ ModelNotFoundError,
22
+ ModelType,
23
+ ModelTypeError,
24
+ OpenAIChatCompletionRequestWithExtraBody,
17
25
  OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
18
26
  OpenAICreateVectorStoreRequestWithExtraBody,
27
+ OpenAIUserMessageParam,
19
28
  QueryChunksResponse,
29
+ RoutingTable,
20
30
  SearchRankingOptions,
21
31
  VectorIO,
22
32
  VectorStoreChunkingStrategy,
33
+ VectorStoreChunkingStrategyStatic,
34
+ VectorStoreChunkingStrategyStaticConfig,
23
35
  VectorStoreDeleteResponse,
24
36
  VectorStoreFileBatchObject,
25
- VectorStoreFileContentsResponse,
37
+ VectorStoreFileContentResponse,
26
38
  VectorStoreFileDeleteResponse,
27
39
  VectorStoreFileObject,
28
40
  VectorStoreFilesListInBatchResponse,
@@ -31,9 +43,6 @@ from llama_stack.apis.vector_io import (
31
43
  VectorStoreObject,
32
44
  VectorStoreSearchResponsePage,
33
45
  )
34
- from llama_stack.core.datatypes import VectorStoresConfig
35
- from llama_stack.log import get_logger
36
- from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
37
46
 
38
47
  logger = get_logger(name=__name__, category="core::routers")
39
48
 
@@ -45,10 +54,11 @@ class VectorIORouter(VectorIO):
45
54
  self,
46
55
  routing_table: RoutingTable,
47
56
  vector_stores_config: VectorStoresConfig | None = None,
57
+ inference_api: Inference | None = None,
48
58
  ) -> None:
49
- logger.debug("Initializing VectorIORouter")
50
59
  self.routing_table = routing_table
51
60
  self.vector_stores_config = vector_stores_config
61
+ self.inference_api = inference_api
52
62
 
53
63
  async def initialize(self) -> None:
54
64
  logger.debug("VectorIORouter.initialize")
@@ -58,6 +68,46 @@ class VectorIORouter(VectorIO):
58
68
  logger.debug("VectorIORouter.shutdown")
59
69
  pass
60
70
 
71
+ async def _rewrite_query_for_search(self, query: str) -> str:
72
+ """Rewrite a search query using the configured LLM model for better retrieval results."""
73
+ if (
74
+ not self.vector_stores_config
75
+ or not self.vector_stores_config.rewrite_query_params
76
+ or not self.vector_stores_config.rewrite_query_params.model
77
+ ):
78
+ logger.warning(
79
+ "User is trying to use vector_store query rewriting, but it is not configured. Please configure rewrite_query_params.model in vector_stores config."
80
+ )
81
+ raise ValueError("Query rewriting is not available")
82
+
83
+ if not self.inference_api:
84
+ logger.warning("Query rewriting requires inference API but it is not available")
85
+ raise ValueError("Query rewriting is not available")
86
+
87
+ model = self.vector_stores_config.rewrite_query_params.model
88
+ model_id = f"{model.provider_id}/{model.model_id}"
89
+
90
+ prompt = self.vector_stores_config.rewrite_query_params.prompt.format(query=query)
91
+
92
+ request = OpenAIChatCompletionRequestWithExtraBody(
93
+ model=model_id,
94
+ messages=[OpenAIUserMessageParam(role="user", content=prompt)],
95
+ max_tokens=self.vector_stores_config.rewrite_query_params.max_tokens or 100,
96
+ temperature=self.vector_stores_config.rewrite_query_params.temperature or 0.3,
97
+ )
98
+
99
+ try:
100
+ response = await self.inference_api.openai_chat_completion(request)
101
+ content = response.choices[0].message.content
102
+ if content is None:
103
+ logger.error(f"LLM returned None content for query rewriting. Model: {model_id}")
104
+ raise RuntimeError("Query rewrite failed due to an internal error")
105
+ rewritten_query: str = content.strip()
106
+ return rewritten_query
107
+ except Exception as e:
108
+ logger.error(f"Query rewrite failed with LLM call error. Model: {model_id}, Error: {e}")
109
+ raise RuntimeError("Query rewrite failed due to an internal error") from e
110
+
61
111
  async def _get_embedding_model_dimension(self, embedding_model_id: str) -> int:
62
112
  """Get the embedding dimension for a specific embedding model."""
63
113
  all_models = await self.routing_table.get_all_with_type("model")
@@ -73,27 +123,25 @@ class VectorIORouter(VectorIO):
73
123
 
74
124
  async def insert_chunks(
75
125
  self,
76
- vector_db_id: str,
77
- chunks: list[Chunk],
126
+ vector_store_id: str,
127
+ chunks: list[EmbeddedChunk],
78
128
  ttl_seconds: int | None = None,
79
129
  ) -> None:
80
130
  doc_ids = [chunk.document_id for chunk in chunks[:3]]
81
131
  logger.debug(
82
- f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, "
132
+ f"VectorIORouter.insert_chunks: {vector_store_id}, {len(chunks)} chunks, "
83
133
  f"ttl_seconds={ttl_seconds}, chunk_ids={doc_ids}{' and more...' if len(chunks) > 3 else ''}"
84
134
  )
85
- provider = await self.routing_table.get_provider_impl(vector_db_id)
86
- return await provider.insert_chunks(vector_db_id, chunks, ttl_seconds)
135
+ return await self.routing_table.insert_chunks(vector_store_id, chunks, ttl_seconds)
87
136
 
88
137
  async def query_chunks(
89
138
  self,
90
- vector_db_id: str,
139
+ vector_store_id: str,
91
140
  query: InterleavedContent,
92
141
  params: dict[str, Any] | None = None,
93
142
  ) -> QueryChunksResponse:
94
- logger.debug(f"VectorIORouter.query_chunks: {vector_db_id}")
95
- provider = await self.routing_table.get_provider_impl(vector_db_id)
96
- return await provider.query_chunks(vector_db_id, query, params)
143
+ logger.debug(f"VectorIORouter.query_chunks: {vector_store_id}")
144
+ return await self.routing_table.query_chunks(vector_store_id, query, params)
97
145
 
98
146
  # OpenAI Vector Stores API endpoints
99
147
  async def openai_create_vector_store(
@@ -120,6 +168,14 @@ class VectorIORouter(VectorIO):
120
168
  if embedding_model is not None and embedding_dimension is None:
121
169
  embedding_dimension = await self._get_embedding_model_dimension(embedding_model)
122
170
 
171
+ # Validate that embedding model exists and is of the correct type
172
+ if embedding_model is not None:
173
+ model = await self.routing_table.get_object_by_identifier("model", embedding_model)
174
+ if model is None:
175
+ raise ModelNotFoundError(embedding_model)
176
+ if model.model_type != ModelType.embedding:
177
+ raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
178
+
123
179
  # Auto-select provider if not specified
124
180
  if provider_id is None:
125
181
  num_providers = len(self.routing_table.impls_by_provider_id)
@@ -167,6 +223,13 @@ class VectorIORouter(VectorIO):
167
223
  if embedding_dimension is not None:
168
224
  params.model_extra["embedding_dimension"] = embedding_dimension
169
225
 
226
+ # Set chunking strategy explicitly if not provided
227
+ if params.chunking_strategy is None or params.chunking_strategy.type == "auto":
228
+ # actualize the chunking strategy to static
229
+ params.chunking_strategy = VectorStoreChunkingStrategyStatic(
230
+ static=VectorStoreChunkingStrategyStaticConfig()
231
+ )
232
+
170
233
  return await provider.openai_create_vector_store(params)
171
234
 
172
235
  async def openai_list_vector_stores(
@@ -183,9 +246,8 @@ class VectorIORouter(VectorIO):
183
246
  all_stores = []
184
247
  for vector_store in vector_stores:
185
248
  try:
186
- provider = await self.routing_table.get_provider_impl(vector_store.identifier)
187
- vector_store = await provider.openai_retrieve_vector_store(vector_store.identifier)
188
- all_stores.append(vector_store)
249
+ vector_store_obj = await self.routing_table.openai_retrieve_vector_store(vector_store.identifier)
250
+ all_stores.append(vector_store_obj)
189
251
  except Exception as e:
190
252
  logger.error(f"Error retrieving vector store {vector_store.identifier}: {e}")
191
253
  continue
@@ -227,8 +289,7 @@ class VectorIORouter(VectorIO):
227
289
  vector_store_id: str,
228
290
  ) -> VectorStoreObject:
229
291
  logger.debug(f"VectorIORouter.openai_retrieve_vector_store: {vector_store_id}")
230
- provider = await self.routing_table.get_provider_impl(vector_store_id)
231
- return await provider.openai_retrieve_vector_store(vector_store_id)
292
+ return await self.routing_table.openai_retrieve_vector_store(vector_store_id)
232
293
 
233
294
  async def openai_update_vector_store(
234
295
  self,
@@ -238,8 +299,14 @@ class VectorIORouter(VectorIO):
238
299
  metadata: dict[str, Any] | None = None,
239
300
  ) -> VectorStoreObject:
240
301
  logger.debug(f"VectorIORouter.openai_update_vector_store: {vector_store_id}")
241
- provider = await self.routing_table.get_provider_impl(vector_store_id)
242
- return await provider.openai_update_vector_store(
302
+
303
+ # Check if provider_id is being changed (not supported)
304
+ if metadata and "provider_id" in metadata:
305
+ current_store = await self.routing_table.get_object_by_identifier("vector_store", vector_store_id)
306
+ if current_store and current_store.provider_id != metadata["provider_id"]:
307
+ raise ValueError("provider_id cannot be changed after vector store creation")
308
+
309
+ return await self.routing_table.openai_update_vector_store(
243
310
  vector_store_id=vector_store_id,
244
311
  name=name,
245
312
  expires_after=expires_after,
@@ -264,14 +331,23 @@ class VectorIORouter(VectorIO):
264
331
  search_mode: str | None = "vector",
265
332
  ) -> VectorStoreSearchResponsePage:
266
333
  logger.debug(f"VectorIORouter.openai_search_vector_store: {vector_store_id}")
267
- provider = await self.routing_table.get_provider_impl(vector_store_id)
268
- return await provider.openai_search_vector_store(
334
+
335
+ # Handle query rewriting at the router level
336
+ search_query = query
337
+ if rewrite_query:
338
+ if isinstance(query, list):
339
+ original_query = " ".join(query)
340
+ else:
341
+ original_query = query
342
+ search_query = await self._rewrite_query_for_search(original_query)
343
+
344
+ return await self.routing_table.openai_search_vector_store(
269
345
  vector_store_id=vector_store_id,
270
- query=query,
346
+ query=search_query,
271
347
  filters=filters,
272
348
  max_num_results=max_num_results,
273
349
  ranking_options=ranking_options,
274
- rewrite_query=rewrite_query,
350
+ rewrite_query=False, # Already handled at router level
275
351
  search_mode=search_mode,
276
352
  )
277
353
 
@@ -283,8 +359,9 @@ class VectorIORouter(VectorIO):
283
359
  chunking_strategy: VectorStoreChunkingStrategy | None = None,
284
360
  ) -> VectorStoreFileObject:
285
361
  logger.debug(f"VectorIORouter.openai_attach_file_to_vector_store: {vector_store_id}, {file_id}")
286
- provider = await self.routing_table.get_provider_impl(vector_store_id)
287
- return await provider.openai_attach_file_to_vector_store(
362
+ if chunking_strategy is None or chunking_strategy.type == "auto":
363
+ chunking_strategy = VectorStoreChunkingStrategyStatic(static=VectorStoreChunkingStrategyStaticConfig())
364
+ return await self.routing_table.openai_attach_file_to_vector_store(
288
365
  vector_store_id=vector_store_id,
289
366
  file_id=file_id,
290
367
  attributes=attributes,
@@ -301,8 +378,7 @@ class VectorIORouter(VectorIO):
301
378
  filter: VectorStoreFileStatus | None = None,
302
379
  ) -> list[VectorStoreFileObject]:
303
380
  logger.debug(f"VectorIORouter.openai_list_files_in_vector_store: {vector_store_id}")
304
- provider = await self.routing_table.get_provider_impl(vector_store_id)
305
- return await provider.openai_list_files_in_vector_store(
381
+ return await self.routing_table.openai_list_files_in_vector_store(
306
382
  vector_store_id=vector_store_id,
307
383
  limit=limit,
308
384
  order=order,
@@ -317,8 +393,7 @@ class VectorIORouter(VectorIO):
317
393
  file_id: str,
318
394
  ) -> VectorStoreFileObject:
319
395
  logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file: {vector_store_id}, {file_id}")
320
- provider = await self.routing_table.get_provider_impl(vector_store_id)
321
- return await provider.openai_retrieve_vector_store_file(
396
+ return await self.routing_table.openai_retrieve_vector_store_file(
322
397
  vector_store_id=vector_store_id,
323
398
  file_id=file_id,
324
399
  )
@@ -327,12 +402,19 @@ class VectorIORouter(VectorIO):
327
402
  self,
328
403
  vector_store_id: str,
329
404
  file_id: str,
330
- ) -> VectorStoreFileContentsResponse:
331
- logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}")
332
- provider = await self.routing_table.get_provider_impl(vector_store_id)
333
- return await provider.openai_retrieve_vector_store_file_contents(
405
+ include_embeddings: bool | None = False,
406
+ include_metadata: bool | None = False,
407
+ ) -> VectorStoreFileContentResponse:
408
+ logger.debug(
409
+ f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}, "
410
+ f"include_embeddings={include_embeddings}, include_metadata={include_metadata}"
411
+ )
412
+
413
+ return await self.routing_table.openai_retrieve_vector_store_file_contents(
334
414
  vector_store_id=vector_store_id,
335
415
  file_id=file_id,
416
+ include_embeddings=include_embeddings,
417
+ include_metadata=include_metadata,
336
418
  )
337
419
 
338
420
  async def openai_update_vector_store_file(
@@ -342,8 +424,7 @@ class VectorIORouter(VectorIO):
342
424
  attributes: dict[str, Any],
343
425
  ) -> VectorStoreFileObject:
344
426
  logger.debug(f"VectorIORouter.openai_update_vector_store_file: {vector_store_id}, {file_id}")
345
- provider = await self.routing_table.get_provider_impl(vector_store_id)
346
- return await provider.openai_update_vector_store_file(
427
+ return await self.routing_table.openai_update_vector_store_file(
347
428
  vector_store_id=vector_store_id,
348
429
  file_id=file_id,
349
430
  attributes=attributes,
@@ -355,8 +436,7 @@ class VectorIORouter(VectorIO):
355
436
  file_id: str,
356
437
  ) -> VectorStoreFileDeleteResponse:
357
438
  logger.debug(f"VectorIORouter.openai_delete_vector_store_file: {vector_store_id}, {file_id}")
358
- provider = await self.routing_table.get_provider_impl(vector_store_id)
359
- return await provider.openai_delete_vector_store_file(
439
+ return await self.routing_table.openai_delete_vector_store_file(
360
440
  vector_store_id=vector_store_id,
361
441
  file_id=file_id,
362
442
  )
@@ -392,8 +472,10 @@ class VectorIORouter(VectorIO):
392
472
  logger.debug(
393
473
  f"VectorIORouter.openai_create_vector_store_file_batch: {vector_store_id}, {len(params.file_ids)} files"
394
474
  )
395
- provider = await self.routing_table.get_provider_impl(vector_store_id)
396
- return await provider.openai_create_vector_store_file_batch(vector_store_id, params)
475
+ return await self.routing_table.openai_create_vector_store_file_batch(
476
+ vector_store_id=vector_store_id,
477
+ params=params,
478
+ )
397
479
 
398
480
  async def openai_retrieve_vector_store_file_batch(
399
481
  self,
@@ -401,8 +483,7 @@ class VectorIORouter(VectorIO):
401
483
  vector_store_id: str,
402
484
  ) -> VectorStoreFileBatchObject:
403
485
  logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_batch: {batch_id}, {vector_store_id}")
404
- provider = await self.routing_table.get_provider_impl(vector_store_id)
405
- return await provider.openai_retrieve_vector_store_file_batch(
486
+ return await self.routing_table.openai_retrieve_vector_store_file_batch(
406
487
  batch_id=batch_id,
407
488
  vector_store_id=vector_store_id,
408
489
  )
@@ -418,8 +499,7 @@ class VectorIORouter(VectorIO):
418
499
  order: str | None = "desc",
419
500
  ) -> VectorStoreFilesListInBatchResponse:
420
501
  logger.debug(f"VectorIORouter.openai_list_files_in_vector_store_file_batch: {batch_id}, {vector_store_id}")
421
- provider = await self.routing_table.get_provider_impl(vector_store_id)
422
- return await provider.openai_list_files_in_vector_store_file_batch(
502
+ return await self.routing_table.openai_list_files_in_vector_store_file_batch(
423
503
  batch_id=batch_id,
424
504
  vector_store_id=vector_store_id,
425
505
  after=after,
@@ -435,8 +515,7 @@ class VectorIORouter(VectorIO):
435
515
  vector_store_id: str,
436
516
  ) -> VectorStoreFileBatchObject:
437
517
  logger.debug(f"VectorIORouter.openai_cancel_vector_store_file_batch: {batch_id}, {vector_store_id}")
438
- provider = await self.routing_table.get_provider_impl(vector_store_id)
439
- return await provider.openai_cancel_vector_store_file_batch(
518
+ return await self.routing_table.openai_cancel_vector_store_file_batch(
440
519
  batch_id=batch_id,
441
520
  vector_store_id=vector_store_id,
442
521
  )
@@ -4,13 +4,20 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from typing import Any
8
7
 
9
- from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
10
8
  from llama_stack.core.datatypes import (
11
9
  BenchmarkWithOwner,
12
10
  )
13
11
  from llama_stack.log import get_logger
12
+ from llama_stack_api import (
13
+ Benchmark,
14
+ Benchmarks,
15
+ GetBenchmarkRequest,
16
+ ListBenchmarksRequest,
17
+ ListBenchmarksResponse,
18
+ RegisterBenchmarkRequest,
19
+ UnregisterBenchmarkRequest,
20
+ )
14
21
 
15
22
  from .common import CommonRoutingTableImpl
16
23
 
@@ -18,26 +25,21 @@ logger = get_logger(name=__name__, category="core::routing_tables")
18
25
 
19
26
 
20
27
  class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
21
- async def list_benchmarks(self) -> ListBenchmarksResponse:
28
+ async def list_benchmarks(self, request: ListBenchmarksRequest) -> ListBenchmarksResponse:
22
29
  return ListBenchmarksResponse(data=await self.get_all_with_type("benchmark"))
23
30
 
24
- async def get_benchmark(self, benchmark_id: str) -> Benchmark:
25
- benchmark = await self.get_object_by_identifier("benchmark", benchmark_id)
31
+ async def get_benchmark(self, request: GetBenchmarkRequest) -> Benchmark:
32
+ benchmark = await self.get_object_by_identifier("benchmark", request.benchmark_id)
26
33
  if benchmark is None:
27
- raise ValueError(f"Benchmark '{benchmark_id}' not found")
34
+ raise ValueError(f"Benchmark '{request.benchmark_id}' not found")
28
35
  return benchmark
29
36
 
30
37
  async def register_benchmark(
31
38
  self,
32
- benchmark_id: str,
33
- dataset_id: str,
34
- scoring_functions: list[str],
35
- metadata: dict[str, Any] | None = None,
36
- provider_benchmark_id: str | None = None,
37
- provider_id: str | None = None,
39
+ request: RegisterBenchmarkRequest,
38
40
  ) -> None:
39
- if metadata is None:
40
- metadata = {}
41
+ metadata = request.metadata if request.metadata is not None else {}
42
+ provider_id = request.provider_id
41
43
  if provider_id is None:
42
44
  if len(self.impls_by_provider_id) == 1:
43
45
  provider_id = list(self.impls_by_provider_id.keys())[0]
@@ -45,18 +47,20 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
45
47
  raise ValueError(
46
48
  "No provider specified and multiple providers available. Please specify a provider_id."
47
49
  )
50
+ provider_benchmark_id = request.provider_benchmark_id
48
51
  if provider_benchmark_id is None:
49
- provider_benchmark_id = benchmark_id
52
+ provider_benchmark_id = request.benchmark_id
50
53
  benchmark = BenchmarkWithOwner(
51
- identifier=benchmark_id,
52
- dataset_id=dataset_id,
53
- scoring_functions=scoring_functions,
54
+ identifier=request.benchmark_id,
55
+ dataset_id=request.dataset_id,
56
+ scoring_functions=request.scoring_functions,
54
57
  metadata=metadata,
55
58
  provider_id=provider_id,
56
59
  provider_resource_id=provider_benchmark_id,
57
60
  )
58
61
  await self.register_object(benchmark)
59
62
 
60
- async def unregister_benchmark(self, benchmark_id: str) -> None:
61
- existing_benchmark = await self.get_benchmark(benchmark_id)
63
+ async def unregister_benchmark(self, request: UnregisterBenchmarkRequest) -> None:
64
+ get_request = GetBenchmarkRequest(benchmark_id=request.benchmark_id)
65
+ existing_benchmark = await self.get_benchmark(get_request)
62
66
  await self.unregister_object(existing_benchmark)
@@ -6,9 +6,6 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from llama_stack.apis.common.errors import ModelNotFoundError
10
- from llama_stack.apis.models import Model
11
- from llama_stack.apis.resource import ResourceType
12
9
  from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
13
10
  from llama_stack.core.access_control.datatypes import Action
14
11
  from llama_stack.core.datatypes import (
@@ -21,7 +18,7 @@ from llama_stack.core.datatypes import (
21
18
  from llama_stack.core.request_headers import get_authenticated_user
22
19
  from llama_stack.core.store import DistributionRegistry
23
20
  from llama_stack.log import get_logger
24
- from llama_stack.providers.datatypes import Api, RoutingTable
21
+ from llama_stack_api import Api, Model, ModelNotFoundError, ResourceType, RoutingTable
25
22
 
26
23
  logger = get_logger(name=__name__, category="core::routing_tables")
27
24
 
@@ -5,24 +5,26 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
  import uuid
8
- from typing import Any
9
8
 
10
- from llama_stack.apis.common.errors import DatasetNotFoundError
11
- from llama_stack.apis.datasets import (
9
+ from llama_stack.core.datatypes import (
10
+ DatasetWithOwner,
11
+ )
12
+ from llama_stack.log import get_logger
13
+ from llama_stack_api import (
12
14
  Dataset,
13
- DatasetPurpose,
14
- Datasets,
15
+ DatasetNotFoundError,
15
16
  DatasetType,
16
- DataSource,
17
17
  ListDatasetsResponse,
18
+ ResourceType,
18
19
  RowsDataSource,
19
20
  URIDataSource,
20
21
  )
21
- from llama_stack.apis.resource import ResourceType
22
- from llama_stack.core.datatypes import (
23
- DatasetWithOwner,
22
+ from llama_stack_api.datasets.api import (
23
+ Datasets,
24
+ GetDatasetRequest,
25
+ RegisterDatasetRequest,
26
+ UnregisterDatasetRequest,
24
27
  )
25
- from llama_stack.log import get_logger
26
28
 
27
29
  from .common import CommonRoutingTableImpl
28
30
 
@@ -33,19 +35,17 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
33
35
  async def list_datasets(self) -> ListDatasetsResponse:
34
36
  return ListDatasetsResponse(data=await self.get_all_with_type(ResourceType.dataset.value))
35
37
 
36
- async def get_dataset(self, dataset_id: str) -> Dataset:
37
- dataset = await self.get_object_by_identifier("dataset", dataset_id)
38
+ async def get_dataset(self, request: GetDatasetRequest) -> Dataset:
39
+ dataset = await self.get_object_by_identifier("dataset", request.dataset_id)
38
40
  if dataset is None:
39
- raise DatasetNotFoundError(dataset_id)
41
+ raise DatasetNotFoundError(request.dataset_id)
40
42
  return dataset
41
43
 
42
- async def register_dataset(
43
- self,
44
- purpose: DatasetPurpose,
45
- source: DataSource,
46
- metadata: dict[str, Any] | None = None,
47
- dataset_id: str | None = None,
48
- ) -> Dataset:
44
+ async def register_dataset(self, request: RegisterDatasetRequest) -> Dataset:
45
+ purpose = request.purpose
46
+ source = request.source
47
+ metadata = request.metadata
48
+ dataset_id = request.dataset_id
49
49
  if isinstance(source, dict):
50
50
  if source["type"] == "uri":
51
51
  source = URIDataSource.parse_obj(source)
@@ -86,6 +86,6 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
86
86
  await self.register_object(dataset)
87
87
  return dataset
88
88
 
89
- async def unregister_dataset(self, dataset_id: str) -> None:
90
- dataset = await self.get_dataset(dataset_id)
89
+ async def unregister_dataset(self, request: UnregisterDatasetRequest) -> None:
90
+ dataset = await self.get_dataset(GetDatasetRequest(dataset_id=request.dataset_id))
91
91
  await self.unregister_object(dataset)