llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (460) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +235 -62
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
  169. llama_stack/providers/registry/agents.py +8 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/providers/utils/vector_io/__init__.py +16 -0
  284. llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
  285. llama_stack/telemetry/constants.py +27 -0
  286. llama_stack/telemetry/helpers.py +43 -0
  287. llama_stack/testing/api_recorder.py +25 -16
  288. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
  289. llama_stack-0.4.1.dist-info/RECORD +588 -0
  290. llama_stack-0.4.1.dist-info/top_level.txt +2 -0
  291. llama_stack_api/__init__.py +945 -0
  292. llama_stack_api/admin/__init__.py +45 -0
  293. llama_stack_api/admin/api.py +72 -0
  294. llama_stack_api/admin/fastapi_routes.py +117 -0
  295. llama_stack_api/admin/models.py +113 -0
  296. llama_stack_api/agents.py +173 -0
  297. llama_stack_api/batches/__init__.py +40 -0
  298. llama_stack_api/batches/api.py +53 -0
  299. llama_stack_api/batches/fastapi_routes.py +113 -0
  300. llama_stack_api/batches/models.py +78 -0
  301. llama_stack_api/benchmarks/__init__.py +43 -0
  302. llama_stack_api/benchmarks/api.py +39 -0
  303. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  304. llama_stack_api/benchmarks/models.py +109 -0
  305. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  306. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  307. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  308. llama_stack_api/common/responses.py +77 -0
  309. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  310. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  311. llama_stack_api/connectors.py +146 -0
  312. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  313. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  314. llama_stack_api/datasets/__init__.py +61 -0
  315. llama_stack_api/datasets/api.py +35 -0
  316. llama_stack_api/datasets/fastapi_routes.py +104 -0
  317. llama_stack_api/datasets/models.py +152 -0
  318. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  319. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  320. llama_stack_api/file_processors/__init__.py +27 -0
  321. llama_stack_api/file_processors/api.py +64 -0
  322. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  323. llama_stack_api/file_processors/models.py +42 -0
  324. llama_stack_api/files/__init__.py +35 -0
  325. llama_stack_api/files/api.py +51 -0
  326. llama_stack_api/files/fastapi_routes.py +124 -0
  327. llama_stack_api/files/models.py +107 -0
  328. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  329. llama_stack_api/inspect_api/__init__.py +37 -0
  330. llama_stack_api/inspect_api/api.py +25 -0
  331. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  332. llama_stack_api/inspect_api/models.py +28 -0
  333. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  334. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  335. llama_stack_api/internal/sqlstore.py +79 -0
  336. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  337. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  338. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  339. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  340. llama_stack_api/providers/__init__.py +33 -0
  341. llama_stack_api/providers/api.py +16 -0
  342. llama_stack_api/providers/fastapi_routes.py +57 -0
  343. llama_stack_api/providers/models.py +24 -0
  344. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  345. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  346. llama_stack_api/router_utils.py +160 -0
  347. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  348. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  349. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  350. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  351. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  352. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  353. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  354. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  355. llama_stack/apis/agents/agents.py +0 -894
  356. llama_stack/apis/batches/__init__.py +0 -9
  357. llama_stack/apis/batches/batches.py +0 -100
  358. llama_stack/apis/benchmarks/__init__.py +0 -7
  359. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  360. llama_stack/apis/common/responses.py +0 -36
  361. llama_stack/apis/conversations/__init__.py +0 -31
  362. llama_stack/apis/datasets/datasets.py +0 -251
  363. llama_stack/apis/datatypes.py +0 -160
  364. llama_stack/apis/eval/__init__.py +0 -7
  365. llama_stack/apis/files/__init__.py +0 -7
  366. llama_stack/apis/files/files.py +0 -199
  367. llama_stack/apis/inference/__init__.py +0 -7
  368. llama_stack/apis/inference/event_logger.py +0 -43
  369. llama_stack/apis/inspect/__init__.py +0 -7
  370. llama_stack/apis/inspect/inspect.py +0 -94
  371. llama_stack/apis/models/__init__.py +0 -7
  372. llama_stack/apis/post_training/__init__.py +0 -7
  373. llama_stack/apis/prompts/__init__.py +0 -9
  374. llama_stack/apis/providers/__init__.py +0 -7
  375. llama_stack/apis/providers/providers.py +0 -69
  376. llama_stack/apis/safety/__init__.py +0 -7
  377. llama_stack/apis/scoring/__init__.py +0 -7
  378. llama_stack/apis/scoring_functions/__init__.py +0 -7
  379. llama_stack/apis/shields/__init__.py +0 -7
  380. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  381. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  382. llama_stack/apis/telemetry/__init__.py +0 -7
  383. llama_stack/apis/telemetry/telemetry.py +0 -423
  384. llama_stack/apis/tools/__init__.py +0 -8
  385. llama_stack/apis/vector_io/__init__.py +0 -7
  386. llama_stack/apis/vector_stores/__init__.py +0 -7
  387. llama_stack/core/server/tracing.py +0 -80
  388. llama_stack/core/ui/app.py +0 -55
  389. llama_stack/core/ui/modules/__init__.py +0 -5
  390. llama_stack/core/ui/modules/api.py +0 -32
  391. llama_stack/core/ui/modules/utils.py +0 -42
  392. llama_stack/core/ui/page/__init__.py +0 -5
  393. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  394. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  395. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  396. llama_stack/core/ui/page/distribution/models.py +0 -18
  397. llama_stack/core/ui/page/distribution/providers.py +0 -27
  398. llama_stack/core/ui/page/distribution/resources.py +0 -48
  399. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  400. llama_stack/core/ui/page/distribution/shields.py +0 -19
  401. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  402. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  403. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  404. llama_stack/core/ui/page/playground/__init__.py +0 -5
  405. llama_stack/core/ui/page/playground/chat.py +0 -130
  406. llama_stack/core/ui/page/playground/tools.py +0 -352
  407. llama_stack/distributions/dell/build.yaml +0 -33
  408. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  409. llama_stack/distributions/nvidia/build.yaml +0 -29
  410. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  411. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  412. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  413. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  414. llama_stack/distributions/starter/build.yaml +0 -61
  415. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  416. llama_stack/distributions/watsonx/build.yaml +0 -33
  417. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  418. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  419. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  420. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  421. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  422. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  423. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  424. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  425. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  426. llama_stack/providers/utils/sqlstore/api.py +0 -128
  427. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  428. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  429. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  430. llama_stack/strong_typing/__init__.py +0 -19
  431. llama_stack/strong_typing/auxiliary.py +0 -228
  432. llama_stack/strong_typing/classdef.py +0 -440
  433. llama_stack/strong_typing/core.py +0 -46
  434. llama_stack/strong_typing/deserializer.py +0 -877
  435. llama_stack/strong_typing/docstring.py +0 -409
  436. llama_stack/strong_typing/exception.py +0 -23
  437. llama_stack/strong_typing/inspection.py +0 -1085
  438. llama_stack/strong_typing/mapping.py +0 -40
  439. llama_stack/strong_typing/name.py +0 -182
  440. llama_stack/strong_typing/schema.py +0 -792
  441. llama_stack/strong_typing/serialization.py +0 -97
  442. llama_stack/strong_typing/serializer.py +0 -500
  443. llama_stack/strong_typing/slots.py +0 -27
  444. llama_stack/strong_typing/topological.py +0 -89
  445. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  446. llama_stack-0.3.5.dist-info/RECORD +0 -625
  447. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  448. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  451. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  452. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  453. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  454. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
  456. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
  457. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
  458. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  459. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  460. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -9,16 +9,16 @@ from typing import Any
9
9
 
10
10
  import httpx
11
11
 
12
- from llama_stack.apis.common.content_types import URL
13
- from llama_stack.apis.tools import (
12
+ from llama_stack.core.request_headers import NeedsRequestProviderData
13
+ from llama_stack_api import (
14
+ URL,
14
15
  ListToolDefsResponse,
15
16
  ToolDef,
16
17
  ToolGroup,
18
+ ToolGroupsProtocolPrivate,
17
19
  ToolInvocationResult,
18
20
  ToolRuntime,
19
21
  )
20
- from llama_stack.core.request_headers import NeedsRequestProviderData
21
- from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
22
22
 
23
23
  from .config import BingSearchToolConfig
24
24
 
@@ -49,7 +49,10 @@ class BingSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsReq
49
49
  return provider_data.bing_search_api_key
50
50
 
51
51
  async def list_runtime_tools(
52
- self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
52
+ self,
53
+ tool_group_id: str | None = None,
54
+ mcp_endpoint: URL | None = None,
55
+ authorization: str | None = None,
53
56
  ) -> ListToolDefsResponse:
54
57
  return ListToolDefsResponse(
55
58
  data=[
@@ -70,7 +73,9 @@ class BingSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsReq
70
73
  ]
71
74
  )
72
75
 
73
- async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
76
+ async def invoke_tool(
77
+ self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
78
+ ) -> ToolInvocationResult:
74
79
  api_key = self._get_api_key()
75
80
  headers = {
76
81
  "Ocp-Apim-Subscription-Key": api_key,
@@ -8,17 +8,17 @@ from typing import Any
8
8
 
9
9
  import httpx
10
10
 
11
- from llama_stack.apis.common.content_types import URL
12
- from llama_stack.apis.tools import (
11
+ from llama_stack.core.request_headers import NeedsRequestProviderData
12
+ from llama_stack.models.llama.datatypes import BuiltinTool
13
+ from llama_stack_api import (
14
+ URL,
13
15
  ListToolDefsResponse,
14
16
  ToolDef,
15
17
  ToolGroup,
18
+ ToolGroupsProtocolPrivate,
16
19
  ToolInvocationResult,
17
20
  ToolRuntime,
18
21
  )
19
- from llama_stack.core.request_headers import NeedsRequestProviderData
20
- from llama_stack.models.llama.datatypes import BuiltinTool
21
- from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
22
22
 
23
23
  from .config import BraveSearchToolConfig
24
24
 
@@ -48,7 +48,10 @@ class BraveSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRe
48
48
  return provider_data.brave_search_api_key
49
49
 
50
50
  async def list_runtime_tools(
51
- self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
51
+ self,
52
+ tool_group_id: str | None = None,
53
+ mcp_endpoint: URL | None = None,
54
+ authorization: str | None = None,
52
55
  ) -> ListToolDefsResponse:
53
56
  return ListToolDefsResponse(
54
57
  data=[
@@ -70,7 +73,9 @@ class BraveSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRe
70
73
  ]
71
74
  )
72
75
 
73
- async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
76
+ async def invoke_tool(
77
+ self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
78
+ ) -> ToolInvocationResult:
74
79
  api_key = self._get_api_key()
75
80
  url = "https://api.search.brave.com/res/v1/web/search"
76
81
  headers = {
@@ -10,8 +10,14 @@ from pydantic import BaseModel
10
10
 
11
11
 
12
12
  class MCPProviderDataValidator(BaseModel):
13
- # mcp_endpoint => dict of headers to send
14
- mcp_headers: dict[str, dict[str, str]] | None = None
13
+ """
14
+ Validator for MCP provider-specific data passed via request headers.
15
+
16
+ Phase 1: Support old header-based authentication for backward compatibility.
17
+ In Phase 2, this will be deprecated in favor of the authorization parameter.
18
+ """
19
+
20
+ mcp_headers: dict[str, dict[str, str]] | None = None # Map of URI -> headers dict
15
21
 
16
22
 
17
23
  class MCPProviderConfig(BaseModel):
@@ -7,18 +7,18 @@
7
7
  from typing import Any
8
8
  from urllib.parse import urlparse
9
9
 
10
- from llama_stack.apis.common.content_types import URL
11
- from llama_stack.apis.datatypes import Api
12
- from llama_stack.apis.tools import (
10
+ from llama_stack.core.request_headers import NeedsRequestProviderData
11
+ from llama_stack.log import get_logger
12
+ from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
13
+ from llama_stack_api import (
14
+ URL,
15
+ Api,
13
16
  ListToolDefsResponse,
14
17
  ToolGroup,
18
+ ToolGroupsProtocolPrivate,
15
19
  ToolInvocationResult,
16
20
  ToolRuntime,
17
21
  )
18
- from llama_stack.core.request_headers import NeedsRequestProviderData
19
- from llama_stack.log import get_logger
20
- from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
21
- from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
22
22
 
23
23
  from .config import MCPProviderConfig
24
24
 
@@ -39,15 +39,23 @@ class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
39
39
  return
40
40
 
41
41
  async def list_runtime_tools(
42
- self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
42
+ self,
43
+ tool_group_id: str | None = None,
44
+ mcp_endpoint: URL | None = None,
45
+ authorization: str | None = None,
43
46
  ) -> ListToolDefsResponse:
44
47
  # this endpoint should be retrieved by getting the tool group right?
45
48
  if mcp_endpoint is None:
46
49
  raise ValueError("mcp_endpoint is required")
47
- headers = await self.get_headers_from_request(mcp_endpoint.uri)
48
- return await list_mcp_tools(mcp_endpoint.uri, headers)
49
50
 
50
- async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
51
+ # Get other headers from provider data (but NOT authorization)
52
+ provider_headers = await self.get_headers_from_request(mcp_endpoint.uri)
53
+
54
+ return await list_mcp_tools(endpoint=mcp_endpoint.uri, headers=provider_headers, authorization=authorization)
55
+
56
+ async def invoke_tool(
57
+ self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
58
+ ) -> ToolInvocationResult:
51
59
  tool = await self.tool_store.get_tool(tool_name)
52
60
  if tool.metadata is None or tool.metadata.get("endpoint") is None:
53
61
  raise ValueError(f"Tool {tool_name} does not have metadata")
@@ -55,19 +63,53 @@ class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
55
63
  if urlparse(endpoint).scheme not in ("http", "https"):
56
64
  raise ValueError(f"Endpoint {endpoint} is not a valid HTTP(S) URL")
57
65
 
58
- headers = await self.get_headers_from_request(endpoint)
59
- return await invoke_mcp_tool(endpoint, headers, tool_name, kwargs)
66
+ # Get other headers from provider data (but NOT authorization)
67
+ provider_headers = await self.get_headers_from_request(endpoint)
68
+
69
+ return await invoke_mcp_tool(
70
+ endpoint=endpoint,
71
+ tool_name=tool_name,
72
+ kwargs=kwargs,
73
+ headers=provider_headers,
74
+ authorization=authorization,
75
+ )
60
76
 
61
77
  async def get_headers_from_request(self, mcp_endpoint_uri: str) -> dict[str, str]:
78
+ """
79
+ Extract headers from request provider data, excluding authorization.
80
+
81
+ Authorization must be provided via the dedicated authorization parameter.
82
+ If Authorization is found in mcp_headers, raise an error to guide users to the correct approach.
83
+
84
+ Args:
85
+ mcp_endpoint_uri: The MCP endpoint URI to match against provider data
86
+
87
+ Returns:
88
+ dict[str, str]: Headers dictionary (without Authorization)
89
+
90
+ Raises:
91
+ ValueError: If Authorization header is found in mcp_headers
92
+ """
93
+
62
94
  def canonicalize_uri(uri: str) -> str:
63
95
  return f"{urlparse(uri).netloc or ''}/{urlparse(uri).path or ''}"
64
96
 
65
97
  headers = {}
66
98
 
67
99
  provider_data = self.get_request_provider_data()
68
- if provider_data and provider_data.mcp_headers:
100
+ if provider_data and hasattr(provider_data, "mcp_headers") and provider_data.mcp_headers:
69
101
  for uri, values in provider_data.mcp_headers.items():
70
102
  if canonicalize_uri(uri) != canonicalize_uri(mcp_endpoint_uri):
71
103
  continue
72
- headers.update(values)
104
+
105
+ # Reject Authorization in mcp_headers - must use authorization parameter
106
+ for key in values.keys():
107
+ if key.lower() == "authorization":
108
+ raise ValueError(
109
+ "Authorization cannot be provided via mcp_headers in provider_data. "
110
+ "Please use the dedicated 'authorization' parameter instead. "
111
+ "Example: tool_runtime.invoke_tool(..., authorization='your-token')"
112
+ )
113
+ headers[key] = values[key]
114
+
73
115
  return headers
@@ -9,16 +9,16 @@ from typing import Any
9
9
 
10
10
  import httpx
11
11
 
12
- from llama_stack.apis.common.content_types import URL
13
- from llama_stack.apis.tools import (
12
+ from llama_stack.core.request_headers import NeedsRequestProviderData
13
+ from llama_stack_api import (
14
+ URL,
14
15
  ListToolDefsResponse,
15
16
  ToolDef,
16
17
  ToolGroup,
18
+ ToolGroupsProtocolPrivate,
17
19
  ToolInvocationResult,
18
20
  ToolRuntime,
19
21
  )
20
- from llama_stack.core.request_headers import NeedsRequestProviderData
21
- from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
22
22
 
23
23
  from .config import TavilySearchToolConfig
24
24
 
@@ -48,7 +48,10 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
48
48
  return provider_data.tavily_search_api_key
49
49
 
50
50
  async def list_runtime_tools(
51
- self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
51
+ self,
52
+ tool_group_id: str | None = None,
53
+ mcp_endpoint: URL | None = None,
54
+ authorization: str | None = None,
52
55
  ) -> ListToolDefsResponse:
53
56
  return ListToolDefsResponse(
54
57
  data=[
@@ -69,7 +72,9 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
69
72
  ]
70
73
  )
71
74
 
72
- async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
75
+ async def invoke_tool(
76
+ self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
77
+ ) -> ToolInvocationResult:
73
78
  api_key = self._get_api_key()
74
79
  async with httpx.AsyncClient() as client:
75
80
  response = await client.post(
@@ -9,16 +9,16 @@ from typing import Any
9
9
 
10
10
  import httpx
11
11
 
12
- from llama_stack.apis.common.content_types import URL
13
- from llama_stack.apis.tools import (
12
+ from llama_stack.core.request_headers import NeedsRequestProviderData
13
+ from llama_stack_api import (
14
+ URL,
14
15
  ListToolDefsResponse,
15
16
  ToolDef,
16
17
  ToolGroup,
18
+ ToolGroupsProtocolPrivate,
17
19
  ToolInvocationResult,
18
20
  ToolRuntime,
19
21
  )
20
- from llama_stack.core.request_headers import NeedsRequestProviderData
21
- from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
22
22
 
23
23
  from .config import WolframAlphaToolConfig
24
24
 
@@ -49,7 +49,10 @@ class WolframAlphaToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
49
49
  return provider_data.wolfram_alpha_api_key
50
50
 
51
51
  async def list_runtime_tools(
52
- self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
52
+ self,
53
+ tool_group_id: str | None = None,
54
+ mcp_endpoint: URL | None = None,
55
+ authorization: str | None = None,
53
56
  ) -> ListToolDefsResponse:
54
57
  return ListToolDefsResponse(
55
58
  data=[
@@ -70,7 +73,9 @@ class WolframAlphaToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
70
73
  ]
71
74
  )
72
75
 
73
- async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
76
+ async def invoke_tool(
77
+ self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
78
+ ) -> ToolInvocationResult:
74
79
  api_key = self._get_api_key()
75
80
  params = {
76
81
  "input": kwargs["query"],
@@ -4,7 +4,7 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from llama_stack.providers.datatypes import Api, ProviderSpec
7
+ from llama_stack_api import Api, ProviderSpec
8
8
 
9
9
  from .config import ChromaVectorIOConfig
10
10
 
@@ -4,6 +4,7 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
  import asyncio
7
+ import heapq
7
8
  import json
8
9
  from typing import Any
9
10
  from urllib.parse import urlparse
@@ -11,17 +12,24 @@ from urllib.parse import urlparse
11
12
  import chromadb
12
13
  from numpy.typing import NDArray
13
14
 
14
- from llama_stack.apis.files import Files
15
- from llama_stack.apis.inference import Inference, InterleavedContent
16
- from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
17
- from llama_stack.apis.vector_stores import VectorStore
15
+ from llama_stack.core.storage.kvstore import kvstore_impl
18
16
  from llama_stack.log import get_logger
19
- from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
20
17
  from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
21
- from llama_stack.providers.utils.kvstore import kvstore_impl
22
- from llama_stack.providers.utils.kvstore.api import KVStore
23
18
  from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
24
19
  from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
20
+ from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
21
+ from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
22
+ from llama_stack_api import (
23
+ EmbeddedChunk,
24
+ Files,
25
+ Inference,
26
+ InterleavedContent,
27
+ QueryChunksResponse,
28
+ VectorIO,
29
+ VectorStore,
30
+ VectorStoresProtocolPrivate,
31
+ )
32
+ from llama_stack_api.internal.kvstore import KVStore
25
33
 
26
34
  from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
27
35
 
@@ -53,10 +61,12 @@ class ChromaIndex(EmbeddingIndex):
53
61
  async def initialize(self):
54
62
  pass
55
63
 
56
- async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
57
- assert len(chunks) == len(embeddings), (
58
- f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
59
- )
64
+ async def add_chunks(self, chunks: list[EmbeddedChunk]):
65
+ if not chunks:
66
+ return
67
+
68
+ # Extract embeddings directly from chunks (already list[float])
69
+ embeddings = [chunk.embedding for chunk in chunks]
60
70
 
61
71
  ids = [f"{c.metadata.get('document_id', '')}:{c.chunk_id}" for c in chunks]
62
72
  await maybe_await(
@@ -77,7 +87,7 @@ class ChromaIndex(EmbeddingIndex):
77
87
  for dist, doc in zip(distances, documents, strict=False):
78
88
  try:
79
89
  doc = json.loads(doc)
80
- chunk = Chunk(**doc)
90
+ chunk = load_embedded_chunk_with_backward_compat(doc)
81
91
  except Exception:
82
92
  log.exception(f"Failed to parse document: {doc}")
83
93
  continue
@@ -94,8 +104,55 @@ class ChromaIndex(EmbeddingIndex):
94
104
  async def delete(self):
95
105
  await maybe_await(self.client.delete_collection(self.collection.name))
96
106
 
97
- async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
98
- raise NotImplementedError("Keyword search is not supported in Chroma")
107
+ async def query_keyword(
108
+ self,
109
+ query_string: str,
110
+ k: int,
111
+ score_threshold: float,
112
+ ) -> QueryChunksResponse:
113
+ """
114
+ Perform keyword search using Chroma's built-in where_document feature.
115
+
116
+ Args:
117
+ query_string: The text query for keyword search
118
+ k: Number of results to return
119
+ score_threshold: Minimum similarity score threshold
120
+
121
+ Returns:
122
+ QueryChunksResponse with combined results
123
+ """
124
+ try:
125
+ results = await maybe_await(
126
+ self.collection.query(
127
+ query_texts=[query_string],
128
+ where_document={"$contains": query_string},
129
+ n_results=k,
130
+ include=["documents", "distances"],
131
+ )
132
+ )
133
+ except Exception as e:
134
+ log.error(f"Chroma client keyword search failed: {e}")
135
+ raise
136
+
137
+ distances = results["distances"][0] if results["distances"] else []
138
+ documents = results["documents"][0] if results["documents"] else []
139
+
140
+ chunks = []
141
+ scores = []
142
+
143
+ for dist, doc in zip(distances, documents, strict=False):
144
+ doc_data = json.loads(doc)
145
+ chunk = load_embedded_chunk_with_backward_compat(doc_data)
146
+
147
+ score = 1.0 / (1.0 + float(dist)) if dist is not None else 1.0
148
+
149
+ if score < score_threshold:
150
+ continue
151
+
152
+ chunks.append(chunk)
153
+ scores.append(score)
154
+
155
+ return QueryChunksResponse(chunks=chunks, scores=scores)
99
156
 
100
157
  async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
101
158
  """Delete a single chunk from the Chroma collection by its ID."""
@@ -111,7 +168,57 @@ class ChromaIndex(EmbeddingIndex):
111
168
  reranker_type: str,
112
169
  reranker_params: dict[str, Any] | None = None,
113
170
  ) -> QueryChunksResponse:
114
- raise NotImplementedError("Hybrid search is not supported in Chroma")
171
+ """
172
+ Hybrid search combining vector similarity and keyword search using configurable reranking.
173
+ Args:
174
+ embedding: The query embedding vector
175
+ query_string: The text query for keyword search
176
+ k: Number of results to return
177
+ score_threshold: Minimum similarity score threshold
178
+ reranker_type: Type of reranker to use ("rrf" or "weighted")
179
+ reranker_params: Parameters for the reranker
180
+ Returns:
181
+ QueryChunksResponse with combined results
182
+ """
183
+ if reranker_params is None:
184
+ reranker_params = {}
185
+
186
+ # Get results from both search methods
187
+ vector_response = await self.query_vector(embedding, k, score_threshold)
188
+ keyword_response = await self.query_keyword(query_string, k, score_threshold)
189
+
190
+ # Convert responses to score dictionaries using chunk_id
191
+ vector_scores = {
192
+ chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
193
+ }
194
+ keyword_scores = {
195
+ chunk.chunk_id: score
196
+ for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
197
+ }
198
+
199
+ # Combine scores using the reranking utility
200
+ combined_scores = WeightedInMemoryAggregator.combine_search_results(
201
+ vector_scores, keyword_scores, reranker_type, reranker_params
202
+ )
203
+
204
+ # Efficient top-k selection because it only tracks the k best candidates it's seen so far
205
+ top_k_items = heapq.nlargest(k, combined_scores.items(), key=lambda x: x[1])
206
+
207
+ # Filter by score threshold
208
+ filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
209
+
210
+ # Create a map of chunk_id to chunk for both responses
211
+ chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
212
+
213
+ # Use the map to look up chunks by their IDs
214
+ chunks = []
215
+ scores = []
216
+ for doc_id, score in filtered_items:
217
+ if doc_id in chunk_map:
218
+ chunks.append(chunk_map[doc_id])
219
+ scores.append(score)
220
+
221
+ return QueryChunksResponse(chunks=chunks, scores=scores)
115
222
 
116
223
 
117
224
  class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
@@ -121,10 +228,9 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
121
228
  inference_api: Inference,
122
229
  files_api: Files | None,
123
230
  ) -> None:
124
- super().__init__(files_api=files_api, kvstore=None)
231
+ super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
125
232
  log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
126
233
  self.config = config
127
- self.inference_api = inference_api
128
234
  self.client = None
129
235
  self.cache = {}
130
236
  self.vector_store_table = None
@@ -168,20 +274,22 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
168
274
  await self.cache[vector_store_id].index.delete()
169
275
  del self.cache[vector_store_id]
170
276
 
171
- async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
172
- index = await self._get_and_cache_vector_store_index(vector_db_id)
277
+ async def insert_chunks(
278
+ self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
279
+ ) -> None:
280
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
173
281
  if index is None:
174
- raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
282
+ raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
175
283
 
176
284
  await index.insert_chunks(chunks)
177
285
 
178
286
  async def query_chunks(
179
- self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
287
+ self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
180
288
  ) -> QueryChunksResponse:
181
- index = await self._get_and_cache_vector_store_index(vector_db_id)
289
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
182
290
 
183
291
  if index is None:
184
- raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
292
+ raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
185
293
 
186
294
  return await index.query_chunks(query, params)
187
295
 
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel, Field
10
10
 
11
11
  from llama_stack.core.storage.datatypes import KVStoreReference
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type
@@ -4,7 +4,7 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from llama_stack.providers.datatypes import Api, ProviderSpec
7
+ from llama_stack_api import Api, ProviderSpec
8
8
 
9
9
  from .config import MilvusVectorIOConfig
10
10
 
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel, ConfigDict, Field
10
10
 
11
11
  from llama_stack.core.storage.datatypes import KVStoreReference
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type