llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +12 -21
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.4.dist-info/RECORD +0 -625
  445. llama_stack-0.3.4.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -9,16 +9,16 @@ from typing import Any
9
9
 
10
10
  import httpx
11
11
 
12
- from llama_stack.apis.common.content_types import URL
13
- from llama_stack.apis.tools import (
12
+ from llama_stack.core.request_headers import NeedsRequestProviderData
13
+ from llama_stack_api import (
14
+ URL,
14
15
  ListToolDefsResponse,
15
16
  ToolDef,
16
17
  ToolGroup,
18
+ ToolGroupsProtocolPrivate,
17
19
  ToolInvocationResult,
18
20
  ToolRuntime,
19
21
  )
20
- from llama_stack.core.request_headers import NeedsRequestProviderData
21
- from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
22
22
 
23
23
  from .config import BingSearchToolConfig
24
24
 
@@ -49,7 +49,10 @@ class BingSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsReq
49
49
  return provider_data.bing_search_api_key
50
50
 
51
51
  async def list_runtime_tools(
52
- self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
52
+ self,
53
+ tool_group_id: str | None = None,
54
+ mcp_endpoint: URL | None = None,
55
+ authorization: str | None = None,
53
56
  ) -> ListToolDefsResponse:
54
57
  return ListToolDefsResponse(
55
58
  data=[
@@ -70,7 +73,9 @@ class BingSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsReq
70
73
  ]
71
74
  )
72
75
 
73
- async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
76
+ async def invoke_tool(
77
+ self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
78
+ ) -> ToolInvocationResult:
74
79
  api_key = self._get_api_key()
75
80
  headers = {
76
81
  "Ocp-Apim-Subscription-Key": api_key,
@@ -8,17 +8,17 @@ from typing import Any
8
8
 
9
9
  import httpx
10
10
 
11
- from llama_stack.apis.common.content_types import URL
12
- from llama_stack.apis.tools import (
11
+ from llama_stack.core.request_headers import NeedsRequestProviderData
12
+ from llama_stack.models.llama.datatypes import BuiltinTool
13
+ from llama_stack_api import (
14
+ URL,
13
15
  ListToolDefsResponse,
14
16
  ToolDef,
15
17
  ToolGroup,
18
+ ToolGroupsProtocolPrivate,
16
19
  ToolInvocationResult,
17
20
  ToolRuntime,
18
21
  )
19
- from llama_stack.core.request_headers import NeedsRequestProviderData
20
- from llama_stack.models.llama.datatypes import BuiltinTool
21
- from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
22
22
 
23
23
  from .config import BraveSearchToolConfig
24
24
 
@@ -48,7 +48,10 @@ class BraveSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRe
48
48
  return provider_data.brave_search_api_key
49
49
 
50
50
  async def list_runtime_tools(
51
- self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
51
+ self,
52
+ tool_group_id: str | None = None,
53
+ mcp_endpoint: URL | None = None,
54
+ authorization: str | None = None,
52
55
  ) -> ListToolDefsResponse:
53
56
  return ListToolDefsResponse(
54
57
  data=[
@@ -70,7 +73,9 @@ class BraveSearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsRe
70
73
  ]
71
74
  )
72
75
 
73
- async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
76
+ async def invoke_tool(
77
+ self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
78
+ ) -> ToolInvocationResult:
74
79
  api_key = self._get_api_key()
75
80
  url = "https://api.search.brave.com/res/v1/web/search"
76
81
  headers = {
@@ -10,8 +10,14 @@ from pydantic import BaseModel
10
10
 
11
11
 
12
12
  class MCPProviderDataValidator(BaseModel):
13
- # mcp_endpoint => dict of headers to send
14
- mcp_headers: dict[str, dict[str, str]] | None = None
13
+ """
14
+ Validator for MCP provider-specific data passed via request headers.
15
+
16
+ Phase 1: Support old header-based authentication for backward compatibility.
17
+ In Phase 2, this will be deprecated in favor of the authorization parameter.
18
+ """
19
+
20
+ mcp_headers: dict[str, dict[str, str]] | None = None # Map of URI -> headers dict
15
21
 
16
22
 
17
23
  class MCPProviderConfig(BaseModel):
@@ -7,18 +7,18 @@
7
7
  from typing import Any
8
8
  from urllib.parse import urlparse
9
9
 
10
- from llama_stack.apis.common.content_types import URL
11
- from llama_stack.apis.datatypes import Api
12
- from llama_stack.apis.tools import (
10
+ from llama_stack.core.request_headers import NeedsRequestProviderData
11
+ from llama_stack.log import get_logger
12
+ from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
13
+ from llama_stack_api import (
14
+ URL,
15
+ Api,
13
16
  ListToolDefsResponse,
14
17
  ToolGroup,
18
+ ToolGroupsProtocolPrivate,
15
19
  ToolInvocationResult,
16
20
  ToolRuntime,
17
21
  )
18
- from llama_stack.core.request_headers import NeedsRequestProviderData
19
- from llama_stack.log import get_logger
20
- from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
21
- from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
22
22
 
23
23
  from .config import MCPProviderConfig
24
24
 
@@ -39,15 +39,23 @@ class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
39
39
  return
40
40
 
41
41
  async def list_runtime_tools(
42
- self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
42
+ self,
43
+ tool_group_id: str | None = None,
44
+ mcp_endpoint: URL | None = None,
45
+ authorization: str | None = None,
43
46
  ) -> ListToolDefsResponse:
44
47
  # this endpoint should be retrieved by getting the tool group right?
45
48
  if mcp_endpoint is None:
46
49
  raise ValueError("mcp_endpoint is required")
47
- headers = await self.get_headers_from_request(mcp_endpoint.uri)
48
- return await list_mcp_tools(mcp_endpoint.uri, headers)
49
50
 
50
- async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
51
+ # Get other headers from provider data (but NOT authorization)
52
+ provider_headers = await self.get_headers_from_request(mcp_endpoint.uri)
53
+
54
+ return await list_mcp_tools(endpoint=mcp_endpoint.uri, headers=provider_headers, authorization=authorization)
55
+
56
+ async def invoke_tool(
57
+ self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
58
+ ) -> ToolInvocationResult:
51
59
  tool = await self.tool_store.get_tool(tool_name)
52
60
  if tool.metadata is None or tool.metadata.get("endpoint") is None:
53
61
  raise ValueError(f"Tool {tool_name} does not have metadata")
@@ -55,19 +63,53 @@ class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
55
63
  if urlparse(endpoint).scheme not in ("http", "https"):
56
64
  raise ValueError(f"Endpoint {endpoint} is not a valid HTTP(S) URL")
57
65
 
58
- headers = await self.get_headers_from_request(endpoint)
59
- return await invoke_mcp_tool(endpoint, headers, tool_name, kwargs)
66
+ # Get other headers from provider data (but NOT authorization)
67
+ provider_headers = await self.get_headers_from_request(endpoint)
68
+
69
+ return await invoke_mcp_tool(
70
+ endpoint=endpoint,
71
+ tool_name=tool_name,
72
+ kwargs=kwargs,
73
+ headers=provider_headers,
74
+ authorization=authorization,
75
+ )
60
76
 
61
77
  async def get_headers_from_request(self, mcp_endpoint_uri: str) -> dict[str, str]:
78
+ """
79
+ Extract headers from request provider data, excluding authorization.
80
+
81
+ Authorization must be provided via the dedicated authorization parameter.
82
+ If Authorization is found in mcp_headers, raise an error to guide users to the correct approach.
83
+
84
+ Args:
85
+ mcp_endpoint_uri: The MCP endpoint URI to match against provider data
86
+
87
+ Returns:
88
+ dict[str, str]: Headers dictionary (without Authorization)
89
+
90
+ Raises:
91
+ ValueError: If Authorization header is found in mcp_headers
92
+ """
93
+
62
94
  def canonicalize_uri(uri: str) -> str:
63
95
  return f"{urlparse(uri).netloc or ''}/{urlparse(uri).path or ''}"
64
96
 
65
97
  headers = {}
66
98
 
67
99
  provider_data = self.get_request_provider_data()
68
- if provider_data and provider_data.mcp_headers:
100
+ if provider_data and hasattr(provider_data, "mcp_headers") and provider_data.mcp_headers:
69
101
  for uri, values in provider_data.mcp_headers.items():
70
102
  if canonicalize_uri(uri) != canonicalize_uri(mcp_endpoint_uri):
71
103
  continue
72
- headers.update(values)
104
+
105
+ # Reject Authorization in mcp_headers - must use authorization parameter
106
+ for key in values.keys():
107
+ if key.lower() == "authorization":
108
+ raise ValueError(
109
+ "Authorization cannot be provided via mcp_headers in provider_data. "
110
+ "Please use the dedicated 'authorization' parameter instead. "
111
+ "Example: tool_runtime.invoke_tool(..., authorization='your-token')"
112
+ )
113
+ headers[key] = values[key]
114
+
73
115
  return headers
@@ -9,16 +9,16 @@ from typing import Any
9
9
 
10
10
  import httpx
11
11
 
12
- from llama_stack.apis.common.content_types import URL
13
- from llama_stack.apis.tools import (
12
+ from llama_stack.core.request_headers import NeedsRequestProviderData
13
+ from llama_stack_api import (
14
+ URL,
14
15
  ListToolDefsResponse,
15
16
  ToolDef,
16
17
  ToolGroup,
18
+ ToolGroupsProtocolPrivate,
17
19
  ToolInvocationResult,
18
20
  ToolRuntime,
19
21
  )
20
- from llama_stack.core.request_headers import NeedsRequestProviderData
21
- from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
22
22
 
23
23
  from .config import TavilySearchToolConfig
24
24
 
@@ -48,7 +48,10 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
48
48
  return provider_data.tavily_search_api_key
49
49
 
50
50
  async def list_runtime_tools(
51
- self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
51
+ self,
52
+ tool_group_id: str | None = None,
53
+ mcp_endpoint: URL | None = None,
54
+ authorization: str | None = None,
52
55
  ) -> ListToolDefsResponse:
53
56
  return ListToolDefsResponse(
54
57
  data=[
@@ -69,7 +72,9 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
69
72
  ]
70
73
  )
71
74
 
72
- async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
75
+ async def invoke_tool(
76
+ self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
77
+ ) -> ToolInvocationResult:
73
78
  api_key = self._get_api_key()
74
79
  async with httpx.AsyncClient() as client:
75
80
  response = await client.post(
@@ -9,16 +9,16 @@ from typing import Any
9
9
 
10
10
  import httpx
11
11
 
12
- from llama_stack.apis.common.content_types import URL
13
- from llama_stack.apis.tools import (
12
+ from llama_stack.core.request_headers import NeedsRequestProviderData
13
+ from llama_stack_api import (
14
+ URL,
14
15
  ListToolDefsResponse,
15
16
  ToolDef,
16
17
  ToolGroup,
18
+ ToolGroupsProtocolPrivate,
17
19
  ToolInvocationResult,
18
20
  ToolRuntime,
19
21
  )
20
- from llama_stack.core.request_headers import NeedsRequestProviderData
21
- from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
22
22
 
23
23
  from .config import WolframAlphaToolConfig
24
24
 
@@ -49,7 +49,10 @@ class WolframAlphaToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
49
49
  return provider_data.wolfram_alpha_api_key
50
50
 
51
51
  async def list_runtime_tools(
52
- self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
52
+ self,
53
+ tool_group_id: str | None = None,
54
+ mcp_endpoint: URL | None = None,
55
+ authorization: str | None = None,
53
56
  ) -> ListToolDefsResponse:
54
57
  return ListToolDefsResponse(
55
58
  data=[
@@ -70,7 +73,9 @@ class WolframAlphaToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR
70
73
  ]
71
74
  )
72
75
 
73
- async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
76
+ async def invoke_tool(
77
+ self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
78
+ ) -> ToolInvocationResult:
74
79
  api_key = self._get_api_key()
75
80
  params = {
76
81
  "input": kwargs["query"],
@@ -4,7 +4,7 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from llama_stack.providers.datatypes import Api, ProviderSpec
7
+ from llama_stack_api import Api, ProviderSpec
8
8
 
9
9
  from .config import ChromaVectorIOConfig
10
10
 
@@ -4,6 +4,7 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
  import asyncio
7
+ import heapq
7
8
  import json
8
9
  from typing import Any
9
10
  from urllib.parse import urlparse
@@ -11,17 +12,23 @@ from urllib.parse import urlparse
11
12
  import chromadb
12
13
  from numpy.typing import NDArray
13
14
 
14
- from llama_stack.apis.files import Files
15
- from llama_stack.apis.inference import Inference, InterleavedContent
16
- from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
17
- from llama_stack.apis.vector_stores import VectorStore
15
+ from llama_stack.core.storage.kvstore import kvstore_impl
18
16
  from llama_stack.log import get_logger
19
- from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
20
17
  from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
21
- from llama_stack.providers.utils.kvstore import kvstore_impl
22
- from llama_stack.providers.utils.kvstore.api import KVStore
23
18
  from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
24
19
  from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
20
+ from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
21
+ from llama_stack_api import (
22
+ EmbeddedChunk,
23
+ Files,
24
+ Inference,
25
+ InterleavedContent,
26
+ QueryChunksResponse,
27
+ VectorIO,
28
+ VectorStore,
29
+ VectorStoresProtocolPrivate,
30
+ )
31
+ from llama_stack_api.internal.kvstore import KVStore
25
32
 
26
33
  from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
27
34
 
@@ -53,7 +60,7 @@ class ChromaIndex(EmbeddingIndex):
53
60
  async def initialize(self):
54
61
  pass
55
62
 
56
- async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
63
+ async def add_chunks(self, chunks: list[EmbeddedChunk], embeddings: NDArray):
57
64
  assert len(chunks) == len(embeddings), (
58
65
  f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
59
66
  )
@@ -77,7 +84,7 @@ class ChromaIndex(EmbeddingIndex):
77
84
  for dist, doc in zip(distances, documents, strict=False):
78
85
  try:
79
86
  doc = json.loads(doc)
80
- chunk = Chunk(**doc)
87
+ chunk = EmbeddedChunk(**doc)
81
88
  except Exception:
82
89
  log.exception(f"Failed to parse document: {doc}")
83
90
  continue
@@ -94,8 +101,55 @@ class ChromaIndex(EmbeddingIndex):
94
101
  async def delete(self):
95
102
  await maybe_await(self.client.delete_collection(self.collection.name))
96
103
 
97
- async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
98
- raise NotImplementedError("Keyword search is not supported in Chroma")
104
+ async def query_keyword(
105
+ self,
106
+ query_string: str,
107
+ k: int,
108
+ score_threshold: float,
109
+ ) -> QueryChunksResponse:
110
+ """
111
+ Perform keyword search using Chroma's built-in where_document feature.
112
+
113
+ Args:
114
+ query_string: The text query for keyword search
115
+ k: Number of results to return
116
+ score_threshold: Minimum similarity score threshold
117
+
118
+ Returns:
119
+ QueryChunksResponse with combined results
120
+ """
121
+ try:
122
+ results = await maybe_await(
123
+ self.collection.query(
124
+ query_texts=[query_string],
125
+ where_document={"$contains": query_string},
126
+ n_results=k,
127
+ include=["documents", "distances"],
128
+ )
129
+ )
130
+ except Exception as e:
131
+ log.error(f"Chroma client keyword search failed: {e}")
132
+ raise
133
+
134
+ distances = results["distances"][0] if results["distances"] else []
135
+ documents = results["documents"][0] if results["documents"] else []
136
+
137
+ chunks = []
138
+ scores = []
139
+
140
+ for dist, doc in zip(distances, documents, strict=False):
141
+ doc_data = json.loads(doc)
142
+ chunk = EmbeddedChunk(**doc_data)
143
+
144
+ score = 1.0 / (1.0 + float(dist)) if dist is not None else 1.0
145
+
146
+ if score < score_threshold:
147
+ continue
148
+
149
+ chunks.append(chunk)
150
+ scores.append(score)
151
+
152
+ return QueryChunksResponse(chunks=chunks, scores=scores)
99
153
 
100
154
  async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
101
155
  """Delete a single chunk from the Chroma collection by its ID."""
@@ -111,7 +165,57 @@ class ChromaIndex(EmbeddingIndex):
111
165
  reranker_type: str,
112
166
  reranker_params: dict[str, Any] | None = None,
113
167
  ) -> QueryChunksResponse:
114
- raise NotImplementedError("Hybrid search is not supported in Chroma")
168
+ """
169
+ Hybrid search combining vector similarity and keyword search using configurable reranking.
170
+ Args:
171
+ embedding: The query embedding vector
172
+ query_string: The text query for keyword search
173
+ k: Number of results to return
174
+ score_threshold: Minimum similarity score threshold
175
+ reranker_type: Type of reranker to use ("rrf" or "weighted")
176
+ reranker_params: Parameters for the reranker
177
+ Returns:
178
+ QueryChunksResponse with combined results
179
+ """
180
+ if reranker_params is None:
181
+ reranker_params = {}
182
+
183
+ # Get results from both search methods
184
+ vector_response = await self.query_vector(embedding, k, score_threshold)
185
+ keyword_response = await self.query_keyword(query_string, k, score_threshold)
186
+
187
+ # Convert responses to score dictionaries using chunk_id
188
+ vector_scores = {
189
+ chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
190
+ }
191
+ keyword_scores = {
192
+ chunk.chunk_id: score
193
+ for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
194
+ }
195
+
196
+ # Combine scores using the reranking utility
197
+ combined_scores = WeightedInMemoryAggregator.combine_search_results(
198
+ vector_scores, keyword_scores, reranker_type, reranker_params
199
+ )
200
+
201
+ # Efficient top-k selection because it only tracks the k best candidates it's seen so far
202
+ top_k_items = heapq.nlargest(k, combined_scores.items(), key=lambda x: x[1])
203
+
204
+ # Filter by score threshold
205
+ filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
206
+
207
+ # Create a map of chunk_id to chunk for both responses
208
+ chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
209
+
210
+ # Use the map to look up chunks by their IDs
211
+ chunks = []
212
+ scores = []
213
+ for doc_id, score in filtered_items:
214
+ if doc_id in chunk_map:
215
+ chunks.append(chunk_map[doc_id])
216
+ scores.append(score)
217
+
218
+ return QueryChunksResponse(chunks=chunks, scores=scores)
115
219
 
116
220
 
117
221
  class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
@@ -121,10 +225,9 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
121
225
  inference_api: Inference,
122
226
  files_api: Files | None,
123
227
  ) -> None:
124
- super().__init__(files_api=files_api, kvstore=None)
228
+ super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
125
229
  log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
126
230
  self.config = config
127
- self.inference_api = inference_api
128
231
  self.client = None
129
232
  self.cache = {}
130
233
  self.vector_store_table = None
@@ -168,20 +271,22 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
168
271
  await self.cache[vector_store_id].index.delete()
169
272
  del self.cache[vector_store_id]
170
273
 
171
- async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
172
- index = await self._get_and_cache_vector_store_index(vector_db_id)
274
+ async def insert_chunks(
275
+ self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
276
+ ) -> None:
277
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
173
278
  if index is None:
174
- raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
279
+ raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
175
280
 
176
281
  await index.insert_chunks(chunks)
177
282
 
178
283
  async def query_chunks(
179
- self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
284
+ self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
180
285
  ) -> QueryChunksResponse:
181
- index = await self._get_and_cache_vector_store_index(vector_db_id)
286
+ index = await self._get_and_cache_vector_store_index(vector_store_id)
182
287
 
183
288
  if index is None:
184
- raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
289
+ raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
185
290
 
186
291
  return await index.query_chunks(query, params)
187
292
 
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel, Field
10
10
 
11
11
  from llama_stack.core.storage.datatypes import KVStoreReference
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type
@@ -4,7 +4,7 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from llama_stack.providers.datatypes import Api, ProviderSpec
7
+ from llama_stack_api import Api, ProviderSpec
8
8
 
9
9
  from .config import MilvusVectorIOConfig
10
10
 
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel, ConfigDict, Field
10
10
 
11
11
  from llama_stack.core.storage.datatypes import KVStoreReference
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type