llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (460) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +235 -62
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
  169. llama_stack/providers/registry/agents.py +8 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/providers/utils/vector_io/__init__.py +16 -0
  284. llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
  285. llama_stack/telemetry/constants.py +27 -0
  286. llama_stack/telemetry/helpers.py +43 -0
  287. llama_stack/testing/api_recorder.py +25 -16
  288. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
  289. llama_stack-0.4.1.dist-info/RECORD +588 -0
  290. llama_stack-0.4.1.dist-info/top_level.txt +2 -0
  291. llama_stack_api/__init__.py +945 -0
  292. llama_stack_api/admin/__init__.py +45 -0
  293. llama_stack_api/admin/api.py +72 -0
  294. llama_stack_api/admin/fastapi_routes.py +117 -0
  295. llama_stack_api/admin/models.py +113 -0
  296. llama_stack_api/agents.py +173 -0
  297. llama_stack_api/batches/__init__.py +40 -0
  298. llama_stack_api/batches/api.py +53 -0
  299. llama_stack_api/batches/fastapi_routes.py +113 -0
  300. llama_stack_api/batches/models.py +78 -0
  301. llama_stack_api/benchmarks/__init__.py +43 -0
  302. llama_stack_api/benchmarks/api.py +39 -0
  303. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  304. llama_stack_api/benchmarks/models.py +109 -0
  305. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  306. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  307. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  308. llama_stack_api/common/responses.py +77 -0
  309. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  310. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  311. llama_stack_api/connectors.py +146 -0
  312. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  313. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  314. llama_stack_api/datasets/__init__.py +61 -0
  315. llama_stack_api/datasets/api.py +35 -0
  316. llama_stack_api/datasets/fastapi_routes.py +104 -0
  317. llama_stack_api/datasets/models.py +152 -0
  318. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  319. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  320. llama_stack_api/file_processors/__init__.py +27 -0
  321. llama_stack_api/file_processors/api.py +64 -0
  322. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  323. llama_stack_api/file_processors/models.py +42 -0
  324. llama_stack_api/files/__init__.py +35 -0
  325. llama_stack_api/files/api.py +51 -0
  326. llama_stack_api/files/fastapi_routes.py +124 -0
  327. llama_stack_api/files/models.py +107 -0
  328. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  329. llama_stack_api/inspect_api/__init__.py +37 -0
  330. llama_stack_api/inspect_api/api.py +25 -0
  331. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  332. llama_stack_api/inspect_api/models.py +28 -0
  333. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  334. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  335. llama_stack_api/internal/sqlstore.py +79 -0
  336. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  337. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  338. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  339. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  340. llama_stack_api/providers/__init__.py +33 -0
  341. llama_stack_api/providers/api.py +16 -0
  342. llama_stack_api/providers/fastapi_routes.py +57 -0
  343. llama_stack_api/providers/models.py +24 -0
  344. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  345. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  346. llama_stack_api/router_utils.py +160 -0
  347. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  348. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  349. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  350. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  351. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  352. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  353. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  354. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  355. llama_stack/apis/agents/agents.py +0 -894
  356. llama_stack/apis/batches/__init__.py +0 -9
  357. llama_stack/apis/batches/batches.py +0 -100
  358. llama_stack/apis/benchmarks/__init__.py +0 -7
  359. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  360. llama_stack/apis/common/responses.py +0 -36
  361. llama_stack/apis/conversations/__init__.py +0 -31
  362. llama_stack/apis/datasets/datasets.py +0 -251
  363. llama_stack/apis/datatypes.py +0 -160
  364. llama_stack/apis/eval/__init__.py +0 -7
  365. llama_stack/apis/files/__init__.py +0 -7
  366. llama_stack/apis/files/files.py +0 -199
  367. llama_stack/apis/inference/__init__.py +0 -7
  368. llama_stack/apis/inference/event_logger.py +0 -43
  369. llama_stack/apis/inspect/__init__.py +0 -7
  370. llama_stack/apis/inspect/inspect.py +0 -94
  371. llama_stack/apis/models/__init__.py +0 -7
  372. llama_stack/apis/post_training/__init__.py +0 -7
  373. llama_stack/apis/prompts/__init__.py +0 -9
  374. llama_stack/apis/providers/__init__.py +0 -7
  375. llama_stack/apis/providers/providers.py +0 -69
  376. llama_stack/apis/safety/__init__.py +0 -7
  377. llama_stack/apis/scoring/__init__.py +0 -7
  378. llama_stack/apis/scoring_functions/__init__.py +0 -7
  379. llama_stack/apis/shields/__init__.py +0 -7
  380. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  381. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  382. llama_stack/apis/telemetry/__init__.py +0 -7
  383. llama_stack/apis/telemetry/telemetry.py +0 -423
  384. llama_stack/apis/tools/__init__.py +0 -8
  385. llama_stack/apis/vector_io/__init__.py +0 -7
  386. llama_stack/apis/vector_stores/__init__.py +0 -7
  387. llama_stack/core/server/tracing.py +0 -80
  388. llama_stack/core/ui/app.py +0 -55
  389. llama_stack/core/ui/modules/__init__.py +0 -5
  390. llama_stack/core/ui/modules/api.py +0 -32
  391. llama_stack/core/ui/modules/utils.py +0 -42
  392. llama_stack/core/ui/page/__init__.py +0 -5
  393. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  394. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  395. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  396. llama_stack/core/ui/page/distribution/models.py +0 -18
  397. llama_stack/core/ui/page/distribution/providers.py +0 -27
  398. llama_stack/core/ui/page/distribution/resources.py +0 -48
  399. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  400. llama_stack/core/ui/page/distribution/shields.py +0 -19
  401. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  402. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  403. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  404. llama_stack/core/ui/page/playground/__init__.py +0 -5
  405. llama_stack/core/ui/page/playground/chat.py +0 -130
  406. llama_stack/core/ui/page/playground/tools.py +0 -352
  407. llama_stack/distributions/dell/build.yaml +0 -33
  408. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  409. llama_stack/distributions/nvidia/build.yaml +0 -29
  410. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  411. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  412. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  413. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  414. llama_stack/distributions/starter/build.yaml +0 -61
  415. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  416. llama_stack/distributions/watsonx/build.yaml +0 -33
  417. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  418. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  419. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  420. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  421. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  422. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  423. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  424. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  425. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  426. llama_stack/providers/utils/sqlstore/api.py +0 -128
  427. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  428. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  429. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  430. llama_stack/strong_typing/__init__.py +0 -19
  431. llama_stack/strong_typing/auxiliary.py +0 -228
  432. llama_stack/strong_typing/classdef.py +0 -440
  433. llama_stack/strong_typing/core.py +0 -46
  434. llama_stack/strong_typing/deserializer.py +0 -877
  435. llama_stack/strong_typing/docstring.py +0 -409
  436. llama_stack/strong_typing/exception.py +0 -23
  437. llama_stack/strong_typing/inspection.py +0 -1085
  438. llama_stack/strong_typing/mapping.py +0 -40
  439. llama_stack/strong_typing/name.py +0 -182
  440. llama_stack/strong_typing/schema.py +0 -792
  441. llama_stack/strong_typing/serialization.py +0 -97
  442. llama_stack/strong_typing/serializer.py +0 -500
  443. llama_stack/strong_typing/slots.py +0 -27
  444. llama_stack/strong_typing/topological.py +0 -89
  445. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  446. llama_stack-0.3.5.dist-info/RECORD +0 -625
  447. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  448. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  451. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  452. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  453. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  454. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
  456. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
  457. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
  458. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  459. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  460. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -7,13 +7,23 @@
7
7
  import time
8
8
  from typing import Any
9
9
 
10
- from llama_stack.apis.common.errors import ModelNotFoundError
11
- from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
10
+ from llama_stack.core.access_control.access_control import is_action_allowed
12
11
  from llama_stack.core.datatypes import (
13
12
  ModelWithOwner,
14
13
  RegistryEntrySource,
15
14
  )
15
+ from llama_stack.core.request_headers import PROVIDER_DATA_VAR, NeedsRequestProviderData, get_authenticated_user
16
+ from llama_stack.core.utils.dynamic import instantiate_class_type
16
17
  from llama_stack.log import get_logger
18
+ from llama_stack_api import (
19
+ ListModelsResponse,
20
+ Model,
21
+ ModelNotFoundError,
22
+ Models,
23
+ ModelType,
24
+ OpenAIListModelsResponse,
25
+ OpenAIModel,
26
+ )
17
27
 
18
28
  from .common import CommonRoutingTableImpl, lookup_model
19
29
 
@@ -42,19 +52,122 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
42
52
 
43
53
  await self.update_registered_models(provider_id, models)
44
54
 
55
+ async def _get_dynamic_models_from_provider_data(self) -> list[Model]:
56
+ """
57
+ Fetch models from providers that have credentials in the current request's provider_data.
58
+
59
+ This allows users to see models available to them from providers that require
60
+ per-request API keys (via X-LlamaStack-Provider-Data header).
61
+
62
+ Returns models with fully qualified identifiers (provider_id/model_id) but does NOT
63
+ cache them in the registry since they are user-specific.
64
+ """
65
+ provider_data = PROVIDER_DATA_VAR.get()
66
+ if not provider_data:
67
+ return []
68
+
69
+ dynamic_models = []
70
+ user = get_authenticated_user()
71
+
72
+ for provider_id, provider in self.impls_by_provider_id.items():
73
+ # Check if this provider supports provider_data
74
+ if not isinstance(provider, NeedsRequestProviderData):
75
+ continue
76
+
77
+ # Check if provider has a validator (some providers like ollama don't need per-request credentials)
78
+ spec = getattr(provider, "__provider_spec__", None)
79
+ if not spec or not getattr(spec, "provider_data_validator", None):
80
+ continue
81
+
82
+ # Validate provider_data silently - we're speculatively checking all providers
83
+ # so validation failures are expected when user didn't provide keys for this provider
84
+ try:
85
+ validator = instantiate_class_type(spec.provider_data_validator)
86
+ validator(**provider_data)
87
+ except Exception:
88
+ # User didn't provide credentials for this provider - skip silently
89
+ continue
90
+
91
+ # Validation succeeded! User has credentials for this provider
92
+ # Now try to list models
93
+ try:
94
+ models = await provider.list_models()
95
+ if not models:
96
+ continue
97
+
98
+ # Ensure models have fully qualified identifiers and apply RBAC filtering
99
+ for model in models:
100
+ # Only add prefix if model identifier doesn't already have it
101
+ if not model.identifier.startswith(f"{provider_id}/"):
102
+ model.identifier = f"{provider_id}/{model.provider_resource_id}"
103
+
104
+ # Convert to ModelWithOwner for RBAC check
105
+ temp_model = ModelWithOwner(
106
+ identifier=model.identifier,
107
+ provider_id=provider_id,
108
+ provider_resource_id=model.provider_resource_id,
109
+ model_type=model.model_type,
110
+ metadata=model.metadata,
111
+ )
112
+
113
+ # Apply RBAC check - only include models user has read permission for
114
+ if is_action_allowed(self.policy, "read", temp_model, user):
115
+ dynamic_models.append(model)
116
+ else:
117
+ logger.debug(
118
+ f"Access denied to dynamic model '{model.identifier}' for user {user.principal if user else 'anonymous'}"
119
+ )
120
+
121
+ logger.debug(
122
+ f"Fetched {len(dynamic_models)} accessible models from provider {provider_id} using provider_data"
123
+ )
124
+
125
+ except Exception as e:
126
+ logger.debug(f"Failed to list models from provider {provider_id} with provider_data: {e}")
127
+ continue
128
+
129
+ return dynamic_models
130
+
45
131
  async def list_models(self) -> ListModelsResponse:
46
- return ListModelsResponse(data=await self.get_all_with_type("model"))
132
+ # Get models from registry
133
+ registry_models = await self.get_all_with_type("model")
134
+
135
+ # Get additional models available via provider_data (user-specific, not cached)
136
+ dynamic_models = await self._get_dynamic_models_from_provider_data()
137
+
138
+ # Combine, avoiding duplicates (registry takes precedence)
139
+ registry_identifiers = {m.identifier for m in registry_models}
140
+ unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers]
141
+
142
+ return ListModelsResponse(data=registry_models + unique_dynamic_models)
47
143
 
48
144
  async def openai_list_models(self) -> OpenAIListModelsResponse:
49
- models = await self.get_all_with_type("model")
145
+ # Get models from registry
146
+ registry_models = await self.get_all_with_type("model")
147
+
148
+ # Get additional models available via provider_data (user-specific, not cached)
149
+ dynamic_models = await self._get_dynamic_models_from_provider_data()
150
+
151
+ # Combine, avoiding duplicates (registry takes precedence)
152
+ registry_identifiers = {m.identifier for m in registry_models}
153
+ unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers]
154
+
155
+ all_models = registry_models + unique_dynamic_models
156
+
50
157
  openai_models = [
51
158
  OpenAIModel(
52
159
  id=model.identifier,
53
160
  object="model",
54
161
  created=int(time.time()),
55
162
  owned_by="llama_stack",
163
+ custom_metadata={
164
+ "model_type": model.model_type,
165
+ "provider_id": model.provider_id,
166
+ "provider_resource_id": model.provider_resource_id,
167
+ **model.metadata,
168
+ },
56
169
  )
57
- for model in models
170
+ for model in all_models
58
171
  ]
59
172
  return OpenAIListModelsResponse(data=openai_models)
60
173
 
@@ -130,7 +243,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
130
243
  existing_models = await self.get_all_with_type("model")
131
244
 
132
245
  # we may have an alias for the model registered by the user (or during initialization
133
- # from run.yaml) that we need to keep track of
246
+ # from config.yaml) that we need to keep track of
134
247
  model_ids = {}
135
248
  for model in existing_models:
136
249
  if model.provider_id != provider_id:
@@ -4,18 +4,18 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from llama_stack.apis.common.type_system import ParamType
8
- from llama_stack.apis.resource import ResourceType
9
- from llama_stack.apis.scoring_functions import (
7
+ from llama_stack.core.datatypes import (
8
+ ScoringFnWithOwner,
9
+ )
10
+ from llama_stack.log import get_logger
11
+ from llama_stack_api import (
10
12
  ListScoringFunctionsResponse,
13
+ ParamType,
14
+ ResourceType,
11
15
  ScoringFn,
12
16
  ScoringFnParams,
13
17
  ScoringFunctions,
14
18
  )
15
- from llama_stack.core.datatypes import (
16
- ScoringFnWithOwner,
17
- )
18
- from llama_stack.log import get_logger
19
19
 
20
20
  from .common import CommonRoutingTableImpl
21
21
 
@@ -6,12 +6,11 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from llama_stack.apis.resource import ResourceType
10
- from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
11
9
  from llama_stack.core.datatypes import (
12
10
  ShieldWithOwner,
13
11
  )
14
12
  from llama_stack.log import get_logger
13
+ from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields
15
14
 
16
15
  from .common import CommonRoutingTableImpl
17
16
 
@@ -6,11 +6,17 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from llama_stack.apis.common.content_types import URL
10
- from llama_stack.apis.common.errors import ToolGroupNotFoundError
11
- from llama_stack.apis.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups
12
9
  from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
13
10
  from llama_stack.log import get_logger
11
+ from llama_stack_api import (
12
+ URL,
13
+ ListToolDefsResponse,
14
+ ListToolGroupsResponse,
15
+ ToolDef,
16
+ ToolGroup,
17
+ ToolGroupNotFoundError,
18
+ ToolGroups,
19
+ )
14
20
 
15
21
  from .common import CommonRoutingTableImpl
16
22
 
@@ -43,7 +49,9 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
43
49
  routing_key = self.tool_to_toolgroup[routing_key]
44
50
  return await super().get_provider_impl(routing_key, provider_id)
45
51
 
46
- async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse:
52
+ async def list_tools(
53
+ self, toolgroup_id: str | None = None, authorization: str | None = None
54
+ ) -> ListToolDefsResponse:
47
55
  if toolgroup_id:
48
56
  if group_id := parse_toolgroup_from_toolgroup_name_pair(toolgroup_id):
49
57
  toolgroup_id = group_id
@@ -55,7 +63,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
55
63
  for toolgroup in toolgroups:
56
64
  if toolgroup.identifier not in self.toolgroups_to_tools:
57
65
  try:
58
- await self._index_tools(toolgroup)
66
+ await self._index_tools(toolgroup, authorization=authorization)
59
67
  except AuthenticationRequiredError:
60
68
  # Send authentication errors back to the client so it knows
61
69
  # that it needs to supply credentials for remote MCP servers.
@@ -70,9 +78,11 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
70
78
 
71
79
  return ListToolDefsResponse(data=all_tools)
72
80
 
73
- async def _index_tools(self, toolgroup: ToolGroup):
81
+ async def _index_tools(self, toolgroup: ToolGroup, authorization: str | None = None):
74
82
  provider_impl = await super().get_provider_impl(toolgroup.identifier, toolgroup.provider_id)
75
- tooldefs_response = await provider_impl.list_runtime_tools(toolgroup.identifier, toolgroup.mcp_endpoint)
83
+ tooldefs_response = await provider_impl.list_runtime_tools(
84
+ toolgroup.identifier, toolgroup.mcp_endpoint, authorization=authorization
85
+ )
76
86
 
77
87
  tooldefs = tooldefs_response.data
78
88
  for t in tooldefs:
@@ -6,26 +6,31 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
10
- from llama_stack.apis.models import ModelType
11
- from llama_stack.apis.resource import ResourceType
9
+ from llama_stack.core.datatypes import (
10
+ VectorStoreWithOwner,
11
+ )
12
+ from llama_stack.log import get_logger
12
13
 
13
14
  # Removed VectorStores import to avoid exposing public API
14
- from llama_stack.apis.vector_io.vector_io import (
15
+ from llama_stack_api import (
16
+ EmbeddedChunk,
17
+ InterleavedContent,
18
+ ModelNotFoundError,
19
+ ModelType,
20
+ ModelTypeError,
21
+ OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
22
+ QueryChunksResponse,
23
+ ResourceType,
15
24
  SearchRankingOptions,
16
25
  VectorStoreChunkingStrategy,
17
26
  VectorStoreDeleteResponse,
18
- VectorStoreFileContentsResponse,
27
+ VectorStoreFileContentResponse,
19
28
  VectorStoreFileDeleteResponse,
20
29
  VectorStoreFileObject,
21
30
  VectorStoreFileStatus,
22
31
  VectorStoreObject,
23
32
  VectorStoreSearchResponsePage,
24
33
  )
25
- from llama_stack.core.datatypes import (
26
- VectorStoreWithOwner,
27
- )
28
- from llama_stack.log import get_logger
29
34
 
30
35
  from .common import CommonRoutingTableImpl, lookup_model
31
36
 
@@ -39,6 +44,15 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
39
44
  Only provides internal routing functionality for VectorIORouter.
40
45
  """
41
46
 
47
+ def __init__(
48
+ self,
49
+ impls_by_provider_id: dict[str, Any],
50
+ dist_registry: Any,
51
+ policy: list[Any],
52
+ ) -> None:
53
+ super().__init__(impls_by_provider_id, dist_registry, policy)
54
+ self.vector_io_router = None # Will be set post-instantiation
55
+
42
56
  # Internal methods only - no public API exposure
43
57
 
44
58
  async def register_vector_store(
@@ -77,6 +91,26 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
77
91
  await self.register_object(vector_store)
78
92
  return vector_store
79
93
 
94
+ async def insert_chunks(
95
+ self,
96
+ vector_store_id: str,
97
+ chunks: list[EmbeddedChunk],
98
+ ttl_seconds: int | None = None,
99
+ ) -> None:
100
+ await self.assert_action_allowed("update", "vector_store", vector_store_id)
101
+ provider = await self.get_provider_impl(vector_store_id)
102
+ return await provider.insert_chunks(vector_store_id, chunks, ttl_seconds)
103
+
104
+ async def query_chunks(
105
+ self,
106
+ vector_store_id: str,
107
+ query: InterleavedContent,
108
+ params: dict[str, Any] | None = None,
109
+ ) -> QueryChunksResponse:
110
+ await self.assert_action_allowed("read", "vector_store", vector_store_id)
111
+ provider = await self.get_provider_impl(vector_store_id)
112
+ return await provider.query_chunks(vector_store_id, query, params)
113
+
80
114
  async def openai_retrieve_vector_store(
81
115
  self,
82
116
  vector_store_id: str,
@@ -195,12 +229,17 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
195
229
  self,
196
230
  vector_store_id: str,
197
231
  file_id: str,
198
- ) -> VectorStoreFileContentsResponse:
232
+ include_embeddings: bool | None = False,
233
+ include_metadata: bool | None = False,
234
+ ) -> VectorStoreFileContentResponse:
199
235
  await self.assert_action_allowed("read", "vector_store", vector_store_id)
236
+
200
237
  provider = await self.get_provider_impl(vector_store_id)
201
238
  return await provider.openai_retrieve_vector_store_file_contents(
202
239
  vector_store_id=vector_store_id,
203
240
  file_id=file_id,
241
+ include_embeddings=include_embeddings,
242
+ include_metadata=include_metadata,
204
243
  )
205
244
 
206
245
  async def openai_update_vector_store_file(
@@ -232,17 +271,13 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
232
271
  async def openai_create_vector_store_file_batch(
233
272
  self,
234
273
  vector_store_id: str,
235
- file_ids: list[str],
236
- attributes: dict[str, Any] | None = None,
237
- chunking_strategy: Any | None = None,
274
+ params: OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
238
275
  ):
239
276
  await self.assert_action_allowed("update", "vector_store", vector_store_id)
240
277
  provider = await self.get_provider_impl(vector_store_id)
241
278
  return await provider.openai_create_vector_store_file_batch(
242
279
  vector_store_id=vector_store_id,
243
- file_ids=file_ids,
244
- attributes=attributes,
245
- chunking_strategy=chunking_strategy,
280
+ params=params,
246
281
  )
247
282
 
248
283
  async def openai_retrieve_vector_store_file_batch(
@@ -28,9 +28,11 @@ class AuthenticationMiddleware:
28
28
  4. Makes these attributes available to the route handlers for access control
29
29
 
30
30
  Unauthenticated Access:
31
- Endpoints can opt out of authentication by setting require_authentication=False
32
- in their @webmethod decorator. This is typically used for operational endpoints
33
- like /health and /version to support monitoring, load balancers, and observability tools.
31
+ Endpoints can opt out of authentication by:
32
+ - For legacy @webmethod routes: setting require_authentication=False in the decorator
33
+ - For FastAPI router routes: setting openapi_extra={PUBLIC_ROUTE_KEY: True}
34
+ This is typically used for operational endpoints like /health and /version to support
35
+ monitoring, load balancers, and observability tools.
34
36
 
35
37
  The middleware supports multiple authentication providers through the AuthProvider interface:
36
38
  - Kubernetes: Validates tokens against the Kubernetes API server
@@ -6,13 +6,13 @@
6
6
 
7
7
  import ssl
8
8
  from abc import ABC, abstractmethod
9
+ from typing import Any
9
10
  from urllib.parse import parse_qs, urljoin, urlparse
10
11
 
11
12
  import httpx
12
13
  import jwt
13
14
  from pydantic import BaseModel, Field
14
15
 
15
- from llama_stack.apis.common.errors import TokenValidationError
16
16
  from llama_stack.core.datatypes import (
17
17
  AuthenticationConfig,
18
18
  CustomAuthConfig,
@@ -22,6 +22,7 @@ from llama_stack.core.datatypes import (
22
22
  User,
23
23
  )
24
24
  from llama_stack.log import get_logger
25
+ from llama_stack_api import TokenValidationError
25
26
 
26
27
  logger = get_logger(name=__name__, category="core::auth")
27
28
 
@@ -143,14 +144,21 @@ class OAuth2TokenAuthProvider(AuthProvider):
143
144
  if self.config.jwks and self.config.jwks.token:
144
145
  headers["Authorization"] = f"Bearer {self.config.jwks.token}"
145
146
 
146
- self._jwks_client = jwt.PyJWKClient(
147
- self.config.jwks.uri if self.config.jwks else None,
148
- cache_keys=True,
149
- max_cached_keys=10,
150
- lifespan=self.config.jwks.key_recheck_period if self.config.jwks else None,
151
- headers=headers,
152
- ssl_context=ssl_context,
153
- )
147
+ # Ensure uri is not None for PyJWKClient
148
+ if not self.config.jwks or not self.config.jwks.uri:
149
+ raise ValueError("JWKS configuration requires a valid URI")
150
+
151
+ # Build kwargs conditionally to avoid passing None values
152
+ jwks_kwargs: dict[str, Any] = {
153
+ "cache_keys": True,
154
+ "max_cached_keys": 10,
155
+ "headers": headers,
156
+ "ssl_context": ssl_context,
157
+ }
158
+ if self.config.jwks.key_recheck_period is not None:
159
+ jwks_kwargs["lifespan"] = self.config.jwks.key_recheck_period
160
+
161
+ self._jwks_client = jwt.PyJWKClient(self.config.jwks.uri, **jwks_kwargs)
154
162
  return self._jwks_client
155
163
 
156
164
  async def validate_jwt_token(self, token: str, scope: dict | None = None) -> User:
@@ -197,23 +205,31 @@ class OAuth2TokenAuthProvider(AuthProvider):
197
205
  if self.config.introspection is None:
198
206
  raise ValueError("Introspection is not configured")
199
207
 
208
+ # ssl_ctxt can be None, bool, str, or SSLContext - httpx accepts all
209
+ ssl_ctxt: ssl.SSLContext | bool = False # Default to no verification if no cafile
210
+ if self.config.tls_cafile:
211
+ ssl_ctxt = ssl.create_default_context(cafile=self.config.tls_cafile.as_posix())
212
+
213
+ # Build post kwargs conditionally based on auth method
214
+ post_kwargs: dict[str, Any] = {
215
+ "url": self.config.introspection.url,
216
+ "data": form,
217
+ "timeout": 10.0,
218
+ }
219
+
200
220
  if self.config.introspection.send_secret_in_body:
201
221
  form["client_id"] = self.config.introspection.client_id
202
222
  form["client_secret"] = self.config.introspection.client_secret
203
- auth = None
204
223
  else:
205
- auth = (self.config.introspection.client_id, self.config.introspection.client_secret)
206
- ssl_ctxt = None
207
- if self.config.tls_cafile:
208
- ssl_ctxt = ssl.create_default_context(cafile=self.config.tls_cafile.as_posix())
224
+ # httpx auth parameter expects tuple[str | bytes, str | bytes]
225
+ post_kwargs["auth"] = (
226
+ self.config.introspection.client_id,
227
+ self.config.introspection.client_secret,
228
+ )
229
+
209
230
  try:
210
231
  async with httpx.AsyncClient(verify=ssl_ctxt) as client:
211
- response = await client.post(
212
- self.config.introspection.url,
213
- data=form,
214
- auth=auth,
215
- timeout=10.0, # Add a reasonable timeout
216
- )
232
+ response = await client.post(**post_kwargs)
217
233
  if response.status_code != httpx.codes.OK:
218
234
  logger.warning(f"Token introspection failed with status code: {response.status_code}")
219
235
  raise ValueError(f"Token introspection failed: {response.status_code}")
@@ -0,0 +1,84 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """Router utilities for FastAPI routers.
8
+
9
+ This module provides utilities to create FastAPI routers from API packages.
10
+ APIs with routers are explicitly listed here.
11
+ """
12
+
13
+ from collections.abc import Callable
14
+ from typing import Any, cast
15
+
16
+ from fastapi import APIRouter
17
+ from fastapi.routing import APIRoute
18
+
19
+ from llama_stack_api import admin, batches, benchmarks, datasets, files, inspect_api, providers
20
+
21
+ # Router factories for APIs that have FastAPI routers
22
+ # Add new APIs here as they are migrated to the router system
23
+ from llama_stack_api.datatypes import Api
24
+
25
+ _ROUTER_FACTORIES: dict[str, Callable[[Any], APIRouter]] = {
26
+ "admin": admin.fastapi_routes.create_router,
27
+ "batches": batches.fastapi_routes.create_router,
28
+ "benchmarks": benchmarks.fastapi_routes.create_router,
29
+ "datasets": datasets.fastapi_routes.create_router,
30
+ "providers": providers.fastapi_routes.create_router,
31
+ "inspect": inspect_api.fastapi_routes.create_router,
32
+ "files": files.fastapi_routes.create_router,
33
+ }
34
+
35
+
36
+ def has_router(api: "Api") -> bool:
37
+ """Check if an API has a router factory.
38
+
39
+ Args:
40
+ api: The API enum value
41
+
42
+ Returns:
43
+ True if the API has a router factory, False otherwise
44
+ """
45
+ return api.value in _ROUTER_FACTORIES
46
+
47
+
48
+ def build_fastapi_router(api: "Api", impl: Any) -> APIRouter | None:
49
+ """Build a router for an API by combining its router factory with the implementation.
50
+
51
+ Args:
52
+ api: The API enum value
53
+ impl: The implementation instance for the API
54
+
55
+ Returns:
56
+ APIRouter if the API has a router factory, None otherwise
57
+ """
58
+ router_factory = _ROUTER_FACTORIES.get(api.value)
59
+ if router_factory is None:
60
+ return None
61
+
62
+ # cast is safe here: all router factories in API packages are required to return APIRouter.
63
+ # If a router factory returns the wrong type, it will fail at runtime when
64
+ # app.include_router(router) is called
65
+ return cast(APIRouter, router_factory(impl))
66
+
67
+
68
+ def get_router_routes(router: APIRouter) -> list[APIRoute]:
69
+ """Extract APIRoute objects from a FastAPI router.
70
+
71
+ Args:
72
+ router: The FastAPI router to extract routes from
73
+
74
+ Returns:
75
+ List of APIRoute objects from the router (preserves tags and other metadata)
76
+ """
77
+ routes = []
78
+
79
+ for route in router.routes:
80
+ # FastAPI routers use APIRoute objects, which have path, methods, tags, etc.
81
+ if isinstance(route, APIRoute):
82
+ routes.append(route)
83
+
84
+ return routes
@@ -11,9 +11,9 @@ from datetime import UTC, datetime, timedelta
11
11
  from starlette.types import ASGIApp, Receive, Scope, Send
12
12
 
13
13
  from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType
14
+ from llama_stack.core.storage.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
14
15
  from llama_stack.log import get_logger
15
- from llama_stack.providers.utils.kvstore.api import KVStore
16
- from llama_stack.providers.utils.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
16
+ from llama_stack_api.internal.kvstore import KVStore
17
17
 
18
18
  logger = get_logger(name=__name__, category="core::server")
19
19