llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.5.dist-info/RECORD +0 -625
  445. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -12,10 +12,14 @@ from typing import Any
12
12
  from aiohttp import hdrs
13
13
  from starlette.routing import Route
14
14
 
15
- from llama_stack.apis.datatypes import Api, ExternalApiSpec
16
- from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup
17
15
  from llama_stack.core.resolver import api_protocol_map
18
- from llama_stack.schema_utils import WebMethod
16
+ from llama_stack.core.server.fastapi_router_registry import (
17
+ _ROUTER_FACTORIES,
18
+ build_fastapi_router,
19
+ get_router_routes,
20
+ )
21
+ from llama_stack_api import Api, ExternalApiSpec, WebMethod
22
+ from llama_stack_api.router_utils import PUBLIC_ROUTE_KEY
19
23
 
20
24
  EndpointFunc = Callable[..., Any]
21
25
  PathParams = dict[str, str]
@@ -25,33 +29,28 @@ RouteImpls = dict[str, PathImpl]
25
29
  RouteMatch = tuple[EndpointFunc, PathParams, str, WebMethod]
26
30
 
27
31
 
28
- def toolgroup_protocol_map():
29
- return {
30
- SpecialToolGroup.rag_tool: RAGToolRuntime,
31
- }
32
-
33
-
34
32
  def get_all_api_routes(
35
33
  external_apis: dict[Api, ExternalApiSpec] | None = None,
36
34
  ) -> dict[Api, list[tuple[Route, WebMethod]]]:
35
+ """Get all API routes from webmethod-based protocols.
36
+
37
+ This function only returns routes from APIs that use the legacy @webmethod
38
+ decorator system. For APIs that have been migrated to FastAPI routers,
39
+ use the router registry (fastapi_router_registry.has_router() and fastapi_router_registry.build_fastapi_router()).
40
+
41
+ Args:
42
+ external_apis: Optional dictionary of external API specifications
43
+
44
+ Returns:
45
+ Dictionary mapping API to list of (Route, WebMethod) tuples
46
+ """
37
47
  apis = {}
38
48
 
39
49
  protocols = api_protocol_map(external_apis)
40
- toolgroup_protocols = toolgroup_protocol_map()
41
50
  for api, protocol in protocols.items():
42
51
  routes = []
43
52
  protocol_methods = inspect.getmembers(protocol, predicate=inspect.isfunction)
44
53
 
45
- # HACK ALERT
46
- if api == Api.tool_runtime:
47
- for tool_group in SpecialToolGroup:
48
- sub_protocol = toolgroup_protocols[tool_group]
49
- sub_protocol_methods = inspect.getmembers(sub_protocol, predicate=inspect.isfunction)
50
- for name, method in sub_protocol_methods:
51
- if not hasattr(method, "__webmethod__"):
52
- continue
53
- protocol_methods.append((f"{tool_group.value}.{name}", method))
54
-
55
54
  for name, method in protocol_methods:
56
55
  # Get all webmethods for this method (supports multiple decorators)
57
56
  webmethods = getattr(method, "__webmethods__", [])
@@ -68,8 +67,9 @@ def get_all_api_routes(
68
67
  else:
69
68
  http_method = hdrs.METH_POST
70
69
  routes.append(
71
- (Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod)
72
- ) # setting endpoint to None since don't use a Router object
70
+ # setting endpoint to None since don't use a Router object
71
+ (Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod) # type: ignore[arg-type]
72
+ )
73
73
 
74
74
  apis[api] = routes
75
75
 
@@ -91,22 +91,74 @@ def initialize_route_impls(impls, external_apis: dict[Api, ExternalApiSpec] | No
91
91
 
92
92
  return f"^{pattern}$"
93
93
 
94
+ # Process routes from FastAPI routers
95
+ for api_name in _ROUTER_FACTORIES.keys():
96
+ api = Api(api_name)
97
+ if api not in impls:
98
+ continue
99
+ impl = impls[api]
100
+ router = build_fastapi_router(api, impl)
101
+ if router:
102
+ router_routes = get_router_routes(router)
103
+ for route in router_routes:
104
+ # Get the endpoint function from the route
105
+ # For FastAPI routes, the endpoint is the actual function
106
+ func = route.endpoint
107
+ if func is None:
108
+ continue
109
+
110
+ # Get the first (and typically only) method from the set, filtering out HEAD
111
+ available_methods = [m for m in (route.methods or []) if m != "HEAD"]
112
+ if not available_methods:
113
+ continue # Skip if only HEAD method is available
114
+ method = available_methods[0].lower()
115
+
116
+ if method not in route_impls:
117
+ route_impls[method] = {}
118
+
119
+ # Create a minimal WebMethod for router routes (needed for RouteMatch tuple)
120
+ # We don't have webmethod metadata for router routes, so create a minimal one
121
+ # that has the attributes used by the library client (descriptive_name for tracing)
122
+ #
123
+ # TODO: Long-term migration plan (once all APIs are migrated to FastAPI routers):
124
+ # - Extract summary from APIRoute: route.summary (available on FastAPI APIRoute objects)
125
+ # - Pass summary directly in RouteMatch instead of WebMethod
126
+ # - Remove this WebMethod() instantiation entirely
127
+ # - Update library_client.py to use the extracted summary instead of webmethod.descriptive_name
128
+
129
+ # Routes with openapi_extra[PUBLIC_ROUTE_KEY]=True don't require authentication
130
+ is_public = (route.openapi_extra or {}).get(PUBLIC_ROUTE_KEY, False)
131
+ webmethod = WebMethod(
132
+ descriptive_name=None,
133
+ require_authentication=not is_public,
134
+ )
135
+ route_impls[method][_convert_path_to_regex(route.path)] = (
136
+ func,
137
+ route.path,
138
+ webmethod,
139
+ )
140
+
141
+ # Process routes from legacy webmethod-based APIs
94
142
  for api, api_routes in api_to_routes.items():
143
+ # Skip APIs that have routers (already processed above)
144
+ if api.value in _ROUTER_FACTORIES:
145
+ continue
146
+
95
147
  if api not in impls:
96
148
  continue
97
- for route, webmethod in api_routes:
149
+ for legacy_route, webmethod in api_routes:
98
150
  impl = impls[api]
99
- func = getattr(impl, route.name)
151
+ func = getattr(impl, legacy_route.name)
100
152
  # Get the first (and typically only) method from the set, filtering out HEAD
101
- available_methods = [m for m in route.methods if m != "HEAD"]
153
+ available_methods = [m for m in (legacy_route.methods or []) if m != "HEAD"]
102
154
  if not available_methods:
103
155
  continue # Skip if only HEAD method is available
104
156
  method = available_methods[0].lower()
105
157
  if method not in route_impls:
106
158
  route_impls[method] = {}
107
- route_impls[method][_convert_path_to_regex(route.path)] = (
159
+ route_impls[method][_convert_path_to_regex(legacy_route.path)] = (
108
160
  func,
109
- route.path,
161
+ legacy_route.path,
110
162
  webmethod,
111
163
  )
112
164
 
@@ -31,13 +31,10 @@ from fastapi.responses import JSONResponse, StreamingResponse
31
31
  from openai import BadRequestError
32
32
  from pydantic import BaseModel, ValidationError
33
33
 
34
- from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
35
- from llama_stack.apis.common.responses import PaginatedResponse
36
34
  from llama_stack.core.access_control.access_control import AccessDeniedError
37
35
  from llama_stack.core.datatypes import (
38
36
  AuthenticationRequiredError,
39
- LoggingConfig,
40
- StackRunConfig,
37
+ StackConfig,
41
38
  process_cors_config,
42
39
  )
43
40
  from llama_stack.core.distribution import builtin_automatically_routed_apis
@@ -47,6 +44,7 @@ from llama_stack.core.request_headers import (
47
44
  request_provider_data_context,
48
45
  user_from_scope,
49
46
  )
47
+ from llama_stack.core.server.fastapi_router_registry import build_fastapi_router
50
48
  from llama_stack.core.server.routes import get_all_api_routes
51
49
  from llama_stack.core.stack import (
52
50
  Stack,
@@ -54,22 +52,13 @@ from llama_stack.core.stack import (
54
52
  replace_env_vars,
55
53
  )
56
54
  from llama_stack.core.utils.config import redact_sensitive_fields
57
- from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
55
+ from llama_stack.core.utils.config_resolution import resolve_config_or_distro
58
56
  from llama_stack.core.utils.context import preserve_contexts_async_generator
59
- from llama_stack.log import get_logger, setup_logging
60
- from llama_stack.providers.datatypes import Api
61
- from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig
62
- from llama_stack.providers.inline.telemetry.meta_reference.telemetry import (
63
- TelemetryAdapter,
64
- )
65
- from llama_stack.providers.utils.telemetry.tracing import (
66
- CURRENT_TRACE_CONTEXT,
67
- setup_logger,
68
- )
57
+ from llama_stack.log import LoggingConfig, get_logger
58
+ from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
69
59
 
70
60
  from .auth import AuthenticationMiddleware
71
61
  from .quota import QuotaMiddleware
72
- from .tracing import TracingMiddleware
73
62
 
74
63
  REPO_ROOT = Path(__file__).parent.parent.parent.parent
75
64
 
@@ -96,7 +85,7 @@ def create_sse_event(data: Any) -> str:
96
85
 
97
86
 
98
87
  async def global_exception_handler(request: Request, exc: Exception):
99
- traceback.print_exception(exc)
88
+ traceback.print_exception(type(exc), exc, exc.__traceback__)
100
89
  http_exc = translate_exception(exc)
101
90
 
102
91
  return JSONResponse(status_code=http_exc.status_code, content={"error": {"detail": http_exc.detail}})
@@ -158,7 +147,7 @@ class StackApp(FastAPI):
158
147
  start background tasks (e.g. refresh model registry periodically) from the lifespan context manager.
159
148
  """
160
149
 
161
- def __init__(self, config: StackRunConfig, *args, **kwargs):
150
+ def __init__(self, config: StackConfig, *args, **kwargs):
162
151
  super().__init__(*args, **kwargs)
163
152
  self.stack: Stack = Stack(config)
164
153
 
@@ -174,7 +163,9 @@ class StackApp(FastAPI):
174
163
 
175
164
  @asynccontextmanager
176
165
  async def lifespan(app: StackApp):
177
- logger.info("Starting up")
166
+ server_version = parse_version("llama-stack")
167
+
168
+ logger.info(f"Starting up Llama Stack server (version: {server_version})")
178
169
  assert app.stack is not None
179
170
  app.stack.create_registry_refresh_task()
180
171
  yield
@@ -244,56 +235,36 @@ async def log_request_pre_validation(request: Request):
244
235
  def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
245
236
  @functools.wraps(func)
246
237
  async def route_handler(request: Request, **kwargs):
247
- # Get auth attributes from the request scope
248
- user = user_from_scope(request.scope)
249
-
250
238
  await log_request_pre_validation(request)
251
239
 
252
- test_context_token = None
253
- test_context_var = None
254
- reset_test_context_fn = None
255
-
256
- # Use context manager with both provider data and auth attributes
257
- with request_provider_data_context(request.headers, user):
258
- if os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE"):
259
- from llama_stack.core.testing_context import (
260
- TEST_CONTEXT,
261
- reset_test_context,
262
- sync_test_context_from_provider_data,
263
- )
240
+ is_streaming = is_streaming_request(func.__name__, request, **kwargs)
264
241
 
265
- test_context_token = sync_test_context_from_provider_data()
266
- test_context_var = TEST_CONTEXT
267
- reset_test_context_fn = reset_test_context
268
-
269
- is_streaming = is_streaming_request(func.__name__, request, **kwargs)
270
-
271
- try:
272
- if is_streaming:
273
- context_vars = [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR]
274
- if test_context_var is not None:
275
- context_vars.append(test_context_var)
276
- gen = preserve_contexts_async_generator(sse_generator(func(**kwargs)), context_vars)
277
- return StreamingResponse(gen, media_type="text/event-stream")
278
- else:
279
- value = func(**kwargs)
280
- result = await maybe_await(value)
281
- if isinstance(result, PaginatedResponse) and result.url is None:
282
- result.url = route
283
-
284
- if method.upper() == "DELETE" and result is None:
285
- return Response(status_code=httpx.codes.NO_CONTENT)
286
-
287
- return result
288
- except Exception as e:
289
- if logger.isEnabledFor(logging.INFO):
290
- logger.exception(f"Error executing endpoint {route=} {method=}")
291
- else:
292
- logger.error(f"Error executing endpoint {route=} {method=}: {str(e)}")
293
- raise translate_exception(e) from e
294
- finally:
295
- if test_context_token is not None and reset_test_context_fn is not None:
296
- reset_test_context_fn(test_context_token)
242
+ try:
243
+ if is_streaming:
244
+ # Preserve context vars across async generator boundaries
245
+ context_vars = [PROVIDER_DATA_VAR]
246
+ if os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE"):
247
+ from llama_stack.core.testing_context import TEST_CONTEXT
248
+
249
+ context_vars.append(TEST_CONTEXT)
250
+ gen = preserve_contexts_async_generator(sse_generator(func(**kwargs)), context_vars)
251
+ return StreamingResponse(gen, media_type="text/event-stream")
252
+ else:
253
+ value = func(**kwargs)
254
+ result = await maybe_await(value)
255
+ if isinstance(result, PaginatedResponse) and result.url is None:
256
+ result.url = route
257
+
258
+ if method.upper() == "DELETE" and result is None:
259
+ return Response(status_code=httpx.codes.NO_CONTENT)
260
+
261
+ return result
262
+ except Exception as e:
263
+ if logger.isEnabledFor(logging.INFO):
264
+ logger.exception(f"Error executing endpoint {route=} {method=}")
265
+ else:
266
+ logger.error(f"Error executing endpoint {route=} {method=}: {str(e)}")
267
+ raise translate_exception(e) from e
297
268
 
298
269
  sig = inspect.signature(func)
299
270
 
@@ -365,6 +336,42 @@ class ClientVersionMiddleware:
365
336
  return await self.app(scope, receive, send)
366
337
 
367
338
 
339
+ class ProviderDataMiddleware:
340
+ """Middleware to set up request context for all routes.
341
+
342
+ Sets up provider data context from X-LlamaStack-Provider-Data header
343
+ and auth attributes. Also handles test context propagation when
344
+ running in test mode for deterministic ID generation.
345
+ """
346
+
347
+ def __init__(self, app):
348
+ self.app = app
349
+
350
+ async def __call__(self, scope, receive, send):
351
+ if scope["type"] == "http":
352
+ headers = {k.decode(): v.decode() for k, v in scope.get("headers", [])}
353
+ user = user_from_scope(scope)
354
+
355
+ with request_provider_data_context(headers, user):
356
+ test_context_token = None
357
+ reset_fn = None
358
+ if os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE"):
359
+ from llama_stack.core.testing_context import (
360
+ reset_test_context,
361
+ sync_test_context_from_provider_data,
362
+ )
363
+
364
+ test_context_token = sync_test_context_from_provider_data()
365
+ reset_fn = reset_test_context
366
+ try:
367
+ return await self.app(scope, receive, send)
368
+ finally:
369
+ if test_context_token and reset_fn:
370
+ reset_fn(test_context_token)
371
+
372
+ return await self.app(scope, receive, send)
373
+
374
+
368
375
  def create_app() -> StackApp:
369
376
  """Create and configure the FastAPI application.
370
377
 
@@ -374,14 +381,11 @@ def create_app() -> StackApp:
374
381
  Returns:
375
382
  Configured StackApp instance.
376
383
  """
377
- # Initialize logging from environment variables first
378
- setup_logging()
379
-
380
384
  config_file = os.getenv("LLAMA_STACK_CONFIG")
381
385
  if config_file is None:
382
386
  raise ValueError("LLAMA_STACK_CONFIG environment variable is required")
383
387
 
384
- config_file = resolve_config_or_distro(config_file, Mode.RUN)
388
+ config_file = resolve_config_or_distro(config_file)
385
389
 
386
390
  # Load and process configuration
387
391
  logger_config = None
@@ -392,7 +396,7 @@ def create_app() -> StackApp:
392
396
  logger = get_logger(name=__name__, category="core::server", config=logger_config)
393
397
 
394
398
  config = replace_env_vars(config_contents)
395
- config = StackRunConfig(**cast_image_name_to_string(config))
399
+ config = StackConfig(**cast_image_name_to_string(config))
396
400
 
397
401
  _log_run_config(run_config=config)
398
402
 
@@ -407,6 +411,8 @@ def create_app() -> StackApp:
407
411
  if not os.environ.get("LLAMA_STACK_DISABLE_VERSION_CHECK"):
408
412
  app.add_middleware(ClientVersionMiddleware)
409
413
 
414
+ app.add_middleware(ProviderDataMiddleware)
415
+
410
416
  impls = app.stack.impls
411
417
 
412
418
  if config.server.auth:
@@ -448,11 +454,6 @@ def create_app() -> StackApp:
448
454
  if cors_config:
449
455
  app.add_middleware(CORSMiddleware, **cors_config.model_dump())
450
456
 
451
- if config.telemetry.enabled:
452
- setup_logger(impls[Api.telemetry])
453
- else:
454
- setup_logger(TelemetryAdapter(TelemetryConfig(), {}))
455
-
456
457
  # Load external APIs if configured
457
458
  external_apis = load_external_apis(config)
458
459
  all_routes = get_all_api_routes(external_apis)
@@ -468,19 +469,27 @@ def create_app() -> StackApp:
468
469
  continue
469
470
  apis_to_serve.add(inf.routing_table_api.value)
470
471
 
472
+ apis_to_serve.add("admin")
471
473
  apis_to_serve.add("inspect")
472
474
  apis_to_serve.add("providers")
473
475
  apis_to_serve.add("prompts")
474
476
  apis_to_serve.add("conversations")
477
+
475
478
  for api_str in apis_to_serve:
476
479
  api = Api(api_str)
477
480
 
478
- routes = all_routes[api]
479
- try:
480
- impl = impls[api]
481
- except KeyError as e:
482
- raise ValueError(f"Could not find provider implementation for {api} API") from e
481
+ # Try to discover and use a router factory from the API package
482
+ impl = impls[api]
483
+ router = build_fastapi_router(api, impl)
484
+ if router:
485
+ app.include_router(router)
486
+ logger.debug(f"Registered FastAPIrouter for {api} API")
487
+ continue
488
+
489
+ # Fall back to old webmethod-based route discovery until the migration is complete
490
+ impl = impls[api]
483
491
 
492
+ routes = all_routes[api]
484
493
  for route, _ in routes:
485
494
  if not hasattr(impl, route.name):
486
495
  # ideally this should be a typing violation already
@@ -506,17 +515,23 @@ def create_app() -> StackApp:
506
515
 
507
516
  logger.debug(f"serving APIs: {apis_to_serve}")
508
517
 
518
+ # Register specific exception handlers before the generic Exception handler
519
+ # This prevents the re-raising behavior that causes connection resets
509
520
  app.exception_handler(RequestValidationError)(global_exception_handler)
521
+ app.exception_handler(ConflictError)(global_exception_handler)
522
+ app.exception_handler(ResourceNotFoundError)(global_exception_handler)
523
+ app.exception_handler(AuthenticationRequiredError)(global_exception_handler)
524
+ app.exception_handler(AccessDeniedError)(global_exception_handler)
525
+ app.exception_handler(BadRequestError)(global_exception_handler)
526
+ # Generic Exception handler should be last
510
527
  app.exception_handler(Exception)(global_exception_handler)
511
528
 
512
- app.add_middleware(TracingMiddleware, impls=impls, external_apis=external_apis)
513
-
514
529
  return app
515
530
 
516
531
 
517
- def _log_run_config(run_config: StackRunConfig):
532
+ def _log_run_config(run_config: StackConfig):
518
533
  """Logs the run config with redacted fields and disabled providers removed."""
519
- logger.info("Run configuration:")
534
+ logger.info("Stack Configuration:")
520
535
  safe_config = redact_sensitive_fields(run_config.model_dump(mode="json"))
521
536
  clean_config = remove_disabled_providers(safe_config)
522
537
  logger.info(yaml.dump(clean_config, indent=2))
@@ -532,8 +547,8 @@ def extract_path_params(route: str) -> list[str]:
532
547
 
533
548
  def remove_disabled_providers(obj):
534
549
  if isinstance(obj, dict):
535
- keys = ["provider_id", "shield_id", "provider_model_id", "model_id"]
536
- if any(k in obj and obj[k] in ("__disabled__", "", None) for k in keys):
550
+ # Filter out items where provider_id is explicitly disabled or empty
551
+ if "provider_id" in obj and obj["provider_id"] in ("__disabled__", "", None):
537
552
  return None
538
553
  return {k: v for k, v in ((k, remove_disabled_providers(v)) for k, v in obj.items()) if v is not None}
539
554
  elif isinstance(obj, list):