llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +12 -21
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.4.dist-info/RECORD +0 -625
  445. llama_stack-0.3.4.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -8,21 +8,28 @@ from importlib.metadata import version
8
8
 
9
9
  from pydantic import BaseModel
10
10
 
11
- from llama_stack.apis.inspect import (
11
+ from llama_stack.core.datatypes import StackConfig
12
+ from llama_stack.core.distribution import builtin_automatically_routed_apis
13
+ from llama_stack.core.external import load_external_apis
14
+ from llama_stack.core.server.fastapi_router_registry import (
15
+ _ROUTER_FACTORIES,
16
+ build_fastapi_router,
17
+ get_router_routes,
18
+ )
19
+ from llama_stack.core.server.routes import get_all_api_routes
20
+ from llama_stack_api import (
21
+ Api,
12
22
  HealthInfo,
23
+ HealthStatus,
13
24
  Inspect,
14
25
  ListRoutesResponse,
15
26
  RouteInfo,
16
27
  VersionInfo,
17
28
  )
18
- from llama_stack.core.datatypes import StackRunConfig
19
- from llama_stack.core.external import load_external_apis
20
- from llama_stack.core.server.routes import get_all_api_routes
21
- from llama_stack.providers.datatypes import HealthStatus
22
29
 
23
30
 
24
31
  class DistributionInspectConfig(BaseModel):
25
- run_config: StackRunConfig
32
+ config: StackConfig
26
33
 
27
34
 
28
35
  async def get_provider_impl(config, deps):
@@ -33,19 +40,95 @@ async def get_provider_impl(config, deps):
33
40
 
34
41
  class DistributionInspectImpl(Inspect):
35
42
  def __init__(self, config: DistributionInspectConfig, deps):
36
- self.config = config
43
+ self.stack_config = config.config
37
44
  self.deps = deps
38
45
 
39
46
  async def initialize(self) -> None:
40
47
  pass
41
48
 
42
- async def list_routes(self) -> ListRoutesResponse:
43
- run_config: StackRunConfig = self.config.run_config
49
+ async def list_routes(self, api_filter: str | None = None) -> ListRoutesResponse:
50
+ config: StackConfig = self.stack_config
51
+
52
+ # Helper function to determine if a route should be included based on api_filter
53
+ # TODO: remove this once we've migrated all APIs to FastAPI routers
54
+ def should_include_route(webmethod) -> bool:
55
+ if api_filter is None:
56
+ # Default: only non-deprecated APIs
57
+ return not webmethod.deprecated
58
+ elif api_filter == "deprecated":
59
+ # Special filter: show deprecated routes regardless of their actual level
60
+ return bool(webmethod.deprecated)
61
+ else:
62
+ # Filter by API level (non-deprecated routes only)
63
+ return not webmethod.deprecated and webmethod.level == api_filter
64
+
65
+ # Helper function to get provider types for an API
66
+ def _get_provider_types(api: Api) -> list[str]:
67
+ if api.value in ["providers", "inspect"]:
68
+ return [] # These APIs don't have "real" providers they're internal to the stack
69
+
70
+ # For routing table APIs, look up providers from their router API
71
+ # (e.g., benchmarks -> eval, models -> inference, etc.)
72
+ auto_routed_apis = builtin_automatically_routed_apis()
73
+ for auto_routed in auto_routed_apis:
74
+ if auto_routed.routing_table_api == api:
75
+ # This is a routing table API, use its router API for providers
76
+ providers = config.providers.get(auto_routed.router_api.value, [])
77
+ return [p.provider_type for p in providers] if providers else []
78
+
79
+ # Regular API, look up providers directly
80
+ providers = config.providers.get(api.value, [])
81
+ return [p.provider_type for p in providers] if providers else []
82
+
83
+ # Helper function to determine if a router route should be included based on api_filter
84
+ def _should_include_router_route(route, router_prefix: str | None) -> bool:
85
+ """Check if a router-based route should be included based on api_filter."""
86
+ # Check deprecated status
87
+ route_deprecated = getattr(route, "deprecated", False) or False
88
+
89
+ if api_filter is None:
90
+ # Default: only non-deprecated routes
91
+ return not route_deprecated
92
+ elif api_filter == "deprecated":
93
+ # Special filter: show deprecated routes regardless of their actual level
94
+ return route_deprecated
95
+ else:
96
+ # Filter by API level (non-deprecated routes only)
97
+ # Extract level from router prefix (e.g., "/v1" -> "v1")
98
+ if router_prefix:
99
+ prefix_level = router_prefix.lstrip("/")
100
+ return not route_deprecated and prefix_level == api_filter
101
+ return not route_deprecated
44
102
 
45
103
  ret = []
46
- external_apis = load_external_apis(run_config)
104
+ external_apis = load_external_apis(config)
47
105
  all_endpoints = get_all_api_routes(external_apis)
106
+
107
+ # Process routes from APIs with FastAPI routers
108
+ for api_name in _ROUTER_FACTORIES.keys():
109
+ api = Api(api_name)
110
+ router = build_fastapi_router(api, None) # we don't need the impl here, just the routes
111
+ if router:
112
+ router_routes = get_router_routes(router)
113
+ for route in router_routes:
114
+ if _should_include_router_route(route, router.prefix):
115
+ if route.methods is not None:
116
+ available_methods = [m for m in route.methods if m != "HEAD"]
117
+ if available_methods:
118
+ ret.append(
119
+ RouteInfo(
120
+ route=route.path,
121
+ method=available_methods[0],
122
+ provider_types=_get_provider_types(api),
123
+ )
124
+ )
125
+
126
+ # Process routes from legacy webmethod-based APIs
48
127
  for api, endpoints in all_endpoints.items():
128
+ # Skip APIs that have routers (already processed above)
129
+ if api.value in _ROUTER_FACTORIES:
130
+ continue
131
+
49
132
  # Always include provider and inspect APIs, filter others based on run config
50
133
  if api.value in ["providers", "inspect"]:
51
134
  ret.extend(
@@ -55,12 +138,12 @@ class DistributionInspectImpl(Inspect):
55
138
  method=next(iter([m for m in e.methods if m != "HEAD"])),
56
139
  provider_types=[], # These APIs don't have "real" providers - they're internal to the stack
57
140
  )
58
- for e, _ in endpoints
59
- if e.methods is not None
141
+ for e, webmethod in endpoints
142
+ if e.methods is not None and should_include_route(webmethod)
60
143
  ]
61
144
  )
62
145
  else:
63
- providers = run_config.providers.get(api.value, [])
146
+ providers = config.providers.get(api.value, [])
64
147
  if providers: # Only process if there are providers for this API
65
148
  ret.extend(
66
149
  [
@@ -69,8 +152,8 @@ class DistributionInspectImpl(Inspect):
69
152
  method=next(iter([m for m in e.methods if m != "HEAD"])),
70
153
  provider_types=[p.provider_type for p in providers],
71
154
  )
72
- for e, _ in endpoints
73
- if e.methods is not None
155
+ for e, webmethod in endpoints
156
+ if e.methods is not None and should_include_route(webmethod)
74
157
  ]
75
158
  )
76
159
 
@@ -10,6 +10,7 @@ import json
10
10
  import logging # allow-direct-logging
11
11
  import os
12
12
  import sys
13
+ import typing
13
14
  from enum import Enum
14
15
  from io import BytesIO
15
16
  from pathlib import Path
@@ -18,38 +19,37 @@ from typing import Any, TypeVar, Union, get_args, get_origin
18
19
  import httpx
19
20
  import yaml
20
21
  from fastapi import Response as FastAPIResponse
21
- from llama_stack_client import (
22
- NOT_GIVEN,
23
- APIResponse,
24
- AsyncAPIResponse,
25
- AsyncLlamaStackClient,
26
- AsyncStream,
27
- LlamaStackClient,
28
- )
22
+
23
+ from llama_stack.core.utils.type_inspection import is_unwrapped_body_param
24
+
25
+ try:
26
+ from llama_stack_client import (
27
+ NOT_GIVEN,
28
+ APIResponse,
29
+ AsyncAPIResponse,
30
+ AsyncLlamaStackClient,
31
+ AsyncStream,
32
+ LlamaStackClient,
33
+ )
34
+ except ImportError as e:
35
+ raise ImportError(
36
+ "llama-stack-client is not installed. Please install it with `uv pip install llama-stack[client]`."
37
+ ) from e
38
+
29
39
  from pydantic import BaseModel, TypeAdapter
30
40
  from rich.console import Console
31
41
  from termcolor import cprint
32
42
 
33
43
  from llama_stack.core.build import print_pip_install_help
34
44
  from llama_stack.core.configure import parse_and_maybe_upgrade_config
35
- from llama_stack.core.datatypes import Api, BuildConfig, BuildProvider, DistributionSpec
36
- from llama_stack.core.request_headers import (
37
- PROVIDER_DATA_VAR,
38
- request_provider_data_context,
39
- )
45
+ from llama_stack.core.request_headers import PROVIDER_DATA_VAR, request_provider_data_context
40
46
  from llama_stack.core.resolver import ProviderRegistry
41
47
  from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
42
- from llama_stack.core.stack import (
43
- Stack,
44
- get_stack_run_config_from_distro,
45
- replace_env_vars,
46
- )
48
+ from llama_stack.core.stack import Stack, get_stack_run_config_from_distro, replace_env_vars
47
49
  from llama_stack.core.utils.config import redact_sensitive_fields
48
50
  from llama_stack.core.utils.context import preserve_contexts_async_generator
49
51
  from llama_stack.core.utils.exec import in_notebook
50
52
  from llama_stack.log import get_logger, setup_logging
51
- from llama_stack.providers.utils.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace
52
- from llama_stack.strong_typing.inspection import is_unwrapped_body_param
53
53
 
54
54
  logger = get_logger(name=__name__, category="core")
55
55
 
@@ -202,13 +202,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
202
202
  super().__init__()
203
203
  # Initialize logging from environment variables first
204
204
  setup_logging()
205
-
206
- # when using the library client, we should not log to console since many
207
- # of our logs are intended for server-side usage
208
- if sinks_from_env := os.environ.get("TELEMETRY_SINKS", None):
209
- current_sinks = sinks_from_env.strip().lower().split(",")
210
- os.environ["TELEMETRY_SINKS"] = ",".join(sink for sink in current_sinks if sink != "console")
211
-
212
205
  if in_notebook():
213
206
  import nest_asyncio
214
207
 
@@ -264,20 +257,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
264
257
  file=sys.stderr,
265
258
  )
266
259
  if self.config_path_or_distro_name.endswith(".yaml"):
267
- providers: dict[str, list[BuildProvider]] = {}
268
- for api, run_providers in self.config.providers.items():
269
- for provider in run_providers:
270
- providers.setdefault(api, []).append(
271
- BuildProvider(provider_type=provider.provider_type, module=provider.module)
272
- )
273
- providers = dict(providers)
274
- build_config = BuildConfig(
275
- distribution_spec=DistributionSpec(
276
- providers=providers,
277
- ),
278
- external_providers_dir=self.config.external_providers_dir,
279
- )
280
- print_pip_install_help(build_config)
260
+ print_pip_install_help(self.config)
281
261
  else:
282
262
  prefix = "!" if in_notebook() else ""
283
263
  cprint(
@@ -293,8 +273,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
293
273
  raise _e
294
274
 
295
275
  assert self.impls is not None
296
- if Api.telemetry in self.impls:
297
- setup_logger(self.impls[Api.telemetry])
298
276
 
299
277
  if not os.environ.get("PYTEST_CURRENT_TEST"):
300
278
  console = Console()
@@ -381,16 +359,16 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
381
359
  matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls)
382
360
  body |= path_params
383
361
 
362
+ # Pass through params that aren't already handled as path params
363
+ if options.params:
364
+ extra_query_params = {k: v for k, v in options.params.items() if k not in path_params}
365
+ if extra_query_params:
366
+ body["extra_query"] = extra_query_params
367
+
384
368
  body, field_names = self._handle_file_uploads(options, body)
385
369
 
386
370
  body = self._convert_body(matched_func, body, exclude_params=set(field_names))
387
-
388
- trace_path = webmethod.descriptive_name or route_path
389
- await start_trace(trace_path, {"__location__": "library_client"})
390
- try:
391
- result = await matched_func(**body)
392
- finally:
393
- await end_trace()
371
+ result = await matched_func(**body)
394
372
 
395
373
  # Handle FastAPI Response objects (e.g., from file content retrieval)
396
374
  if isinstance(result, FastAPIResponse):
@@ -449,19 +427,13 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
449
427
  # Prepare body for the function call (handles both Pydantic and traditional params)
450
428
  body = self._convert_body(func, body)
451
429
 
452
- trace_path = webmethod.descriptive_name or route_path
453
- await start_trace(trace_path, {"__location__": "library_client"})
454
-
455
430
  async def gen():
456
- try:
457
- async for chunk in await func(**body):
458
- data = json.dumps(convert_pydantic_to_json_value(chunk))
459
- sse_event = f"data: {data}\n\n"
460
- yield sse_event.encode("utf-8")
461
- finally:
462
- await end_trace()
431
+ async for chunk in await func(**body):
432
+ data = json.dumps(convert_pydantic_to_json_value(chunk))
433
+ sse_event = f"data: {data}\n\n"
434
+ yield sse_event.encode("utf-8")
463
435
 
464
- wrapped_gen = preserve_contexts_async_generator(gen(), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR])
436
+ wrapped_gen = preserve_contexts_async_generator(gen(), [PROVIDER_DATA_VAR])
465
437
 
466
438
  mock_response = httpx.Response(
467
439
  status_code=httpx.codes.OK,
@@ -519,6 +491,25 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
519
491
  unwrapped_body_param = param
520
492
  break
521
493
 
494
+ # Check for parameters with Depends() annotation (FastAPI router endpoints)
495
+ # These need special handling: construct the request model from body
496
+ depends_param = None
497
+ for param in params_list:
498
+ param_type = param.annotation
499
+ if get_origin(param_type) is typing.Annotated:
500
+ args = get_args(param_type)
501
+ if len(args) > 1:
502
+ # Check if any metadata is Depends
503
+ metadata = args[1:]
504
+ for item in metadata:
505
+ # Check if it's a Depends object (has dependency attribute or is a callable)
506
+ # Depends objects typically have a 'dependency' attribute or are callable functions
507
+ if hasattr(item, "dependency") or callable(item) or "Depends" in str(type(item)):
508
+ depends_param = param
509
+ break
510
+ if depends_param:
511
+ break
512
+
522
513
  # Convert parameters to Pydantic models where needed
523
514
  converted_body = {}
524
515
  for param_name, param in sig.parameters.items():
@@ -529,6 +520,27 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
529
520
  else:
530
521
  converted_body[param_name] = convert_to_pydantic(param.annotation, value)
531
522
 
523
+ # Handle Depends parameter: construct request model from body
524
+ if depends_param and depends_param.name not in converted_body:
525
+ param_type = depends_param.annotation
526
+ if get_origin(param_type) is typing.Annotated:
527
+ base_type = get_args(param_type)[0]
528
+ # Handle Union types (e.g., SomeRequestModel | None) - extract the non-None type
529
+ # In Python 3.10+, Union types created with | syntax are still typing.Union
530
+ origin = get_origin(base_type)
531
+ if origin is Union:
532
+ # Get the first non-None type from the Union
533
+ union_args = get_args(base_type)
534
+ base_type = next(
535
+ (t for t in union_args if t is not type(None) and t is not None),
536
+ union_args[0] if union_args else None,
537
+ )
538
+
539
+ # Only try to instantiate if it's a class (not a Union or other non-callable type)
540
+ if base_type is not None and inspect.isclass(base_type) and callable(base_type):
541
+ # Construct the request model from all body parameters
542
+ converted_body[depends_param.name] = base_type(**body)
543
+
532
544
  # handle unwrapped body parameter after processing all named parameters
533
545
  if unwrapped_body_param:
534
546
  base_type = get_args(unwrapped_body_param.annotation)[0]
@@ -9,10 +9,9 @@ from typing import Any
9
9
 
10
10
  from pydantic import BaseModel
11
11
 
12
- from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
13
- from llama_stack.core.datatypes import StackRunConfig
14
- from llama_stack.core.storage.datatypes import KVStoreReference
15
- from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
12
+ from llama_stack.core.datatypes import StackConfig
13
+ from llama_stack.core.storage.kvstore import KVStore, kvstore_impl
14
+ from llama_stack_api import ListPromptsResponse, Prompt, Prompts
16
15
 
17
16
 
18
17
  class PromptServiceConfig(BaseModel):
@@ -21,7 +20,7 @@ class PromptServiceConfig(BaseModel):
21
20
  :param run_config: Stack run configuration containing distribution info
22
21
  """
23
22
 
24
- run_config: StackRunConfig
23
+ config: StackConfig
25
24
 
26
25
 
27
26
  async def get_provider_impl(config: PromptServiceConfig, deps: dict[Any, Any]):
@@ -35,16 +34,15 @@ class PromptServiceImpl(Prompts):
35
34
  """Built-in prompt service implementation using KVStore."""
36
35
 
37
36
  def __init__(self, config: PromptServiceConfig, deps: dict[Any, Any]):
38
- self.config = config
37
+ self.stack_config = config.config
39
38
  self.deps = deps
40
39
  self.kvstore: KVStore
41
40
 
42
41
  async def initialize(self) -> None:
43
- # Use metadata store backend with prompts-specific namespace
44
- metadata_ref = self.config.run_config.storage.stores.metadata
45
- if not metadata_ref:
46
- raise ValueError("storage.stores.metadata must be configured in run config")
47
- prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend)
42
+ # Use prompts store reference from run config
43
+ prompts_ref = self.stack_config.storage.stores.prompts
44
+ if not prompts_ref:
45
+ raise ValueError("storage.stores.prompts must be configured in run config")
48
46
  self.kvstore = await kvstore_impl(prompts_ref)
49
47
 
50
48
  def _get_default_key(self, prompt_id: str) -> str:
@@ -232,3 +230,6 @@ class PromptServiceImpl(Prompts):
232
230
  await self.kvstore.set(default_key, str(version))
233
231
 
234
232
  return self._deserialize_prompt(data)
233
+
234
+ async def shutdown(self) -> None:
235
+ pass
@@ -9,18 +9,24 @@ from typing import Any
9
9
 
10
10
  from pydantic import BaseModel
11
11
 
12
- from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers
13
12
  from llama_stack.log import get_logger
14
- from llama_stack.providers.datatypes import HealthResponse, HealthStatus
15
-
16
- from .datatypes import StackRunConfig
13
+ from llama_stack_api import (
14
+ HealthResponse,
15
+ HealthStatus,
16
+ InspectProviderRequest,
17
+ ListProvidersResponse,
18
+ ProviderInfo,
19
+ Providers,
20
+ )
21
+
22
+ from .datatypes import StackConfig
17
23
  from .utils.config import redact_sensitive_fields
18
24
 
19
25
  logger = get_logger(name=__name__, category="core")
20
26
 
21
27
 
22
28
  class ProviderImplConfig(BaseModel):
23
- run_config: StackRunConfig
29
+ config: StackConfig
24
30
 
25
31
 
26
32
  async def get_provider_impl(config, deps):
@@ -31,7 +37,7 @@ async def get_provider_impl(config, deps):
31
37
 
32
38
  class ProviderImpl(Providers):
33
39
  def __init__(self, config, deps):
34
- self.config = config
40
+ self.stack_config = config.config
35
41
  self.deps = deps
36
42
 
37
43
  async def initialize(self) -> None:
@@ -42,8 +48,8 @@ class ProviderImpl(Providers):
42
48
  pass
43
49
 
44
50
  async def list_providers(self) -> ListProvidersResponse:
45
- run_config = self.config.run_config
46
- safe_config = StackRunConfig(**redact_sensitive_fields(run_config.model_dump()))
51
+ run_config = self.stack_config
52
+ safe_config = StackConfig(**redact_sensitive_fields(run_config.model_dump()))
47
53
  providers_health = await self.get_providers_health()
48
54
  ret = []
49
55
  for api, providers in safe_config.providers.items():
@@ -68,13 +74,13 @@ class ProviderImpl(Providers):
68
74
 
69
75
  return ListProvidersResponse(data=ret)
70
76
 
71
- async def inspect_provider(self, provider_id: str) -> ProviderInfo:
77
+ async def inspect_provider(self, request: InspectProviderRequest) -> ProviderInfo:
72
78
  all_providers = await self.list_providers()
73
79
  for p in all_providers.data:
74
- if p.provider_id == provider_id:
80
+ if p.provider_id == request.provider_id:
75
81
  return p
76
82
 
77
- raise ValueError(f"Provider {provider_id} not found")
83
+ raise ValueError(f"Provider {request.provider_id} not found")
78
84
 
79
85
  async def get_providers_health(self) -> dict[str, dict[str, HealthResponse]]:
80
86
  """Get health status for all providers.