llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.5.dist-info/RECORD +0 -625
  445. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
llama_stack/core/stack.py CHANGED
@@ -6,36 +6,18 @@
6
6
 
7
7
  import asyncio
8
8
  import importlib.resources
9
+ import inspect
9
10
  import os
10
11
  import re
11
12
  import tempfile
12
- from typing import Any
13
+ from typing import Any, get_type_hints
13
14
 
14
15
  import yaml
16
+ from pydantic import BaseModel
15
17
 
16
- from llama_stack.apis.agents import Agents
17
- from llama_stack.apis.benchmarks import Benchmarks
18
- from llama_stack.apis.conversations import Conversations
19
- from llama_stack.apis.datasetio import DatasetIO
20
- from llama_stack.apis.datasets import Datasets
21
- from llama_stack.apis.eval import Eval
22
- from llama_stack.apis.files import Files
23
- from llama_stack.apis.inference import Inference
24
- from llama_stack.apis.inspect import Inspect
25
- from llama_stack.apis.models import Models
26
- from llama_stack.apis.post_training import PostTraining
27
- from llama_stack.apis.prompts import Prompts
28
- from llama_stack.apis.providers import Providers
29
- from llama_stack.apis.safety import Safety
30
- from llama_stack.apis.scoring import Scoring
31
- from llama_stack.apis.scoring_functions import ScoringFunctions
32
- from llama_stack.apis.shields import Shields
33
- from llama_stack.apis.synthetic_data_generation import SyntheticDataGeneration
34
- from llama_stack.apis.telemetry import Telemetry
35
- from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
36
- from llama_stack.apis.vector_io import VectorIO
18
+ from llama_stack.core.admin import AdminImpl, AdminImplConfig
37
19
  from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
38
- from llama_stack.core.datatypes import Provider, StackRunConfig, VectorStoresConfig
20
+ from llama_stack.core.datatypes import Provider, QualifiedModel, SafetyConfig, StackConfig, VectorStoresConfig
39
21
  from llama_stack.core.distribution import get_provider_registry
40
22
  from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
41
23
  from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
@@ -55,7 +37,30 @@ from llama_stack.core.storage.datatypes import (
55
37
  from llama_stack.core.store.registry import create_dist_registry
56
38
  from llama_stack.core.utils.dynamic import instantiate_class_type
57
39
  from llama_stack.log import get_logger
58
- from llama_stack.providers.datatypes import Api
40
+ from llama_stack_api import (
41
+ Agents,
42
+ Api,
43
+ Batches,
44
+ Benchmarks,
45
+ Conversations,
46
+ DatasetIO,
47
+ Datasets,
48
+ Eval,
49
+ Files,
50
+ Inference,
51
+ Inspect,
52
+ Models,
53
+ PostTraining,
54
+ Prompts,
55
+ Providers,
56
+ Safety,
57
+ Scoring,
58
+ ScoringFunctions,
59
+ Shields,
60
+ ToolGroups,
61
+ ToolRuntime,
62
+ VectorIO,
63
+ )
59
64
 
60
65
  logger = get_logger(name=__name__, category="core")
61
66
 
@@ -64,10 +69,9 @@ class LlamaStack(
64
69
  Providers,
65
70
  Inference,
66
71
  Agents,
72
+ Batches,
67
73
  Safety,
68
- SyntheticDataGeneration,
69
74
  Datasets,
70
- Telemetry,
71
75
  PostTraining,
72
76
  VectorIO,
73
77
  Eval,
@@ -80,7 +84,6 @@ class LlamaStack(
80
84
  Inspect,
81
85
  ToolGroups,
82
86
  ToolRuntime,
83
- RAGToolRuntime,
84
87
  Files,
85
88
  Prompts,
86
89
  Conversations,
@@ -108,7 +111,82 @@ REGISTRY_REFRESH_TASK = None
108
111
  TEST_RECORDING_CONTEXT = None
109
112
 
110
113
 
111
- async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
114
+ def is_request_model(t: Any) -> bool:
115
+ """Check if a type is a request model (Pydantic BaseModel).
116
+
117
+ Args:
118
+ t: The type to check
119
+
120
+ Returns:
121
+ True if the type is a Pydantic BaseModel subclass, False otherwise
122
+ """
123
+
124
+ return inspect.isclass(t) and issubclass(t, BaseModel)
125
+
126
+
127
+ async def invoke_with_optional_request(method: Any) -> Any:
128
+ """Invoke a method, automatically creating a request instance if needed.
129
+
130
+ For APIs that use request models, this will create an empty request object.
131
+ For backward compatibility, falls back to calling without arguments.
132
+
133
+ Uses get_type_hints() to resolve forward references (e.g., "ListBenchmarksRequest" -> actual class).
134
+
135
+ Handles methods with:
136
+ - No parameters: calls without arguments
137
+ - One or more request model parameters: creates empty instances for each
138
+ - Mixed parameters: creates request models, uses defaults for others
139
+ - Required non-request-model parameters without defaults: falls back to calling without arguments
140
+
141
+ Args:
142
+ method: The method to invoke
143
+
144
+ Returns:
145
+ The result of calling the method
146
+ """
147
+ try:
148
+ hints = get_type_hints(method)
149
+ except Exception:
150
+ # Forward references can't be resolved, fall back to calling without request
151
+ return await method()
152
+
153
+ params = list(inspect.signature(method).parameters.values())
154
+ params = [p for p in params if p.name != "self"]
155
+
156
+ if not params:
157
+ return await method()
158
+
159
+ # Build arguments for the method call
160
+ args: dict[str, Any] = {}
161
+ can_call = True
162
+
163
+ for param in params:
164
+ param_type = hints.get(param.name)
165
+
166
+ # If it's a request model, try to create an empty instance
167
+ if param_type and is_request_model(param_type):
168
+ try:
169
+ args[param.name] = param_type()
170
+ except Exception:
171
+ # Request model requires arguments, can't create empty instance
172
+ can_call = False
173
+ break
174
+ # If it has a default value, we can skip it (will use default)
175
+ elif param.default != inspect.Parameter.empty:
176
+ continue
177
+ # Required parameter that's not a request model - can't provide it
178
+ else:
179
+ can_call = False
180
+ break
181
+
182
+ if can_call and args:
183
+ return await method(**args)
184
+
185
+ # Fall back to calling without arguments for backward compatibility
186
+ return await method()
187
+
188
+
189
+ async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
112
190
  for rsrc, api, register_method, list_method in RESOURCES:
113
191
  objects = getattr(run_config.registered_resources, rsrc)
114
192
  if api not in impls:
@@ -129,7 +207,7 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
129
207
  await method(**{k: getattr(obj, k) for k in obj.model_dump().keys()})
130
208
 
131
209
  method = getattr(impls[api], list_method)
132
- response = await method()
210
+ response = await invoke_with_optional_request(method)
133
211
 
134
212
  objects_to_process = response.data if hasattr(response, "data") else response
135
213
 
@@ -144,35 +222,93 @@ async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig
144
222
  if vector_stores_config is None:
145
223
  return
146
224
 
147
- default_embedding_model = vector_stores_config.default_embedding_model
148
- if default_embedding_model is None:
149
- return
225
+ # Validate default embedding model
226
+ if vector_stores_config.default_embedding_model is not None:
227
+ await _validate_embedding_model(vector_stores_config.default_embedding_model, impls)
228
+
229
+ # Validate rewrite query params
230
+ if vector_stores_config.rewrite_query_params:
231
+ if vector_stores_config.rewrite_query_params.model:
232
+ await _validate_rewrite_query_model(vector_stores_config.rewrite_query_params.model, impls)
233
+
150
234
 
151
- provider_id = default_embedding_model.provider_id
152
- model_id = default_embedding_model.model_id
153
- default_model_id = f"{provider_id}/{model_id}"
235
+ async def _validate_embedding_model(embedding_model: QualifiedModel, impls: dict[Api, Any]) -> None:
236
+ """Validate that an embedding model exists and has required metadata."""
237
+ provider_id = embedding_model.provider_id
238
+ model_id = embedding_model.model_id
239
+ model_identifier = f"{provider_id}/{model_id}"
154
240
 
155
241
  if Api.models not in impls:
156
- raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'")
242
+ raise ValueError(f"Models API is not available but vector_stores config requires model '{model_identifier}'")
157
243
 
158
244
  models_impl = impls[Api.models]
159
245
  response = await models_impl.list_models()
160
246
  models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"}
161
247
 
162
- default_model = models_list.get(default_model_id)
163
- if default_model is None:
164
- raise ValueError(f"Embedding model '{default_model_id}' not found. Available embedding models: {models_list}")
248
+ model = models_list.get(model_identifier)
249
+ if model is None:
250
+ raise ValueError(
251
+ f"Embedding model '{model_identifier}' not found. Available embedding models: {list(models_list.keys())}"
252
+ )
165
253
 
166
- embedding_dimension = default_model.metadata.get("embedding_dimension")
254
+ embedding_dimension = model.metadata.get("embedding_dimension")
167
255
  if embedding_dimension is None:
168
- raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata")
256
+ raise ValueError(f"Embedding model '{model_identifier}' is missing 'embedding_dimension' in metadata")
169
257
 
170
258
  try:
171
259
  int(embedding_dimension)
172
260
  except ValueError as err:
173
261
  raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
174
262
 
175
- logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})")
263
+ logger.debug(f"Validated embedding model: {model_identifier} (dimension: {embedding_dimension})")
264
+
265
+
266
+ async def _validate_rewrite_query_model(rewrite_query_model: QualifiedModel, impls: dict[Api, Any]) -> None:
267
+ """Validate that a rewrite query model exists and is accessible."""
268
+ provider_id = rewrite_query_model.provider_id
269
+ model_id = rewrite_query_model.model_id
270
+ model_identifier = f"{provider_id}/{model_id}"
271
+
272
+ if Api.models not in impls:
273
+ raise ValueError(
274
+ f"Models API is not available but vector_stores config requires rewrite query model '{model_identifier}'"
275
+ )
276
+
277
+ models_impl = impls[Api.models]
278
+ response = await models_impl.list_models()
279
+ llm_models_list = {m.identifier: m for m in response.data if m.model_type == "llm"}
280
+
281
+ model = llm_models_list.get(model_identifier)
282
+ if model is None:
283
+ raise ValueError(
284
+ f"Rewrite query model '{model_identifier}' not found. Available LLM models: {list(llm_models_list.keys())}"
285
+ )
286
+
287
+ logger.debug(f"Validated rewrite query model: {model_identifier}")
288
+
289
+
290
+ async def validate_safety_config(safety_config: SafetyConfig | None, impls: dict[Api, Any]):
291
+ if safety_config is None or safety_config.default_shield_id is None:
292
+ return
293
+
294
+ if Api.shields not in impls:
295
+ raise ValueError("Safety configuration requires the shields API to be enabled")
296
+
297
+ if Api.safety not in impls:
298
+ raise ValueError("Safety configuration requires the safety API to be enabled")
299
+
300
+ shields_impl = impls[Api.shields]
301
+ response = await shields_impl.list_shields()
302
+ shields_by_id = {shield.identifier: shield for shield in response.data}
303
+
304
+ default_shield_id = safety_config.default_shield_id
305
+ # don't validate if there are no shields registered
306
+ if shields_by_id and default_shield_id not in shields_by_id:
307
+ available = sorted(shields_by_id)
308
+ raise ValueError(
309
+ f"Configured default_shield_id '{default_shield_id}' not found among registered shields."
310
+ f" Available shields: {available}"
311
+ )
176
312
 
177
313
 
178
314
  class EnvVarError(Exception):
@@ -317,39 +453,44 @@ def cast_image_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]:
317
453
  return config_dict
318
454
 
319
455
 
320
- def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConfig) -> None:
321
- """Add internal implementations (inspect and providers) to the implementations dictionary.
322
-
456
+ def add_internal_implementations(impls: dict[Api, Any], config: StackConfig) -> None:
457
+ """Add internal implementations (inspect, providers, and admin) to the implementations dictionary.
323
458
  Args:
324
459
  impls: Dictionary of API implementations
325
460
  run_config: Stack run configuration
326
461
  """
327
462
  inspect_impl = DistributionInspectImpl(
328
- DistributionInspectConfig(run_config=run_config),
463
+ DistributionInspectConfig(config=config),
329
464
  deps=impls,
330
465
  )
331
466
  impls[Api.inspect] = inspect_impl
332
467
 
333
468
  providers_impl = ProviderImpl(
334
- ProviderImplConfig(run_config=run_config),
469
+ ProviderImplConfig(config=config),
335
470
  deps=impls,
336
471
  )
337
472
  impls[Api.providers] = providers_impl
338
473
 
474
+ admin_impl = AdminImpl(
475
+ AdminImplConfig(config=config),
476
+ deps=impls,
477
+ )
478
+ impls[Api.admin] = admin_impl
479
+
339
480
  prompts_impl = PromptServiceImpl(
340
- PromptServiceConfig(run_config=run_config),
481
+ PromptServiceConfig(config=config),
341
482
  deps=impls,
342
483
  )
343
484
  impls[Api.prompts] = prompts_impl
344
485
 
345
486
  conversations_impl = ConversationServiceImpl(
346
- ConversationServiceConfig(run_config=run_config),
487
+ ConversationServiceConfig(config=config),
347
488
  deps=impls,
348
489
  )
349
490
  impls[Api.conversations] = conversations_impl
350
491
 
351
492
 
352
- def _initialize_storage(run_config: StackRunConfig):
493
+ def _initialize_storage(run_config: StackConfig):
353
494
  kv_backends: dict[str, StorageBackendConfig] = {}
354
495
  sql_backends: dict[str, StorageBackendConfig] = {}
355
496
  for backend_name, backend_config in run_config.storage.backends.items():
@@ -361,15 +502,15 @@ def _initialize_storage(run_config: StackRunConfig):
361
502
  else:
362
503
  raise ValueError(f"Unknown storage backend type: {type}")
363
504
 
364
- from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends
365
- from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
505
+ from llama_stack.core.storage.kvstore.kvstore import register_kvstore_backends
506
+ from llama_stack.core.storage.sqlstore.sqlstore import register_sqlstore_backends
366
507
 
367
508
  register_kvstore_backends(kv_backends)
368
509
  register_sqlstore_backends(sql_backends)
369
510
 
370
511
 
371
512
  class Stack:
372
- def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None):
513
+ def __init__(self, run_config: StackConfig, provider_registry: ProviderRegistry | None = None):
373
514
  self.run_config = run_config
374
515
  self.provider_registry = provider_registry
375
516
  self.impls = None
@@ -412,6 +553,7 @@ class Stack:
412
553
  await register_resources(self.run_config, impls)
413
554
  await refresh_registry_once(impls)
414
555
  await validate_vector_stores_config(self.run_config.vector_stores, impls)
556
+ await validate_safety_config(self.run_config.safety, impls)
415
557
  self.impls = impls
416
558
 
417
559
  def create_registry_refresh_task(self):
@@ -474,20 +616,20 @@ async def refresh_registry_task(impls: dict[Api, Any]):
474
616
  await asyncio.sleep(REGISTRY_REFRESH_INTERVAL_SECONDS)
475
617
 
476
618
 
477
- def get_stack_run_config_from_distro(distro: str) -> StackRunConfig:
478
- distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro}/run.yaml"
619
+ def get_stack_run_config_from_distro(distro: str) -> StackConfig:
620
+ distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro}/config.yaml"
479
621
 
480
622
  with importlib.resources.as_file(distro_path) as path:
481
623
  if not path.exists():
482
624
  raise ValueError(f"Distribution '{distro}' not found at {distro_path}")
483
625
  run_config = yaml.safe_load(path.open())
484
626
 
485
- return StackRunConfig(**replace_env_vars(run_config))
627
+ return StackConfig(**replace_env_vars(run_config))
486
628
 
487
629
 
488
630
  def run_config_from_adhoc_config_spec(
489
631
  adhoc_config_spec: str, provider_registry: ProviderRegistry | None = None
490
- ) -> StackRunConfig:
632
+ ) -> StackConfig:
491
633
  """
492
634
  Create an adhoc distribution from a list of API providers.
493
635
 
@@ -527,7 +669,7 @@ def run_config_from_adhoc_config_spec(
527
669
  config=provider_config,
528
670
  )
529
671
  ]
530
- config = StackRunConfig(
672
+ config = StackConfig(
531
673
  image_name="distro-test",
532
674
  apis=list(provider_configs_by_api.keys()),
533
675
  providers=provider_configs_by_api,
@@ -540,6 +682,7 @@ def run_config_from_adhoc_config_spec(
540
682
  metadata=KVStoreReference(backend="kv_default", namespace="registry"),
541
683
  inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
542
684
  conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
685
+ prompts=KVStoreReference(backend="kv_default", namespace="prompts"),
543
686
  ),
544
687
  ),
545
688
  )
@@ -12,6 +12,8 @@ from typing import Annotated, Literal
12
12
 
13
13
  from pydantic import BaseModel, Field, field_validator
14
14
 
15
+ from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
16
+
15
17
 
16
18
  class StorageBackendType(StrEnum):
17
19
  KV_REDIS = "kv_redis"
@@ -256,25 +258,46 @@ class ResponsesStoreReference(InferenceStoreReference):
256
258
 
257
259
  class ServerStoresConfig(BaseModel):
258
260
  metadata: KVStoreReference | None = Field(
259
- default=None,
261
+ default=KVStoreReference(
262
+ backend="kv_default",
263
+ namespace="registry",
264
+ ),
260
265
  description="Metadata store configuration (uses KV backend)",
261
266
  )
262
267
  inference: InferenceStoreReference | None = Field(
263
- default=None,
268
+ default=InferenceStoreReference(
269
+ backend="sql_default",
270
+ table_name="inference_store",
271
+ ),
264
272
  description="Inference store configuration (uses SQL backend)",
265
273
  )
266
274
  conversations: SqlStoreReference | None = Field(
267
- default=None,
275
+ default=SqlStoreReference(
276
+ backend="sql_default",
277
+ table_name="openai_conversations",
278
+ ),
268
279
  description="Conversations store configuration (uses SQL backend)",
269
280
  )
270
281
  responses: ResponsesStoreReference | None = Field(
271
282
  default=None,
272
283
  description="Responses store configuration (uses SQL backend)",
273
284
  )
285
+ prompts: KVStoreReference | None = Field(
286
+ default=KVStoreReference(backend="kv_default", namespace="prompts"),
287
+ description="Prompts store configuration (uses KV backend)",
288
+ )
274
289
 
275
290
 
276
291
  class StorageConfig(BaseModel):
277
292
  backends: dict[str, StorageBackendConfig] = Field(
293
+ default={
294
+ "kv_default": SqliteKVStoreConfig(
295
+ db_path=f"${{env.SQLITE_STORE_DIR:={DISTRIBS_BASE_DIR}}}/kvstore.db",
296
+ ),
297
+ "sql_default": SqliteSqlStoreConfig(
298
+ db_path=f"${{env.SQLITE_STORE_DIR:={DISTRIBS_BASE_DIR}}}/sql_store.db",
299
+ ),
300
+ },
278
301
  description="Named backend configurations (e.g., 'default', 'cache')",
279
302
  )
280
303
  stores: ServerStoresConfig = Field(
@@ -4,4 +4,6 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
+ from llama_stack_api.internal.kvstore import KVStore as KVStore
8
+
7
9
  from .kvstore import * # noqa: F401, F403
@@ -11,10 +11,21 @@
11
11
 
12
12
  from __future__ import annotations
13
13
 
14
- from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig, StorageBackendType
14
+ import asyncio
15
+ from collections import defaultdict
16
+ from datetime import datetime
17
+ from typing import cast
15
18
 
16
- from .api import KVStore
17
- from .config import KVStoreConfig
19
+ from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig
20
+ from llama_stack_api.internal.kvstore import KVStore
21
+
22
+ from .config import (
23
+ KVStoreConfig,
24
+ MongoDBKVStoreConfig,
25
+ PostgresKVStoreConfig,
26
+ RedisKVStoreConfig,
27
+ SqliteKVStoreConfig,
28
+ )
18
29
 
19
30
 
20
31
  def kvstore_dependencies():
@@ -30,7 +41,7 @@ def kvstore_dependencies():
30
41
 
31
42
  class InmemoryKVStoreImpl(KVStore):
32
43
  def __init__(self):
33
- self._store = {}
44
+ self._store: dict[str, str] = {}
34
45
 
35
46
  async def initialize(self) -> None:
36
47
  pass
@@ -38,7 +49,7 @@ class InmemoryKVStoreImpl(KVStore):
38
49
  async def get(self, key: str) -> str | None:
39
50
  return self._store.get(key)
40
51
 
41
- async def set(self, key: str, value: str) -> None:
52
+ async def set(self, key: str, value: str, expiration: datetime | None = None) -> None:
42
53
  self._store[key] = value
43
54
 
44
55
  async def values_in_range(self, start_key: str, end_key: str) -> list[str]:
@@ -53,45 +64,65 @@ class InmemoryKVStoreImpl(KVStore):
53
64
 
54
65
 
55
66
  _KVSTORE_BACKENDS: dict[str, KVStoreConfig] = {}
67
+ _KVSTORE_INSTANCES: dict[tuple[str, str], KVStore] = {}
68
+ _KVSTORE_LOCKS: defaultdict[tuple[str, str], asyncio.Lock] = defaultdict(asyncio.Lock)
56
69
 
57
70
 
58
71
  def register_kvstore_backends(backends: dict[str, StorageBackendConfig]) -> None:
59
72
  """Register the set of available KV store backends for reference resolution."""
60
73
  global _KVSTORE_BACKENDS
74
+ global _KVSTORE_INSTANCES
75
+ global _KVSTORE_LOCKS
61
76
 
62
77
  _KVSTORE_BACKENDS.clear()
78
+ _KVSTORE_INSTANCES.clear()
79
+ _KVSTORE_LOCKS.clear()
63
80
  for name, cfg in backends.items():
64
- _KVSTORE_BACKENDS[name] = cfg
81
+ typed_cfg = cast(KVStoreConfig, cfg)
82
+ _KVSTORE_BACKENDS[name] = typed_cfg
65
83
 
66
84
 
67
85
  async def kvstore_impl(reference: KVStoreReference) -> KVStore:
68
86
  backend_name = reference.backend
87
+ cache_key = (backend_name, reference.namespace)
88
+
89
+ existing = _KVSTORE_INSTANCES.get(cache_key)
90
+ if existing:
91
+ return existing
69
92
 
70
93
  backend_config = _KVSTORE_BACKENDS.get(backend_name)
71
94
  if backend_config is None:
72
95
  raise ValueError(f"Unknown KVStore backend '{backend_name}'. Registered backends: {sorted(_KVSTORE_BACKENDS)}")
73
96
 
74
- config = backend_config.model_copy()
75
- config.namespace = reference.namespace
97
+ lock = _KVSTORE_LOCKS[cache_key]
98
+ async with lock:
99
+ existing = _KVSTORE_INSTANCES.get(cache_key)
100
+ if existing:
101
+ return existing
102
+
103
+ config = backend_config.model_copy()
104
+ config.namespace = reference.namespace
76
105
 
77
- if config.type == StorageBackendType.KV_REDIS.value:
78
- from .redis import RedisKVStoreImpl
106
+ impl: KVStore
107
+ if isinstance(config, RedisKVStoreConfig):
108
+ from .redis import RedisKVStoreImpl
79
109
 
80
- impl = RedisKVStoreImpl(config)
81
- elif config.type == StorageBackendType.KV_SQLITE.value:
82
- from .sqlite import SqliteKVStoreImpl
110
+ impl = RedisKVStoreImpl(config)
111
+ elif isinstance(config, SqliteKVStoreConfig):
112
+ from .sqlite import SqliteKVStoreImpl
83
113
 
84
- impl = SqliteKVStoreImpl(config)
85
- elif config.type == StorageBackendType.KV_POSTGRES.value:
86
- from .postgres import PostgresKVStoreImpl
114
+ impl = SqliteKVStoreImpl(config)
115
+ elif isinstance(config, PostgresKVStoreConfig):
116
+ from .postgres import PostgresKVStoreImpl
87
117
 
88
- impl = PostgresKVStoreImpl(config)
89
- elif config.type == StorageBackendType.KV_MONGODB.value:
90
- from .mongodb import MongoDBKVStoreImpl
118
+ impl = PostgresKVStoreImpl(config)
119
+ elif isinstance(config, MongoDBKVStoreConfig):
120
+ from .mongodb import MongoDBKVStoreImpl
91
121
 
92
- impl = MongoDBKVStoreImpl(config)
93
- else:
94
- raise ValueError(f"Unknown kvstore type {config.type}")
122
+ impl = MongoDBKVStoreImpl(config)
123
+ else:
124
+ raise ValueError(f"Unknown kvstore type {config.type}")
95
125
 
96
- await impl.initialize()
97
- return impl
126
+ await impl.initialize()
127
+ _KVSTORE_INSTANCES[cache_key] = impl
128
+ return impl
@@ -9,8 +9,8 @@ from datetime import datetime
9
9
  from pymongo import AsyncMongoClient
10
10
  from pymongo.asynchronous.collection import AsyncCollection
11
11
 
12
+ from llama_stack.core.storage.kvstore import KVStore
12
13
  from llama_stack.log import get_logger
13
- from llama_stack.providers.utils.kvstore import KVStore
14
14
 
15
15
  from ..config import MongoDBKVStoreConfig
16
16
 
@@ -30,14 +30,13 @@ class MongoDBKVStoreImpl(KVStore):
30
30
 
31
31
  async def initialize(self) -> None:
32
32
  try:
33
- conn_creds = {
34
- "host": self.config.host,
35
- "port": self.config.port,
36
- "username": self.config.user,
37
- "password": self.config.password,
38
- }
39
- conn_creds = {k: v for k, v in conn_creds.items() if v is not None}
40
- self.conn = AsyncMongoClient(**conn_creds)
33
+ # Pass parameters explicitly to satisfy mypy - AsyncMongoClient doesn't accept **dict
34
+ self.conn = AsyncMongoClient(
35
+ host=self.config.host if self.config.host is not None else None,
36
+ port=self.config.port if self.config.port is not None else None,
37
+ username=self.config.user if self.config.user is not None else None,
38
+ password=self.config.password if self.config.password is not None else None,
39
+ )
41
40
  except Exception as e:
42
41
  log.exception("Could not connect to MongoDB database server")
43
42
  raise RuntimeError("Could not connect to MongoDB database server") from e
@@ -79,4 +78,8 @@ class MongoDBKVStoreImpl(KVStore):
79
78
  end_key = self._namespaced_key(end_key)
80
79
  query = {"key": {"$gte": start_key, "$lt": end_key}}
81
80
  cursor = self.collection.find(query, {"key": 1, "_id": 0}).sort("key", 1)
82
- return [doc["key"] for doc in cursor]
81
+ # AsyncCursor requires async iteration
82
+ result = []
83
+ async for doc in cursor:
84
+ result.append(doc["key"])
85
+ return result