llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (460) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +235 -62
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
  169. llama_stack/providers/registry/agents.py +8 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/providers/utils/vector_io/__init__.py +16 -0
  284. llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
  285. llama_stack/telemetry/constants.py +27 -0
  286. llama_stack/telemetry/helpers.py +43 -0
  287. llama_stack/testing/api_recorder.py +25 -16
  288. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
  289. llama_stack-0.4.1.dist-info/RECORD +588 -0
  290. llama_stack-0.4.1.dist-info/top_level.txt +2 -0
  291. llama_stack_api/__init__.py +945 -0
  292. llama_stack_api/admin/__init__.py +45 -0
  293. llama_stack_api/admin/api.py +72 -0
  294. llama_stack_api/admin/fastapi_routes.py +117 -0
  295. llama_stack_api/admin/models.py +113 -0
  296. llama_stack_api/agents.py +173 -0
  297. llama_stack_api/batches/__init__.py +40 -0
  298. llama_stack_api/batches/api.py +53 -0
  299. llama_stack_api/batches/fastapi_routes.py +113 -0
  300. llama_stack_api/batches/models.py +78 -0
  301. llama_stack_api/benchmarks/__init__.py +43 -0
  302. llama_stack_api/benchmarks/api.py +39 -0
  303. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  304. llama_stack_api/benchmarks/models.py +109 -0
  305. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  306. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  307. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  308. llama_stack_api/common/responses.py +77 -0
  309. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  310. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  311. llama_stack_api/connectors.py +146 -0
  312. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  313. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  314. llama_stack_api/datasets/__init__.py +61 -0
  315. llama_stack_api/datasets/api.py +35 -0
  316. llama_stack_api/datasets/fastapi_routes.py +104 -0
  317. llama_stack_api/datasets/models.py +152 -0
  318. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  319. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  320. llama_stack_api/file_processors/__init__.py +27 -0
  321. llama_stack_api/file_processors/api.py +64 -0
  322. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  323. llama_stack_api/file_processors/models.py +42 -0
  324. llama_stack_api/files/__init__.py +35 -0
  325. llama_stack_api/files/api.py +51 -0
  326. llama_stack_api/files/fastapi_routes.py +124 -0
  327. llama_stack_api/files/models.py +107 -0
  328. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  329. llama_stack_api/inspect_api/__init__.py +37 -0
  330. llama_stack_api/inspect_api/api.py +25 -0
  331. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  332. llama_stack_api/inspect_api/models.py +28 -0
  333. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  334. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  335. llama_stack_api/internal/sqlstore.py +79 -0
  336. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  337. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  338. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  339. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  340. llama_stack_api/providers/__init__.py +33 -0
  341. llama_stack_api/providers/api.py +16 -0
  342. llama_stack_api/providers/fastapi_routes.py +57 -0
  343. llama_stack_api/providers/models.py +24 -0
  344. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  345. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  346. llama_stack_api/router_utils.py +160 -0
  347. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  348. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  349. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  350. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  351. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  352. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  353. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  354. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  355. llama_stack/apis/agents/agents.py +0 -894
  356. llama_stack/apis/batches/__init__.py +0 -9
  357. llama_stack/apis/batches/batches.py +0 -100
  358. llama_stack/apis/benchmarks/__init__.py +0 -7
  359. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  360. llama_stack/apis/common/responses.py +0 -36
  361. llama_stack/apis/conversations/__init__.py +0 -31
  362. llama_stack/apis/datasets/datasets.py +0 -251
  363. llama_stack/apis/datatypes.py +0 -160
  364. llama_stack/apis/eval/__init__.py +0 -7
  365. llama_stack/apis/files/__init__.py +0 -7
  366. llama_stack/apis/files/files.py +0 -199
  367. llama_stack/apis/inference/__init__.py +0 -7
  368. llama_stack/apis/inference/event_logger.py +0 -43
  369. llama_stack/apis/inspect/__init__.py +0 -7
  370. llama_stack/apis/inspect/inspect.py +0 -94
  371. llama_stack/apis/models/__init__.py +0 -7
  372. llama_stack/apis/post_training/__init__.py +0 -7
  373. llama_stack/apis/prompts/__init__.py +0 -9
  374. llama_stack/apis/providers/__init__.py +0 -7
  375. llama_stack/apis/providers/providers.py +0 -69
  376. llama_stack/apis/safety/__init__.py +0 -7
  377. llama_stack/apis/scoring/__init__.py +0 -7
  378. llama_stack/apis/scoring_functions/__init__.py +0 -7
  379. llama_stack/apis/shields/__init__.py +0 -7
  380. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  381. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  382. llama_stack/apis/telemetry/__init__.py +0 -7
  383. llama_stack/apis/telemetry/telemetry.py +0 -423
  384. llama_stack/apis/tools/__init__.py +0 -8
  385. llama_stack/apis/vector_io/__init__.py +0 -7
  386. llama_stack/apis/vector_stores/__init__.py +0 -7
  387. llama_stack/core/server/tracing.py +0 -80
  388. llama_stack/core/ui/app.py +0 -55
  389. llama_stack/core/ui/modules/__init__.py +0 -5
  390. llama_stack/core/ui/modules/api.py +0 -32
  391. llama_stack/core/ui/modules/utils.py +0 -42
  392. llama_stack/core/ui/page/__init__.py +0 -5
  393. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  394. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  395. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  396. llama_stack/core/ui/page/distribution/models.py +0 -18
  397. llama_stack/core/ui/page/distribution/providers.py +0 -27
  398. llama_stack/core/ui/page/distribution/resources.py +0 -48
  399. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  400. llama_stack/core/ui/page/distribution/shields.py +0 -19
  401. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  402. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  403. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  404. llama_stack/core/ui/page/playground/__init__.py +0 -5
  405. llama_stack/core/ui/page/playground/chat.py +0 -130
  406. llama_stack/core/ui/page/playground/tools.py +0 -352
  407. llama_stack/distributions/dell/build.yaml +0 -33
  408. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  409. llama_stack/distributions/nvidia/build.yaml +0 -29
  410. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  411. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  412. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  413. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  414. llama_stack/distributions/starter/build.yaml +0 -61
  415. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  416. llama_stack/distributions/watsonx/build.yaml +0 -33
  417. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  418. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  419. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  420. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  421. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  422. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  423. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  424. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  425. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  426. llama_stack/providers/utils/sqlstore/api.py +0 -128
  427. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  428. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  429. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  430. llama_stack/strong_typing/__init__.py +0 -19
  431. llama_stack/strong_typing/auxiliary.py +0 -228
  432. llama_stack/strong_typing/classdef.py +0 -440
  433. llama_stack/strong_typing/core.py +0 -46
  434. llama_stack/strong_typing/deserializer.py +0 -877
  435. llama_stack/strong_typing/docstring.py +0 -409
  436. llama_stack/strong_typing/exception.py +0 -23
  437. llama_stack/strong_typing/inspection.py +0 -1085
  438. llama_stack/strong_typing/mapping.py +0 -40
  439. llama_stack/strong_typing/name.py +0 -182
  440. llama_stack/strong_typing/schema.py +0 -792
  441. llama_stack/strong_typing/serialization.py +0 -97
  442. llama_stack/strong_typing/serializer.py +0 -500
  443. llama_stack/strong_typing/slots.py +0 -27
  444. llama_stack/strong_typing/topological.py +0 -89
  445. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  446. llama_stack-0.3.5.dist-info/RECORD +0 -625
  447. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  448. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  451. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  452. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  453. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  454. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
  456. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
  457. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
  458. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  459. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  460. {llama_stack/apis → llama_stack_api}/version.py +0 -0
llama_stack/core/stack.py CHANGED
@@ -6,36 +6,18 @@
6
6
 
7
7
  import asyncio
8
8
  import importlib.resources
9
+ import inspect
9
10
  import os
10
11
  import re
11
12
  import tempfile
12
- from typing import Any
13
+ from typing import Any, get_type_hints
13
14
 
14
15
  import yaml
16
+ from pydantic import BaseModel
15
17
 
16
- from llama_stack.apis.agents import Agents
17
- from llama_stack.apis.benchmarks import Benchmarks
18
- from llama_stack.apis.conversations import Conversations
19
- from llama_stack.apis.datasetio import DatasetIO
20
- from llama_stack.apis.datasets import Datasets
21
- from llama_stack.apis.eval import Eval
22
- from llama_stack.apis.files import Files
23
- from llama_stack.apis.inference import Inference
24
- from llama_stack.apis.inspect import Inspect
25
- from llama_stack.apis.models import Models
26
- from llama_stack.apis.post_training import PostTraining
27
- from llama_stack.apis.prompts import Prompts
28
- from llama_stack.apis.providers import Providers
29
- from llama_stack.apis.safety import Safety
30
- from llama_stack.apis.scoring import Scoring
31
- from llama_stack.apis.scoring_functions import ScoringFunctions
32
- from llama_stack.apis.shields import Shields
33
- from llama_stack.apis.synthetic_data_generation import SyntheticDataGeneration
34
- from llama_stack.apis.telemetry import Telemetry
35
- from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
36
- from llama_stack.apis.vector_io import VectorIO
18
+ from llama_stack.core.admin import AdminImpl, AdminImplConfig
37
19
  from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
38
- from llama_stack.core.datatypes import Provider, StackRunConfig, VectorStoresConfig
20
+ from llama_stack.core.datatypes import Provider, QualifiedModel, SafetyConfig, StackConfig, VectorStoresConfig
39
21
  from llama_stack.core.distribution import get_provider_registry
40
22
  from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
41
23
  from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
@@ -55,7 +37,30 @@ from llama_stack.core.storage.datatypes import (
55
37
  from llama_stack.core.store.registry import create_dist_registry
56
38
  from llama_stack.core.utils.dynamic import instantiate_class_type
57
39
  from llama_stack.log import get_logger
58
- from llama_stack.providers.datatypes import Api
40
+ from llama_stack_api import (
41
+ Agents,
42
+ Api,
43
+ Batches,
44
+ Benchmarks,
45
+ Conversations,
46
+ DatasetIO,
47
+ Datasets,
48
+ Eval,
49
+ Files,
50
+ Inference,
51
+ Inspect,
52
+ Models,
53
+ PostTraining,
54
+ Prompts,
55
+ Providers,
56
+ Safety,
57
+ Scoring,
58
+ ScoringFunctions,
59
+ Shields,
60
+ ToolGroups,
61
+ ToolRuntime,
62
+ VectorIO,
63
+ )
59
64
 
60
65
  logger = get_logger(name=__name__, category="core")
61
66
 
@@ -64,10 +69,9 @@ class LlamaStack(
64
69
  Providers,
65
70
  Inference,
66
71
  Agents,
72
+ Batches,
67
73
  Safety,
68
- SyntheticDataGeneration,
69
74
  Datasets,
70
- Telemetry,
71
75
  PostTraining,
72
76
  VectorIO,
73
77
  Eval,
@@ -80,7 +84,6 @@ class LlamaStack(
80
84
  Inspect,
81
85
  ToolGroups,
82
86
  ToolRuntime,
83
- RAGToolRuntime,
84
87
  Files,
85
88
  Prompts,
86
89
  Conversations,
@@ -107,8 +110,95 @@ REGISTRY_REFRESH_INTERVAL_SECONDS = 300
107
110
  REGISTRY_REFRESH_TASK = None
108
111
  TEST_RECORDING_CONTEXT = None
109
112
 
113
+ # ID fields for registered resources that should trigger skipping
114
+ # when they resolve to empty/None (from conditional env vars like :+)
115
+ RESOURCE_ID_FIELDS = [
116
+ "vector_store_id",
117
+ "model_id",
118
+ "shield_id",
119
+ "dataset_id",
120
+ "scoring_fn_id",
121
+ "benchmark_id",
122
+ "toolgroup_id",
123
+ ]
124
+
125
+
126
+ def is_request_model(t: Any) -> bool:
127
+ """Check if a type is a request model (Pydantic BaseModel).
128
+
129
+ Args:
130
+ t: The type to check
131
+
132
+ Returns:
133
+ True if the type is a Pydantic BaseModel subclass, False otherwise
134
+ """
135
+
136
+ return inspect.isclass(t) and issubclass(t, BaseModel)
137
+
138
+
139
+ async def invoke_with_optional_request(method: Any) -> Any:
140
+ """Invoke a method, automatically creating a request instance if needed.
141
+
142
+ For APIs that use request models, this will create an empty request object.
143
+ For backward compatibility, falls back to calling without arguments.
144
+
145
+ Uses get_type_hints() to resolve forward references (e.g., "ListBenchmarksRequest" -> actual class).
146
+
147
+ Handles methods with:
148
+ - No parameters: calls without arguments
149
+ - One or more request model parameters: creates empty instances for each
150
+ - Mixed parameters: creates request models, uses defaults for others
151
+ - Required non-request-model parameters without defaults: falls back to calling without arguments
152
+
153
+ Args:
154
+ method: The method to invoke
155
+
156
+ Returns:
157
+ The result of calling the method
158
+ """
159
+ try:
160
+ hints = get_type_hints(method)
161
+ except Exception:
162
+ # Forward references can't be resolved, fall back to calling without request
163
+ return await method()
110
164
 
111
- async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
165
+ params = list(inspect.signature(method).parameters.values())
166
+ params = [p for p in params if p.name != "self"]
167
+
168
+ if not params:
169
+ return await method()
170
+
171
+ # Build arguments for the method call
172
+ args: dict[str, Any] = {}
173
+ can_call = True
174
+
175
+ for param in params:
176
+ param_type = hints.get(param.name)
177
+
178
+ # If it's a request model, try to create an empty instance
179
+ if param_type and is_request_model(param_type):
180
+ try:
181
+ args[param.name] = param_type()
182
+ except Exception:
183
+ # Request model requires arguments, can't create empty instance
184
+ can_call = False
185
+ break
186
+ # If it has a default value, we can skip it (will use default)
187
+ elif param.default != inspect.Parameter.empty:
188
+ continue
189
+ # Required parameter that's not a request model - can't provide it
190
+ else:
191
+ can_call = False
192
+ break
193
+
194
+ if can_call and args:
195
+ return await method(**args)
196
+
197
+ # Fall back to calling without arguments for backward compatibility
198
+ return await method()
199
+
200
+
201
+ async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
112
202
  for rsrc, api, register_method, list_method in RESOURCES:
113
203
  objects = getattr(run_config.registered_resources, rsrc)
114
204
  if api not in impls:
@@ -129,7 +219,7 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
129
219
  await method(**{k: getattr(obj, k) for k in obj.model_dump().keys()})
130
220
 
131
221
  method = getattr(impls[api], list_method)
132
- response = await method()
222
+ response = await invoke_with_optional_request(method)
133
223
 
134
224
  objects_to_process = response.data if hasattr(response, "data") else response
135
225
 
@@ -144,35 +234,93 @@ async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig
144
234
  if vector_stores_config is None:
145
235
  return
146
236
 
147
- default_embedding_model = vector_stores_config.default_embedding_model
148
- if default_embedding_model is None:
149
- return
237
+ # Validate default embedding model
238
+ if vector_stores_config.default_embedding_model is not None:
239
+ await _validate_embedding_model(vector_stores_config.default_embedding_model, impls)
240
+
241
+ # Validate rewrite query params
242
+ if vector_stores_config.rewrite_query_params:
243
+ if vector_stores_config.rewrite_query_params.model:
244
+ await _validate_rewrite_query_model(vector_stores_config.rewrite_query_params.model, impls)
150
245
 
151
- provider_id = default_embedding_model.provider_id
152
- model_id = default_embedding_model.model_id
153
- default_model_id = f"{provider_id}/{model_id}"
246
+
247
+ async def _validate_embedding_model(embedding_model: QualifiedModel, impls: dict[Api, Any]) -> None:
248
+ """Validate that an embedding model exists and has required metadata."""
249
+ provider_id = embedding_model.provider_id
250
+ model_id = embedding_model.model_id
251
+ model_identifier = f"{provider_id}/{model_id}"
154
252
 
155
253
  if Api.models not in impls:
156
- raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'")
254
+ raise ValueError(f"Models API is not available but vector_stores config requires model '{model_identifier}'")
157
255
 
158
256
  models_impl = impls[Api.models]
159
257
  response = await models_impl.list_models()
160
258
  models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"}
161
259
 
162
- default_model = models_list.get(default_model_id)
163
- if default_model is None:
164
- raise ValueError(f"Embedding model '{default_model_id}' not found. Available embedding models: {models_list}")
260
+ model = models_list.get(model_identifier)
261
+ if model is None:
262
+ raise ValueError(
263
+ f"Embedding model '{model_identifier}' not found. Available embedding models: {list(models_list.keys())}"
264
+ )
165
265
 
166
- embedding_dimension = default_model.metadata.get("embedding_dimension")
266
+ embedding_dimension = model.metadata.get("embedding_dimension")
167
267
  if embedding_dimension is None:
168
- raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata")
268
+ raise ValueError(f"Embedding model '{model_identifier}' is missing 'embedding_dimension' in metadata")
169
269
 
170
270
  try:
171
271
  int(embedding_dimension)
172
272
  except ValueError as err:
173
273
  raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
174
274
 
175
- logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})")
275
+ logger.debug(f"Validated embedding model: {model_identifier} (dimension: {embedding_dimension})")
276
+
277
+
278
+ async def _validate_rewrite_query_model(rewrite_query_model: QualifiedModel, impls: dict[Api, Any]) -> None:
279
+ """Validate that a rewrite query model exists and is accessible."""
280
+ provider_id = rewrite_query_model.provider_id
281
+ model_id = rewrite_query_model.model_id
282
+ model_identifier = f"{provider_id}/{model_id}"
283
+
284
+ if Api.models not in impls:
285
+ raise ValueError(
286
+ f"Models API is not available but vector_stores config requires rewrite query model '{model_identifier}'"
287
+ )
288
+
289
+ models_impl = impls[Api.models]
290
+ response = await models_impl.list_models()
291
+ llm_models_list = {m.identifier: m for m in response.data if m.model_type == "llm"}
292
+
293
+ model = llm_models_list.get(model_identifier)
294
+ if model is None:
295
+ raise ValueError(
296
+ f"Rewrite query model '{model_identifier}' not found. Available LLM models: {list(llm_models_list.keys())}"
297
+ )
298
+
299
+ logger.debug(f"Validated rewrite query model: {model_identifier}")
300
+
301
+
302
+ async def validate_safety_config(safety_config: SafetyConfig | None, impls: dict[Api, Any]):
303
+ if safety_config is None or safety_config.default_shield_id is None:
304
+ return
305
+
306
+ if Api.shields not in impls:
307
+ raise ValueError("Safety configuration requires the shields API to be enabled")
308
+
309
+ if Api.safety not in impls:
310
+ raise ValueError("Safety configuration requires the safety API to be enabled")
311
+
312
+ shields_impl = impls[Api.shields]
313
+ response = await shields_impl.list_shields()
314
+ shields_by_id = {shield.identifier: shield for shield in response.data}
315
+
316
+ default_shield_id = safety_config.default_shield_id
317
+ # don't validate if there are no shields registered
318
+ if shields_by_id and default_shield_id not in shields_by_id:
319
+ available = sorted(shields_by_id)
320
+ raise ValueError(
321
+ f"Configured default_shield_id '{default_shield_id}' not found among registered shields."
322
+ f" Available shields: {available}"
323
+ )
176
324
 
177
325
 
178
326
  class EnvVarError(Exception):
@@ -210,15 +358,33 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
210
358
  logger.debug(
211
359
  f"Skipping config env variable expansion for disabled provider: {v.get('provider_id', '')}"
212
360
  )
213
- # Create a copy with resolved provider_id but original config
214
- disabled_provider = v.copy()
215
- disabled_provider["provider_id"] = resolved_provider_id
216
361
  continue
217
362
  except EnvVarError:
218
363
  # If we can't resolve the provider_id, continue with normal processing
219
364
  pass
220
365
 
221
- # Normal processing for non-disabled providers
366
+ # Special handling for registered resources: check if ID field resolves to empty/None
367
+ # from conditional env vars (e.g., ${env.VAR:+value}) and skip the entry if so
368
+ if isinstance(v, dict):
369
+ should_skip = False
370
+ for id_field in RESOURCE_ID_FIELDS:
371
+ if id_field in v:
372
+ try:
373
+ resolved_id = replace_env_vars(v[id_field], f"{path}[{i}].{id_field}")
374
+ if resolved_id is None or resolved_id == "":
375
+ logger.debug(
376
+ f"Skipping {path}[{i}] with empty {id_field} (conditional env var not set)"
377
+ )
378
+ should_skip = True
379
+ break
380
+ except EnvVarError as e:
381
+ logger.warning(
382
+ f"Could not resolve {id_field} in {path}[{i}], env var '{e.var_name}': {e}"
383
+ )
384
+ if should_skip:
385
+ continue
386
+
387
+ # Normal processing
222
388
  result.append(replace_env_vars(v, f"{path}[{i}]"))
223
389
  except EnvVarError as e:
224
390
  raise EnvVarError(e.var_name, e.path) from None
@@ -317,39 +483,44 @@ def cast_image_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]:
317
483
  return config_dict
318
484
 
319
485
 
320
- def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConfig) -> None:
321
- """Add internal implementations (inspect and providers) to the implementations dictionary.
322
-
486
+ def add_internal_implementations(impls: dict[Api, Any], config: StackConfig) -> None:
487
+ """Add internal implementations (inspect, providers, and admin) to the implementations dictionary.
323
488
  Args:
324
489
  impls: Dictionary of API implementations
325
490
  run_config: Stack run configuration
326
491
  """
327
492
  inspect_impl = DistributionInspectImpl(
328
- DistributionInspectConfig(run_config=run_config),
493
+ DistributionInspectConfig(config=config),
329
494
  deps=impls,
330
495
  )
331
496
  impls[Api.inspect] = inspect_impl
332
497
 
333
498
  providers_impl = ProviderImpl(
334
- ProviderImplConfig(run_config=run_config),
499
+ ProviderImplConfig(config=config),
335
500
  deps=impls,
336
501
  )
337
502
  impls[Api.providers] = providers_impl
338
503
 
504
+ admin_impl = AdminImpl(
505
+ AdminImplConfig(config=config),
506
+ deps=impls,
507
+ )
508
+ impls[Api.admin] = admin_impl
509
+
339
510
  prompts_impl = PromptServiceImpl(
340
- PromptServiceConfig(run_config=run_config),
511
+ PromptServiceConfig(config=config),
341
512
  deps=impls,
342
513
  )
343
514
  impls[Api.prompts] = prompts_impl
344
515
 
345
516
  conversations_impl = ConversationServiceImpl(
346
- ConversationServiceConfig(run_config=run_config),
517
+ ConversationServiceConfig(config=config),
347
518
  deps=impls,
348
519
  )
349
520
  impls[Api.conversations] = conversations_impl
350
521
 
351
522
 
352
- def _initialize_storage(run_config: StackRunConfig):
523
+ def _initialize_storage(run_config: StackConfig):
353
524
  kv_backends: dict[str, StorageBackendConfig] = {}
354
525
  sql_backends: dict[str, StorageBackendConfig] = {}
355
526
  for backend_name, backend_config in run_config.storage.backends.items():
@@ -361,15 +532,15 @@ def _initialize_storage(run_config: StackRunConfig):
361
532
  else:
362
533
  raise ValueError(f"Unknown storage backend type: {type}")
363
534
 
364
- from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends
365
- from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
535
+ from llama_stack.core.storage.kvstore.kvstore import register_kvstore_backends
536
+ from llama_stack.core.storage.sqlstore.sqlstore import register_sqlstore_backends
366
537
 
367
538
  register_kvstore_backends(kv_backends)
368
539
  register_sqlstore_backends(sql_backends)
369
540
 
370
541
 
371
542
  class Stack:
372
- def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None):
543
+ def __init__(self, run_config: StackConfig, provider_registry: ProviderRegistry | None = None):
373
544
  self.run_config = run_config
374
545
  self.provider_registry = provider_registry
375
546
  self.impls = None
@@ -412,6 +583,7 @@ class Stack:
412
583
  await register_resources(self.run_config, impls)
413
584
  await refresh_registry_once(impls)
414
585
  await validate_vector_stores_config(self.run_config.vector_stores, impls)
586
+ await validate_safety_config(self.run_config.safety, impls)
415
587
  self.impls = impls
416
588
 
417
589
  def create_registry_refresh_task(self):
@@ -474,20 +646,20 @@ async def refresh_registry_task(impls: dict[Api, Any]):
474
646
  await asyncio.sleep(REGISTRY_REFRESH_INTERVAL_SECONDS)
475
647
 
476
648
 
477
- def get_stack_run_config_from_distro(distro: str) -> StackRunConfig:
478
- distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro}/run.yaml"
649
+ def get_stack_run_config_from_distro(distro: str) -> StackConfig:
650
+ distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro}/config.yaml"
479
651
 
480
652
  with importlib.resources.as_file(distro_path) as path:
481
653
  if not path.exists():
482
654
  raise ValueError(f"Distribution '{distro}' not found at {distro_path}")
483
655
  run_config = yaml.safe_load(path.open())
484
656
 
485
- return StackRunConfig(**replace_env_vars(run_config))
657
+ return StackConfig(**replace_env_vars(run_config))
486
658
 
487
659
 
488
660
  def run_config_from_adhoc_config_spec(
489
661
  adhoc_config_spec: str, provider_registry: ProviderRegistry | None = None
490
- ) -> StackRunConfig:
662
+ ) -> StackConfig:
491
663
  """
492
664
  Create an adhoc distribution from a list of API providers.
493
665
 
@@ -527,7 +699,7 @@ def run_config_from_adhoc_config_spec(
527
699
  config=provider_config,
528
700
  )
529
701
  ]
530
- config = StackRunConfig(
702
+ config = StackConfig(
531
703
  image_name="distro-test",
532
704
  apis=list(provider_configs_by_api.keys()),
533
705
  providers=provider_configs_by_api,
@@ -540,6 +712,7 @@ def run_config_from_adhoc_config_spec(
540
712
  metadata=KVStoreReference(backend="kv_default", namespace="registry"),
541
713
  inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
542
714
  conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
715
+ prompts=KVStoreReference(backend="kv_default", namespace="prompts"),
543
716
  ),
544
717
  ),
545
718
  )
@@ -12,6 +12,8 @@ from typing import Annotated, Literal
12
12
 
13
13
  from pydantic import BaseModel, Field, field_validator
14
14
 
15
+ from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
16
+
15
17
 
16
18
  class StorageBackendType(StrEnum):
17
19
  KV_REDIS = "kv_redis"
@@ -256,25 +258,46 @@ class ResponsesStoreReference(InferenceStoreReference):
256
258
 
257
259
  class ServerStoresConfig(BaseModel):
258
260
  metadata: KVStoreReference | None = Field(
259
- default=None,
261
+ default=KVStoreReference(
262
+ backend="kv_default",
263
+ namespace="registry",
264
+ ),
260
265
  description="Metadata store configuration (uses KV backend)",
261
266
  )
262
267
  inference: InferenceStoreReference | None = Field(
263
- default=None,
268
+ default=InferenceStoreReference(
269
+ backend="sql_default",
270
+ table_name="inference_store",
271
+ ),
264
272
  description="Inference store configuration (uses SQL backend)",
265
273
  )
266
274
  conversations: SqlStoreReference | None = Field(
267
- default=None,
275
+ default=SqlStoreReference(
276
+ backend="sql_default",
277
+ table_name="openai_conversations",
278
+ ),
268
279
  description="Conversations store configuration (uses SQL backend)",
269
280
  )
270
281
  responses: ResponsesStoreReference | None = Field(
271
282
  default=None,
272
283
  description="Responses store configuration (uses SQL backend)",
273
284
  )
285
+ prompts: KVStoreReference | None = Field(
286
+ default=KVStoreReference(backend="kv_default", namespace="prompts"),
287
+ description="Prompts store configuration (uses KV backend)",
288
+ )
274
289
 
275
290
 
276
291
  class StorageConfig(BaseModel):
277
292
  backends: dict[str, StorageBackendConfig] = Field(
293
+ default={
294
+ "kv_default": SqliteKVStoreConfig(
295
+ db_path=f"${{env.SQLITE_STORE_DIR:={DISTRIBS_BASE_DIR}}}/kvstore.db",
296
+ ),
297
+ "sql_default": SqliteSqlStoreConfig(
298
+ db_path=f"${{env.SQLITE_STORE_DIR:={DISTRIBS_BASE_DIR}}}/sql_store.db",
299
+ ),
300
+ },
278
301
  description="Named backend configurations (e.g., 'default', 'cache')",
279
302
  )
280
303
  stores: ServerStoresConfig = Field(
@@ -4,4 +4,6 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
+ from llama_stack_api.internal.kvstore import KVStore as KVStore
8
+
7
9
  from .kvstore import * # noqa: F401, F403
@@ -11,10 +11,21 @@
11
11
 
12
12
  from __future__ import annotations
13
13
 
14
- from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig, StorageBackendType
14
+ import asyncio
15
+ from collections import defaultdict
16
+ from datetime import datetime
17
+ from typing import cast
15
18
 
16
- from .api import KVStore
17
- from .config import KVStoreConfig
19
+ from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig
20
+ from llama_stack_api.internal.kvstore import KVStore
21
+
22
+ from .config import (
23
+ KVStoreConfig,
24
+ MongoDBKVStoreConfig,
25
+ PostgresKVStoreConfig,
26
+ RedisKVStoreConfig,
27
+ SqliteKVStoreConfig,
28
+ )
18
29
 
19
30
 
20
31
  def kvstore_dependencies():
@@ -30,7 +41,7 @@ def kvstore_dependencies():
30
41
 
31
42
  class InmemoryKVStoreImpl(KVStore):
32
43
  def __init__(self):
33
- self._store = {}
44
+ self._store: dict[str, str] = {}
34
45
 
35
46
  async def initialize(self) -> None:
36
47
  pass
@@ -38,7 +49,7 @@ class InmemoryKVStoreImpl(KVStore):
38
49
  async def get(self, key: str) -> str | None:
39
50
  return self._store.get(key)
40
51
 
41
- async def set(self, key: str, value: str) -> None:
52
+ async def set(self, key: str, value: str, expiration: datetime | None = None) -> None:
42
53
  self._store[key] = value
43
54
 
44
55
  async def values_in_range(self, start_key: str, end_key: str) -> list[str]:
@@ -53,45 +64,65 @@ class InmemoryKVStoreImpl(KVStore):
53
64
 
54
65
 
55
66
  _KVSTORE_BACKENDS: dict[str, KVStoreConfig] = {}
67
+ _KVSTORE_INSTANCES: dict[tuple[str, str], KVStore] = {}
68
+ _KVSTORE_LOCKS: defaultdict[tuple[str, str], asyncio.Lock] = defaultdict(asyncio.Lock)
56
69
 
57
70
 
58
71
  def register_kvstore_backends(backends: dict[str, StorageBackendConfig]) -> None:
59
72
  """Register the set of available KV store backends for reference resolution."""
60
73
  global _KVSTORE_BACKENDS
74
+ global _KVSTORE_INSTANCES
75
+ global _KVSTORE_LOCKS
61
76
 
62
77
  _KVSTORE_BACKENDS.clear()
78
+ _KVSTORE_INSTANCES.clear()
79
+ _KVSTORE_LOCKS.clear()
63
80
  for name, cfg in backends.items():
64
- _KVSTORE_BACKENDS[name] = cfg
81
+ typed_cfg = cast(KVStoreConfig, cfg)
82
+ _KVSTORE_BACKENDS[name] = typed_cfg
65
83
 
66
84
 
67
85
  async def kvstore_impl(reference: KVStoreReference) -> KVStore:
68
86
  backend_name = reference.backend
87
+ cache_key = (backend_name, reference.namespace)
88
+
89
+ existing = _KVSTORE_INSTANCES.get(cache_key)
90
+ if existing:
91
+ return existing
69
92
 
70
93
  backend_config = _KVSTORE_BACKENDS.get(backend_name)
71
94
  if backend_config is None:
72
95
  raise ValueError(f"Unknown KVStore backend '{backend_name}'. Registered backends: {sorted(_KVSTORE_BACKENDS)}")
73
96
 
74
- config = backend_config.model_copy()
75
- config.namespace = reference.namespace
97
+ lock = _KVSTORE_LOCKS[cache_key]
98
+ async with lock:
99
+ existing = _KVSTORE_INSTANCES.get(cache_key)
100
+ if existing:
101
+ return existing
102
+
103
+ config = backend_config.model_copy()
104
+ config.namespace = reference.namespace
76
105
 
77
- if config.type == StorageBackendType.KV_REDIS.value:
78
- from .redis import RedisKVStoreImpl
106
+ impl: KVStore
107
+ if isinstance(config, RedisKVStoreConfig):
108
+ from .redis import RedisKVStoreImpl
79
109
 
80
- impl = RedisKVStoreImpl(config)
81
- elif config.type == StorageBackendType.KV_SQLITE.value:
82
- from .sqlite import SqliteKVStoreImpl
110
+ impl = RedisKVStoreImpl(config)
111
+ elif isinstance(config, SqliteKVStoreConfig):
112
+ from .sqlite import SqliteKVStoreImpl
83
113
 
84
- impl = SqliteKVStoreImpl(config)
85
- elif config.type == StorageBackendType.KV_POSTGRES.value:
86
- from .postgres import PostgresKVStoreImpl
114
+ impl = SqliteKVStoreImpl(config)
115
+ elif isinstance(config, PostgresKVStoreConfig):
116
+ from .postgres import PostgresKVStoreImpl
87
117
 
88
- impl = PostgresKVStoreImpl(config)
89
- elif config.type == StorageBackendType.KV_MONGODB.value:
90
- from .mongodb import MongoDBKVStoreImpl
118
+ impl = PostgresKVStoreImpl(config)
119
+ elif isinstance(config, MongoDBKVStoreConfig):
120
+ from .mongodb import MongoDBKVStoreImpl
91
121
 
92
- impl = MongoDBKVStoreImpl(config)
93
- else:
94
- raise ValueError(f"Unknown kvstore type {config.type}")
122
+ impl = MongoDBKVStoreImpl(config)
123
+ else:
124
+ raise ValueError(f"Unknown kvstore type {config.type}")
95
125
 
96
- await impl.initialize()
97
- return impl
126
+ await impl.initialize()
127
+ _KVSTORE_INSTANCES[cache_key] = impl
128
+ return impl