llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (460) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +235 -62
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
  169. llama_stack/providers/registry/agents.py +8 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/providers/utils/vector_io/__init__.py +16 -0
  284. llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
  285. llama_stack/telemetry/constants.py +27 -0
  286. llama_stack/telemetry/helpers.py +43 -0
  287. llama_stack/testing/api_recorder.py +25 -16
  288. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
  289. llama_stack-0.4.1.dist-info/RECORD +588 -0
  290. llama_stack-0.4.1.dist-info/top_level.txt +2 -0
  291. llama_stack_api/__init__.py +945 -0
  292. llama_stack_api/admin/__init__.py +45 -0
  293. llama_stack_api/admin/api.py +72 -0
  294. llama_stack_api/admin/fastapi_routes.py +117 -0
  295. llama_stack_api/admin/models.py +113 -0
  296. llama_stack_api/agents.py +173 -0
  297. llama_stack_api/batches/__init__.py +40 -0
  298. llama_stack_api/batches/api.py +53 -0
  299. llama_stack_api/batches/fastapi_routes.py +113 -0
  300. llama_stack_api/batches/models.py +78 -0
  301. llama_stack_api/benchmarks/__init__.py +43 -0
  302. llama_stack_api/benchmarks/api.py +39 -0
  303. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  304. llama_stack_api/benchmarks/models.py +109 -0
  305. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  306. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  307. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  308. llama_stack_api/common/responses.py +77 -0
  309. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  310. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  311. llama_stack_api/connectors.py +146 -0
  312. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  313. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  314. llama_stack_api/datasets/__init__.py +61 -0
  315. llama_stack_api/datasets/api.py +35 -0
  316. llama_stack_api/datasets/fastapi_routes.py +104 -0
  317. llama_stack_api/datasets/models.py +152 -0
  318. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  319. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  320. llama_stack_api/file_processors/__init__.py +27 -0
  321. llama_stack_api/file_processors/api.py +64 -0
  322. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  323. llama_stack_api/file_processors/models.py +42 -0
  324. llama_stack_api/files/__init__.py +35 -0
  325. llama_stack_api/files/api.py +51 -0
  326. llama_stack_api/files/fastapi_routes.py +124 -0
  327. llama_stack_api/files/models.py +107 -0
  328. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  329. llama_stack_api/inspect_api/__init__.py +37 -0
  330. llama_stack_api/inspect_api/api.py +25 -0
  331. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  332. llama_stack_api/inspect_api/models.py +28 -0
  333. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  334. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  335. llama_stack_api/internal/sqlstore.py +79 -0
  336. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  337. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  338. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  339. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  340. llama_stack_api/providers/__init__.py +33 -0
  341. llama_stack_api/providers/api.py +16 -0
  342. llama_stack_api/providers/fastapi_routes.py +57 -0
  343. llama_stack_api/providers/models.py +24 -0
  344. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  345. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  346. llama_stack_api/router_utils.py +160 -0
  347. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  348. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  349. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  350. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  351. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  352. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  353. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  354. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  355. llama_stack/apis/agents/agents.py +0 -894
  356. llama_stack/apis/batches/__init__.py +0 -9
  357. llama_stack/apis/batches/batches.py +0 -100
  358. llama_stack/apis/benchmarks/__init__.py +0 -7
  359. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  360. llama_stack/apis/common/responses.py +0 -36
  361. llama_stack/apis/conversations/__init__.py +0 -31
  362. llama_stack/apis/datasets/datasets.py +0 -251
  363. llama_stack/apis/datatypes.py +0 -160
  364. llama_stack/apis/eval/__init__.py +0 -7
  365. llama_stack/apis/files/__init__.py +0 -7
  366. llama_stack/apis/files/files.py +0 -199
  367. llama_stack/apis/inference/__init__.py +0 -7
  368. llama_stack/apis/inference/event_logger.py +0 -43
  369. llama_stack/apis/inspect/__init__.py +0 -7
  370. llama_stack/apis/inspect/inspect.py +0 -94
  371. llama_stack/apis/models/__init__.py +0 -7
  372. llama_stack/apis/post_training/__init__.py +0 -7
  373. llama_stack/apis/prompts/__init__.py +0 -9
  374. llama_stack/apis/providers/__init__.py +0 -7
  375. llama_stack/apis/providers/providers.py +0 -69
  376. llama_stack/apis/safety/__init__.py +0 -7
  377. llama_stack/apis/scoring/__init__.py +0 -7
  378. llama_stack/apis/scoring_functions/__init__.py +0 -7
  379. llama_stack/apis/shields/__init__.py +0 -7
  380. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  381. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  382. llama_stack/apis/telemetry/__init__.py +0 -7
  383. llama_stack/apis/telemetry/telemetry.py +0 -423
  384. llama_stack/apis/tools/__init__.py +0 -8
  385. llama_stack/apis/vector_io/__init__.py +0 -7
  386. llama_stack/apis/vector_stores/__init__.py +0 -7
  387. llama_stack/core/server/tracing.py +0 -80
  388. llama_stack/core/ui/app.py +0 -55
  389. llama_stack/core/ui/modules/__init__.py +0 -5
  390. llama_stack/core/ui/modules/api.py +0 -32
  391. llama_stack/core/ui/modules/utils.py +0 -42
  392. llama_stack/core/ui/page/__init__.py +0 -5
  393. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  394. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  395. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  396. llama_stack/core/ui/page/distribution/models.py +0 -18
  397. llama_stack/core/ui/page/distribution/providers.py +0 -27
  398. llama_stack/core/ui/page/distribution/resources.py +0 -48
  399. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  400. llama_stack/core/ui/page/distribution/shields.py +0 -19
  401. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  402. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  403. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  404. llama_stack/core/ui/page/playground/__init__.py +0 -5
  405. llama_stack/core/ui/page/playground/chat.py +0 -130
  406. llama_stack/core/ui/page/playground/tools.py +0 -352
  407. llama_stack/distributions/dell/build.yaml +0 -33
  408. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  409. llama_stack/distributions/nvidia/build.yaml +0 -29
  410. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  411. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  412. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  413. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  414. llama_stack/distributions/starter/build.yaml +0 -61
  415. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  416. llama_stack/distributions/watsonx/build.yaml +0 -33
  417. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  418. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  419. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  420. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  421. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  422. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  423. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  424. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  425. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  426. llama_stack/providers/utils/sqlstore/api.py +0 -128
  427. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  428. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  429. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  430. llama_stack/strong_typing/__init__.py +0 -19
  431. llama_stack/strong_typing/auxiliary.py +0 -228
  432. llama_stack/strong_typing/classdef.py +0 -440
  433. llama_stack/strong_typing/core.py +0 -46
  434. llama_stack/strong_typing/deserializer.py +0 -877
  435. llama_stack/strong_typing/docstring.py +0 -409
  436. llama_stack/strong_typing/exception.py +0 -23
  437. llama_stack/strong_typing/inspection.py +0 -1085
  438. llama_stack/strong_typing/mapping.py +0 -40
  439. llama_stack/strong_typing/name.py +0 -182
  440. llama_stack/strong_typing/schema.py +0 -792
  441. llama_stack/strong_typing/serialization.py +0 -97
  442. llama_stack/strong_typing/serializer.py +0 -500
  443. llama_stack/strong_typing/slots.py +0 -27
  444. llama_stack/strong_typing/topological.py +0 -89
  445. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  446. llama_stack-0.3.5.dist-info/RECORD +0 -625
  447. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  448. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  451. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  452. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  453. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  454. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
  456. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
  457. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
  458. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  459. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  460. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -11,27 +11,40 @@ from urllib.parse import urlparse
11
11
 
12
12
  from pydantic import BaseModel, Field, field_validator, model_validator
13
13
 
14
- from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput
15
- from llama_stack.apis.datasetio import DatasetIO
16
- from llama_stack.apis.datasets import Dataset, DatasetInput
17
- from llama_stack.apis.eval import Eval
18
- from llama_stack.apis.inference import Inference
19
- from llama_stack.apis.models import Model, ModelInput
20
- from llama_stack.apis.resource import Resource
21
- from llama_stack.apis.safety import Safety
22
- from llama_stack.apis.scoring import Scoring
23
- from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
24
- from llama_stack.apis.shields import Shield, ShieldInput
25
- from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
26
- from llama_stack.apis.vector_io import VectorIO
27
- from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
28
14
  from llama_stack.core.access_control.datatypes import AccessRule
29
15
  from llama_stack.core.storage.datatypes import (
30
16
  KVStoreReference,
31
17
  StorageBackendType,
32
18
  StorageConfig,
33
19
  )
34
- from llama_stack.providers.datatypes import Api, ProviderSpec
20
+ from llama_stack.log import LoggingConfig
21
+ from llama_stack_api import (
22
+ Api,
23
+ Benchmark,
24
+ BenchmarkInput,
25
+ ConnectorInput,
26
+ Dataset,
27
+ DatasetInput,
28
+ DatasetIO,
29
+ Eval,
30
+ Inference,
31
+ Model,
32
+ ModelInput,
33
+ ProviderSpec,
34
+ Resource,
35
+ Safety,
36
+ Scoring,
37
+ ScoringFn,
38
+ ScoringFnInput,
39
+ Shield,
40
+ ShieldInput,
41
+ ToolGroup,
42
+ ToolGroupInput,
43
+ ToolRuntime,
44
+ VectorIO,
45
+ VectorStore,
46
+ VectorStoreInput,
47
+ )
35
48
 
36
49
  LLAMA_STACK_BUILD_CONFIG_VERSION = 2
37
50
  LLAMA_STACK_RUN_CONFIG_VERSION = 2
@@ -179,30 +192,6 @@ class DistributionSpec(BaseModel):
179
192
  )
180
193
 
181
194
 
182
- class TelemetryConfig(BaseModel):
183
- """
184
- Configuration for telemetry.
185
-
186
- Llama Stack uses OpenTelemetry for telemetry. Please refer to https://opentelemetry.io/docs/languages/sdk-configuration/
187
- for env variables to configure the OpenTelemetry SDK.
188
-
189
- Example:
190
- ```bash
191
- OTEL_SERVICE_NAME=llama-stack OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 uv run llama stack run starter
192
- ```
193
- """
194
-
195
- enabled: bool = Field(default=False, description="enable or disable telemetry")
196
-
197
-
198
- class LoggingConfig(BaseModel):
199
- category_levels: dict[str, str] = Field(
200
- default_factory=dict,
201
- description="""
202
- Dictionary of different logging configurations for different portions (ex: core, server) of llama stack""",
203
- )
204
-
205
-
206
195
  class OAuth2JWKSConfig(BaseModel):
207
196
  # The JWKS URI for collecting public keys
208
197
  uri: str
@@ -361,6 +350,201 @@ class QualifiedModel(BaseModel):
361
350
  model_id: str
362
351
 
363
352
 
353
+ class RewriteQueryParams(BaseModel):
354
+ """Parameters for query rewriting/expansion."""
355
+
356
+ model: QualifiedModel | None = Field(
357
+ default=None,
358
+ description="LLM model for query rewriting/expansion in vector search.",
359
+ )
360
+ prompt: str = Field(
361
+ default="Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\n{query}\n\nImproved query:",
362
+ description="Prompt template for query rewriting. Use {query} as placeholder for the original query.",
363
+ )
364
+ max_tokens: int = Field(
365
+ default=100,
366
+ description="Maximum number of tokens for query expansion responses.",
367
+ )
368
+ temperature: float = Field(
369
+ default=0.3,
370
+ description="Temperature for query expansion model (0.0 = deterministic, 1.0 = creative).",
371
+ )
372
+
373
+ @field_validator("prompt")
374
+ @classmethod
375
+ def validate_prompt(cls, v: str) -> str:
376
+ if "{query}" not in v:
377
+ raise ValueError("prompt must contain {query} placeholder")
378
+ return v
379
+
380
+ @field_validator("max_tokens")
381
+ @classmethod
382
+ def validate_max_tokens(cls, v: int) -> int:
383
+ if v <= 0:
384
+ raise ValueError("max_tokens must be positive")
385
+ if v > 4096:
386
+ raise ValueError("max_tokens should not exceed 4096")
387
+ return v
388
+
389
+ @field_validator("temperature")
390
+ @classmethod
391
+ def validate_temperature(cls, v: float) -> float:
392
+ if v < 0.0 or v > 2.0:
393
+ raise ValueError("temperature must be between 0.0 and 2.0")
394
+ return v
395
+
396
+
397
+ class FileSearchParams(BaseModel):
398
+ """Configuration for file search tool output formatting."""
399
+
400
+ header_template: str = Field(
401
+ default="knowledge_search tool found {num_chunks} chunks:\nBEGIN of knowledge_search tool results.\n",
402
+ description="Template for the header text shown before search results. Available placeholders: {num_chunks} number of chunks found.",
403
+ )
404
+ footer_template: str = Field(
405
+ default="END of knowledge_search tool results.\n",
406
+ description="Template for the footer text shown after search results.",
407
+ )
408
+
409
+ @field_validator("header_template")
410
+ @classmethod
411
+ def validate_header_template(cls, v: str) -> str:
412
+ if len(v) == 0:
413
+ raise ValueError("header_template must not be empty")
414
+ if "{num_chunks}" not in v:
415
+ raise ValueError("header_template must contain {num_chunks} placeholder")
416
+ if "knowledge_search" not in v.lower():
417
+ raise ValueError(
418
+ "header_template must contain 'knowledge_search' keyword to ensure proper tool identification"
419
+ )
420
+ return v
421
+
422
+
423
+ class ContextPromptParams(BaseModel):
424
+ """Configuration for LLM prompt content and chunk formatting."""
425
+
426
+ chunk_annotation_template: str = Field(
427
+ default="Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
428
+ description="Template for formatting individual chunks in search results. Available placeholders: {index} 1-based chunk index, {chunk.content} chunk content, {metadata} chunk metadata dict.",
429
+ )
430
+ context_template: str = Field(
431
+ default='The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query. {annotation_instruction}\n',
432
+ description="Template for explaining the search results to the model. Available placeholders: {query} user's query, {num_chunks} number of chunks.",
433
+ )
434
+
435
+ @field_validator("chunk_annotation_template")
436
+ @classmethod
437
+ def validate_chunk_annotation_template(cls, v: str) -> str:
438
+ if len(v) == 0:
439
+ raise ValueError("chunk_annotation_template must not be empty")
440
+ if "{chunk.content}" not in v:
441
+ raise ValueError("chunk_annotation_template must contain {chunk.content} placeholder")
442
+ if "{index}" not in v:
443
+ raise ValueError("chunk_annotation_template must contain {index} placeholder")
444
+ return v
445
+
446
+ @field_validator("context_template")
447
+ @classmethod
448
+ def validate_context_template(cls, v: str) -> str:
449
+ if len(v) == 0:
450
+ raise ValueError("context_template must not be empty")
451
+ if "{query}" not in v:
452
+ raise ValueError("context_template must contain {query} placeholder")
453
+ return v
454
+
455
+
456
+ class AnnotationPromptParams(BaseModel):
457
+ """Configuration for source annotation and attribution features."""
458
+
459
+ enable_annotations: bool = Field(
460
+ default=True,
461
+ description="Whether to include annotation information in results.",
462
+ )
463
+ annotation_instruction_template: str = Field(
464
+ default="Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'. Do not add extra punctuation. Use only the file IDs provided, do not invent new ones.",
465
+ description="Instructions for how the model should cite sources. Used when enable_annotations is True.",
466
+ )
467
+ chunk_annotation_template: str = Field(
468
+ default="[{index}] {metadata_text} cite as <|{file_id}|>\n{chunk_text}\n",
469
+ description="Template for chunks with annotation information. Available placeholders: {index} 1-based chunk index, {metadata_text} formatted metadata, {file_id} document identifier, {chunk_text} chunk content.",
470
+ )
471
+
472
+ @field_validator("chunk_annotation_template")
473
+ @classmethod
474
+ def validate_chunk_annotation_template(cls, v: str) -> str:
475
+ if len(v) == 0:
476
+ raise ValueError("chunk_annotation_template must not be empty")
477
+ if "{index}" not in v:
478
+ raise ValueError("chunk_annotation_template must contain {index} placeholder")
479
+ if "{chunk_text}" not in v:
480
+ raise ValueError("chunk_annotation_template must contain {chunk_text} placeholder")
481
+ if "{file_id}" not in v:
482
+ raise ValueError("chunk_annotation_template must contain {file_id} placeholder")
483
+ return v
484
+
485
+ @field_validator("annotation_instruction_template")
486
+ @classmethod
487
+ def validate_annotation_instruction_template(cls, v: str) -> str:
488
+ if len(v) == 0:
489
+ raise ValueError("annotation_instruction_template must not be empty")
490
+ return v
491
+
492
+
493
+ class FileIngestionParams(BaseModel):
494
+ """Configuration for file processing during ingestion."""
495
+
496
+ default_chunk_size_tokens: int = Field(
497
+ default=512,
498
+ description="Default chunk size for RAG tool operations when not specified",
499
+ )
500
+ default_chunk_overlap_tokens: int = Field(
501
+ default=128,
502
+ description="Default overlap in tokens between chunks (original default: 512 // 4 = 128)",
503
+ )
504
+
505
+
506
+ class ChunkRetrievalParams(BaseModel):
507
+ """Configuration for chunk retrieval and ranking during search."""
508
+
509
+ chunk_multiplier: int = Field(
510
+ default=5,
511
+ description="Multiplier for OpenAI API over-retrieval (affects all providers)",
512
+ )
513
+ max_tokens_in_context: int = Field(
514
+ default=4000,
515
+ description="Maximum tokens allowed in RAG context before truncation",
516
+ )
517
+ default_reranker_strategy: str = Field(
518
+ default="rrf",
519
+ description="Default reranker when not specified: 'rrf', 'weighted', or 'normalized'",
520
+ )
521
+ rrf_impact_factor: float = Field(
522
+ default=60.0,
523
+ description="Impact factor for RRF (Reciprocal Rank Fusion) reranking",
524
+ )
525
+ weighted_search_alpha: float = Field(
526
+ default=0.5,
527
+ description="Alpha weight for weighted search reranking (0.0-1.0)",
528
+ )
529
+
530
+
531
+ class FileBatchParams(BaseModel):
532
+ """Configuration for file batch processing."""
533
+
534
+ max_concurrent_files_per_batch: int = Field(
535
+ default=3,
536
+ description="Maximum files processed concurrently in file batches",
537
+ )
538
+ file_batch_chunk_size: int = Field(
539
+ default=10,
540
+ description="Number of files to process in each batch chunk",
541
+ )
542
+ cleanup_interval_seconds: int = Field(
543
+ default=86400, # 24 hours
544
+ description="Interval for cleaning up expired file batches (seconds)",
545
+ )
546
+
547
+
364
548
  class VectorStoresConfig(BaseModel):
365
549
  """Configuration for vector stores in the stack."""
366
550
 
@@ -372,6 +556,44 @@ class VectorStoresConfig(BaseModel):
372
556
  default=None,
373
557
  description="Default embedding model configuration for vector stores.",
374
558
  )
559
+ rewrite_query_params: RewriteQueryParams | None = Field(
560
+ default=None,
561
+ description="Parameters for query rewriting/expansion. None disables query rewriting.",
562
+ )
563
+ file_search_params: FileSearchParams = Field(
564
+ default_factory=FileSearchParams,
565
+ description="Configuration for file search tool output formatting.",
566
+ )
567
+ context_prompt_params: ContextPromptParams = Field(
568
+ default_factory=ContextPromptParams,
569
+ description="Configuration for LLM prompt content and chunk formatting.",
570
+ )
571
+ annotation_prompt_params: AnnotationPromptParams = Field(
572
+ default_factory=AnnotationPromptParams,
573
+ description="Configuration for source annotation and attribution features.",
574
+ )
575
+
576
+ file_ingestion_params: FileIngestionParams = Field(
577
+ default_factory=FileIngestionParams,
578
+ description="Configuration for file processing during ingestion.",
579
+ )
580
+ chunk_retrieval_params: ChunkRetrievalParams = Field(
581
+ default_factory=ChunkRetrievalParams,
582
+ description="Configuration for chunk retrieval and ranking during search.",
583
+ )
584
+ file_batch_params: FileBatchParams = Field(
585
+ default_factory=FileBatchParams,
586
+ description="Configuration for file batch processing.",
587
+ )
588
+
589
+
590
+ class SafetyConfig(BaseModel):
591
+ """Configuration for default moderations model."""
592
+
593
+ default_shield_id: str | None = Field(
594
+ default=None,
595
+ description="ID of the shield to use for when `model` is not specified in the `moderations` API request.",
596
+ )
375
597
 
376
598
 
377
599
  class QuotaPeriod(StrEnum):
@@ -432,6 +654,7 @@ class RegisteredResources(BaseModel):
432
654
  scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
433
655
  benchmarks: list[BenchmarkInput] = Field(default_factory=list)
434
656
  tool_groups: list[ToolGroupInput] = Field(default_factory=list)
657
+ connectors: list[ConnectorInput] = Field(default_factory=list)
435
658
 
436
659
 
437
660
  class ServerConfig(BaseModel):
@@ -477,7 +700,7 @@ class ServerConfig(BaseModel):
477
700
  )
478
701
 
479
702
 
480
- class StackRunConfig(BaseModel):
703
+ class StackConfig(BaseModel):
481
704
  version: int = LLAMA_STACK_RUN_CONFIG_VERSION
482
705
 
483
706
  image_name: str = Field(
@@ -504,6 +727,7 @@ can be instantiated multiple times (with different configs) if necessary.
504
727
  """,
505
728
  )
506
729
  storage: StorageConfig = Field(
730
+ default_factory=StorageConfig,
507
731
  description="Catalog of named storage backends and references available to the stack",
508
732
  )
509
733
 
@@ -514,8 +738,6 @@ can be instantiated multiple times (with different configs) if necessary.
514
738
 
515
739
  logging: LoggingConfig | None = Field(default=None, description="Configuration for Llama Stack Logging")
516
740
 
517
- telemetry: TelemetryConfig = Field(default_factory=TelemetryConfig, description="Configuration for telemetry")
518
-
519
741
  server: ServerConfig = Field(
520
742
  default_factory=ServerConfig,
521
743
  description="Configuration for the HTTP(S) server",
@@ -536,6 +758,11 @@ can be instantiated multiple times (with different configs) if necessary.
536
758
  description="Configuration for vector stores, including default embedding model",
537
759
  )
538
760
 
761
+ safety: SafetyConfig | None = Field(
762
+ default=None,
763
+ description="Configuration for default moderations model",
764
+ )
765
+
539
766
  @field_validator("external_providers_dir")
540
767
  @classmethod
541
768
  def validate_external_providers_dir(cls, v):
@@ -546,7 +773,7 @@ can be instantiated multiple times (with different configs) if necessary.
546
773
  return v
547
774
 
548
775
  @model_validator(mode="after")
549
- def validate_server_stores(self) -> "StackRunConfig":
776
+ def validate_server_stores(self) -> "StackConfig":
550
777
  backend_map = self.storage.backends
551
778
  stores = self.storage.stores
552
779
  kv_backends = {
@@ -586,40 +813,5 @@ can be instantiated multiple times (with different configs) if necessary.
586
813
  _ensure_backend(stores.inference, sql_backends, "storage.stores.inference")
587
814
  _ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations")
588
815
  _ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
816
+ _ensure_backend(stores.prompts, kv_backends, "storage.stores.prompts")
589
817
  return self
590
-
591
-
592
- class BuildConfig(BaseModel):
593
- version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
594
-
595
- distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ")
596
- image_type: str = Field(
597
- default="venv",
598
- description="Type of package to build (container | venv)",
599
- )
600
- image_name: str | None = Field(
601
- default=None,
602
- description="Name of the distribution to build",
603
- )
604
- external_providers_dir: Path | None = Field(
605
- default=None,
606
- description="Path to directory containing external provider implementations. The providers packages will be resolved from this directory. "
607
- "pip_packages MUST contain the provider package name.",
608
- )
609
- additional_pip_packages: list[str] = Field(
610
- default_factory=list,
611
- description="Additional pip packages to install in the distribution. These packages will be installed in the distribution environment.",
612
- )
613
- external_apis_dir: Path | None = Field(
614
- default=None,
615
- description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.",
616
- )
617
-
618
- @field_validator("external_providers_dir")
619
- @classmethod
620
- def validate_external_providers_dir(cls, v):
621
- if v is None:
622
- return None
623
- if isinstance(v, str):
624
- return Path(v)
625
- return v
@@ -12,10 +12,10 @@ from typing import Any
12
12
  import yaml
13
13
  from pydantic import BaseModel
14
14
 
15
- from llama_stack.core.datatypes import BuildConfig, DistributionSpec
15
+ from llama_stack.core.datatypes import StackConfig
16
16
  from llama_stack.core.external import load_external_apis
17
17
  from llama_stack.log import get_logger
18
- from llama_stack.providers.datatypes import (
18
+ from llama_stack_api import (
19
19
  Api,
20
20
  InlineProviderSpec,
21
21
  ProviderSpec,
@@ -25,7 +25,7 @@ from llama_stack.providers.datatypes import (
25
25
  logger = get_logger(name=__name__, category="core")
26
26
 
27
27
 
28
- INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts, Api.conversations, Api.telemetry}
28
+ INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts, Api.conversations, Api.connectors, Api.admin}
29
29
 
30
30
 
31
31
  def stack_apis() -> list[Api]:
@@ -85,7 +85,9 @@ def _load_inline_provider_spec(spec_data: dict[str, Any], api: Api, provider_nam
85
85
  return spec
86
86
 
87
87
 
88
- def get_provider_registry(config=None) -> dict[Api, dict[str, ProviderSpec]]:
88
+ def get_provider_registry(
89
+ config: StackConfig | None = None, listing: bool = False
90
+ ) -> dict[Api, dict[str, ProviderSpec]]:
89
91
  """Get the provider registry, optionally including external providers.
90
92
 
91
93
  This function loads both built-in providers and external providers from YAML files or from their provided modules.
@@ -109,13 +111,13 @@ def get_provider_registry(config=None) -> dict[Api, dict[str, ProviderSpec]]:
109
111
  safety/
110
112
  llama-guard.yaml
111
113
 
112
- This method is overloaded in that it can be called from a variety of places: during build, during run, during stack construction.
113
- So when building external providers from a module, there are scenarios where the pip package required to import the module might not be available yet.
114
+ This method is overloaded in that it can be called from a variety of places: during list-deps, during run, during stack construction.
115
+ So when listing external providers from a module, there are scenarios where the pip package required to import the module might not be available yet.
114
116
  There is special handling for all of the potential cases this method can be called from.
115
117
 
116
118
  Args:
117
119
  config: Optional object containing the external providers directory path
118
- building: Optional bool delineating whether or not this is being called from a build process
120
+ listing: Optional bool delineating whether or not this is being called from a list-deps process
119
121
 
120
122
  Returns:
121
123
  A dictionary mapping APIs to their available providers
@@ -161,7 +163,7 @@ def get_provider_registry(config=None) -> dict[Api, dict[str, ProviderSpec]]:
161
163
  registry = get_external_providers_from_module(
162
164
  registry=registry,
163
165
  config=config,
164
- building=(isinstance(config, BuildConfig) or isinstance(config, DistributionSpec)),
166
+ listing=listing,
165
167
  )
166
168
 
167
169
  return registry
@@ -220,13 +222,10 @@ def get_external_providers_from_dir(
220
222
 
221
223
 
222
224
  def get_external_providers_from_module(
223
- registry: dict[Api, dict[str, ProviderSpec]], config, building: bool
225
+ registry: dict[Api, dict[str, ProviderSpec]], config, listing: bool
224
226
  ) -> dict[Api, dict[str, ProviderSpec]]:
225
227
  provider_list = None
226
- if isinstance(config, BuildConfig):
227
- provider_list = config.distribution_spec.providers.items()
228
- else:
229
- provider_list = config.providers.items()
228
+ provider_list = config.providers.items()
230
229
  if provider_list is None:
231
230
  logger.warning("Could not get list of providers from config")
232
231
  return registry
@@ -236,14 +235,14 @@ def get_external_providers_from_module(
236
235
  continue
237
236
  # get provider using module
238
237
  try:
239
- if not building:
238
+ if not listing:
240
239
  package_name = provider.module.split("==")[0]
241
240
  module = importlib.import_module(f"{package_name}.provider")
242
241
  # if config class is wrong you will get an error saying module could not be imported
243
242
  spec = module.get_provider_spec()
244
243
  else:
245
- # pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon build and run
246
- # in the case we are building we CANNOT import this module of course because it has not been installed.
244
+ # pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon list-deps and run
245
+ # in the case we are listing we CANNOT import this module of course because it has not been installed.
247
246
  spec = ProviderSpec(
248
247
  api=Api(provider_api),
249
248
  provider_type=provider.provider_type,
@@ -7,14 +7,14 @@
7
7
 
8
8
  import yaml
9
9
 
10
- from llama_stack.apis.datatypes import Api, ExternalApiSpec
11
- from llama_stack.core.datatypes import BuildConfig, StackRunConfig
10
+ from llama_stack.core.datatypes import StackConfig
12
11
  from llama_stack.log import get_logger
12
+ from llama_stack_api import Api, ExternalApiSpec
13
13
 
14
14
  logger = get_logger(name=__name__, category="core")
15
15
 
16
16
 
17
- def load_external_apis(config: StackRunConfig | BuildConfig | None) -> dict[Api, ExternalApiSpec]:
17
+ def load_external_apis(config: StackConfig | None) -> dict[Api, ExternalApiSpec]:
18
18
  """Load external API specifications from the configured directory.
19
19
 
20
20
  Args: