llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.5.dist-info/RECORD +0 -625
  445. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -27,12 +27,12 @@ providers:
27
27
  - provider_id: groq
28
28
  provider_type: remote::groq
29
29
  config:
30
- url: https://api.groq.com
30
+ base_url: https://api.groq.com/openai/v1
31
31
  api_key: ${env.GROQ_API_KEY:=}
32
32
  - provider_id: together
33
33
  provider_type: remote::together
34
34
  config:
35
- url: https://api.together.xyz/v1
35
+ base_url: https://api.together.xyz/v1
36
36
  api_key: ${env.TOGETHER_API_KEY:=}
37
37
  vector_io:
38
38
  - provider_id: sqlite-vec
@@ -142,6 +142,9 @@ storage:
142
142
  conversations:
143
143
  table_name: openai_conversations
144
144
  backend: sql_default
145
+ prompts:
146
+ namespace: prompts
147
+ backend: kv_default
145
148
  registered_resources:
146
149
  models:
147
150
  - metadata: {}
@@ -248,5 +251,3 @@ registered_resources:
248
251
  provider_id: rag-runtime
249
252
  server:
250
253
  port: 8321
251
- telemetry:
252
- enabled: true
@@ -5,8 +5,6 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
 
8
- from llama_stack.apis.datasets import DatasetPurpose, URIDataSource
9
- from llama_stack.apis.models import ModelType
10
8
  from llama_stack.core.datatypes import (
11
9
  BenchmarkInput,
12
10
  BuildProvider,
@@ -34,6 +32,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
34
32
  PGVectorVectorIOConfig,
35
33
  )
36
34
  from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
35
+ from llama_stack_api import DatasetPurpose, ModelType, URIDataSource
37
36
 
38
37
 
39
38
  def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
@@ -262,7 +261,7 @@ def get_distribution_template() -> DistributionTemplate:
262
261
  providers=providers,
263
262
  available_models_by_provider=available_models,
264
263
  run_configs={
265
- "run.yaml": RunConfigSettings(
264
+ "config.yaml": RunConfigSettings(
266
265
  provider_overrides={
267
266
  "inference": inference_providers,
268
267
  "vector_io": vector_io_providers,
@@ -11,7 +11,7 @@ providers:
11
11
  - provider_id: vllm-inference
12
12
  provider_type: remote::vllm
13
13
  config:
14
- url: ${env.VLLM_URL:=http://localhost:8000/v1}
14
+ base_url: ${env.VLLM_URL:=}
15
15
  max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
16
16
  api_token: ${env.VLLM_API_TOKEN:=fake}
17
17
  tls_verify: ${env.VLLM_TLS_VERIFY:=true}
@@ -87,6 +87,9 @@ storage:
87
87
  conversations:
88
88
  table_name: openai_conversations
89
89
  backend: sql_default
90
+ prompts:
91
+ namespace: prompts
92
+ backend: kv_default
90
93
  registered_resources:
91
94
  models:
92
95
  - metadata: {}
@@ -111,5 +114,3 @@ registered_resources:
111
114
  provider_id: rag-runtime
112
115
  server:
113
116
  port: 8321
114
- telemetry:
115
- enabled: true
@@ -17,41 +17,43 @@ providers:
17
17
  - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
18
18
  provider_type: remote::cerebras
19
19
  config:
20
- base_url: https://api.cerebras.ai
20
+ base_url: https://api.cerebras.ai/v1
21
21
  api_key: ${env.CEREBRAS_API_KEY:=}
22
22
  - provider_id: ${env.OLLAMA_URL:+ollama}
23
23
  provider_type: remote::ollama
24
24
  config:
25
- url: ${env.OLLAMA_URL:=http://localhost:11434}
25
+ base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
26
26
  - provider_id: ${env.VLLM_URL:+vllm}
27
27
  provider_type: remote::vllm
28
28
  config:
29
- url: ${env.VLLM_URL:=}
29
+ base_url: ${env.VLLM_URL:=}
30
30
  max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
31
31
  api_token: ${env.VLLM_API_TOKEN:=fake}
32
32
  tls_verify: ${env.VLLM_TLS_VERIFY:=true}
33
33
  - provider_id: ${env.TGI_URL:+tgi}
34
34
  provider_type: remote::tgi
35
35
  config:
36
- url: ${env.TGI_URL:=}
36
+ base_url: ${env.TGI_URL:=}
37
37
  - provider_id: fireworks
38
38
  provider_type: remote::fireworks
39
39
  config:
40
- url: https://api.fireworks.ai/inference/v1
40
+ base_url: https://api.fireworks.ai/inference/v1
41
41
  api_key: ${env.FIREWORKS_API_KEY:=}
42
42
  - provider_id: together
43
43
  provider_type: remote::together
44
44
  config:
45
- url: https://api.together.xyz/v1
45
+ base_url: https://api.together.xyz/v1
46
46
  api_key: ${env.TOGETHER_API_KEY:=}
47
47
  - provider_id: bedrock
48
48
  provider_type: remote::bedrock
49
+ config:
50
+ api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
51
+ region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
49
52
  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
50
53
  provider_type: remote::nvidia
51
54
  config:
52
- url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
55
+ base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
53
56
  api_key: ${env.NVIDIA_API_KEY:=}
54
- append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
55
57
  - provider_id: openai
56
58
  provider_type: remote::openai
57
59
  config:
@@ -73,18 +75,18 @@ providers:
73
75
  - provider_id: groq
74
76
  provider_type: remote::groq
75
77
  config:
76
- url: https://api.groq.com
78
+ base_url: https://api.groq.com/openai/v1
77
79
  api_key: ${env.GROQ_API_KEY:=}
78
80
  - provider_id: sambanova
79
81
  provider_type: remote::sambanova
80
82
  config:
81
- url: https://api.sambanova.ai/v1
83
+ base_url: https://api.sambanova.ai/v1
82
84
  api_key: ${env.SAMBANOVA_API_KEY:=}
83
85
  - provider_id: ${env.AZURE_API_KEY:+azure}
84
86
  provider_type: remote::azure
85
87
  config:
86
88
  api_key: ${env.AZURE_API_KEY:=}
87
- api_base: ${env.AZURE_API_BASE:=}
89
+ base_url: ${env.AZURE_API_BASE:=}
88
90
  api_version: ${env.AZURE_API_VERSION:=}
89
91
  api_type: ${env.AZURE_API_TYPE:=}
90
92
  - provider_id: sentence-transformers
@@ -247,6 +249,9 @@ storage:
247
249
  conversations:
248
250
  table_name: openai_conversations
249
251
  backend: sql_default
252
+ prompts:
253
+ namespace: prompts
254
+ backend: kv_default
250
255
  registered_resources:
251
256
  models: []
252
257
  shields:
@@ -267,10 +272,56 @@ registered_resources:
267
272
  provider_id: rag-runtime
268
273
  server:
269
274
  port: 8321
270
- telemetry:
271
- enabled: true
272
275
  vector_stores:
273
276
  default_provider_id: faiss
274
277
  default_embedding_model:
275
278
  provider_id: sentence-transformers
276
279
  model_id: nomic-ai/nomic-embed-text-v1.5
280
+ file_search_params:
281
+ header_template: 'knowledge_search tool found {num_chunks} chunks:
282
+
283
+ BEGIN of knowledge_search tool results.
284
+
285
+ '
286
+ footer_template: 'END of knowledge_search tool results.
287
+
288
+ '
289
+ context_prompt_params:
290
+ chunk_annotation_template: 'Result {index}
291
+
292
+ Content: {chunk.content}
293
+
294
+ Metadata: {metadata}
295
+
296
+ '
297
+ context_template: 'The above results were retrieved to help answer the user''s
298
+ query: "{query}". Use them as supporting information only in answering this
299
+ query. {annotation_instruction}
300
+
301
+ '
302
+ annotation_prompt_params:
303
+ enable_annotations: true
304
+ annotation_instruction_template: Cite sources immediately at the end of sentences
305
+ before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'.
306
+ Do not add extra punctuation. Use only the file IDs provided, do not invent
307
+ new ones.
308
+ chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>
309
+
310
+ {chunk_text}
311
+
312
+ '
313
+ file_ingestion_params:
314
+ default_chunk_size_tokens: 512
315
+ default_chunk_overlap_tokens: 128
316
+ chunk_retrieval_params:
317
+ chunk_multiplier: 5
318
+ max_tokens_in_context: 4000
319
+ default_reranker_strategy: rrf
320
+ rrf_impact_factor: 60.0
321
+ weighted_search_alpha: 0.5
322
+ file_batch_params:
323
+ max_concurrent_files_per_batch: 3
324
+ file_batch_chunk_size: 10
325
+ cleanup_interval_seconds: 86400
326
+ safety:
327
+ default_shield_id: llama-guard
@@ -17,41 +17,43 @@ providers:
17
17
  - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
18
18
  provider_type: remote::cerebras
19
19
  config:
20
- base_url: https://api.cerebras.ai
20
+ base_url: https://api.cerebras.ai/v1
21
21
  api_key: ${env.CEREBRAS_API_KEY:=}
22
22
  - provider_id: ${env.OLLAMA_URL:+ollama}
23
23
  provider_type: remote::ollama
24
24
  config:
25
- url: ${env.OLLAMA_URL:=http://localhost:11434}
25
+ base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
26
26
  - provider_id: ${env.VLLM_URL:+vllm}
27
27
  provider_type: remote::vllm
28
28
  config:
29
- url: ${env.VLLM_URL:=}
29
+ base_url: ${env.VLLM_URL:=}
30
30
  max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
31
31
  api_token: ${env.VLLM_API_TOKEN:=fake}
32
32
  tls_verify: ${env.VLLM_TLS_VERIFY:=true}
33
33
  - provider_id: ${env.TGI_URL:+tgi}
34
34
  provider_type: remote::tgi
35
35
  config:
36
- url: ${env.TGI_URL:=}
36
+ base_url: ${env.TGI_URL:=}
37
37
  - provider_id: fireworks
38
38
  provider_type: remote::fireworks
39
39
  config:
40
- url: https://api.fireworks.ai/inference/v1
40
+ base_url: https://api.fireworks.ai/inference/v1
41
41
  api_key: ${env.FIREWORKS_API_KEY:=}
42
42
  - provider_id: together
43
43
  provider_type: remote::together
44
44
  config:
45
- url: https://api.together.xyz/v1
45
+ base_url: https://api.together.xyz/v1
46
46
  api_key: ${env.TOGETHER_API_KEY:=}
47
47
  - provider_id: bedrock
48
48
  provider_type: remote::bedrock
49
+ config:
50
+ api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
51
+ region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
49
52
  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
50
53
  provider_type: remote::nvidia
51
54
  config:
52
- url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
55
+ base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
53
56
  api_key: ${env.NVIDIA_API_KEY:=}
54
- append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
55
57
  - provider_id: openai
56
58
  provider_type: remote::openai
57
59
  config:
@@ -73,18 +75,18 @@ providers:
73
75
  - provider_id: groq
74
76
  provider_type: remote::groq
75
77
  config:
76
- url: https://api.groq.com
78
+ base_url: https://api.groq.com/openai/v1
77
79
  api_key: ${env.GROQ_API_KEY:=}
78
80
  - provider_id: sambanova
79
81
  provider_type: remote::sambanova
80
82
  config:
81
- url: https://api.sambanova.ai/v1
83
+ base_url: https://api.sambanova.ai/v1
82
84
  api_key: ${env.SAMBANOVA_API_KEY:=}
83
85
  - provider_id: ${env.AZURE_API_KEY:+azure}
84
86
  provider_type: remote::azure
85
87
  config:
86
88
  api_key: ${env.AZURE_API_KEY:=}
87
- api_base: ${env.AZURE_API_BASE:=}
89
+ base_url: ${env.AZURE_API_BASE:=}
88
90
  api_version: ${env.AZURE_API_VERSION:=}
89
91
  api_type: ${env.AZURE_API_TYPE:=}
90
92
  - provider_id: sentence-transformers
@@ -256,6 +258,9 @@ storage:
256
258
  conversations:
257
259
  table_name: openai_conversations
258
260
  backend: sql_default
261
+ prompts:
262
+ namespace: prompts
263
+ backend: kv_default
259
264
  registered_resources:
260
265
  models: []
261
266
  shields:
@@ -276,10 +281,56 @@ registered_resources:
276
281
  provider_id: rag-runtime
277
282
  server:
278
283
  port: 8321
279
- telemetry:
280
- enabled: true
281
284
  vector_stores:
282
285
  default_provider_id: faiss
283
286
  default_embedding_model:
284
287
  provider_id: sentence-transformers
285
288
  model_id: nomic-ai/nomic-embed-text-v1.5
289
+ file_search_params:
290
+ header_template: 'knowledge_search tool found {num_chunks} chunks:
291
+
292
+ BEGIN of knowledge_search tool results.
293
+
294
+ '
295
+ footer_template: 'END of knowledge_search tool results.
296
+
297
+ '
298
+ context_prompt_params:
299
+ chunk_annotation_template: 'Result {index}
300
+
301
+ Content: {chunk.content}
302
+
303
+ Metadata: {metadata}
304
+
305
+ '
306
+ context_template: 'The above results were retrieved to help answer the user''s
307
+ query: "{query}". Use them as supporting information only in answering this
308
+ query. {annotation_instruction}
309
+
310
+ '
311
+ annotation_prompt_params:
312
+ enable_annotations: true
313
+ annotation_instruction_template: Cite sources immediately at the end of sentences
314
+ before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'.
315
+ Do not add extra punctuation. Use only the file IDs provided, do not invent
316
+ new ones.
317
+ chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>
318
+
319
+ {chunk_text}
320
+
321
+ '
322
+ file_ingestion_params:
323
+ default_chunk_size_tokens: 512
324
+ default_chunk_overlap_tokens: 128
325
+ chunk_retrieval_params:
326
+ chunk_multiplier: 5
327
+ max_tokens_in_context: 4000
328
+ default_reranker_strategy: rrf
329
+ rrf_impact_factor: 60.0
330
+ weighted_search_alpha: 0.5
331
+ file_batch_params:
332
+ max_concurrent_files_per_batch: 3
333
+ file_batch_chunk_size: 10
334
+ cleanup_interval_seconds: 86400
335
+ safety:
336
+ default_shield_id: llama-guard
@@ -12,13 +12,15 @@ from llama_stack.core.datatypes import (
12
12
  Provider,
13
13
  ProviderSpec,
14
14
  QualifiedModel,
15
+ SafetyConfig,
15
16
  ShieldInput,
16
17
  ToolGroupInput,
17
18
  VectorStoresConfig,
18
19
  )
20
+ from llama_stack.core.storage.kvstore.config import PostgresKVStoreConfig
21
+ from llama_stack.core.storage.sqlstore.sqlstore import PostgresSqlStoreConfig
19
22
  from llama_stack.core.utils.dynamic import instantiate_class_type
20
23
  from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
21
- from llama_stack.providers.datatypes import RemoteProviderSpec
22
24
  from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
23
25
  from llama_stack.providers.inline.inference.sentence_transformers import (
24
26
  SentenceTransformersInferenceConfig,
@@ -35,8 +37,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
35
37
  )
36
38
  from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig
37
39
  from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
38
- from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
39
- from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
40
+ from llama_stack_api import RemoteProviderSpec
40
41
 
41
42
 
42
43
  def _get_config_for_provider(provider_spec: ProviderSpec) -> dict[str, Any]:
@@ -252,6 +253,9 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
252
253
  model_id="nomic-ai/nomic-embed-text-v1.5",
253
254
  ),
254
255
  ),
256
+ safety_config=SafetyConfig(
257
+ default_shield_id="llama-guard",
258
+ ),
255
259
  )
256
260
 
257
261
  postgres_run_settings = base_run_settings.model_copy(
@@ -271,9 +275,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
271
275
  container_image=None,
272
276
  template_path=None,
273
277
  providers=providers,
274
- additional_pip_packages=PostgresSqlStoreConfig.pip_packages(),
275
278
  run_configs={
276
- "run.yaml": base_run_settings,
279
+ "config.yaml": base_run_settings,
277
280
  "run-with-postgres-store.yaml": postgres_run_settings,
278
281
  },
279
282
  run_config_env_vars={
@@ -17,41 +17,43 @@ providers:
17
17
  - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
18
18
  provider_type: remote::cerebras
19
19
  config:
20
- base_url: https://api.cerebras.ai
20
+ base_url: https://api.cerebras.ai/v1
21
21
  api_key: ${env.CEREBRAS_API_KEY:=}
22
22
  - provider_id: ${env.OLLAMA_URL:+ollama}
23
23
  provider_type: remote::ollama
24
24
  config:
25
- url: ${env.OLLAMA_URL:=http://localhost:11434}
25
+ base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
26
26
  - provider_id: ${env.VLLM_URL:+vllm}
27
27
  provider_type: remote::vllm
28
28
  config:
29
- url: ${env.VLLM_URL:=}
29
+ base_url: ${env.VLLM_URL:=}
30
30
  max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
31
31
  api_token: ${env.VLLM_API_TOKEN:=fake}
32
32
  tls_verify: ${env.VLLM_TLS_VERIFY:=true}
33
33
  - provider_id: ${env.TGI_URL:+tgi}
34
34
  provider_type: remote::tgi
35
35
  config:
36
- url: ${env.TGI_URL:=}
36
+ base_url: ${env.TGI_URL:=}
37
37
  - provider_id: fireworks
38
38
  provider_type: remote::fireworks
39
39
  config:
40
- url: https://api.fireworks.ai/inference/v1
40
+ base_url: https://api.fireworks.ai/inference/v1
41
41
  api_key: ${env.FIREWORKS_API_KEY:=}
42
42
  - provider_id: together
43
43
  provider_type: remote::together
44
44
  config:
45
- url: https://api.together.xyz/v1
45
+ base_url: https://api.together.xyz/v1
46
46
  api_key: ${env.TOGETHER_API_KEY:=}
47
47
  - provider_id: bedrock
48
48
  provider_type: remote::bedrock
49
+ config:
50
+ api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
51
+ region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
49
52
  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
50
53
  provider_type: remote::nvidia
51
54
  config:
52
- url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
55
+ base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
53
56
  api_key: ${env.NVIDIA_API_KEY:=}
54
- append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
55
57
  - provider_id: openai
56
58
  provider_type: remote::openai
57
59
  config:
@@ -73,18 +75,18 @@ providers:
73
75
  - provider_id: groq
74
76
  provider_type: remote::groq
75
77
  config:
76
- url: https://api.groq.com
78
+ base_url: https://api.groq.com/openai/v1
77
79
  api_key: ${env.GROQ_API_KEY:=}
78
80
  - provider_id: sambanova
79
81
  provider_type: remote::sambanova
80
82
  config:
81
- url: https://api.sambanova.ai/v1
83
+ base_url: https://api.sambanova.ai/v1
82
84
  api_key: ${env.SAMBANOVA_API_KEY:=}
83
85
  - provider_id: ${env.AZURE_API_KEY:+azure}
84
86
  provider_type: remote::azure
85
87
  config:
86
88
  api_key: ${env.AZURE_API_KEY:=}
87
- api_base: ${env.AZURE_API_BASE:=}
89
+ base_url: ${env.AZURE_API_BASE:=}
88
90
  api_version: ${env.AZURE_API_VERSION:=}
89
91
  api_type: ${env.AZURE_API_TYPE:=}
90
92
  - provider_id: sentence-transformers
@@ -250,6 +252,9 @@ storage:
250
252
  conversations:
251
253
  table_name: openai_conversations
252
254
  backend: sql_default
255
+ prompts:
256
+ namespace: prompts
257
+ backend: kv_default
253
258
  registered_resources:
254
259
  models: []
255
260
  shields:
@@ -270,10 +275,56 @@ registered_resources:
270
275
  provider_id: rag-runtime
271
276
  server:
272
277
  port: 8321
273
- telemetry:
274
- enabled: true
275
278
  vector_stores:
276
279
  default_provider_id: faiss
277
280
  default_embedding_model:
278
281
  provider_id: sentence-transformers
279
282
  model_id: nomic-ai/nomic-embed-text-v1.5
283
+ file_search_params:
284
+ header_template: 'knowledge_search tool found {num_chunks} chunks:
285
+
286
+ BEGIN of knowledge_search tool results.
287
+
288
+ '
289
+ footer_template: 'END of knowledge_search tool results.
290
+
291
+ '
292
+ context_prompt_params:
293
+ chunk_annotation_template: 'Result {index}
294
+
295
+ Content: {chunk.content}
296
+
297
+ Metadata: {metadata}
298
+
299
+ '
300
+ context_template: 'The above results were retrieved to help answer the user''s
301
+ query: "{query}". Use them as supporting information only in answering this
302
+ query. {annotation_instruction}
303
+
304
+ '
305
+ annotation_prompt_params:
306
+ enable_annotations: true
307
+ annotation_instruction_template: Cite sources immediately at the end of sentences
308
+ before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'.
309
+ Do not add extra punctuation. Use only the file IDs provided, do not invent
310
+ new ones.
311
+ chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>
312
+
313
+ {chunk_text}
314
+
315
+ '
316
+ file_ingestion_params:
317
+ default_chunk_size_tokens: 512
318
+ default_chunk_overlap_tokens: 128
319
+ chunk_retrieval_params:
320
+ chunk_multiplier: 5
321
+ max_tokens_in_context: 4000
322
+ default_reranker_strategy: rrf
323
+ rrf_impact_factor: 60.0
324
+ weighted_search_alpha: 0.5
325
+ file_batch_params:
326
+ max_concurrent_files_per_batch: 3
327
+ file_batch_chunk_size: 10
328
+ cleanup_interval_seconds: 86400
329
+ safety:
330
+ default_shield_id: llama-guard