llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (460) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +235 -62
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
  169. llama_stack/providers/registry/agents.py +8 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/providers/utils/vector_io/__init__.py +16 -0
  284. llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
  285. llama_stack/telemetry/constants.py +27 -0
  286. llama_stack/telemetry/helpers.py +43 -0
  287. llama_stack/testing/api_recorder.py +25 -16
  288. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
  289. llama_stack-0.4.1.dist-info/RECORD +588 -0
  290. llama_stack-0.4.1.dist-info/top_level.txt +2 -0
  291. llama_stack_api/__init__.py +945 -0
  292. llama_stack_api/admin/__init__.py +45 -0
  293. llama_stack_api/admin/api.py +72 -0
  294. llama_stack_api/admin/fastapi_routes.py +117 -0
  295. llama_stack_api/admin/models.py +113 -0
  296. llama_stack_api/agents.py +173 -0
  297. llama_stack_api/batches/__init__.py +40 -0
  298. llama_stack_api/batches/api.py +53 -0
  299. llama_stack_api/batches/fastapi_routes.py +113 -0
  300. llama_stack_api/batches/models.py +78 -0
  301. llama_stack_api/benchmarks/__init__.py +43 -0
  302. llama_stack_api/benchmarks/api.py +39 -0
  303. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  304. llama_stack_api/benchmarks/models.py +109 -0
  305. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  306. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  307. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  308. llama_stack_api/common/responses.py +77 -0
  309. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  310. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  311. llama_stack_api/connectors.py +146 -0
  312. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  313. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  314. llama_stack_api/datasets/__init__.py +61 -0
  315. llama_stack_api/datasets/api.py +35 -0
  316. llama_stack_api/datasets/fastapi_routes.py +104 -0
  317. llama_stack_api/datasets/models.py +152 -0
  318. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  319. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  320. llama_stack_api/file_processors/__init__.py +27 -0
  321. llama_stack_api/file_processors/api.py +64 -0
  322. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  323. llama_stack_api/file_processors/models.py +42 -0
  324. llama_stack_api/files/__init__.py +35 -0
  325. llama_stack_api/files/api.py +51 -0
  326. llama_stack_api/files/fastapi_routes.py +124 -0
  327. llama_stack_api/files/models.py +107 -0
  328. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  329. llama_stack_api/inspect_api/__init__.py +37 -0
  330. llama_stack_api/inspect_api/api.py +25 -0
  331. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  332. llama_stack_api/inspect_api/models.py +28 -0
  333. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  334. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  335. llama_stack_api/internal/sqlstore.py +79 -0
  336. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  337. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  338. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  339. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  340. llama_stack_api/providers/__init__.py +33 -0
  341. llama_stack_api/providers/api.py +16 -0
  342. llama_stack_api/providers/fastapi_routes.py +57 -0
  343. llama_stack_api/providers/models.py +24 -0
  344. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  345. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  346. llama_stack_api/router_utils.py +160 -0
  347. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  348. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  349. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  350. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  351. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  352. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  353. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  354. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  355. llama_stack/apis/agents/agents.py +0 -894
  356. llama_stack/apis/batches/__init__.py +0 -9
  357. llama_stack/apis/batches/batches.py +0 -100
  358. llama_stack/apis/benchmarks/__init__.py +0 -7
  359. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  360. llama_stack/apis/common/responses.py +0 -36
  361. llama_stack/apis/conversations/__init__.py +0 -31
  362. llama_stack/apis/datasets/datasets.py +0 -251
  363. llama_stack/apis/datatypes.py +0 -160
  364. llama_stack/apis/eval/__init__.py +0 -7
  365. llama_stack/apis/files/__init__.py +0 -7
  366. llama_stack/apis/files/files.py +0 -199
  367. llama_stack/apis/inference/__init__.py +0 -7
  368. llama_stack/apis/inference/event_logger.py +0 -43
  369. llama_stack/apis/inspect/__init__.py +0 -7
  370. llama_stack/apis/inspect/inspect.py +0 -94
  371. llama_stack/apis/models/__init__.py +0 -7
  372. llama_stack/apis/post_training/__init__.py +0 -7
  373. llama_stack/apis/prompts/__init__.py +0 -9
  374. llama_stack/apis/providers/__init__.py +0 -7
  375. llama_stack/apis/providers/providers.py +0 -69
  376. llama_stack/apis/safety/__init__.py +0 -7
  377. llama_stack/apis/scoring/__init__.py +0 -7
  378. llama_stack/apis/scoring_functions/__init__.py +0 -7
  379. llama_stack/apis/shields/__init__.py +0 -7
  380. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  381. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  382. llama_stack/apis/telemetry/__init__.py +0 -7
  383. llama_stack/apis/telemetry/telemetry.py +0 -423
  384. llama_stack/apis/tools/__init__.py +0 -8
  385. llama_stack/apis/vector_io/__init__.py +0 -7
  386. llama_stack/apis/vector_stores/__init__.py +0 -7
  387. llama_stack/core/server/tracing.py +0 -80
  388. llama_stack/core/ui/app.py +0 -55
  389. llama_stack/core/ui/modules/__init__.py +0 -5
  390. llama_stack/core/ui/modules/api.py +0 -32
  391. llama_stack/core/ui/modules/utils.py +0 -42
  392. llama_stack/core/ui/page/__init__.py +0 -5
  393. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  394. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  395. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  396. llama_stack/core/ui/page/distribution/models.py +0 -18
  397. llama_stack/core/ui/page/distribution/providers.py +0 -27
  398. llama_stack/core/ui/page/distribution/resources.py +0 -48
  399. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  400. llama_stack/core/ui/page/distribution/shields.py +0 -19
  401. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  402. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  403. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  404. llama_stack/core/ui/page/playground/__init__.py +0 -5
  405. llama_stack/core/ui/page/playground/chat.py +0 -130
  406. llama_stack/core/ui/page/playground/tools.py +0 -352
  407. llama_stack/distributions/dell/build.yaml +0 -33
  408. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  409. llama_stack/distributions/nvidia/build.yaml +0 -29
  410. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  411. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  412. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  413. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  414. llama_stack/distributions/starter/build.yaml +0 -61
  415. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  416. llama_stack/distributions/watsonx/build.yaml +0 -33
  417. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  418. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  419. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  420. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  421. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  422. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  423. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  424. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  425. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  426. llama_stack/providers/utils/sqlstore/api.py +0 -128
  427. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  428. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  429. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  430. llama_stack/strong_typing/__init__.py +0 -19
  431. llama_stack/strong_typing/auxiliary.py +0 -228
  432. llama_stack/strong_typing/classdef.py +0 -440
  433. llama_stack/strong_typing/core.py +0 -46
  434. llama_stack/strong_typing/deserializer.py +0 -877
  435. llama_stack/strong_typing/docstring.py +0 -409
  436. llama_stack/strong_typing/exception.py +0 -23
  437. llama_stack/strong_typing/inspection.py +0 -1085
  438. llama_stack/strong_typing/mapping.py +0 -40
  439. llama_stack/strong_typing/name.py +0 -182
  440. llama_stack/strong_typing/schema.py +0 -792
  441. llama_stack/strong_typing/serialization.py +0 -97
  442. llama_stack/strong_typing/serializer.py +0 -500
  443. llama_stack/strong_typing/slots.py +0 -27
  444. llama_stack/strong_typing/topological.py +0 -89
  445. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  446. llama_stack-0.3.5.dist-info/RECORD +0 -625
  447. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  448. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  451. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  452. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  453. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  454. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
  456. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
  457. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
  458. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  459. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  460. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -17,41 +17,43 @@ providers:
17
17
  - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
18
18
  provider_type: remote::cerebras
19
19
  config:
20
- base_url: https://api.cerebras.ai
20
+ base_url: https://api.cerebras.ai/v1
21
21
  api_key: ${env.CEREBRAS_API_KEY:=}
22
22
  - provider_id: ${env.OLLAMA_URL:+ollama}
23
23
  provider_type: remote::ollama
24
24
  config:
25
- url: ${env.OLLAMA_URL:=http://localhost:11434}
25
+ base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
26
26
  - provider_id: ${env.VLLM_URL:+vllm}
27
27
  provider_type: remote::vllm
28
28
  config:
29
- url: ${env.VLLM_URL:=}
29
+ base_url: ${env.VLLM_URL:=}
30
30
  max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
31
31
  api_token: ${env.VLLM_API_TOKEN:=fake}
32
32
  tls_verify: ${env.VLLM_TLS_VERIFY:=true}
33
33
  - provider_id: ${env.TGI_URL:+tgi}
34
34
  provider_type: remote::tgi
35
35
  config:
36
- url: ${env.TGI_URL:=}
36
+ base_url: ${env.TGI_URL:=}
37
37
  - provider_id: fireworks
38
38
  provider_type: remote::fireworks
39
39
  config:
40
- url: https://api.fireworks.ai/inference/v1
40
+ base_url: https://api.fireworks.ai/inference/v1
41
41
  api_key: ${env.FIREWORKS_API_KEY:=}
42
42
  - provider_id: together
43
43
  provider_type: remote::together
44
44
  config:
45
- url: https://api.together.xyz/v1
45
+ base_url: https://api.together.xyz/v1
46
46
  api_key: ${env.TOGETHER_API_KEY:=}
47
47
  - provider_id: bedrock
48
48
  provider_type: remote::bedrock
49
+ config:
50
+ api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
51
+ region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
49
52
  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
50
53
  provider_type: remote::nvidia
51
54
  config:
52
- url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
55
+ base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
53
56
  api_key: ${env.NVIDIA_API_KEY:=}
54
- append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
55
57
  - provider_id: openai
56
58
  provider_type: remote::openai
57
59
  config:
@@ -73,18 +75,18 @@ providers:
73
75
  - provider_id: groq
74
76
  provider_type: remote::groq
75
77
  config:
76
- url: https://api.groq.com
78
+ base_url: https://api.groq.com/openai/v1
77
79
  api_key: ${env.GROQ_API_KEY:=}
78
80
  - provider_id: sambanova
79
81
  provider_type: remote::sambanova
80
82
  config:
81
- url: https://api.sambanova.ai/v1
83
+ base_url: https://api.sambanova.ai/v1
82
84
  api_key: ${env.SAMBANOVA_API_KEY:=}
83
85
  - provider_id: ${env.AZURE_API_KEY:+azure}
84
86
  provider_type: remote::azure
85
87
  config:
86
88
  api_key: ${env.AZURE_API_KEY:=}
87
- api_base: ${env.AZURE_API_BASE:=}
89
+ base_url: ${env.AZURE_API_BASE:=}
88
90
  api_version: ${env.AZURE_API_VERSION:=}
89
91
  api_type: ${env.AZURE_API_TYPE:=}
90
92
  - provider_id: sentence-transformers
@@ -259,6 +261,9 @@ storage:
259
261
  conversations:
260
262
  table_name: openai_conversations
261
263
  backend: sql_default
264
+ prompts:
265
+ namespace: prompts
266
+ backend: kv_default
262
267
  registered_resources:
263
268
  models: []
264
269
  shields:
@@ -279,10 +284,56 @@ registered_resources:
279
284
  provider_id: rag-runtime
280
285
  server:
281
286
  port: 8321
282
- telemetry:
283
- enabled: true
284
287
  vector_stores:
285
288
  default_provider_id: faiss
286
289
  default_embedding_model:
287
290
  provider_id: sentence-transformers
288
291
  model_id: nomic-ai/nomic-embed-text-v1.5
292
+ file_search_params:
293
+ header_template: 'knowledge_search tool found {num_chunks} chunks:
294
+
295
+ BEGIN of knowledge_search tool results.
296
+
297
+ '
298
+ footer_template: 'END of knowledge_search tool results.
299
+
300
+ '
301
+ context_prompt_params:
302
+ chunk_annotation_template: 'Result {index}
303
+
304
+ Content: {chunk.content}
305
+
306
+ Metadata: {metadata}
307
+
308
+ '
309
+ context_template: 'The above results were retrieved to help answer the user''s
310
+ query: "{query}". Use them as supporting information only in answering this
311
+ query. {annotation_instruction}
312
+
313
+ '
314
+ annotation_prompt_params:
315
+ enable_annotations: true
316
+ annotation_instruction_template: Cite sources immediately at the end of sentences
317
+ before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'.
318
+ Do not add extra punctuation. Use only the file IDs provided, do not invent
319
+ new ones.
320
+ chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>
321
+
322
+ {chunk_text}
323
+
324
+ '
325
+ file_ingestion_params:
326
+ default_chunk_size_tokens: 512
327
+ default_chunk_overlap_tokens: 128
328
+ chunk_retrieval_params:
329
+ chunk_multiplier: 5
330
+ max_tokens_in_context: 4000
331
+ default_reranker_strategy: rrf
332
+ rrf_impact_factor: 60.0
333
+ weighted_search_alpha: 0.5
334
+ file_batch_params:
335
+ max_concurrent_files_per_batch: 3
336
+ file_batch_chunk_size: 10
337
+ cleanup_interval_seconds: 86400
338
+ safety:
339
+ default_shield_id: llama-guard
@@ -12,20 +12,16 @@ import rich
12
12
  import yaml
13
13
  from pydantic import BaseModel, Field
14
14
 
15
- from llama_stack.apis.datasets import DatasetPurpose
16
- from llama_stack.apis.models import ModelType
17
15
  from llama_stack.core.datatypes import (
18
16
  LLAMA_STACK_RUN_CONFIG_VERSION,
19
17
  Api,
20
18
  BenchmarkInput,
21
- BuildConfig,
22
19
  BuildProvider,
23
20
  DatasetInput,
24
- DistributionSpec,
25
21
  ModelInput,
26
22
  Provider,
23
+ SafetyConfig,
27
24
  ShieldInput,
28
- TelemetryConfig,
29
25
  ToolGroupInput,
30
26
  VectorStoresConfig,
31
27
  )
@@ -36,13 +32,11 @@ from llama_stack.core.storage.datatypes import (
36
32
  SqlStoreReference,
37
33
  StorageBackendType,
38
34
  )
35
+ from llama_stack.core.storage.kvstore.config import SqliteKVStoreConfig
36
+ from llama_stack.core.storage.sqlstore.sqlstore import SqliteSqlStoreConfig
39
37
  from llama_stack.core.utils.dynamic import instantiate_class_type
40
- from llama_stack.core.utils.image_types import LlamaStackImageType
41
38
  from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
42
- from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
43
- from llama_stack.providers.utils.kvstore.config import get_pip_packages as get_kv_pip_packages
44
- from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
45
- from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages
39
+ from llama_stack_api import DatasetPurpose, ModelType
46
40
 
47
41
 
48
42
  def filter_empty_values(obj: Any) -> Any:
@@ -188,7 +182,7 @@ class RunConfigSettings(BaseModel):
188
182
  default_datasets: list[DatasetInput] | None = None
189
183
  default_benchmarks: list[BenchmarkInput] | None = None
190
184
  vector_stores_config: VectorStoresConfig | None = None
191
- telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
185
+ safety_config: SafetyConfig | None = None
192
186
  storage_backends: dict[str, Any] | None = None
193
187
  storage_stores: dict[str, Any] | None = None
194
188
 
@@ -257,6 +251,10 @@ class RunConfigSettings(BaseModel):
257
251
  backend="sql_default",
258
252
  table_name="openai_conversations",
259
253
  ).model_dump(exclude_none=True),
254
+ "prompts": KVStoreReference(
255
+ backend="kv_default",
256
+ namespace="prompts",
257
+ ).model_dump(exclude_none=True),
260
258
  }
261
259
 
262
260
  storage_config = dict(
@@ -284,12 +282,14 @@ class RunConfigSettings(BaseModel):
284
282
  "server": {
285
283
  "port": 8321,
286
284
  },
287
- "telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
288
285
  }
289
286
 
290
287
  if self.vector_stores_config:
291
288
  config["vector_stores"] = self.vector_stores_config.model_dump(exclude_none=True)
292
289
 
290
+ if self.safety_config:
291
+ config["safety"] = self.safety_config.model_dump(exclude_none=True)
292
+
293
293
  return config
294
294
 
295
295
 
@@ -314,55 +314,6 @@ class DistributionTemplate(BaseModel):
314
314
 
315
315
  available_models_by_provider: dict[str, list[ProviderModelEntry]] | None = None
316
316
 
317
- # we may want to specify additional pip packages without necessarily indicating a
318
- # specific "default" inference store (which is what typically used to dictate additional
319
- # pip packages)
320
- additional_pip_packages: list[str] | None = None
321
-
322
- def build_config(self) -> BuildConfig:
323
- additional_pip_packages: list[str] = []
324
- for run_config in self.run_configs.values():
325
- run_config_ = run_config.run_config(self.name, self.providers, self.container_image)
326
-
327
- # TODO: This is a hack to get the dependencies for internal APIs into build
328
- # We should have a better way to do this by formalizing the concept of "internal" APIs
329
- # and providers, with a way to specify dependencies for them.
330
-
331
- storage_cfg = run_config_.get("storage", {})
332
- for backend_cfg in storage_cfg.get("backends", {}).values():
333
- store_type = backend_cfg.get("type")
334
- if not store_type:
335
- continue
336
- if str(store_type).startswith("kv_"):
337
- additional_pip_packages.extend(get_kv_pip_packages(backend_cfg))
338
- elif str(store_type).startswith("sql_"):
339
- additional_pip_packages.extend(get_sql_pip_packages(backend_cfg))
340
-
341
- if self.additional_pip_packages:
342
- additional_pip_packages.extend(self.additional_pip_packages)
343
-
344
- # Create minimal providers for build config (without runtime configs)
345
- build_providers = {}
346
- for api, providers in self.providers.items():
347
- build_providers[api] = []
348
- for provider in providers:
349
- # Create a minimal build provider object with only essential build information
350
- build_provider = BuildProvider(
351
- provider_type=provider.provider_type,
352
- module=provider.module,
353
- )
354
- build_providers[api].append(build_provider)
355
-
356
- return BuildConfig(
357
- distribution_spec=DistributionSpec(
358
- description=self.description,
359
- container_image=self.container_image,
360
- providers=build_providers,
361
- ),
362
- image_type=LlamaStackImageType.VENV.value, # default to venv
363
- additional_pip_packages=sorted(set(additional_pip_packages)),
364
- )
365
-
366
317
  def generate_markdown_docs(self) -> str:
367
318
  providers_table = "| API | Provider(s) |\n"
368
319
  providers_table += "|-----|-------------|\n"
@@ -415,6 +366,7 @@ class DistributionTemplate(BaseModel):
415
366
  providers_table=providers_table,
416
367
  run_config_env_vars=self.run_config_env_vars,
417
368
  default_models=default_models,
369
+ run_configs=list(self.run_configs.keys()),
418
370
  )
419
371
  return ""
420
372
 
@@ -433,14 +385,6 @@ class DistributionTemplate(BaseModel):
433
385
  for output_dir in [yaml_output_dir, doc_output_dir]:
434
386
  output_dir.mkdir(parents=True, exist_ok=True)
435
387
 
436
- build_config = self.build_config()
437
- with open(yaml_output_dir / "build.yaml", "w") as f:
438
- yaml.safe_dump(
439
- filter_empty_values(build_config.model_dump(exclude_none=True)),
440
- f,
441
- sort_keys=False,
442
- )
443
-
444
388
  for yaml_pth, settings in self.run_configs.items():
445
389
  run_config = settings.run_config(self.name, self.providers, self.container_image)
446
390
  with open(yaml_output_dir / yaml_pth, "w") as f:
@@ -15,7 +15,7 @@ providers:
15
15
  - provider_id: watsonx
16
16
  provider_type: remote::watsonx
17
17
  config:
18
- url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
18
+ base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
19
19
  api_key: ${env.WATSONX_API_KEY:=}
20
20
  project_id: ${env.WATSONX_PROJECT_ID:=}
21
21
  vector_io:
@@ -115,6 +115,9 @@ storage:
115
115
  conversations:
116
116
  table_name: openai_conversations
117
117
  backend: sql_default
118
+ prompts:
119
+ namespace: prompts
120
+ backend: kv_default
118
121
  registered_resources:
119
122
  models: []
120
123
  shields: []
@@ -129,5 +132,3 @@ registered_resources:
129
132
  provider_id: rag-runtime
130
133
  server:
131
134
  port: 8321
132
- telemetry:
133
- enabled: true
@@ -69,7 +69,7 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
69
69
  template_path=None,
70
70
  providers=providers,
71
71
  run_configs={
72
- "run.yaml": RunConfigSettings(
72
+ "config.yaml": RunConfigSettings(
73
73
  provider_overrides={
74
74
  "inference": [inference_provider],
75
75
  "files": [files_provider],
llama_stack/log.py CHANGED
@@ -9,15 +9,23 @@ import os
9
9
  import re
10
10
  from logging.config import dictConfig # allow-direct-logging
11
11
 
12
+ from pydantic import BaseModel, Field
12
13
  from rich.console import Console
13
14
  from rich.errors import MarkupError
14
15
  from rich.logging import RichHandler
15
16
 
16
- from llama_stack.core.datatypes import LoggingConfig
17
-
18
17
  # Default log level
19
18
  DEFAULT_LOG_LEVEL = logging.INFO
20
19
 
20
+
21
+ class LoggingConfig(BaseModel):
22
+ category_levels: dict[str, str] = Field(
23
+ default_factory=dict,
24
+ description="""
25
+ Dictionary of different logging configurations for different portions (ex: core, server) of llama stack""",
26
+ )
27
+
28
+
21
29
  # Predefined categories
22
30
  CATEGORIES = [
23
31
  "core",
@@ -29,7 +37,6 @@ CATEGORIES = [
29
37
  "eval",
30
38
  "tools",
31
39
  "client",
32
- "telemetry",
33
40
  "openai",
34
41
  "openai_responses",
35
42
  "openai_conversations",
@@ -37,6 +44,7 @@ CATEGORIES = [
37
44
  "providers",
38
45
  "models",
39
46
  "files",
47
+ "file_processors",
40
48
  "vector_io",
41
49
  "tool_runtime",
42
50
  "cli",
@@ -84,10 +92,10 @@ def config_to_category_levels(category: str, level: str):
84
92
 
85
93
  def parse_yaml_config(yaml_config: LoggingConfig) -> dict[str, int]:
86
94
  """
87
- Helper function to parse a yaml logging configuration found in the run.yaml
95
+ Helper function to parse a yaml logging configuration found in the config.yaml
88
96
 
89
97
  Parameters:
90
- yaml_config (Logging): the logger config object found in the run.yaml
98
+ yaml_config (Logging): the logger config object found in the config.yaml
91
99
 
92
100
  Returns:
93
101
  Dict[str, int]: A dictionary mapping categories to their log levels.
@@ -137,7 +145,8 @@ class CustomRichHandler(RichHandler):
137
145
  # Set a reasonable default width for console output, especially when redirected to files
138
146
  console_width = int(os.environ.get("LLAMA_STACK_LOG_WIDTH", "120"))
139
147
  # Don't force terminal codes to avoid ANSI escape codes in log files
140
- kwargs["console"] = Console(width=console_width)
148
+ # Ensure logs go to stderr, not stdout
149
+ kwargs["console"] = Console(width=console_width, stderr=True)
141
150
  super().__init__(*args, **kwargs)
142
151
 
143
152
  def emit(self, record):
@@ -177,6 +186,7 @@ def setup_logging(category_levels: dict[str, int] | None = None, log_file: str |
177
186
  log_file (str | None): Path to a log file to additionally pipe the logs into.
178
187
  If None, reads from LLAMA_STACK_LOG_FILE environment variable.
179
188
  """
189
+ global _category_levels
180
190
  # Read from environment variables if not explicitly provided
181
191
  if category_levels is None:
182
192
  category_levels = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL)
@@ -184,6 +194,9 @@ def setup_logging(category_levels: dict[str, int] | None = None, log_file: str |
184
194
  if env_config:
185
195
  category_levels.update(parse_environment_config(env_config))
186
196
 
197
+ # Update the module-level _category_levels so that already-created loggers pick up the new levels
198
+ _category_levels.update(category_levels)
199
+
187
200
  if log_file is None:
188
201
  log_file = os.environ.get("LLAMA_STACK_LOG_FILE")
189
202
  log_format = "%(asctime)s %(name)s:%(lineno)d %(category)s: %(message)s"
@@ -268,14 +281,18 @@ def setup_logging(category_levels: dict[str, int] | None = None, log_file: str |
268
281
  }
269
282
  dictConfig(logging_config)
270
283
 
271
- # Ensure third-party libraries follow the root log level, but preserve
272
- # already-configured loggers (e.g., uvicorn) and our own llama_stack loggers
284
+ # Update log levels for all loggers that were created before setup_logging was called
273
285
  for name, logger in logging.root.manager.loggerDict.items():
274
286
  if isinstance(logger, logging.Logger):
275
- # Skip infrastructure loggers (uvicorn, fastapi) and our own loggers
276
- if name.startswith(("uvicorn", "fastapi", "llama_stack")):
287
+ # Skip infrastructure loggers (uvicorn, fastapi) to preserve their configured levels
288
+ if name.startswith(("uvicorn", "fastapi")):
277
289
  continue
278
- logger.setLevel(root_level)
290
+ # Update llama_stack loggers if root level was explicitly set (e.g., via all=CRITICAL)
291
+ if name.startswith("llama_stack") and "root" in category_levels:
292
+ logger.setLevel(root_level)
293
+ # Update third-party library loggers
294
+ elif not name.startswith("llama_stack"):
295
+ logger.setLevel(root_level)
279
296
 
280
297
 
281
298
  def get_logger(
@@ -38,18 +38,18 @@ def maybe_reshard_state_dict(
38
38
  mmap: bool = True,
39
39
  ) -> dict[str, torch.Tensor]:
40
40
  if str(map_location) == "cpu":
41
- torch.set_default_tensor_type(torch.BFloat16Tensor)
41
+ torch.set_default_dtype(torch.bfloat16)
42
42
  else:
43
- torch.set_default_tensor_type(torch.cuda.BFloat16Tensor)
43
+ torch.set_default_dtype(torch.bfloat16)
44
44
 
45
- ckpt_paths = np.array(sorted(ckpt_paths))
45
+ ckpt_paths_array = np.array(sorted(ckpt_paths))
46
46
 
47
47
  new_mp_size, new_mp_rank = get_model_parallel_world_size(), get_model_parallel_rank()
48
- old_mp_size = len(ckpt_paths)
48
+ old_mp_size = len(ckpt_paths_array)
49
49
  old_mp_ranks = map_mp_rank(old_mp_size, new_mp_size, new_mp_rank)
50
50
 
51
- print(f"Loading checkpoint shards:\n{str(ckpt_paths[old_mp_ranks])}") # type: ignore
52
- paths = ckpt_paths[old_mp_ranks] # type: ignore
51
+ print(f"Loading checkpoint shards:\n{str(ckpt_paths_array[old_mp_ranks])}") # type: ignore
52
+ paths = ckpt_paths_array[old_mp_ranks] # type: ignore
53
53
  state_dicts = [torch.load(str(p), map_location=map_location, mmap=mmap) for p in paths]
54
54
 
55
55
  if new_mp_size == old_mp_size:
@@ -79,6 +79,8 @@ def add_hadamard_transform_for_spinquant(model: torch.nn.Module, prefix: str = "
79
79
  for module_name, module in model.named_children():
80
80
  child_full_name = prefix + "." + module_name
81
81
  if re.search(pattern_last_linear_ffn, child_full_name):
82
+ # Module matching this pattern should be nn.Linear with in_features
83
+ assert isinstance(module, nn.Linear), f"Expected nn.Linear, got {type(module)}"
82
84
  new_module = nn.Sequential(HadamardModule(group_size=module.in_features), module)
83
85
  del module
84
86
  setattr(model, module_name, new_module)
@@ -26,8 +26,10 @@ from fairscale.nn.model_parallel.initialize import (
26
26
  )
27
27
  from termcolor import cprint
28
28
 
29
+ from llama_stack.models.llama.datatypes import ToolPromptFormat
30
+
29
31
  from ..checkpoint import maybe_reshard_state_dict
30
- from ..datatypes import GenerationResult, QuantizationMode, RawContent, RawMessage, ToolPromptFormat
32
+ from ..datatypes import GenerationResult, QuantizationMode, RawContent, RawMessage
31
33
  from .args import ModelArgs
32
34
  from .chat_format import ChatFormat, LLMInput
33
35
  from .model import Transformer
@@ -15,13 +15,10 @@ from pathlib import Path
15
15
 
16
16
  from termcolor import colored
17
17
 
18
+ from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall, ToolDefinition, ToolPromptFormat
19
+
18
20
  from ..datatypes import (
19
- BuiltinTool,
20
21
  RawMessage,
21
- StopReason,
22
- ToolCall,
23
- ToolDefinition,
24
- ToolPromptFormat,
25
22
  )
26
23
  from . import template_data
27
24
  from .chat_format import ChatFormat
@@ -141,15 +141,15 @@ def build_encoder_attention_mask(
141
141
  """
142
142
  Build vision encoder attention mask that omits padding tokens.
143
143
  """
144
- masks = []
144
+ masks_list: list[torch.Tensor] = []
145
145
  for arx in ar:
146
146
  mask_i = torch.ones((num_chunks, x.shape[2], 1), dtype=x.dtype)
147
147
  mask_i[: arx[0] * arx[1], :ntok] = 0
148
148
  mask_i = mask_i.view(num_chunks * x.shape[2], -1)
149
149
  mask_i = mask_i @ mask_i.T * get_negative_inf_value(x.dtype)
150
150
  mask_i = mask_i.unsqueeze(0)
151
- masks.append(mask_i)
152
- masks = torch.stack(masks).to(x.device).expand(-1, n_heads, -1, -1)
151
+ masks_list.append(mask_i)
152
+ masks = torch.stack(masks_list).to(x.device).expand(-1, n_heads, -1, -1)
153
153
  return masks
154
154
 
155
155
 
@@ -95,7 +95,7 @@ class VariableSizeImageTransform:
95
95
  factors_set.add(n // i)
96
96
  return factors_set
97
97
 
98
- def find_supported_resolutions(self, max_num_chunks: int, patch_size: int) -> torch.Tensor:
98
+ def find_supported_resolutions(self, max_num_chunks: int, patch_size: int) -> list[tuple[int, int]]:
99
99
  """
100
100
  Computes all of the allowed resoltuions for a fixed number of chunks
101
101
  and patch_size. Useful for when dividing an image into chunks.
@@ -198,10 +198,10 @@ class VariableSizeImageTransform:
198
198
 
199
199
  def resize_without_distortion(
200
200
  self,
201
- image: torch.Tensor,
201
+ image: Image.Image,
202
202
  target_size: tuple[int, int],
203
203
  max_upscaling_size: int | None,
204
- ) -> torch.Tensor:
204
+ ) -> Image.Image:
205
205
  """
206
206
  Used to resize an image to target_resolution, without distortion.
207
207
 
@@ -380,12 +380,12 @@ class VariableSizeImageTransform:
380
380
  assert isinstance(image, Image.Image), type(image)
381
381
  w, h = image.size
382
382
 
383
- possible_resolutions = self.find_supported_resolutions(max_num_chunks=max_num_chunks, patch_size=self.size)
384
- possible_resolutions = torch.tensor(possible_resolutions)
383
+ possible_resolutions_list = self.find_supported_resolutions(max_num_chunks=max_num_chunks, patch_size=self.size)
384
+ possible_resolutions_tensor = torch.tensor(possible_resolutions_list)
385
385
 
386
386
  best_resolution = self.get_best_fit(
387
387
  image_size=(w, h),
388
- possible_resolutions=possible_resolutions,
388
+ possible_resolutions=possible_resolutions_tensor,
389
389
  resize_to_max_canvas=resize_to_max_canvas,
390
390
  )
391
391
 
@@ -15,7 +15,7 @@ import textwrap
15
15
  from datetime import datetime
16
16
  from typing import Any
17
17
 
18
- from llama_stack.apis.inference import (
18
+ from llama_stack.models.llama.datatypes import (
19
19
  BuiltinTool,
20
20
  ToolDefinition,
21
21
  )
@@ -8,8 +8,9 @@ import json
8
8
  import re
9
9
 
10
10
  from llama_stack.log import get_logger
11
+ from llama_stack.models.llama.datatypes import BuiltinTool, ToolCall, ToolPromptFormat
11
12
 
12
- from ..datatypes import BuiltinTool, RecursiveType, ToolCall, ToolPromptFormat
13
+ from ..datatypes import RecursiveType
13
14
 
14
15
  logger = get_logger(name=__name__, category="models::llama")
15
16
 
@@ -13,7 +13,7 @@
13
13
 
14
14
  import textwrap
15
15
 
16
- from llama_stack.apis.inference import ToolDefinition
16
+ from llama_stack.models.llama.datatypes import ToolDefinition
17
17
  from llama_stack.models.llama.llama3.prompt_templates.base import (
18
18
  PromptTemplate,
19
19
  PromptTemplateGeneratorBase,
@@ -15,7 +15,6 @@ async def get_provider_impl(
15
15
  config: MetaReferenceAgentsImplConfig,
16
16
  deps: dict[Api, Any],
17
17
  policy: list[AccessRule],
18
- telemetry_enabled: bool = False,
19
18
  ):
20
19
  from .agents import MetaReferenceAgentsImpl
21
20
 
@@ -23,12 +22,13 @@ async def get_provider_impl(
23
22
  config,
24
23
  deps[Api.inference],
25
24
  deps[Api.vector_io],
26
- deps[Api.safety],
25
+ deps.get(Api.safety),
27
26
  deps[Api.tool_runtime],
28
27
  deps[Api.tool_groups],
29
28
  deps[Api.conversations],
29
+ deps[Api.prompts],
30
+ deps[Api.files],
30
31
  policy,
31
- telemetry_enabled,
32
32
  )
33
33
  await impl.initialize()
34
34
  return impl