llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (460) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +235 -62
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
  169. llama_stack/providers/registry/agents.py +8 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/providers/utils/vector_io/__init__.py +16 -0
  284. llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
  285. llama_stack/telemetry/constants.py +27 -0
  286. llama_stack/telemetry/helpers.py +43 -0
  287. llama_stack/testing/api_recorder.py +25 -16
  288. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
  289. llama_stack-0.4.1.dist-info/RECORD +588 -0
  290. llama_stack-0.4.1.dist-info/top_level.txt +2 -0
  291. llama_stack_api/__init__.py +945 -0
  292. llama_stack_api/admin/__init__.py +45 -0
  293. llama_stack_api/admin/api.py +72 -0
  294. llama_stack_api/admin/fastapi_routes.py +117 -0
  295. llama_stack_api/admin/models.py +113 -0
  296. llama_stack_api/agents.py +173 -0
  297. llama_stack_api/batches/__init__.py +40 -0
  298. llama_stack_api/batches/api.py +53 -0
  299. llama_stack_api/batches/fastapi_routes.py +113 -0
  300. llama_stack_api/batches/models.py +78 -0
  301. llama_stack_api/benchmarks/__init__.py +43 -0
  302. llama_stack_api/benchmarks/api.py +39 -0
  303. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  304. llama_stack_api/benchmarks/models.py +109 -0
  305. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  306. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  307. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  308. llama_stack_api/common/responses.py +77 -0
  309. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  310. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  311. llama_stack_api/connectors.py +146 -0
  312. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  313. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  314. llama_stack_api/datasets/__init__.py +61 -0
  315. llama_stack_api/datasets/api.py +35 -0
  316. llama_stack_api/datasets/fastapi_routes.py +104 -0
  317. llama_stack_api/datasets/models.py +152 -0
  318. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  319. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  320. llama_stack_api/file_processors/__init__.py +27 -0
  321. llama_stack_api/file_processors/api.py +64 -0
  322. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  323. llama_stack_api/file_processors/models.py +42 -0
  324. llama_stack_api/files/__init__.py +35 -0
  325. llama_stack_api/files/api.py +51 -0
  326. llama_stack_api/files/fastapi_routes.py +124 -0
  327. llama_stack_api/files/models.py +107 -0
  328. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  329. llama_stack_api/inspect_api/__init__.py +37 -0
  330. llama_stack_api/inspect_api/api.py +25 -0
  331. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  332. llama_stack_api/inspect_api/models.py +28 -0
  333. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  334. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  335. llama_stack_api/internal/sqlstore.py +79 -0
  336. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  337. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  338. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  339. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  340. llama_stack_api/providers/__init__.py +33 -0
  341. llama_stack_api/providers/api.py +16 -0
  342. llama_stack_api/providers/fastapi_routes.py +57 -0
  343. llama_stack_api/providers/models.py +24 -0
  344. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  345. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  346. llama_stack_api/router_utils.py +160 -0
  347. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  348. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  349. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  350. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  351. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  352. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  353. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  354. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  355. llama_stack/apis/agents/agents.py +0 -894
  356. llama_stack/apis/batches/__init__.py +0 -9
  357. llama_stack/apis/batches/batches.py +0 -100
  358. llama_stack/apis/benchmarks/__init__.py +0 -7
  359. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  360. llama_stack/apis/common/responses.py +0 -36
  361. llama_stack/apis/conversations/__init__.py +0 -31
  362. llama_stack/apis/datasets/datasets.py +0 -251
  363. llama_stack/apis/datatypes.py +0 -160
  364. llama_stack/apis/eval/__init__.py +0 -7
  365. llama_stack/apis/files/__init__.py +0 -7
  366. llama_stack/apis/files/files.py +0 -199
  367. llama_stack/apis/inference/__init__.py +0 -7
  368. llama_stack/apis/inference/event_logger.py +0 -43
  369. llama_stack/apis/inspect/__init__.py +0 -7
  370. llama_stack/apis/inspect/inspect.py +0 -94
  371. llama_stack/apis/models/__init__.py +0 -7
  372. llama_stack/apis/post_training/__init__.py +0 -7
  373. llama_stack/apis/prompts/__init__.py +0 -9
  374. llama_stack/apis/providers/__init__.py +0 -7
  375. llama_stack/apis/providers/providers.py +0 -69
  376. llama_stack/apis/safety/__init__.py +0 -7
  377. llama_stack/apis/scoring/__init__.py +0 -7
  378. llama_stack/apis/scoring_functions/__init__.py +0 -7
  379. llama_stack/apis/shields/__init__.py +0 -7
  380. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  381. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  382. llama_stack/apis/telemetry/__init__.py +0 -7
  383. llama_stack/apis/telemetry/telemetry.py +0 -423
  384. llama_stack/apis/tools/__init__.py +0 -8
  385. llama_stack/apis/vector_io/__init__.py +0 -7
  386. llama_stack/apis/vector_stores/__init__.py +0 -7
  387. llama_stack/core/server/tracing.py +0 -80
  388. llama_stack/core/ui/app.py +0 -55
  389. llama_stack/core/ui/modules/__init__.py +0 -5
  390. llama_stack/core/ui/modules/api.py +0 -32
  391. llama_stack/core/ui/modules/utils.py +0 -42
  392. llama_stack/core/ui/page/__init__.py +0 -5
  393. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  394. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  395. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  396. llama_stack/core/ui/page/distribution/models.py +0 -18
  397. llama_stack/core/ui/page/distribution/providers.py +0 -27
  398. llama_stack/core/ui/page/distribution/resources.py +0 -48
  399. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  400. llama_stack/core/ui/page/distribution/shields.py +0 -19
  401. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  402. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  403. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  404. llama_stack/core/ui/page/playground/__init__.py +0 -5
  405. llama_stack/core/ui/page/playground/chat.py +0 -130
  406. llama_stack/core/ui/page/playground/tools.py +0 -352
  407. llama_stack/distributions/dell/build.yaml +0 -33
  408. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  409. llama_stack/distributions/nvidia/build.yaml +0 -29
  410. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  411. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  412. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  413. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  414. llama_stack/distributions/starter/build.yaml +0 -61
  415. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  416. llama_stack/distributions/watsonx/build.yaml +0 -33
  417. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  418. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  419. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  420. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  421. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  422. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  423. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  424. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  425. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  426. llama_stack/providers/utils/sqlstore/api.py +0 -128
  427. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  428. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  429. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  430. llama_stack/strong_typing/__init__.py +0 -19
  431. llama_stack/strong_typing/auxiliary.py +0 -228
  432. llama_stack/strong_typing/classdef.py +0 -440
  433. llama_stack/strong_typing/core.py +0 -46
  434. llama_stack/strong_typing/deserializer.py +0 -877
  435. llama_stack/strong_typing/docstring.py +0 -409
  436. llama_stack/strong_typing/exception.py +0 -23
  437. llama_stack/strong_typing/inspection.py +0 -1085
  438. llama_stack/strong_typing/mapping.py +0 -40
  439. llama_stack/strong_typing/name.py +0 -182
  440. llama_stack/strong_typing/schema.py +0 -792
  441. llama_stack/strong_typing/serialization.py +0 -97
  442. llama_stack/strong_typing/serializer.py +0 -500
  443. llama_stack/strong_typing/slots.py +0 -27
  444. llama_stack/strong_typing/topological.py +0 -89
  445. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  446. llama_stack-0.3.5.dist-info/RECORD +0 -625
  447. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  448. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  451. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  452. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  453. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  454. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
  456. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
  457. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
  458. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  459. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  460. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -4,6 +4,7 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
+ from threading import Lock
7
8
  from typing import Annotated, cast
8
9
 
9
10
  from pydantic import Field
@@ -15,12 +16,13 @@ from llama_stack.core.storage.datatypes import (
15
16
  StorageBackendConfig,
16
17
  StorageBackendType,
17
18
  )
18
-
19
- from .api import SqlStore
19
+ from llama_stack_api.internal.sqlstore import SqlStore
20
20
 
21
21
  sql_store_pip_packages = ["sqlalchemy[asyncio]", "aiosqlite", "asyncpg"]
22
22
 
23
23
  _SQLSTORE_BACKENDS: dict[str, StorageBackendConfig] = {}
24
+ _SQLSTORE_INSTANCES: dict[str, SqlStore] = {}
25
+ _SQLSTORE_LOCKS: dict[str, Lock] = {}
24
26
 
25
27
 
26
28
  SqlStoreConfig = Annotated[
@@ -52,19 +54,34 @@ def sqlstore_impl(reference: SqlStoreReference) -> SqlStore:
52
54
  f"Unknown SQL store backend '{backend_name}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}"
53
55
  )
54
56
 
55
- if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig):
56
- from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
57
+ existing = _SQLSTORE_INSTANCES.get(backend_name)
58
+ if existing:
59
+ return existing
57
60
 
58
- config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy()
59
- return SqlAlchemySqlStoreImpl(config)
60
- else:
61
- raise ValueError(f"Unknown sqlstore type {backend_config.type}")
61
+ lock = _SQLSTORE_LOCKS.setdefault(backend_name, Lock())
62
+ with lock:
63
+ existing = _SQLSTORE_INSTANCES.get(backend_name)
64
+ if existing:
65
+ return existing
66
+
67
+ if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig):
68
+ from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
69
+
70
+ config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy()
71
+ instance = SqlAlchemySqlStoreImpl(config)
72
+ _SQLSTORE_INSTANCES[backend_name] = instance
73
+ return instance
74
+ else:
75
+ raise ValueError(f"Unknown sqlstore type {backend_config.type}")
62
76
 
63
77
 
64
78
  def register_sqlstore_backends(backends: dict[str, StorageBackendConfig]) -> None:
65
79
  """Register the set of available SQL store backends for reference resolution."""
66
80
  global _SQLSTORE_BACKENDS
81
+ global _SQLSTORE_INSTANCES
67
82
 
68
83
  _SQLSTORE_BACKENDS.clear()
84
+ _SQLSTORE_INSTANCES.clear()
85
+ _SQLSTORE_LOCKS.clear()
69
86
  for name, cfg in backends.items():
70
87
  _SQLSTORE_BACKENDS[name] = cfg
@@ -12,8 +12,8 @@ import pydantic
12
12
 
13
13
  from llama_stack.core.datatypes import RoutableObjectWithProvider
14
14
  from llama_stack.core.storage.datatypes import KVStoreReference
15
+ from llama_stack.core.storage.kvstore import KVStore, kvstore_impl
15
16
  from llama_stack.log import get_logger
16
- from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
17
17
 
18
18
  logger = get_logger(__name__, category="core::registry")
19
19
 
@@ -9,7 +9,10 @@ from typing import Any
9
9
 
10
10
  def redact_sensitive_fields(data: dict[str, Any]) -> dict[str, Any]:
11
11
  """Redact sensitive information from config before printing."""
12
- sensitive_patterns = ["api_key", "api_token", "password", "secret"]
12
+ sensitive_patterns = ["api_key", "api_token", "password", "secret", "token"]
13
+
14
+ # Specific configuration field names that should NOT be redacted despite containing "token"
15
+ safe_token_fields = ["chunk_size_tokens", "max_tokens", "default_chunk_overlap_tokens"]
13
16
 
14
17
  def _redact_value(v: Any) -> Any:
15
18
  if isinstance(v, dict):
@@ -21,7 +24,10 @@ def redact_sensitive_fields(data: dict[str, Any]) -> dict[str, Any]:
21
24
  def _redact_dict(d: dict[str, Any]) -> dict[str, Any]:
22
25
  result = {}
23
26
  for k, v in d.items():
24
- if any(pattern in k.lower() for pattern in sensitive_patterns):
27
+ # Don't redact if it's a safe field
28
+ if any(safe_field in k.lower() for safe_field in safe_token_fields):
29
+ result[k] = _redact_value(v)
30
+ elif any(pattern in k.lower() for pattern in sensitive_patterns):
25
31
  result[k] = "********"
26
32
  else:
27
33
  result[k] = _redact_value(v)
@@ -4,7 +4,6 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from enum import StrEnum
8
7
  from pathlib import Path
9
8
 
10
9
  from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
@@ -16,21 +15,14 @@ logger = get_logger(name=__name__, category="core")
16
15
  DISTRO_DIR = Path(__file__).parent.parent.parent.parent / "llama_stack" / "distributions"
17
16
 
18
17
 
19
- class Mode(StrEnum):
20
- RUN = "run"
21
- BUILD = "build"
22
-
23
-
24
18
  def resolve_config_or_distro(
25
19
  config_or_distro: str,
26
- mode: Mode = Mode.RUN,
27
20
  ) -> Path:
28
21
  """
29
22
  Resolve a config/distro argument to a concrete config file path.
30
23
 
31
24
  Args:
32
25
  config_or_distro: User input (file path, distribution name, or built distribution)
33
- mode: Mode resolving for ("run", "build", "server")
34
26
 
35
27
  Returns:
36
28
  Path to the resolved config file
@@ -47,38 +39,50 @@ def resolve_config_or_distro(
47
39
 
48
40
  # Strategy 2: Try as distribution name (if no .yaml extension)
49
41
  if not config_or_distro.endswith(".yaml"):
50
- distro_config = _get_distro_config_path(config_or_distro, mode)
42
+ distro_config = _get_distro_config_path(config_or_distro)
51
43
  if distro_config.exists():
52
44
  logger.debug(f"Using distribution: {distro_config}")
53
45
  return distro_config
54
46
 
55
- # Strategy 3: Try as built distribution name
56
- distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
47
+ # Strategy 3: Try as distro config path (if no .yaml extension and contains a slash)
48
+ # eg: starter::run-with-postgres-store.yaml
49
+ # Use :: to avoid slash and confusion with a filesystem path
50
+ if "::" in config_or_distro:
51
+ distro_name, config_name = config_or_distro.split("::")
52
+ distro_config = _get_distro_config_path(distro_name, config_name)
53
+ if distro_config.exists():
54
+ logger.info(f"Using distribution: {distro_config}")
55
+ return distro_config
56
+
57
+ # Strategy 4: Try as built distribution name
58
+ distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-config.yaml"
57
59
  if distrib_config.exists():
58
60
  logger.debug(f"Using built distribution: {distrib_config}")
59
61
  return distrib_config
60
62
 
61
- distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
63
+ distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / "config.yaml"
62
64
  if distrib_config.exists():
63
65
  logger.debug(f"Using built distribution: {distrib_config}")
64
66
  return distrib_config
65
67
 
66
- # Strategy 4: Failed - provide helpful error
67
- raise ValueError(_format_resolution_error(config_or_distro, mode))
68
+ # Strategy 5: Failed - provide helpful error
69
+ raise ValueError(_format_resolution_error(config_or_distro))
68
70
 
69
71
 
70
- def _get_distro_config_path(distro_name: str, mode: Mode) -> Path:
72
+ def _get_distro_config_path(distro_name: str, path: str | None = None) -> Path:
71
73
  """Get the config file path for a distro."""
72
- return DISTRO_DIR / distro_name / f"{mode}.yaml"
74
+ if not path or not path.endswith(".yaml"):
75
+ path = "config.yaml"
76
+ return DISTRO_DIR / distro_name / path
73
77
 
74
78
 
75
- def _format_resolution_error(config_or_distro: str, mode: Mode) -> str:
79
+ def _format_resolution_error(config_or_distro: str) -> str:
76
80
  """Format a helpful error message for resolution failures."""
77
81
  from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
78
82
 
79
- distro_path = _get_distro_config_path(config_or_distro, mode)
80
- distrib_path = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
81
- distrib_path2 = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
83
+ distro_path = _get_distro_config_path(config_or_distro)
84
+ distrib_path = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-config.yaml"
85
+ distrib_path2 = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-config.yaml"
82
86
 
83
87
  available_distros = _get_available_distros()
84
88
  distros_str = ", ".join(available_distros) if available_distros else "none found"
@@ -99,15 +103,14 @@ Did you mean one of these distributions?
99
103
 
100
104
  def _get_available_distros() -> list[str]:
101
105
  """Get list of available distro names."""
102
- if not DISTRO_DIR.exists() and not DISTRIBS_BASE_DIR.exists():
103
- return []
104
-
105
- return list(
106
- set(
107
- [d.name for d in DISTRO_DIR.iterdir() if d.is_dir() and not d.name.startswith(".")]
108
- + [d.name for d in DISTRIBS_BASE_DIR.iterdir() if d.is_dir() and not d.name.startswith(".")]
109
- )
110
- )
106
+
107
+ distros = []
108
+ if DISTRO_DIR.exists():
109
+ distros.extend([d.name for d in DISTRO_DIR.iterdir() if d.is_dir() and not d.name.startswith(".")])
110
+ if DISTRIBS_BASE_DIR.exists():
111
+ distros.extend([d.name for d in DISTRIBS_BASE_DIR.iterdir() if d.is_dir() and not d.name.startswith(".")])
112
+
113
+ return list(set(distros))
111
114
 
112
115
 
113
116
  def _format_distro_suggestions(distros: list[str], user_input: str) -> str:
@@ -7,8 +7,6 @@
7
7
  from collections.abc import AsyncGenerator
8
8
  from contextvars import ContextVar
9
9
 
10
- from llama_stack.providers.utils.telemetry.tracing import CURRENT_TRACE_CONTEXT
11
-
12
10
  _MISSING = object()
13
11
 
14
12
 
@@ -69,16 +67,12 @@ def preserve_contexts_async_generator[T](
69
67
  try:
70
68
  yield item
71
69
  # Update our tracked values with any changes made during this iteration
72
- # Only for non-trace context vars - trace context must persist across yields
73
- # to allow nested span tracking for telemetry
70
+ # This allows context changes to persist across generator iterations
74
71
  for context_var in context_vars:
75
- if context_var is not CURRENT_TRACE_CONTEXT:
76
- initial_context_values[context_var.name] = context_var.get()
72
+ initial_context_values[context_var.name] = context_var.get()
77
73
  finally:
78
- # Restore non-trace context vars after each yield to prevent leaks between requests
79
- # CURRENT_TRACE_CONTEXT is NOT restored here to preserve telemetry span stack
74
+ # Restore context vars after each yield to prevent leaks between requests
80
75
  for context_var in context_vars:
81
- if context_var is not CURRENT_TRACE_CONTEXT:
82
- _restore_context_var(context_var)
76
+ _restore_context_var(context_var)
83
77
 
84
78
  return wrapper()
@@ -84,6 +84,15 @@ def run_command(command: list[str]) -> int:
84
84
  text=True,
85
85
  check=False,
86
86
  )
87
+
88
+ # Print stdout and stderr if command failed
89
+ if result.returncode != 0:
90
+ log.error(f"Command {' '.join(command)} failed with returncode {result.returncode}")
91
+ if result.stdout:
92
+ log.error(f"STDOUT: {result.stdout}")
93
+ if result.stderr:
94
+ log.error(f"STDERR: {result.stderr}")
95
+
87
96
  return result.returncode
88
97
  except subprocess.SubprocessError as e:
89
98
  log.error(f"Subprocess error: {e}")
@@ -0,0 +1,45 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """
8
+ Utility functions for type inspection and parameter handling.
9
+ """
10
+
11
+ import inspect
12
+ import typing
13
+ from typing import Any, get_args, get_origin
14
+
15
+ from pydantic import BaseModel
16
+ from pydantic.fields import FieldInfo
17
+
18
+
19
+ def is_unwrapped_body_param(param_type: Any) -> bool:
20
+ """
21
+ Check if a parameter type represents an unwrapped body parameter.
22
+ An unwrapped body parameter is an Annotated type with Body(embed=False)
23
+
24
+ This is used to determine whether request parameters should be flattened
25
+ in OpenAPI specs and client libraries (matching FastAPI's embed=False behavior).
26
+
27
+ Args:
28
+ param_type: The parameter type annotation to check
29
+
30
+ Returns:
31
+ True if the parameter should be treated as an unwrapped body parameter
32
+ """
33
+ # Check if it's Annotated with Body(embed=False)
34
+ if get_origin(param_type) is typing.Annotated:
35
+ args = get_args(param_type)
36
+ base_type = args[0]
37
+ metadata = args[1:]
38
+
39
+ # Look for Body annotation with embed=False
40
+ # Body() returns a FieldInfo object, so we check for that type and the embed attribute
41
+ for item in metadata:
42
+ if isinstance(item, FieldInfo) and hasattr(item, "embed") and not item.embed:
43
+ return inspect.isclass(base_type) and issubclass(base_type, BaseModel)
44
+
45
+ return False
@@ -105,6 +105,9 @@ storage:
105
105
  conversations:
106
106
  table_name: openai_conversations
107
107
  backend: sql_default
108
+ prompts:
109
+ namespace: prompts
110
+ backend: kv_default
108
111
  registered_resources:
109
112
  models:
110
113
  - metadata: {}
@@ -128,5 +131,3 @@ registered_resources:
128
131
  provider_id: rag-runtime
129
132
  server:
130
133
  port: 8321
131
- telemetry:
132
- enabled: true
@@ -4,7 +4,6 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from llama_stack.apis.models import ModelType
8
7
  from llama_stack.core.datatypes import (
9
8
  BuildProvider,
10
9
  ModelInput,
@@ -17,6 +16,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
17
16
  SentenceTransformersInferenceConfig,
18
17
  )
19
18
  from llama_stack.providers.remote.vector_io.chroma import ChromaVectorIOConfig
19
+ from llama_stack_api import ModelType
20
20
 
21
21
 
22
22
  def get_distribution_template() -> DistributionTemplate:
@@ -111,7 +111,7 @@ def get_distribution_template() -> DistributionTemplate:
111
111
  container_image=None,
112
112
  providers=providers,
113
113
  run_configs={
114
- "run.yaml": RunConfigSettings(
114
+ "config.yaml": RunConfigSettings(
115
115
  provider_overrides={
116
116
  "inference": [inference_provider, embedding_provider],
117
117
  "vector_io": [chromadb_provider],
@@ -109,6 +109,9 @@ storage:
109
109
  conversations:
110
110
  table_name: openai_conversations
111
111
  backend: sql_default
112
+ prompts:
113
+ namespace: prompts
114
+ backend: kv_default
112
115
  registered_resources:
113
116
  models:
114
117
  - metadata: {}
@@ -137,5 +140,3 @@ registered_resources:
137
140
  provider_id: rag-runtime
138
141
  server:
139
142
  port: 8321
140
- telemetry:
141
- enabled: true
@@ -112,6 +112,9 @@ storage:
112
112
  conversations:
113
113
  table_name: openai_conversations
114
114
  backend: sql_default
115
+ prompts:
116
+ namespace: prompts
117
+ backend: kv_default
115
118
  registered_resources:
116
119
  models:
117
120
  - metadata: {}
@@ -135,5 +138,3 @@ registered_resources:
135
138
  provider_id: rag-runtime
136
139
  server:
137
140
  port: 8321
138
- telemetry:
139
- enabled: true
@@ -6,7 +6,6 @@
6
6
 
7
7
  from pathlib import Path
8
8
 
9
- from llama_stack.apis.models import ModelType
10
9
  from llama_stack.core.datatypes import (
11
10
  BuildProvider,
12
11
  ModelInput,
@@ -22,6 +21,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
22
21
  SentenceTransformersInferenceConfig,
23
22
  )
24
23
  from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
24
+ from llama_stack_api import ModelType
25
25
 
26
26
 
27
27
  def get_distribution_template() -> DistributionTemplate:
@@ -105,7 +105,7 @@ def get_distribution_template() -> DistributionTemplate:
105
105
  template_path=Path(__file__).parent / "doc_template.md",
106
106
  providers=providers,
107
107
  run_configs={
108
- "run.yaml": RunConfigSettings(
108
+ "config.yaml": RunConfigSettings(
109
109
  provider_overrides={
110
110
  "inference": [inference_provider, embedding_provider],
111
111
  "vector_io": [vector_io_provider],
@@ -122,6 +122,9 @@ storage:
122
122
  conversations:
123
123
  table_name: openai_conversations
124
124
  backend: sql_default
125
+ prompts:
126
+ namespace: prompts
127
+ backend: kv_default
125
128
  registered_resources:
126
129
  models:
127
130
  - metadata: {}
@@ -150,5 +153,3 @@ registered_resources:
150
153
  provider_id: rag-runtime
151
154
  server:
152
155
  port: 8321
153
- telemetry:
154
- enabled: true
@@ -16,9 +16,8 @@ providers:
16
16
  - provider_id: nvidia
17
17
  provider_type: remote::nvidia
18
18
  config:
19
- url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
19
+ base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
20
20
  api_key: ${env.NVIDIA_API_KEY:=}
21
- append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
22
21
  vector_io:
23
22
  - provider_id: faiss
24
23
  provider_type: inline::faiss
@@ -100,6 +99,9 @@ storage:
100
99
  conversations:
101
100
  table_name: openai_conversations
102
101
  backend: sql_default
102
+ prompts:
103
+ namespace: prompts
104
+ backend: kv_default
103
105
  registered_resources:
104
106
  models: []
105
107
  shields: []
@@ -112,5 +114,3 @@ registered_resources:
112
114
  provider_id: rag-runtime
113
115
  server:
114
116
  port: 8321
115
- telemetry:
116
- enabled: true
@@ -81,7 +81,7 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate:
81
81
  template_path=Path(__file__).parent / "doc_template.md",
82
82
  providers=providers,
83
83
  run_configs={
84
- "run.yaml": RunConfigSettings(
84
+ "config.yaml": RunConfigSettings(
85
85
  provider_overrides={
86
86
  "inference": [inference_provider],
87
87
  "datasetio": [datasetio_provider],
@@ -16,9 +16,8 @@ providers:
16
16
  - provider_id: nvidia
17
17
  provider_type: remote::nvidia
18
18
  config:
19
- url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
19
+ base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
20
20
  api_key: ${env.NVIDIA_API_KEY:=}
21
- append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
22
21
  - provider_id: nvidia
23
22
  provider_type: remote::nvidia
24
23
  config:
@@ -111,6 +110,9 @@ storage:
111
110
  conversations:
112
111
  table_name: openai_conversations
113
112
  backend: sql_default
113
+ prompts:
114
+ namespace: prompts
115
+ backend: kv_default
114
116
  registered_resources:
115
117
  models:
116
118
  - metadata: {}
@@ -133,5 +135,3 @@ registered_resources:
133
135
  provider_id: rag-runtime
134
136
  server:
135
137
  port: 8321
136
- telemetry:
137
- enabled: true
@@ -4,4 +4,4 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from .datasetio import *
7
+ from .oci import get_distribution_template # noqa: F401
@@ -0,0 +1,134 @@
1
+ version: 2
2
+ image_name: oci
3
+ apis:
4
+ - agents
5
+ - datasetio
6
+ - eval
7
+ - files
8
+ - inference
9
+ - safety
10
+ - scoring
11
+ - tool_runtime
12
+ - vector_io
13
+ providers:
14
+ inference:
15
+ - provider_id: oci
16
+ provider_type: remote::oci
17
+ config:
18
+ oci_auth_type: ${env.OCI_AUTH_TYPE:=instance_principal}
19
+ oci_config_file_path: ${env.OCI_CONFIG_FILE_PATH:=~/.oci/config}
20
+ oci_config_profile: ${env.OCI_CLI_PROFILE:=DEFAULT}
21
+ oci_region: ${env.OCI_REGION:=us-ashburn-1}
22
+ oci_compartment_id: ${env.OCI_COMPARTMENT_OCID:=}
23
+ vector_io:
24
+ - provider_id: faiss
25
+ provider_type: inline::faiss
26
+ config:
27
+ persistence:
28
+ namespace: vector_io::faiss
29
+ backend: kv_default
30
+ safety:
31
+ - provider_id: llama-guard
32
+ provider_type: inline::llama-guard
33
+ config:
34
+ excluded_categories: []
35
+ agents:
36
+ - provider_id: meta-reference
37
+ provider_type: inline::meta-reference
38
+ config:
39
+ persistence:
40
+ agent_state:
41
+ namespace: agents
42
+ backend: kv_default
43
+ responses:
44
+ table_name: responses
45
+ backend: sql_default
46
+ max_write_queue_size: 10000
47
+ num_writers: 4
48
+ eval:
49
+ - provider_id: meta-reference
50
+ provider_type: inline::meta-reference
51
+ config:
52
+ kvstore:
53
+ namespace: eval
54
+ backend: kv_default
55
+ datasetio:
56
+ - provider_id: huggingface
57
+ provider_type: remote::huggingface
58
+ config:
59
+ kvstore:
60
+ namespace: datasetio::huggingface
61
+ backend: kv_default
62
+ - provider_id: localfs
63
+ provider_type: inline::localfs
64
+ config:
65
+ kvstore:
66
+ namespace: datasetio::localfs
67
+ backend: kv_default
68
+ scoring:
69
+ - provider_id: basic
70
+ provider_type: inline::basic
71
+ - provider_id: llm-as-judge
72
+ provider_type: inline::llm-as-judge
73
+ - provider_id: braintrust
74
+ provider_type: inline::braintrust
75
+ config:
76
+ openai_api_key: ${env.OPENAI_API_KEY:=}
77
+ tool_runtime:
78
+ - provider_id: brave-search
79
+ provider_type: remote::brave-search
80
+ config:
81
+ api_key: ${env.BRAVE_SEARCH_API_KEY:=}
82
+ max_results: 3
83
+ - provider_id: tavily-search
84
+ provider_type: remote::tavily-search
85
+ config:
86
+ api_key: ${env.TAVILY_SEARCH_API_KEY:=}
87
+ max_results: 3
88
+ - provider_id: rag-runtime
89
+ provider_type: inline::rag-runtime
90
+ - provider_id: model-context-protocol
91
+ provider_type: remote::model-context-protocol
92
+ files:
93
+ - provider_id: meta-reference-files
94
+ provider_type: inline::localfs
95
+ config:
96
+ storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/oci/files}
97
+ metadata_store:
98
+ table_name: files_metadata
99
+ backend: sql_default
100
+ storage:
101
+ backends:
102
+ kv_default:
103
+ type: kv_sqlite
104
+ db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/oci}/kvstore.db
105
+ sql_default:
106
+ type: sql_sqlite
107
+ db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/oci}/sql_store.db
108
+ stores:
109
+ metadata:
110
+ namespace: registry
111
+ backend: kv_default
112
+ inference:
113
+ table_name: inference_store
114
+ backend: sql_default
115
+ max_write_queue_size: 10000
116
+ num_writers: 4
117
+ conversations:
118
+ table_name: openai_conversations
119
+ backend: sql_default
120
+ prompts:
121
+ namespace: prompts
122
+ backend: kv_default
123
+ registered_resources:
124
+ models: []
125
+ shields: []
126
+ vector_dbs: []
127
+ datasets: []
128
+ scoring_fns: []
129
+ benchmarks: []
130
+ tool_groups:
131
+ - toolgroup_id: builtin::websearch
132
+ provider_id: tavily-search
133
+ server:
134
+ port: 8321