llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (738) hide show
  1. llama_stack/__init__.py +5 -0
  2. llama_stack/apis/agents/__init__.py +1 -1
  3. llama_stack/apis/agents/agents.py +700 -281
  4. llama_stack/apis/agents/openai_responses.py +1311 -0
  5. llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
  6. llama_stack/apis/batches/batches.py +100 -0
  7. llama_stack/apis/benchmarks/__init__.py +7 -0
  8. llama_stack/apis/benchmarks/benchmarks.py +108 -0
  9. llama_stack/apis/common/content_types.py +143 -0
  10. llama_stack/apis/common/errors.py +103 -0
  11. llama_stack/apis/common/job_types.py +38 -0
  12. llama_stack/apis/common/responses.py +36 -0
  13. llama_stack/apis/common/training_types.py +36 -5
  14. llama_stack/apis/common/type_system.py +158 -0
  15. llama_stack/apis/conversations/__init__.py +31 -0
  16. llama_stack/apis/conversations/conversations.py +286 -0
  17. llama_stack/apis/datasetio/__init__.py +7 -0
  18. llama_stack/apis/datasetio/datasetio.py +59 -0
  19. llama_stack/apis/datasets/__init__.py +7 -0
  20. llama_stack/apis/datasets/datasets.py +251 -0
  21. llama_stack/apis/datatypes.py +160 -0
  22. llama_stack/apis/eval/__init__.py +7 -0
  23. llama_stack/apis/eval/eval.py +169 -0
  24. llama_stack/apis/files/__init__.py +7 -0
  25. llama_stack/apis/files/files.py +199 -0
  26. llama_stack/apis/inference/__init__.py +1 -1
  27. llama_stack/apis/inference/inference.py +1169 -113
  28. llama_stack/apis/inspect/__init__.py +1 -1
  29. llama_stack/apis/inspect/inspect.py +69 -16
  30. llama_stack/apis/models/__init__.py +1 -1
  31. llama_stack/apis/models/models.py +148 -21
  32. llama_stack/apis/post_training/__init__.py +1 -1
  33. llama_stack/apis/post_training/post_training.py +265 -120
  34. llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
  35. llama_stack/apis/prompts/prompts.py +204 -0
  36. llama_stack/apis/providers/__init__.py +7 -0
  37. llama_stack/apis/providers/providers.py +69 -0
  38. llama_stack/apis/resource.py +37 -0
  39. llama_stack/apis/safety/__init__.py +1 -1
  40. llama_stack/apis/safety/safety.py +95 -12
  41. llama_stack/apis/scoring/__init__.py +7 -0
  42. llama_stack/apis/scoring/scoring.py +93 -0
  43. llama_stack/apis/scoring_functions/__init__.py +7 -0
  44. llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
  45. llama_stack/apis/shields/__init__.py +1 -1
  46. llama_stack/apis/shields/shields.py +76 -33
  47. llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
  48. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
  49. llama_stack/apis/telemetry/__init__.py +1 -1
  50. llama_stack/apis/telemetry/telemetry.py +322 -31
  51. llama_stack/apis/{dataset → tools}/__init__.py +2 -1
  52. llama_stack/apis/tools/rag_tool.py +218 -0
  53. llama_stack/apis/tools/tools.py +221 -0
  54. llama_stack/apis/vector_io/__init__.py +7 -0
  55. llama_stack/apis/vector_io/vector_io.py +960 -0
  56. llama_stack/apis/vector_stores/__init__.py +7 -0
  57. llama_stack/apis/vector_stores/vector_stores.py +51 -0
  58. llama_stack/apis/version.py +9 -0
  59. llama_stack/cli/llama.py +13 -5
  60. llama_stack/cli/stack/_list_deps.py +182 -0
  61. llama_stack/cli/stack/list_apis.py +1 -1
  62. llama_stack/cli/stack/list_deps.py +55 -0
  63. llama_stack/cli/stack/list_providers.py +24 -10
  64. llama_stack/cli/stack/list_stacks.py +56 -0
  65. llama_stack/cli/stack/remove.py +115 -0
  66. llama_stack/cli/stack/run.py +169 -56
  67. llama_stack/cli/stack/stack.py +18 -4
  68. llama_stack/cli/stack/utils.py +151 -0
  69. llama_stack/cli/table.py +23 -61
  70. llama_stack/cli/utils.py +29 -0
  71. llama_stack/core/access_control/access_control.py +131 -0
  72. llama_stack/core/access_control/conditions.py +129 -0
  73. llama_stack/core/access_control/datatypes.py +107 -0
  74. llama_stack/core/build.py +164 -0
  75. llama_stack/core/client.py +205 -0
  76. llama_stack/core/common.sh +37 -0
  77. llama_stack/{distribution → core}/configure.py +74 -55
  78. llama_stack/core/conversations/conversations.py +309 -0
  79. llama_stack/core/datatypes.py +625 -0
  80. llama_stack/core/distribution.py +276 -0
  81. llama_stack/core/external.py +54 -0
  82. llama_stack/core/id_generation.py +42 -0
  83. llama_stack/core/inspect.py +86 -0
  84. llama_stack/core/library_client.py +539 -0
  85. llama_stack/core/prompts/prompts.py +234 -0
  86. llama_stack/core/providers.py +137 -0
  87. llama_stack/core/request_headers.py +115 -0
  88. llama_stack/core/resolver.py +506 -0
  89. llama_stack/core/routers/__init__.py +101 -0
  90. llama_stack/core/routers/datasets.py +73 -0
  91. llama_stack/core/routers/eval_scoring.py +155 -0
  92. llama_stack/core/routers/inference.py +645 -0
  93. llama_stack/core/routers/safety.py +85 -0
  94. llama_stack/core/routers/tool_runtime.py +91 -0
  95. llama_stack/core/routers/vector_io.py +442 -0
  96. llama_stack/core/routing_tables/benchmarks.py +62 -0
  97. llama_stack/core/routing_tables/common.py +254 -0
  98. llama_stack/core/routing_tables/datasets.py +91 -0
  99. llama_stack/core/routing_tables/models.py +163 -0
  100. llama_stack/core/routing_tables/scoring_functions.py +66 -0
  101. llama_stack/core/routing_tables/shields.py +61 -0
  102. llama_stack/core/routing_tables/toolgroups.py +129 -0
  103. llama_stack/core/routing_tables/vector_stores.py +292 -0
  104. llama_stack/core/server/auth.py +187 -0
  105. llama_stack/core/server/auth_providers.py +494 -0
  106. llama_stack/core/server/quota.py +110 -0
  107. llama_stack/core/server/routes.py +141 -0
  108. llama_stack/core/server/server.py +542 -0
  109. llama_stack/core/server/tracing.py +80 -0
  110. llama_stack/core/stack.py +546 -0
  111. llama_stack/core/start_stack.sh +117 -0
  112. llama_stack/core/storage/datatypes.py +283 -0
  113. llama_stack/{cli/model → core/store}/__init__.py +1 -1
  114. llama_stack/core/store/registry.py +199 -0
  115. llama_stack/core/testing_context.py +49 -0
  116. llama_stack/core/ui/app.py +55 -0
  117. llama_stack/core/ui/modules/api.py +32 -0
  118. llama_stack/core/ui/modules/utils.py +42 -0
  119. llama_stack/core/ui/page/distribution/datasets.py +18 -0
  120. llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
  121. llama_stack/core/ui/page/distribution/models.py +18 -0
  122. llama_stack/core/ui/page/distribution/providers.py +27 -0
  123. llama_stack/core/ui/page/distribution/resources.py +48 -0
  124. llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
  125. llama_stack/core/ui/page/distribution/shields.py +19 -0
  126. llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
  127. llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
  128. llama_stack/core/ui/page/playground/chat.py +130 -0
  129. llama_stack/core/ui/page/playground/tools.py +352 -0
  130. llama_stack/core/utils/config.py +30 -0
  131. llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
  132. llama_stack/core/utils/config_resolution.py +125 -0
  133. llama_stack/core/utils/context.py +84 -0
  134. llama_stack/core/utils/exec.py +96 -0
  135. llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
  136. llama_stack/{distribution → core}/utils/model_utils.py +2 -2
  137. llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
  138. llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
  139. llama_stack/distributions/dell/build.yaml +33 -0
  140. llama_stack/distributions/dell/dell.py +158 -0
  141. llama_stack/distributions/dell/run-with-safety.yaml +141 -0
  142. llama_stack/distributions/dell/run.yaml +132 -0
  143. llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
  144. llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
  145. llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
  146. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
  147. llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
  148. llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
  149. llama_stack/distributions/nvidia/build.yaml +29 -0
  150. llama_stack/distributions/nvidia/nvidia.py +154 -0
  151. llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
  152. llama_stack/distributions/nvidia/run.yaml +116 -0
  153. llama_stack/distributions/open-benchmark/__init__.py +7 -0
  154. llama_stack/distributions/open-benchmark/build.yaml +36 -0
  155. llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
  156. llama_stack/distributions/open-benchmark/run.yaml +252 -0
  157. llama_stack/distributions/postgres-demo/__init__.py +7 -0
  158. llama_stack/distributions/postgres-demo/build.yaml +23 -0
  159. llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
  160. llama_stack/distributions/postgres-demo/run.yaml +115 -0
  161. llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
  162. llama_stack/distributions/starter/build.yaml +61 -0
  163. llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
  164. llama_stack/distributions/starter/run.yaml +276 -0
  165. llama_stack/distributions/starter/starter.py +345 -0
  166. llama_stack/distributions/starter-gpu/__init__.py +7 -0
  167. llama_stack/distributions/starter-gpu/build.yaml +61 -0
  168. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
  169. llama_stack/distributions/starter-gpu/run.yaml +279 -0
  170. llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
  171. llama_stack/distributions/template.py +456 -0
  172. llama_stack/distributions/watsonx/__init__.py +7 -0
  173. llama_stack/distributions/watsonx/build.yaml +33 -0
  174. llama_stack/distributions/watsonx/run.yaml +133 -0
  175. llama_stack/distributions/watsonx/watsonx.py +95 -0
  176. llama_stack/env.py +24 -0
  177. llama_stack/log.py +314 -0
  178. llama_stack/models/llama/checkpoint.py +164 -0
  179. llama_stack/models/llama/datatypes.py +164 -0
  180. llama_stack/models/llama/hadamard_utils.py +86 -0
  181. llama_stack/models/llama/llama3/args.py +74 -0
  182. llama_stack/models/llama/llama3/chat_format.py +286 -0
  183. llama_stack/models/llama/llama3/generation.py +376 -0
  184. llama_stack/models/llama/llama3/interface.py +255 -0
  185. llama_stack/models/llama/llama3/model.py +304 -0
  186. llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
  187. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
  188. llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
  189. llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
  190. llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
  191. llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
  192. llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
  193. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
  194. llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
  195. llama_stack/models/llama/llama3/quantization/loader.py +316 -0
  196. llama_stack/models/llama/llama3/template_data.py +116 -0
  197. llama_stack/models/llama/llama3/tokenizer.model +128000 -0
  198. llama_stack/models/llama/llama3/tokenizer.py +198 -0
  199. llama_stack/models/llama/llama3/tool_utils.py +266 -0
  200. llama_stack/models/llama/llama3_1/__init__.py +12 -0
  201. llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
  202. llama_stack/models/llama/llama3_1/prompts.py +258 -0
  203. llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
  204. llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
  205. llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
  206. llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
  207. llama_stack/models/llama/llama3_3/prompts.py +259 -0
  208. llama_stack/models/llama/llama4/args.py +107 -0
  209. llama_stack/models/llama/llama4/chat_format.py +317 -0
  210. llama_stack/models/llama/llama4/datatypes.py +56 -0
  211. llama_stack/models/llama/llama4/ffn.py +58 -0
  212. llama_stack/models/llama/llama4/generation.py +313 -0
  213. llama_stack/models/llama/llama4/model.py +437 -0
  214. llama_stack/models/llama/llama4/moe.py +214 -0
  215. llama_stack/models/llama/llama4/preprocess.py +435 -0
  216. llama_stack/models/llama/llama4/prompt_format.md +304 -0
  217. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
  218. llama_stack/models/llama/llama4/prompts.py +279 -0
  219. llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
  220. llama_stack/models/llama/llama4/quantization/loader.py +226 -0
  221. llama_stack/models/llama/llama4/tokenizer.model +200000 -0
  222. llama_stack/models/llama/llama4/tokenizer.py +263 -0
  223. llama_stack/models/llama/llama4/vision/__init__.py +5 -0
  224. llama_stack/models/llama/llama4/vision/embedding.py +210 -0
  225. llama_stack/models/llama/llama4/vision/encoder.py +412 -0
  226. llama_stack/models/llama/prompt_format.py +191 -0
  227. llama_stack/models/llama/quantize_impls.py +316 -0
  228. llama_stack/models/llama/sku_list.py +1029 -0
  229. llama_stack/models/llama/sku_types.py +233 -0
  230. llama_stack/models/llama/tokenizer_utils.py +40 -0
  231. llama_stack/providers/datatypes.py +136 -107
  232. llama_stack/providers/inline/__init__.py +5 -0
  233. llama_stack/providers/inline/agents/__init__.py +5 -0
  234. llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
  235. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
  236. llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
  237. llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
  238. llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
  239. llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
  240. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
  241. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
  242. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
  243. llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
  244. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
  245. llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
  246. llama_stack/providers/inline/batches/__init__.py +5 -0
  247. llama_stack/providers/inline/batches/reference/__init__.py +36 -0
  248. llama_stack/providers/inline/batches/reference/batches.py +679 -0
  249. llama_stack/providers/inline/batches/reference/config.py +40 -0
  250. llama_stack/providers/inline/datasetio/__init__.py +5 -0
  251. llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
  252. llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
  253. llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
  254. llama_stack/providers/inline/eval/__init__.py +5 -0
  255. llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
  256. llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
  257. llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
  258. llama_stack/providers/inline/files/localfs/__init__.py +20 -0
  259. llama_stack/providers/inline/files/localfs/config.py +31 -0
  260. llama_stack/providers/inline/files/localfs/files.py +219 -0
  261. llama_stack/providers/inline/inference/__init__.py +5 -0
  262. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
  263. llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
  264. llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
  265. llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
  266. llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
  267. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
  268. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
  269. llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
  270. llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
  271. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
  272. llama_stack/providers/inline/post_training/__init__.py +5 -0
  273. llama_stack/providers/inline/post_training/common/__init__.py +5 -0
  274. llama_stack/providers/inline/post_training/common/utils.py +35 -0
  275. llama_stack/providers/inline/post_training/common/validator.py +36 -0
  276. llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
  277. llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
  278. llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
  279. llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
  280. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
  281. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
  282. llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
  283. llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
  284. llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
  285. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
  286. llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
  287. llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
  288. llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
  289. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
  290. llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
  291. llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
  292. llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
  293. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
  294. llama_stack/providers/inline/safety/__init__.py +5 -0
  295. llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
  296. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
  297. llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
  298. llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
  299. llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
  300. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
  301. llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
  302. llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
  303. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
  304. llama_stack/providers/inline/scoring/__init__.py +5 -0
  305. llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
  306. llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
  307. llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
  308. llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
  309. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
  310. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
  311. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
  312. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
  313. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
  314. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
  315. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
  316. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
  317. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
  318. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
  319. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
  320. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
  321. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
  322. llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
  323. llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
  324. llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
  325. llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
  326. llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
  327. llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
  328. llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
  329. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
  330. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
  331. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
  332. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
  333. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
  334. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
  335. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
  336. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
  337. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
  338. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
  339. llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
  340. llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
  341. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
  342. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
  343. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
  344. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
  345. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
  346. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
  347. llama_stack/providers/inline/telemetry/__init__.py +5 -0
  348. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
  349. llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
  350. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
  351. llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
  352. llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
  353. llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
  354. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
  355. llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
  356. llama_stack/providers/inline/vector_io/__init__.py +5 -0
  357. llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
  358. llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
  359. llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
  360. llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
  361. llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
  362. llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
  363. llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
  364. llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
  365. llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
  366. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
  367. llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
  368. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
  369. llama_stack/providers/registry/agents.py +16 -18
  370. llama_stack/providers/registry/batches.py +26 -0
  371. llama_stack/providers/registry/datasetio.py +49 -0
  372. llama_stack/providers/registry/eval.py +46 -0
  373. llama_stack/providers/registry/files.py +31 -0
  374. llama_stack/providers/registry/inference.py +273 -118
  375. llama_stack/providers/registry/post_training.py +69 -0
  376. llama_stack/providers/registry/safety.py +46 -41
  377. llama_stack/providers/registry/scoring.py +51 -0
  378. llama_stack/providers/registry/tool_runtime.py +87 -0
  379. llama_stack/providers/registry/vector_io.py +828 -0
  380. llama_stack/providers/remote/__init__.py +5 -0
  381. llama_stack/providers/remote/agents/__init__.py +5 -0
  382. llama_stack/providers/remote/datasetio/__init__.py +5 -0
  383. llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
  384. llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
  385. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
  386. llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
  387. llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
  388. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
  389. llama_stack/providers/remote/eval/__init__.py +5 -0
  390. llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
  391. llama_stack/providers/remote/eval/nvidia/config.py +29 -0
  392. llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
  393. llama_stack/providers/remote/files/s3/__init__.py +19 -0
  394. llama_stack/providers/remote/files/s3/config.py +42 -0
  395. llama_stack/providers/remote/files/s3/files.py +313 -0
  396. llama_stack/providers/remote/inference/__init__.py +5 -0
  397. llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
  398. llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
  399. llama_stack/providers/remote/inference/anthropic/config.py +28 -0
  400. llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
  401. llama_stack/providers/remote/inference/azure/azure.py +25 -0
  402. llama_stack/providers/remote/inference/azure/config.py +61 -0
  403. llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
  404. llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
  405. llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
  406. llama_stack/providers/remote/inference/bedrock/models.py +29 -0
  407. llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
  408. llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
  409. llama_stack/providers/remote/inference/cerebras/config.py +30 -0
  410. llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
  411. llama_stack/providers/remote/inference/databricks/config.py +37 -0
  412. llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
  413. llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
  414. llama_stack/providers/remote/inference/fireworks/config.py +27 -0
  415. llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
  416. llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
  417. llama_stack/providers/remote/inference/gemini/config.py +28 -0
  418. llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
  419. llama_stack/providers/remote/inference/groq/__init__.py +15 -0
  420. llama_stack/providers/remote/inference/groq/config.py +34 -0
  421. llama_stack/providers/remote/inference/groq/groq.py +18 -0
  422. llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
  423. llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
  424. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
  425. llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
  426. llama_stack/providers/remote/inference/nvidia/config.py +64 -0
  427. llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
  428. llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
  429. llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
  430. llama_stack/providers/remote/inference/ollama/config.py +25 -0
  431. llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
  432. llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
  433. llama_stack/providers/remote/inference/openai/config.py +39 -0
  434. llama_stack/providers/remote/inference/openai/openai.py +38 -0
  435. llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
  436. llama_stack/providers/remote/inference/passthrough/config.py +34 -0
  437. llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
  438. llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
  439. llama_stack/providers/remote/inference/runpod/config.py +32 -0
  440. llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
  441. llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
  442. llama_stack/providers/remote/inference/sambanova/config.py +34 -0
  443. llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
  444. llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
  445. llama_stack/providers/remote/inference/tgi/config.py +76 -0
  446. llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
  447. llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
  448. llama_stack/providers/remote/inference/together/config.py +27 -0
  449. llama_stack/providers/remote/inference/together/together.py +102 -0
  450. llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
  451. llama_stack/providers/remote/inference/vertexai/config.py +48 -0
  452. llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
  453. llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
  454. llama_stack/providers/remote/inference/vllm/config.py +59 -0
  455. llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
  456. llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
  457. llama_stack/providers/remote/inference/watsonx/config.py +45 -0
  458. llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
  459. llama_stack/providers/remote/post_training/__init__.py +5 -0
  460. llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
  461. llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
  462. llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
  463. llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
  464. llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
  465. llama_stack/providers/remote/safety/__init__.py +5 -0
  466. llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
  467. llama_stack/providers/remote/safety/bedrock/config.py +14 -0
  468. llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
  469. llama_stack/providers/remote/safety/nvidia/config.py +40 -0
  470. llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
  471. llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
  472. llama_stack/providers/remote/safety/sambanova/config.py +37 -0
  473. llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
  474. llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
  475. llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
  476. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
  477. llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
  478. llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
  479. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
  480. llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
  481. llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
  482. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
  483. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
  484. llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
  485. llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
  486. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
  487. llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
  488. llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
  489. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
  490. llama_stack/providers/remote/vector_io/__init__.py +5 -0
  491. llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
  492. llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
  493. llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
  494. llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
  495. llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
  496. llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
  497. llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
  498. llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
  499. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
  500. llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
  501. llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
  502. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
  503. llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
  504. llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
  505. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
  506. llama_stack/providers/utils/bedrock/__init__.py +5 -0
  507. llama_stack/providers/utils/bedrock/client.py +74 -0
  508. llama_stack/providers/utils/bedrock/config.py +64 -0
  509. llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
  510. llama_stack/providers/utils/common/__init__.py +5 -0
  511. llama_stack/providers/utils/common/data_schema_validator.py +103 -0
  512. llama_stack/providers/utils/datasetio/__init__.py +5 -0
  513. llama_stack/providers/utils/datasetio/url_utils.py +47 -0
  514. llama_stack/providers/utils/files/__init__.py +5 -0
  515. llama_stack/providers/utils/files/form_data.py +69 -0
  516. llama_stack/providers/utils/inference/__init__.py +8 -7
  517. llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
  518. llama_stack/providers/utils/inference/inference_store.py +264 -0
  519. llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
  520. llama_stack/providers/utils/inference/model_registry.py +173 -23
  521. llama_stack/providers/utils/inference/openai_compat.py +1261 -49
  522. llama_stack/providers/utils/inference/openai_mixin.py +506 -0
  523. llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
  524. llama_stack/providers/utils/kvstore/api.py +6 -6
  525. llama_stack/providers/utils/kvstore/config.py +28 -48
  526. llama_stack/providers/utils/kvstore/kvstore.py +61 -15
  527. llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
  528. llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
  529. llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
  530. llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
  531. llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
  532. llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
  533. llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
  534. llama_stack/providers/utils/memory/file_utils.py +1 -1
  535. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
  536. llama_stack/providers/utils/memory/vector_store.py +220 -82
  537. llama_stack/providers/utils/pagination.py +43 -0
  538. llama_stack/providers/utils/responses/__init__.py +5 -0
  539. llama_stack/providers/utils/responses/responses_store.py +292 -0
  540. llama_stack/providers/utils/scheduler.py +270 -0
  541. llama_stack/providers/utils/scoring/__init__.py +5 -0
  542. llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
  543. llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
  544. llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
  545. llama_stack/providers/utils/sqlstore/__init__.py +5 -0
  546. llama_stack/providers/utils/sqlstore/api.py +128 -0
  547. llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
  548. llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
  549. llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
  550. llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
  551. llama_stack/providers/utils/telemetry/tracing.py +192 -53
  552. llama_stack/providers/utils/tools/__init__.py +5 -0
  553. llama_stack/providers/utils/tools/mcp.py +148 -0
  554. llama_stack/providers/utils/tools/ttl_dict.py +70 -0
  555. llama_stack/providers/utils/vector_io/__init__.py +5 -0
  556. llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
  557. llama_stack/schema_utils.py +118 -0
  558. llama_stack/strong_typing/__init__.py +19 -0
  559. llama_stack/strong_typing/auxiliary.py +228 -0
  560. llama_stack/strong_typing/classdef.py +440 -0
  561. llama_stack/strong_typing/core.py +46 -0
  562. llama_stack/strong_typing/deserializer.py +877 -0
  563. llama_stack/strong_typing/docstring.py +409 -0
  564. llama_stack/strong_typing/exception.py +23 -0
  565. llama_stack/strong_typing/inspection.py +1085 -0
  566. llama_stack/strong_typing/mapping.py +40 -0
  567. llama_stack/strong_typing/name.py +182 -0
  568. llama_stack/strong_typing/py.typed +0 -0
  569. llama_stack/strong_typing/schema.py +792 -0
  570. llama_stack/strong_typing/serialization.py +97 -0
  571. llama_stack/strong_typing/serializer.py +500 -0
  572. llama_stack/strong_typing/slots.py +27 -0
  573. llama_stack/strong_typing/topological.py +89 -0
  574. llama_stack/testing/__init__.py +5 -0
  575. llama_stack/testing/api_recorder.py +956 -0
  576. llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
  577. llama_stack-0.3.4.dist-info/METADATA +261 -0
  578. llama_stack-0.3.4.dist-info/RECORD +625 -0
  579. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
  580. llama_stack/apis/agents/client.py +0 -292
  581. llama_stack/apis/agents/event_logger.py +0 -184
  582. llama_stack/apis/batch_inference/batch_inference.py +0 -72
  583. llama_stack/apis/common/deployment_types.py +0 -31
  584. llama_stack/apis/dataset/dataset.py +0 -63
  585. llama_stack/apis/evals/evals.py +0 -122
  586. llama_stack/apis/inference/client.py +0 -197
  587. llama_stack/apis/inspect/client.py +0 -82
  588. llama_stack/apis/memory/client.py +0 -155
  589. llama_stack/apis/memory/memory.py +0 -65
  590. llama_stack/apis/memory_banks/__init__.py +0 -7
  591. llama_stack/apis/memory_banks/client.py +0 -101
  592. llama_stack/apis/memory_banks/memory_banks.py +0 -78
  593. llama_stack/apis/models/client.py +0 -83
  594. llama_stack/apis/reward_scoring/__init__.py +0 -7
  595. llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
  596. llama_stack/apis/safety/client.py +0 -105
  597. llama_stack/apis/shields/client.py +0 -79
  598. llama_stack/cli/download.py +0 -340
  599. llama_stack/cli/model/describe.py +0 -82
  600. llama_stack/cli/model/download.py +0 -24
  601. llama_stack/cli/model/list.py +0 -62
  602. llama_stack/cli/model/model.py +0 -34
  603. llama_stack/cli/model/prompt_format.py +0 -112
  604. llama_stack/cli/model/safety_models.py +0 -52
  605. llama_stack/cli/stack/build.py +0 -299
  606. llama_stack/cli/stack/configure.py +0 -178
  607. llama_stack/distribution/build.py +0 -123
  608. llama_stack/distribution/build_conda_env.sh +0 -136
  609. llama_stack/distribution/build_container.sh +0 -142
  610. llama_stack/distribution/common.sh +0 -40
  611. llama_stack/distribution/configure_container.sh +0 -47
  612. llama_stack/distribution/datatypes.py +0 -139
  613. llama_stack/distribution/distribution.py +0 -58
  614. llama_stack/distribution/inspect.py +0 -67
  615. llama_stack/distribution/request_headers.py +0 -57
  616. llama_stack/distribution/resolver.py +0 -323
  617. llama_stack/distribution/routers/__init__.py +0 -48
  618. llama_stack/distribution/routers/routers.py +0 -158
  619. llama_stack/distribution/routers/routing_tables.py +0 -173
  620. llama_stack/distribution/server/endpoints.py +0 -48
  621. llama_stack/distribution/server/server.py +0 -343
  622. llama_stack/distribution/start_conda_env.sh +0 -42
  623. llama_stack/distribution/start_container.sh +0 -64
  624. llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
  625. llama_stack/distribution/templates/local-build.yaml +0 -10
  626. llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
  627. llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
  628. llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
  629. llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
  630. llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
  631. llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
  632. llama_stack/distribution/templates/local-together-build.yaml +0 -10
  633. llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
  634. llama_stack/distribution/utils/exec.py +0 -105
  635. llama_stack/providers/adapters/agents/sample/sample.py +0 -18
  636. llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
  637. llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
  638. llama_stack/providers/adapters/inference/databricks/config.py +0 -21
  639. llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
  640. llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
  641. llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
  642. llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
  643. llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
  644. llama_stack/providers/adapters/inference/sample/sample.py +0 -23
  645. llama_stack/providers/adapters/inference/tgi/config.py +0 -43
  646. llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
  647. llama_stack/providers/adapters/inference/together/config.py +0 -22
  648. llama_stack/providers/adapters/inference/together/together.py +0 -143
  649. llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
  650. llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
  651. llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
  652. llama_stack/providers/adapters/memory/sample/sample.py +0 -23
  653. llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
  654. llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
  655. llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
  656. llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
  657. llama_stack/providers/adapters/safety/sample/sample.py +0 -23
  658. llama_stack/providers/adapters/safety/together/__init__.py +0 -18
  659. llama_stack/providers/adapters/safety/together/config.py +0 -26
  660. llama_stack/providers/adapters/safety/together/together.py +0 -101
  661. llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
  662. llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
  663. llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
  664. llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
  665. llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
  666. llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
  667. llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
  668. llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
  669. llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
  670. llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
  671. llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
  672. llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
  673. llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
  674. llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
  675. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
  676. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
  677. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
  678. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
  679. llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
  680. llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
  681. llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
  682. llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
  683. llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
  684. llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
  685. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
  686. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
  687. llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
  688. llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
  689. llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
  690. llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
  691. llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
  692. llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
  693. llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
  694. llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
  695. llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
  696. llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
  697. llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
  698. llama_stack/providers/impls/vllm/config.py +0 -35
  699. llama_stack/providers/impls/vllm/vllm.py +0 -241
  700. llama_stack/providers/registry/memory.py +0 -78
  701. llama_stack/providers/registry/telemetry.py +0 -44
  702. llama_stack/providers/tests/agents/test_agents.py +0 -210
  703. llama_stack/providers/tests/inference/test_inference.py +0 -257
  704. llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
  705. llama_stack/providers/tests/memory/test_memory.py +0 -136
  706. llama_stack/providers/tests/resolver.py +0 -100
  707. llama_stack/providers/tests/safety/test_safety.py +0 -77
  708. llama_stack-0.0.42.dist-info/METADATA +0 -137
  709. llama_stack-0.0.42.dist-info/RECORD +0 -256
  710. /llama_stack/{distribution → core}/__init__.py +0 -0
  711. /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
  712. /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
  713. /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
  714. /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
  715. /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
  716. /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
  717. /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
  718. /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
  719. /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
  720. /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
  721. /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
  722. /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
  723. /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
  724. /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
  725. /llama_stack/{distribution → core}/utils/serialize.py +0 -0
  726. /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
  727. /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
  728. /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
  729. /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
  730. /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
  731. /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
  732. /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
  733. /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
  734. /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
  735. /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
  736. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
  737. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
  738. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1029 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from dataclasses import dataclass
8
+ from functools import lru_cache
9
+
10
+ from .sku_types import (
11
+ CheckpointQuantizationFormat,
12
+ CoreModelId,
13
+ Model,
14
+ ModelFamily,
15
+ )
16
+
17
+ LLAMA2_VOCAB_SIZE = 32000
18
+ LLAMA3_VOCAB_SIZE = 128256
19
+
20
+
21
+ def resolve_model(descriptor: str) -> Model | None:
22
+ for m in all_registered_models():
23
+ if descriptor in (m.descriptor(), m.huggingface_repo):
24
+ return m
25
+ return None
26
+
27
+
28
+ def all_registered_models() -> list[Model]:
29
+ return (
30
+ llama2_family()
31
+ + llama3_family()
32
+ + llama3_1_family()
33
+ + llama3_2_family()
34
+ + llama3_3_family()
35
+ + llama4_family()
36
+ + safety_models()
37
+ )
38
+
39
+
40
+ def llama2_family() -> list[Model]:
41
+ return [
42
+ *llama2_base_models(),
43
+ *llama2_instruct_models(),
44
+ ]
45
+
46
+
47
+ def llama3_family() -> list[Model]:
48
+ return [
49
+ *llama3_base_models(),
50
+ *llama3_instruct_models(),
51
+ ]
52
+
53
+
54
+ def llama3_1_family() -> list[Model]:
55
+ return [
56
+ *llama3_1_base_models(),
57
+ *llama3_1_instruct_models(),
58
+ ]
59
+
60
+
61
+ def llama3_2_family() -> list[Model]:
62
+ return [
63
+ *llama3_2_base_models(),
64
+ *llama3_2_instruct_models(),
65
+ ]
66
+
67
+
68
+ def llama3_3_family() -> list[Model]:
69
+ return [
70
+ *llama3_3_instruct_models(),
71
+ ]
72
+
73
+
74
+ def llama4_family() -> list[Model]:
75
+ return [
76
+ *llama4_base_models(),
77
+ *llama4_instruct_models(),
78
+ ]
79
+
80
+
81
+ def llama4_base_models() -> list[Model]:
82
+ return [
83
+ Model(
84
+ core_model_id=CoreModelId.llama4_scout_17b_16e,
85
+ description="Llama 4 Scout (17b 16 experts model)",
86
+ huggingface_repo="meta-llama/Llama-4-Scout-17B-16E",
87
+ pth_file_count=8,
88
+ arch_args={},
89
+ ),
90
+ Model(
91
+ core_model_id=CoreModelId.llama4_maverick_17b_128e,
92
+ description="Llama 4 Maverick (17b 128 experts model)",
93
+ huggingface_repo="meta-llama/Llama-4-Maverick-17B-128E",
94
+ pth_file_count=8,
95
+ arch_args={},
96
+ ),
97
+ ]
98
+
99
+
100
+ def llama4_instruct_models() -> list[Model]:
101
+ return [
102
+ Model(
103
+ core_model_id=CoreModelId.llama4_scout_17b_16e_instruct,
104
+ description="Llama 4 Scout (17b 16 experts instruct model)",
105
+ huggingface_repo="meta-llama/Llama-4-Scout-17B-16E-Instruct",
106
+ pth_file_count=8,
107
+ arch_args={},
108
+ ),
109
+ Model(
110
+ core_model_id=CoreModelId.llama4_maverick_17b_128e_instruct,
111
+ description="Llama 4 Maverick (17b 128 experts instruct model)",
112
+ huggingface_repo="meta-llama/Llama-4-Maverick-17B-128E-Instruct",
113
+ pth_file_count=8,
114
+ arch_args={},
115
+ ),
116
+ Model(
117
+ core_model_id=CoreModelId.llama4_maverick_17b_128e_instruct,
118
+ description="Llama 4 Maverick (FP8 quantized)",
119
+ huggingface_repo="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
120
+ quantization_format=CheckpointQuantizationFormat.fp8_mixed,
121
+ pth_file_count=8,
122
+ variant="fp8",
123
+ arch_args={},
124
+ ),
125
+ ]
126
+
127
+
128
+ def llama2_base_models() -> list[Model]:
129
+ return [
130
+ Model(
131
+ core_model_id=CoreModelId.llama2_7b,
132
+ description="Llama 2 7b model",
133
+ huggingface_repo="meta-llama/Llama-2-7b",
134
+ arch_args={
135
+ "dim": 4096,
136
+ "n_layers": 32,
137
+ "n_heads": 32,
138
+ "n_kv_heads": 8,
139
+ "vocab_size": LLAMA2_VOCAB_SIZE,
140
+ "ffn_dim_multiplier": 1.3,
141
+ "multiple_of": 256,
142
+ "norm_eps": 1e-05,
143
+ "rope_theta": 500000.0,
144
+ "use_scaled_rope": False,
145
+ },
146
+ pth_file_count=1,
147
+ ),
148
+ Model(
149
+ core_model_id=CoreModelId.llama2_13b,
150
+ description="Llama 2 13b model",
151
+ huggingface_repo="meta-llama/Llama-2-13b",
152
+ arch_args={
153
+ "dim": 5120,
154
+ "n_layers": 40,
155
+ "n_heads": 40,
156
+ "n_kv_heads": 8,
157
+ "vocab_size": LLAMA2_VOCAB_SIZE,
158
+ "ffn_dim_multiplier": 1.3,
159
+ "multiple_of": 256,
160
+ "norm_eps": 1e-05,
161
+ "rope_theta": 500000.0,
162
+ "use_scaled_rope": False,
163
+ },
164
+ pth_file_count=1,
165
+ ),
166
+ Model(
167
+ core_model_id=CoreModelId.llama2_70b,
168
+ description="Llama 2 70b model",
169
+ huggingface_repo="meta-llama/Llama-2-70b",
170
+ arch_args={
171
+ "dim": 8192,
172
+ "n_layers": 80,
173
+ "n_heads": 64,
174
+ "n_kv_heads": 8,
175
+ "vocab_size": LLAMA2_VOCAB_SIZE,
176
+ "ffn_dim_multiplier": 1.3,
177
+ "multiple_of": 4096,
178
+ "norm_eps": 1e-05,
179
+ "rope_theta": 500000.0,
180
+ "use_scaled_rope": False,
181
+ },
182
+ pth_file_count=8,
183
+ ),
184
+ ]
185
+
186
+
187
+ def llama3_base_models() -> list[Model]:
188
+ return [
189
+ Model(
190
+ core_model_id=CoreModelId.llama3_8b,
191
+ description="Llama 3 8b model",
192
+ huggingface_repo="meta-llama/Llama-3-8B",
193
+ arch_args={
194
+ "dim": 4096,
195
+ "n_layers": 32,
196
+ "n_heads": 32,
197
+ "n_kv_heads": 8,
198
+ "vocab_size": LLAMA3_VOCAB_SIZE,
199
+ "ffn_dim_multiplier": 1.3,
200
+ "multiple_of": 1024,
201
+ "norm_eps": 1e-05,
202
+ "rope_theta": 500000.0,
203
+ "use_scaled_rope": False,
204
+ },
205
+ pth_file_count=1,
206
+ ),
207
+ Model(
208
+ core_model_id=CoreModelId.llama3_70b,
209
+ description="Llama 3 70b model",
210
+ huggingface_repo="meta-llama/Llama-3-70B",
211
+ arch_args={
212
+ "dim": 8192,
213
+ "n_layers": 80,
214
+ "n_heads": 64,
215
+ "n_kv_heads": 8,
216
+ "vocab_size": LLAMA3_VOCAB_SIZE,
217
+ "ffn_dim_multiplier": 1.3,
218
+ "multiple_of": 4096,
219
+ "norm_eps": 1e-05,
220
+ "rope_theta": 500000.0,
221
+ "use_scaled_rope": False,
222
+ },
223
+ pth_file_count=8,
224
+ ),
225
+ ]
226
+
227
+
228
+ def llama3_1_base_models() -> list[Model]:
229
+ return [
230
+ Model(
231
+ core_model_id=CoreModelId.llama3_1_8b,
232
+ description="Llama 3.1 8b model",
233
+ huggingface_repo="meta-llama/Llama-3.1-8B",
234
+ arch_args={
235
+ "dim": 4096,
236
+ "n_layers": 32,
237
+ "n_heads": 32,
238
+ "n_kv_heads": 8,
239
+ "vocab_size": LLAMA3_VOCAB_SIZE,
240
+ "ffn_dim_multiplier": 1.3,
241
+ "multiple_of": 1024,
242
+ "norm_eps": 1e-05,
243
+ "rope_theta": 500000.0,
244
+ "use_scaled_rope": True,
245
+ },
246
+ pth_file_count=1,
247
+ ),
248
+ Model(
249
+ core_model_id=CoreModelId.llama3_1_70b,
250
+ description="Llama 3.1 70b model",
251
+ huggingface_repo="meta-llama/Llama-3.1-70B",
252
+ arch_args={
253
+ "dim": 8192,
254
+ "n_layers": 80,
255
+ "n_heads": 64,
256
+ "n_kv_heads": 8,
257
+ "vocab_size": LLAMA3_VOCAB_SIZE,
258
+ "ffn_dim_multiplier": 1.3,
259
+ "multiple_of": 4096,
260
+ "norm_eps": 1e-05,
261
+ "rope_theta": 500000.0,
262
+ "use_scaled_rope": True,
263
+ },
264
+ pth_file_count=8,
265
+ ),
266
+ Model(
267
+ core_model_id=CoreModelId.llama3_1_405b,
268
+ variant="bf16-mp8",
269
+ description="Llama 3.1 405b model (BF16 weights)",
270
+ huggingface_repo="meta-llama/Llama-3.1-405B",
271
+ arch_args={
272
+ "dim": 16384,
273
+ "n_layers": 126,
274
+ "n_heads": 128,
275
+ "n_kv_heads": 8,
276
+ "vocab_size": LLAMA3_VOCAB_SIZE,
277
+ "ffn_dim_multiplier": 1.2,
278
+ "multiple_of": 4096,
279
+ "norm_eps": 1e-05,
280
+ "rope_theta": 500000.0,
281
+ "use_scaled_rope": True,
282
+ },
283
+ pth_file_count=8,
284
+ ),
285
+ Model(
286
+ core_model_id=CoreModelId.llama3_1_405b,
287
+ description="Llama 3.1 405b model (FP8 quantized)",
288
+ huggingface_repo="meta-llama/Llama-3.1-405B-FP8",
289
+ quantization_format=CheckpointQuantizationFormat.fp8_mixed,
290
+ arch_args={
291
+ "dim": 16384,
292
+ "n_layers": 126,
293
+ "n_heads": 128,
294
+ "n_kv_heads": 8,
295
+ "vocab_size": LLAMA3_VOCAB_SIZE,
296
+ "ffn_dim_multiplier": 1.2,
297
+ "multiple_of": 4096,
298
+ "norm_eps": 1e-05,
299
+ "rope_theta": 500000.0,
300
+ "use_scaled_rope": True,
301
+ },
302
+ pth_file_count=8,
303
+ ),
304
+ Model(
305
+ core_model_id=CoreModelId.llama3_1_405b,
306
+ variant="bf16-mp16",
307
+ description="Llama 3.1 405b model (BF16 weights for mp16)",
308
+ huggingface_repo="meta-llama/Llama-3.1-405B",
309
+ arch_args={
310
+ "dim": 16384,
311
+ "n_layers": 126,
312
+ "n_heads": 128,
313
+ "n_kv_heads": 16,
314
+ "vocab_size": LLAMA3_VOCAB_SIZE,
315
+ "ffn_dim_multiplier": 1.2,
316
+ "multiple_of": 4096,
317
+ "norm_eps": 1e-05,
318
+ "rope_theta": 500000.0,
319
+ "use_scaled_rope": True,
320
+ },
321
+ pth_file_count=16,
322
+ ),
323
+ ]
324
+
325
+
326
+ def llama3_2_base_models() -> list[Model]:
327
+ return [
328
+ Model(
329
+ core_model_id=CoreModelId.llama3_2_1b,
330
+ description="Llama 3.2 1b model",
331
+ huggingface_repo="meta-llama/Llama-3.2-1B",
332
+ arch_args={
333
+ "dim": 2048,
334
+ "n_layers": 16,
335
+ "n_heads": 32,
336
+ "n_kv_heads": 8,
337
+ "vocab_size": LLAMA3_VOCAB_SIZE,
338
+ "ffn_dim_multiplier": 1.5,
339
+ "multiple_of": 256,
340
+ "norm_eps": 1e-05,
341
+ "rope_theta": 500000.0,
342
+ "use_scaled_rope": True,
343
+ },
344
+ pth_file_count=1,
345
+ ),
346
+ Model(
347
+ core_model_id=CoreModelId.llama3_2_3b,
348
+ description="Llama 3.2 3b model",
349
+ huggingface_repo="meta-llama/Llama-3.2-3B",
350
+ arch_args={
351
+ "dim": 3072,
352
+ "n_layers": 28,
353
+ "n_heads": 24,
354
+ "n_kv_heads": 8,
355
+ "vocab_size": LLAMA3_VOCAB_SIZE,
356
+ "ffn_dim_multiplier": 1.0,
357
+ "multiple_of": 256,
358
+ "norm_eps": 1e-05,
359
+ "rope_theta": 500000.0,
360
+ "use_scaled_rope": True,
361
+ },
362
+ pth_file_count=1,
363
+ ),
364
+ Model(
365
+ core_model_id=CoreModelId.llama3_2_11b_vision,
366
+ description="Llama 3.2 11b vision model",
367
+ huggingface_repo="meta-llama/Llama-3.2-11B-Vision",
368
+ arch_args={
369
+ "dim": 4096,
370
+ "n_layers": 32,
371
+ "n_heads": 32,
372
+ "n_kv_heads": 8,
373
+ "vocab_size": LLAMA3_VOCAB_SIZE,
374
+ "ffn_dim_multiplier": 1.3,
375
+ "multiple_of": 1024,
376
+ "norm_eps": 1e-05,
377
+ "rope_theta": 500000.0,
378
+ "use_scaled_rope": True,
379
+ "vision_chunk_size": 448,
380
+ "vision_max_num_chunks": 4,
381
+ "vision_num_cross_attention_layers": 8,
382
+ },
383
+ pth_file_count=1,
384
+ ),
385
+ Model(
386
+ core_model_id=CoreModelId.llama3_2_90b_vision,
387
+ description="Llama 3.2 90b vision model",
388
+ huggingface_repo="meta-llama/Llama-3.2-90B-Vision",
389
+ arch_args={
390
+ "dim": 8192,
391
+ "n_layers": 80,
392
+ "n_heads": 64,
393
+ "n_kv_heads": 8,
394
+ "vocab_size": LLAMA3_VOCAB_SIZE,
395
+ "ffn_dim_multiplier": 1.3,
396
+ "multiple_of": 4096,
397
+ "norm_eps": 1e-05,
398
+ "rope_theta": 500000.0,
399
+ "use_scaled_rope": True,
400
+ "vision_chunk_size": 560,
401
+ "vision_max_num_chunks": 4,
402
+ "vision_num_cross_attention_layers": 20,
403
+ },
404
+ pth_file_count=8,
405
+ ),
406
+ ]
407
+
408
+
409
+ def llama2_instruct_models() -> list[Model]:
410
+ return [
411
+ Model(
412
+ core_model_id=CoreModelId.llama2_7b_chat,
413
+ description="Llama 2 7b chat model",
414
+ huggingface_repo="meta-llama/Llama-2-7b-chat",
415
+ arch_args={
416
+ "dim": 4096,
417
+ "n_layers": 32,
418
+ "n_heads": 32,
419
+ "n_kv_heads": 8,
420
+ "vocab_size": LLAMA2_VOCAB_SIZE,
421
+ "ffn_dim_multiplier": 1.3,
422
+ "multiple_of": 256,
423
+ "norm_eps": 1e-05,
424
+ "rope_theta": 500000.0,
425
+ "use_scaled_rope": False,
426
+ },
427
+ pth_file_count=1,
428
+ ),
429
+ Model(
430
+ core_model_id=CoreModelId.llama2_13b_chat,
431
+ description="Llama 2 13b chat model",
432
+ huggingface_repo="meta-llama/Llama-2-13b-chat",
433
+ arch_args={
434
+ "dim": 5120,
435
+ "n_layers": 40,
436
+ "n_heads": 40,
437
+ "n_kv_heads": 8,
438
+ "vocab_size": LLAMA2_VOCAB_SIZE,
439
+ "ffn_dim_multiplier": 1.3,
440
+ "multiple_of": 256,
441
+ "norm_eps": 1e-05,
442
+ "rope_theta": 500000.0,
443
+ "use_scaled_rope": False,
444
+ },
445
+ pth_file_count=1,
446
+ ),
447
+ Model(
448
+ core_model_id=CoreModelId.llama2_70b_chat,
449
+ description="Llama 2 70b chat model",
450
+ huggingface_repo="meta-llama/Llama-2-70b-chat",
451
+ arch_args={
452
+ "dim": 8192,
453
+ "n_layers": 80,
454
+ "n_heads": 64,
455
+ "n_kv_heads": 8,
456
+ "vocab_size": LLAMA2_VOCAB_SIZE,
457
+ "ffn_dim_multiplier": 1.3,
458
+ "multiple_of": 256,
459
+ "norm_eps": 1e-05,
460
+ "rope_theta": 500000.0,
461
+ "use_scaled_rope": False,
462
+ },
463
+ pth_file_count=8,
464
+ ),
465
+ ]
466
+
467
+
468
+ def llama3_instruct_models() -> list[Model]:
469
+ return [
470
+ Model(
471
+ core_model_id=CoreModelId.llama3_8b_instruct,
472
+ description="Llama 3 8b instruct model",
473
+ huggingface_repo="meta-llama/Llama-3-8B-Instruct",
474
+ arch_args={
475
+ "dim": 4096,
476
+ "n_layers": 32,
477
+ "n_heads": 32,
478
+ "n_kv_heads": 8,
479
+ "vocab_size": LLAMA3_VOCAB_SIZE,
480
+ "ffn_dim_multiplier": 1.3,
481
+ "multiple_of": 1024,
482
+ "norm_eps": 1e-05,
483
+ "rope_theta": 500000.0,
484
+ "use_scaled_rope": False,
485
+ },
486
+ pth_file_count=1,
487
+ ),
488
+ Model(
489
+ core_model_id=CoreModelId.llama3_70b_instruct,
490
+ description="Llama 3 70b instruct model",
491
+ huggingface_repo="meta-llama/Llama-3-70B-Instruct",
492
+ arch_args={
493
+ "dim": 8192,
494
+ "n_layers": 80,
495
+ "n_heads": 64,
496
+ "n_kv_heads": 8,
497
+ "vocab_size": LLAMA3_VOCAB_SIZE,
498
+ "ffn_dim_multiplier": 1.3,
499
+ "multiple_of": 4096,
500
+ "norm_eps": 1e-05,
501
+ "rope_theta": 500000.0,
502
+ "use_scaled_rope": False,
503
+ },
504
+ pth_file_count=8,
505
+ ),
506
+ ]
507
+
508
+
509
+ def llama3_1_instruct_models() -> list[Model]:
510
+ return [
511
+ Model(
512
+ core_model_id=CoreModelId.llama3_1_8b_instruct,
513
+ description="Llama 3.1 8b instruct model",
514
+ huggingface_repo="meta-llama/Llama-3.1-8B-Instruct",
515
+ arch_args={
516
+ "dim": 4096,
517
+ "n_layers": 32,
518
+ "n_heads": 32,
519
+ "n_kv_heads": 8,
520
+ "vocab_size": LLAMA3_VOCAB_SIZE,
521
+ "ffn_dim_multiplier": 1.3,
522
+ "multiple_of": 1024,
523
+ "norm_eps": 1e-05,
524
+ "rope_theta": 500000.0,
525
+ "use_scaled_rope": True,
526
+ },
527
+ pth_file_count=1,
528
+ ),
529
+ Model(
530
+ core_model_id=CoreModelId.llama3_1_70b_instruct,
531
+ description="Llama 3.1 70b instruct model",
532
+ huggingface_repo="meta-llama/Llama-3.1-70B-Instruct",
533
+ arch_args={
534
+ "dim": 8192,
535
+ "n_layers": 80,
536
+ "n_heads": 64,
537
+ "n_kv_heads": 8,
538
+ "vocab_size": LLAMA3_VOCAB_SIZE,
539
+ "ffn_dim_multiplier": 1.3,
540
+ "multiple_of": 4096,
541
+ "norm_eps": 1e-05,
542
+ "rope_theta": 500000.0,
543
+ "use_scaled_rope": True,
544
+ },
545
+ pth_file_count=8,
546
+ ),
547
+ Model(
548
+ core_model_id=CoreModelId.llama3_1_405b_instruct,
549
+ variant="bf16-mp8",
550
+ description="Llama 3.1 405b instruct model (BF16 weights)",
551
+ huggingface_repo="meta-llama/Llama-3.1-405B-Instruct",
552
+ arch_args={
553
+ "dim": 16384,
554
+ "n_layers": 126,
555
+ "n_heads": 128,
556
+ "n_kv_heads": 8,
557
+ "vocab_size": LLAMA3_VOCAB_SIZE,
558
+ "ffn_dim_multiplier": 1.2,
559
+ "multiple_of": 4096,
560
+ "norm_eps": 1e-05,
561
+ "rope_theta": 500000.0,
562
+ "use_scaled_rope": True,
563
+ },
564
+ pth_file_count=8,
565
+ ),
566
+ Model(
567
+ core_model_id=CoreModelId.llama3_1_405b_instruct,
568
+ description="Llama 3.1 405b instruct model (FP8 quantized)",
569
+ huggingface_repo="meta-llama/Llama-3.1-405B-Instruct-FP8",
570
+ quantization_format=CheckpointQuantizationFormat.fp8_mixed,
571
+ arch_args={
572
+ "dim": 16384,
573
+ "n_layers": 126,
574
+ "n_heads": 128,
575
+ "n_kv_heads": 8,
576
+ "vocab_size": LLAMA3_VOCAB_SIZE,
577
+ "ffn_dim_multiplier": 1.2,
578
+ "multiple_of": 4096,
579
+ "norm_eps": 1e-05,
580
+ "rope_theta": 500000.0,
581
+ "use_scaled_rope": True,
582
+ },
583
+ pth_file_count=8,
584
+ ),
585
+ Model(
586
+ core_model_id=CoreModelId.llama3_1_405b_instruct,
587
+ variant="bf16-mp16",
588
+ description="Llama 3.1 405b instruct model (BF16 weights for mp16)",
589
+ huggingface_repo="meta-llama/Llama-3.1-405B-Instruct",
590
+ arch_args={
591
+ "dim": 16384,
592
+ "n_layers": 126,
593
+ "n_heads": 128,
594
+ "n_kv_heads": 16,
595
+ "vocab_size": LLAMA3_VOCAB_SIZE,
596
+ "ffn_dim_multiplier": 1.2,
597
+ "multiple_of": 4096,
598
+ "norm_eps": 1e-05,
599
+ "rope_theta": 500000.0,
600
+ "use_scaled_rope": True,
601
+ },
602
+ pth_file_count=16,
603
+ ),
604
+ ]
605
+
606
+
607
+ def arch_args_1b() -> dict:
608
+ return {
609
+ "dim": 2048,
610
+ "n_layers": 16,
611
+ "n_heads": 32,
612
+ "n_kv_heads": 8,
613
+ "vocab_size": LLAMA3_VOCAB_SIZE,
614
+ "ffn_dim_multiplier": 1.5,
615
+ "multiple_of": 256,
616
+ "norm_eps": 1e-05,
617
+ "rope_theta": 500000.0,
618
+ "use_scaled_rope": True,
619
+ }
620
+
621
+
622
+ def arch_args_3b() -> dict:
623
+ return {
624
+ "dim": 3072,
625
+ "n_layers": 28,
626
+ "n_heads": 24,
627
+ "n_kv_heads": 8,
628
+ "vocab_size": LLAMA3_VOCAB_SIZE,
629
+ "ffn_dim_multiplier": 1.0,
630
+ "multiple_of": 256,
631
+ "norm_eps": 1e-05,
632
+ "rope_theta": 500000.0,
633
+ "use_scaled_rope": True,
634
+ }
635
+
636
+
637
+ def llama3_2_quantized_models() -> list[Model]:
638
+ return [
639
+ Model(
640
+ core_model_id=CoreModelId.llama3_2_1b_instruct,
641
+ variant="int4-qlora-eo8",
642
+ quantization_format=CheckpointQuantizationFormat.int4,
643
+ description="Llama 3.2 1b INT4 quantized LoRA",
644
+ huggingface_repo="meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8",
645
+ arch_args={
646
+ **arch_args_1b(),
647
+ "quantization_args": {
648
+ "group_size": 256,
649
+ },
650
+ "lora_args": {
651
+ "rank": 16,
652
+ "scale": 2.0,
653
+ },
654
+ },
655
+ pth_file_count=1,
656
+ ),
657
+ Model(
658
+ core_model_id=CoreModelId.llama3_2_1b_instruct,
659
+ variant="int4-spinquant-eo8",
660
+ quantization_format=CheckpointQuantizationFormat.int4,
661
+ description="Llama 3.2 1b INT4 quantized SpinQuant",
662
+ huggingface_repo="meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8",
663
+ arch_args={
664
+ **arch_args_1b(),
665
+ "quantization_args": {
666
+ "group_size": 256,
667
+ },
668
+ },
669
+ pth_file_count=1,
670
+ ),
671
+ Model(
672
+ core_model_id=CoreModelId.llama3_2_3b_instruct,
673
+ variant="int4-qlora-eo8",
674
+ quantization_format=CheckpointQuantizationFormat.int4,
675
+ description="Llama 3.2 3b INT4 quantized LoRA",
676
+ huggingface_repo="meta-llama/Llama-3.2-3B-Instruct-QLORA_INT4_EO8",
677
+ arch_args={
678
+ **arch_args_3b(),
679
+ "quantization_args": {
680
+ "group_size": 256,
681
+ },
682
+ "lora_args": {
683
+ "rank": 16,
684
+ "scale": 2.0,
685
+ },
686
+ },
687
+ pth_file_count=1,
688
+ ),
689
+ Model(
690
+ core_model_id=CoreModelId.llama3_2_3b_instruct,
691
+ variant="int4-spinquant-eo8",
692
+ quantization_format=CheckpointQuantizationFormat.int4,
693
+ description="Llama 3.2 3b INT4 quantized SpinQuant",
694
+ huggingface_repo="meta-llama/Llama-3.2-3B-Instruct-SpinQuant_INT4_EO8",
695
+ arch_args={
696
+ **arch_args_3b(),
697
+ "quantization_args": {
698
+ "group_size": 256,
699
+ },
700
+ },
701
+ pth_file_count=1,
702
+ ),
703
+ ]
704
+
705
+
706
+ def llama3_2_instruct_models() -> list[Model]:
707
+ return [
708
+ Model(
709
+ core_model_id=CoreModelId.llama3_2_1b_instruct,
710
+ description="Llama 3.2 1b instruct model",
711
+ huggingface_repo="meta-llama/Llama-3.2-1B-Instruct",
712
+ arch_args=arch_args_1b(),
713
+ pth_file_count=1,
714
+ ),
715
+ Model(
716
+ core_model_id=CoreModelId.llama3_2_3b_instruct,
717
+ description="Llama 3.2 3b instruct model",
718
+ huggingface_repo="meta-llama/Llama-3.2-3B-Instruct",
719
+ arch_args=arch_args_3b(),
720
+ pth_file_count=1,
721
+ ),
722
+ *llama3_2_quantized_models(),
723
+ Model(
724
+ core_model_id=CoreModelId.llama3_2_11b_vision_instruct,
725
+ description="Llama 3.2 11b vision instruct model",
726
+ huggingface_repo="meta-llama/Llama-3.2-11B-Vision-Instruct",
727
+ arch_args={
728
+ "dim": 4096,
729
+ "n_layers": 32,
730
+ "n_heads": 32,
731
+ "n_kv_heads": 8,
732
+ "vocab_size": LLAMA3_VOCAB_SIZE,
733
+ "ffn_dim_multiplier": 1.3,
734
+ "multiple_of": 1024,
735
+ "norm_eps": 1e-05,
736
+ "rope_theta": 500000.0,
737
+ "use_scaled_rope": True,
738
+ "vision_chunk_size": 560,
739
+ "vision_max_num_chunks": 4,
740
+ "vision_num_cross_attention_layers": 8,
741
+ },
742
+ pth_file_count=1,
743
+ ),
744
+ Model(
745
+ core_model_id=CoreModelId.llama3_2_90b_vision_instruct,
746
+ description="Llama 3.2 90b vision instruct model",
747
+ huggingface_repo="meta-llama/Llama-3.2-90B-Vision-Instruct",
748
+ arch_args={
749
+ "dim": 8192,
750
+ "n_layers": 80,
751
+ "n_heads": 64,
752
+ "n_kv_heads": 8,
753
+ "vocab_size": LLAMA3_VOCAB_SIZE,
754
+ "ffn_dim_multiplier": 1.3,
755
+ "multiple_of": 4096,
756
+ "norm_eps": 1e-05,
757
+ "rope_theta": 500000.0,
758
+ "use_scaled_rope": True,
759
+ "vision_chunk_size": 560,
760
+ "vision_max_num_chunks": 4,
761
+ "vision_num_cross_attention_layers": 20,
762
+ },
763
+ pth_file_count=8,
764
+ ),
765
+ ]
766
+
767
+
768
+ def llama3_3_instruct_models() -> list[Model]:
769
+ return [
770
+ Model(
771
+ core_model_id=CoreModelId.llama3_3_70b_instruct,
772
+ description="Llama 3.3 70b instruct",
773
+ huggingface_repo="meta-llama/Llama-3.3-70B-Instruct",
774
+ arch_args={
775
+ "dim": 8192,
776
+ "n_layers": 80,
777
+ "n_heads": 64,
778
+ "n_kv_heads": 8,
779
+ "vocab_size": LLAMA3_VOCAB_SIZE,
780
+ "ffn_dim_multiplier": 1.3,
781
+ "multiple_of": 4096,
782
+ "norm_eps": 1e-05,
783
+ "rope_theta": 500000.0,
784
+ "use_scaled_rope": True,
785
+ },
786
+ pth_file_count=8,
787
+ ),
788
+ ]
789
+
790
+
791
+ @lru_cache
792
+ def safety_models() -> list[Model]:
793
+ return [
794
+ Model(
795
+ core_model_id=CoreModelId.llama_guard_4_12b,
796
+ description="Llama Guard v4 12b system safety model",
797
+ huggingface_repo="meta-llama/Llama-Guard-4-12B",
798
+ arch_args={},
799
+ pth_file_count=1,
800
+ ),
801
+ Model(
802
+ core_model_id=CoreModelId.llama_guard_3_11b_vision,
803
+ description="Llama Guard v3 11b vision system safety model",
804
+ huggingface_repo="meta-llama/Llama-Guard-3-11B-Vision",
805
+ arch_args={
806
+ "dim": 4096,
807
+ "n_layers": 32,
808
+ "n_heads": 32,
809
+ "n_kv_heads": 8,
810
+ "vocab_size": LLAMA3_VOCAB_SIZE,
811
+ "ffn_dim_multiplier": 1.3,
812
+ "multiple_of": 1024,
813
+ "norm_eps": 1e-05,
814
+ "rope_theta": 500000.0,
815
+ "use_scaled_rope": True,
816
+ "vision_chunk_size": 560,
817
+ "vision_max_num_chunks": 4,
818
+ "vision_num_cross_attention_layers": 8,
819
+ },
820
+ pth_file_count=1,
821
+ ),
822
+ Model(
823
+ core_model_id=CoreModelId.llama_guard_3_1b,
824
+ variant="int4",
825
+ description="Llama Guard v3 1b 'int4' quantized system safety model",
826
+ huggingface_repo="meta-llama/Llama-Guard-3-1B-INT4",
827
+ quantization_format=CheckpointQuantizationFormat.int4,
828
+ arch_args={
829
+ "dim": 2048,
830
+ "n_layers": 12,
831
+ "n_heads": 32,
832
+ "n_kv_heads": 8,
833
+ "vocab_size": LLAMA3_VOCAB_SIZE,
834
+ "rope_freq_base": 500000.0,
835
+ "norm_eps": 1e-05,
836
+ "hidden_dim": 6400,
837
+ "use_scaled_rope": True,
838
+ },
839
+ pth_file_count=1,
840
+ ),
841
+ Model(
842
+ core_model_id=CoreModelId.llama_guard_3_1b,
843
+ description="Llama Guard v3 1b system safety model",
844
+ huggingface_repo="meta-llama/Llama-Guard-3-1B",
845
+ arch_args={
846
+ "dim": 2048,
847
+ "n_layers": 16,
848
+ "n_heads": 32,
849
+ "n_kv_heads": 8,
850
+ "vocab_size": LLAMA3_VOCAB_SIZE,
851
+ "ffn_dim_multiplier": 1.5,
852
+ "multiple_of": 256,
853
+ "norm_eps": 1e-05,
854
+ "rope_theta": 500000.0,
855
+ "use_scaled_rope": True,
856
+ },
857
+ pth_file_count=1,
858
+ ),
859
+ Model(
860
+ core_model_id=CoreModelId.llama_guard_3_8b,
861
+ description="Llama Guard v3 8b system safety model",
862
+ huggingface_repo="meta-llama/Llama-Guard-3-8B",
863
+ arch_args={
864
+ "dim": 4096,
865
+ "ffn_dim_multiplier": 1.3,
866
+ "multiple_of": 1024,
867
+ "n_heads": 32,
868
+ "n_kv_heads": 8,
869
+ "n_layers": 32,
870
+ "norm_eps": 1e-05,
871
+ "rope_theta": 500000.0,
872
+ "use_scaled_rope": False,
873
+ "vocab_size": LLAMA3_VOCAB_SIZE,
874
+ },
875
+ pth_file_count=1,
876
+ ),
877
+ Model(
878
+ core_model_id=CoreModelId.llama_guard_3_8b,
879
+ variant="int8",
880
+ description="Llama Guard v3 8b system safety model",
881
+ huggingface_repo="meta-llama/Llama-Guard-3-8B-INT8",
882
+ quantization_format=CheckpointQuantizationFormat.int8,
883
+ arch_args={
884
+ "dim": 4096,
885
+ "ffn_dim_multiplier": 1.3,
886
+ "multiple_of": 1024,
887
+ "n_heads": 32,
888
+ "n_kv_heads": 8,
889
+ "n_layers": 32,
890
+ "norm_eps": 1e-05,
891
+ "rope_theta": 500000.0,
892
+ "use_scaled_rope": False,
893
+ "vocab_size": LLAMA3_VOCAB_SIZE,
894
+ },
895
+ pth_file_count=1,
896
+ ),
897
+ Model(
898
+ core_model_id=CoreModelId.llama_guard_2_8b,
899
+ description="Llama Guard v2 8b system safety model",
900
+ huggingface_repo="meta-llama/Llama-Guard-2-8B",
901
+ arch_args={
902
+ "dim": 4096,
903
+ "n_layers": 32,
904
+ "n_heads": 32,
905
+ "n_kv_heads": 8,
906
+ "vocab_size": LLAMA2_VOCAB_SIZE,
907
+ "ffn_dim_multiplier": 1.3,
908
+ "multiple_of": 256,
909
+ "norm_eps": 1e-05,
910
+ "rope_theta": 500000.0,
911
+ "use_scaled_rope": False,
912
+ },
913
+ pth_file_count=1,
914
+ ),
915
+ ]
916
+
917
+
918
+ @dataclass
919
+ class LlamaDownloadInfo:
920
+ folder: str
921
+ files: list[str]
922
+ pth_size: int
923
+
924
+
925
+ def llama_meta_net_info(model: Model) -> LlamaDownloadInfo:
926
+ """Information needed to download model from llamameta.net"""
927
+
928
+ pth_count = model.pth_file_count
929
+ if model.core_model_id == CoreModelId.llama3_1_405b:
930
+ if pth_count == 16:
931
+ folder = "Llama-3.1-405B-MP16"
932
+ elif model.quantization_format == CheckpointQuantizationFormat.fp8_mixed:
933
+ folder = "Llama-3.1-405B"
934
+ else:
935
+ folder = "Llama-3.1-405B-MP8"
936
+ elif model.core_model_id == CoreModelId.llama3_1_405b_instruct:
937
+ if pth_count == 16:
938
+ folder = "Llama-3.1-405B-Instruct-MP16"
939
+ elif model.quantization_format == CheckpointQuantizationFormat.fp8_mixed:
940
+ folder = "Llama-3.1-405B-Instruct"
941
+ else:
942
+ folder = "Llama-3.1-405B-Instruct-MP8"
943
+ elif model.core_model_id == CoreModelId.llama_guard_3_8b:
944
+ if model.quantization_format == CheckpointQuantizationFormat.int8:
945
+ folder = "Llama-Guard-3-8B-INT8-HF"
946
+ else:
947
+ folder = "Llama-Guard-3-8B"
948
+ elif model.core_model_id == CoreModelId.llama_guard_2_8b:
949
+ folder = "llama-guard-2"
950
+ else:
951
+ if model.huggingface_repo is None:
952
+ raise ValueError(f"Model {model.core_model_id} has no huggingface_repo set")
953
+ folder = model.huggingface_repo.split("/")[-1]
954
+ if "Llama-2" in folder:
955
+ folder = folder.lower()
956
+
957
+ files = ["checklist.chk"]
958
+ if (
959
+ model.core_model_id == CoreModelId.llama_guard_3_8b
960
+ and model.quantization_format == CheckpointQuantizationFormat.int8
961
+ ):
962
+ files.extend(
963
+ [
964
+ "generation_config.json",
965
+ "model-00001-of-00002.safetensors",
966
+ "model-00002-of-00002.safetensors",
967
+ "special_tokens_map.json",
968
+ "tokenizer.json",
969
+ "tokenizer_config.json",
970
+ "model.safetensors.index.json",
971
+ ]
972
+ )
973
+ elif (
974
+ model.core_model_id == CoreModelId.llama_guard_3_1b
975
+ and model.quantization_format == CheckpointQuantizationFormat.int4
976
+ ):
977
+ files.extend(
978
+ [
979
+ "llama_guard_3_1b_pruned_xnnpack.pte",
980
+ "example-prompt.txt",
981
+ "params.json",
982
+ "tokenizer.model",
983
+ ]
984
+ )
985
+ else:
986
+ files.extend(
987
+ [
988
+ "tokenizer.model",
989
+ "params.json",
990
+ ]
991
+ )
992
+ if model.quantization_format == CheckpointQuantizationFormat.fp8_mixed:
993
+ files.extend([f"fp8_scales_{i}.pt" for i in range(pth_count)])
994
+ files.extend([f"consolidated.{i:02d}.pth" for i in range(pth_count)])
995
+
996
+ return LlamaDownloadInfo(
997
+ folder=folder,
998
+ files=files,
999
+ pth_size=llama_meta_pth_size(model),
1000
+ )
1001
+
1002
+
1003
+ # Sadness because Cloudfront rejects our HEAD requests to find Content-Length
1004
+ def llama_meta_pth_size(model: Model) -> int:
1005
+ if model.core_model_id not in (
1006
+ CoreModelId.llama3_1_405b,
1007
+ CoreModelId.llama3_1_405b_instruct,
1008
+ CoreModelId.llama4_maverick_17b_128e,
1009
+ CoreModelId.llama4_maverick_17b_128e_instruct,
1010
+ ):
1011
+ return 0
1012
+
1013
+ if model.model_family == ModelFamily.llama3_1:
1014
+ if model.pth_file_count == 16:
1015
+ return 51268302389
1016
+ elif model.quantization_format == CheckpointQuantizationFormat.fp8_mixed:
1017
+ return 60903742309
1018
+ else:
1019
+ return 101470976045
1020
+
1021
+ if model.model_family == ModelFamily.llama4:
1022
+ if model.core_model_id == CoreModelId.llama4_maverick_17b_128e:
1023
+ return 100458118386
1024
+ elif model.core_model_id == CoreModelId.llama4_maverick_17b_128e_instruct:
1025
+ if model.quantization_format == CheckpointQuantizationFormat.fp8_mixed:
1026
+ return 54121549657
1027
+ else:
1028
+ return 100426653046
1029
+ return 0