@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,526 @@
1
+ ---
2
+ name: moe-training
3
+ description: Train Mixture of Experts (MoE) models using DeepSpeed or HuggingFace. Use when training large-scale models with limited compute (5× cost reduction vs dense models), implementing sparse architectures like Mixtral 8x7B or DeepSeek-V3, or scaling model capacity without proportional compute increase. Covers MoE architectures, routing mechanisms, load balancing, expert parallelism, and inference optimization.
4
+ version: 1.0.0
5
+ author: Synthetic Sciences
6
+ license: MIT
7
+ tags: [Emerging Techniques, MoE, Mixture Of Experts, Sparse Models, DeepSpeed, Expert Parallelism, Mixtral, DeepSeek, Routing, Load Balancing, Efficient Training]
8
+ dependencies: [deepspeed, transformers, torch, accelerate]
9
+ ---
10
+
11
+ # MoE Training: Mixture of Experts
12
+
13
+ ## When to Use This Skill
14
+
15
+ Use MoE Training when you need to:
16
+ - **Train larger models** with limited compute (5× cost reduction vs dense models)
17
+ - **Scale model capacity** without proportional compute increase
18
+ - **Achieve better performance** per compute budget than dense models
19
+ - **Specialize experts** for different domains/tasks/languages
20
+ - **Reduce inference latency** with sparse activation (only 13B/47B params active in Mixtral)
21
+ - **Implement SOTA models** like Mixtral 8x7B, DeepSeek-V3, Switch Transformers
22
+
23
+ **Notable MoE Models**: Mixtral 8x7B (Mistral AI), DeepSeek-V3, Switch Transformers (Google), GLaM (Google), NLLB-MoE (Meta)
24
+
25
+ ## Installation
26
+
27
+ ```bash
28
+ # DeepSpeed with MoE support
29
+ pip install deepspeed>=0.6.0
30
+
31
+ # Megatron-DeepSpeed for large-scale training
32
+ git clone https://github.com/microsoft/Megatron-DeepSpeed
33
+ cd Megatron-DeepSpeed
34
+ pip install -r requirements.txt
35
+
36
+ # Alternative: HuggingFace Transformers
37
+ pip install transformers accelerate
38
+ ```
39
+
40
+ ## Quick Start
41
+
42
+ ### Basic MoE Architecture
43
+
44
+ ```python
45
+ import torch
46
+ import torch.nn as nn
47
+
48
+ class MoELayer(nn.Module):
49
+ """Sparse Mixture of Experts layer."""
50
+
51
+ def __init__(self, hidden_size, num_experts=8, top_k=2):
52
+ super().__init__()
53
+ self.num_experts = num_experts
54
+ self.top_k = top_k
55
+
56
+ # Expert networks (FFN)
57
+ self.experts = nn.ModuleList([
58
+ nn.Sequential(
59
+ nn.Linear(hidden_size, 4 * hidden_size),
60
+ nn.GELU(),
61
+ nn.Linear(4 * hidden_size, hidden_size)
62
+ )
63
+ for _ in range(num_experts)
64
+ ])
65
+
66
+ # Gating network (router)
67
+ self.gate = nn.Linear(hidden_size, num_experts)
68
+
69
+ def forward(self, x):
70
+ # x shape: (batch_size, seq_len, hidden_size)
71
+ batch_size, seq_len, hidden_size = x.shape
72
+
73
+ # Flatten for routing
74
+ x_flat = x.view(-1, hidden_size) # (batch_size * seq_len, hidden_size)
75
+
76
+ # Compute gate scores
77
+ gate_logits = self.gate(x_flat) # (batch_size * seq_len, num_experts)
78
+
79
+ # Top-k routing
80
+ gate_scores = torch.softmax(gate_logits, dim=-1)
81
+ topk_scores, topk_indices = torch.topk(gate_scores, self.top_k, dim=-1)
82
+
83
+ # Normalize top-k scores
84
+ topk_scores = topk_scores / topk_scores.sum(dim=-1, keepdim=True)
85
+
86
+ # Dispatch and combine expert outputs
87
+ output = torch.zeros_like(x_flat)
88
+
89
+ for i in range(self.top_k):
90
+ expert_idx = topk_indices[:, i]
91
+ expert_scores = topk_scores[:, i].unsqueeze(-1)
92
+
93
+ # Route tokens to experts
94
+ for expert_id in range(self.num_experts):
95
+ mask = (expert_idx == expert_id)
96
+ if mask.any():
97
+ expert_input = x_flat[mask]
98
+ expert_output = self.experts[expert_id](expert_input)
99
+ output[mask] += expert_scores[mask] * expert_output
100
+
101
+ # Reshape back
102
+ return output.view(batch_size, seq_len, hidden_size)
103
+ ```
104
+
105
+ ### DeepSpeed MoE Training
106
+
107
+ ```bash
108
+ # Training script with MoE
109
+ deepspeed pretrain_gpt_moe.py \
110
+ --num-layers 24 \
111
+ --hidden-size 1024 \
112
+ --num-attention-heads 16 \
113
+ --seq-length 2048 \
114
+ --max-position-embeddings 2048 \
115
+ --micro-batch-size 4 \
116
+ --global-batch-size 256 \
117
+ --train-iters 500000 \
118
+ --lr 0.0001 \
119
+ --min-lr 0.00001 \
120
+ --lr-decay-style cosine \
121
+ --num-experts 128 \
122
+ --moe-expert-parallel-size 4 \
123
+ --moe-loss-coeff 0.01 \
124
+ --moe-train-capacity-factor 1.25 \
125
+ --moe-eval-capacity-factor 2.0 \
126
+ --fp16 \
127
+ --deepspeed_config ds_config.json
128
+ ```
129
+
130
+ ## Core Concepts
131
+
132
+ ### 1. MoE Architecture
133
+
134
+ **Key Components:**
135
+ - **Experts**: Multiple specialized FFN networks (typically 8-128)
136
+ - **Router/Gate**: Learned network that selects which experts to use
137
+ - **Top-k Routing**: Activate only k experts per token (k=1 or k=2)
138
+ - **Load Balancing**: Ensure even expert utilization
139
+
140
+ ```
141
+ Input Token
142
+
143
+ Router (Gate Network)
144
+
145
+ Top-k Expert Selection (e.g., 2 out of 8)
146
+
147
+ Expert 1 (weight: 0.6) + Expert 5 (weight: 0.4)
148
+
149
+ Weighted Combination
150
+
151
+ Output
152
+ ```
153
+
154
+ ### 2. Routing Mechanisms
155
+
156
+ **Top-1 Routing (Switch Transformer):**
157
+ ```python
158
+ # Simplest routing: one expert per token
159
+ gate_logits = router(x) # (batch, seq_len, num_experts)
160
+ expert_idx = torch.argmax(gate_logits, dim=-1) # Hard routing
161
+ ```
162
+
163
+ **Top-2 Routing (Mixtral):**
164
+ ```python
165
+ # Top-2: two experts per token
166
+ gate_scores = torch.softmax(router(x), dim=-1)
167
+ top2_scores, top2_indices = torch.topk(gate_scores, k=2, dim=-1)
168
+
169
+ # Normalize scores
170
+ top2_scores = top2_scores / top2_scores.sum(dim=-1, keepdim=True)
171
+
172
+ # Combine expert outputs
173
+ output = (top2_scores[:, :, 0:1] * expert_outputs[top2_indices[:, :, 0]] +
174
+ top2_scores[:, :, 1:2] * expert_outputs[top2_indices[:, :, 1]])
175
+ ```
176
+
177
+ **Expert Choice Routing:**
178
+ ```python
179
+ # Experts choose top-k tokens (instead of tokens choosing experts)
180
+ # Guarantees perfect load balancing
181
+ expert_scores = router(x).transpose(-1, -2) # (batch, num_experts, seq_len)
182
+ topk_tokens = torch.topk(expert_scores, k=capacity_per_expert, dim=-1)
183
+ ```
184
+
185
+ ### 3. Load Balancing
186
+
187
+ **Auxiliary Loss:**
188
+ ```python
189
+ def load_balancing_loss(gate_logits, expert_indices, num_experts):
190
+ """Encourage uniform expert usage."""
191
+ # Fraction of tokens routed to each expert
192
+ expert_counts = torch.bincount(expert_indices.flatten(), minlength=num_experts)
193
+ expert_fraction = expert_counts.float() / expert_indices.numel()
194
+
195
+ # Gate probability for each expert (average across tokens)
196
+ gate_probs = torch.softmax(gate_logits, dim=-1).mean(dim=0)
197
+
198
+ # Auxiliary loss: encourage alignment
199
+ aux_loss = num_experts * (expert_fraction * gate_probs).sum()
200
+
201
+ return aux_loss
202
+
203
+ # Add to main loss
204
+ total_loss = language_model_loss + 0.01 * load_balancing_loss(...)
205
+ ```
206
+
207
+ **Router Z-Loss (Stability):**
208
+ ```python
209
+ def router_z_loss(logits):
210
+ """Encourage router to have lower entropy (more decisive)."""
211
+ z_loss = torch.logsumexp(logits, dim=-1).pow(2).mean()
212
+ return z_loss
213
+
214
+ total_loss = lm_loss + 0.01 * aux_loss + 0.001 * router_z_loss(gate_logits)
215
+ ```
216
+
217
+ ### 4. Expert Parallelism
218
+
219
+ ```python
220
+ # DeepSpeed configuration
221
+ {
222
+ "train_batch_size": 256,
223
+ "fp16": {"enabled": true},
224
+ "moe": {
225
+ "enabled": true,
226
+ "num_experts": 128,
227
+ "expert_parallel_size": 8, # Distribute 128 experts across 8 GPUs
228
+ "capacity_factor": 1.25, # Expert capacity = tokens_per_batch * capacity_factor / num_experts
229
+ "drop_tokens": true, # Drop tokens exceeding capacity
230
+ "use_residual": false
231
+ }
232
+ }
233
+ ```
234
+
235
+ ## Training Configuration
236
+
237
+ ### DeepSpeed MoE Config
238
+
239
+ ```json
240
+ {
241
+ "train_batch_size": 256,
242
+ "gradient_accumulation_steps": 1,
243
+ "optimizer": {
244
+ "type": "Adam",
245
+ "params": {
246
+ "lr": 0.0001,
247
+ "betas": [0.9, 0.999],
248
+ "eps": 1e-8
249
+ }
250
+ },
251
+ "fp16": {
252
+ "enabled": true,
253
+ "loss_scale": 0,
254
+ "initial_scale_power": 16
255
+ },
256
+ "moe": {
257
+ "enabled": true,
258
+ "num_experts": 128,
259
+ "expert_parallel_size": 8,
260
+ "moe_loss_coeff": 0.01,
261
+ "train_capacity_factor": 1.25,
262
+ "eval_capacity_factor": 2.0,
263
+ "min_capacity": 4,
264
+ "drop_tokens": true,
265
+ "use_residual": false,
266
+ "use_tutel": false
267
+ },
268
+ "zero_optimization": {
269
+ "stage": 1
270
+ }
271
+ }
272
+ ```
273
+
274
+ ### Training Script
275
+
276
+ ```bash
277
+ #!/bin/bash
278
+
279
+ # Mixtral-style MoE training
280
+ deepspeed --num_gpus 8 pretrain_moe.py \
281
+ --model-parallel-size 1 \
282
+ --num-layers 32 \
283
+ --hidden-size 4096 \
284
+ --num-attention-heads 32 \
285
+ --seq-length 2048 \
286
+ --max-position-embeddings 4096 \
287
+ --micro-batch-size 2 \
288
+ --global-batch-size 256 \
289
+ --train-iters 500000 \
290
+ --save-interval 5000 \
291
+ --eval-interval 1000 \
292
+ --eval-iters 100 \
293
+ --lr 0.0001 \
294
+ --min-lr 0.00001 \
295
+ --lr-decay-style cosine \
296
+ --lr-warmup-iters 2000 \
297
+ --clip-grad 1.0 \
298
+ --weight-decay 0.1 \
299
+ --num-experts 8 \
300
+ --moe-expert-parallel-size 4 \
301
+ --moe-loss-coeff 0.01 \
302
+ --moe-train-capacity-factor 1.25 \
303
+ --moe-eval-capacity-factor 2.0 \
304
+ --disable-moe-token-dropping \
305
+ --fp16 \
306
+ --deepspeed \
307
+ --deepspeed_config ds_config_moe.json \
308
+ --data-path /path/to/data \
309
+ --vocab-file /path/to/vocab.json \
310
+ --merge-file /path/to/merges.txt
311
+ ```
312
+
313
+ ## Advanced Patterns
314
+
315
+ ### Mixtral 8x7B Architecture
316
+
317
+ ```python
318
+ class MixtralMoEBlock(nn.Module):
319
+ """Mixtral-style MoE block with 8 experts, top-2 routing."""
320
+
321
+ def __init__(self, config):
322
+ super().__init__()
323
+ self.hidden_dim = config.hidden_size
324
+ self.ffn_dim = config.intermediate_size
325
+ self.num_experts = config.num_local_experts # 8
326
+ self.top_k = config.num_experts_per_tok # 2
327
+
328
+ # 8 expert FFNs
329
+ self.experts = nn.ModuleList([
330
+ nn.Sequential(
331
+ nn.Linear(self.hidden_dim, self.ffn_dim, bias=False),
332
+ nn.SiLU(),
333
+ nn.Linear(self.ffn_dim, self.hidden_dim, bias=False)
334
+ )
335
+ for _ in range(self.num_experts)
336
+ ])
337
+
338
+ # Router
339
+ self.gate = nn.Linear(self.hidden_dim, self.num_experts, bias=False)
340
+
341
+ def forward(self, hidden_states):
342
+ batch_size, sequence_length, hidden_dim = hidden_states.shape
343
+
344
+ # Flatten
345
+ hidden_states = hidden_states.view(-1, hidden_dim)
346
+
347
+ # Router logits
348
+ router_logits = self.gate(hidden_states) # (batch * seq_len, num_experts)
349
+
350
+ # Softmax and top-2
351
+ routing_weights = torch.softmax(router_logits, dim=1)
352
+ routing_weights, selected_experts = torch.topk(routing_weights, self.top_k, dim=-1)
353
+
354
+ # Normalize routing weights
355
+ routing_weights /= routing_weights.sum(dim=-1, keepdim=True)
356
+
357
+ # Initialize output
358
+ final_hidden_states = torch.zeros_like(hidden_states)
359
+
360
+ # Route to experts
361
+ for expert_idx in range(self.num_experts):
362
+ expert_layer = self.experts[expert_idx]
363
+ idx, top_x = torch.where(selected_experts == expert_idx)
364
+
365
+ if idx.shape[0] == 0:
366
+ continue
367
+
368
+ # Current expert tokens
369
+ current_hidden_states = hidden_states[idx]
370
+
371
+ # Expert forward
372
+ current_hidden_states = expert_layer(current_hidden_states)
373
+
374
+ # Weighted by routing scores
375
+ current_hidden_states *= routing_weights[idx, top_x, None]
376
+
377
+ # Accumulate
378
+ final_hidden_states.index_add_(0, idx, current_hidden_states)
379
+
380
+ # Reshape
381
+ return final_hidden_states.view(batch_size, sequence_length, hidden_dim)
382
+ ```
383
+
384
+ ### PR-MoE (Pyramid-Residual-MoE)
385
+
386
+ ```bash
387
+ # DeepSpeed PR-MoE: 3x better parameter efficiency
388
+ deepspeed pretrain_gpt_moe.py \
389
+ --num-layers 24 \
390
+ --hidden-size 1024 \
391
+ --num-attention-heads 16 \
392
+ --num-experts "[128, 64, 32, 16]" \
393
+ --mlp-type residual \
394
+ --moe-expert-parallel-size 4 \
395
+ --moe-loss-coeff 0.01 \
396
+ --fp16
397
+ ```
398
+
399
+ ## Best Practices
400
+
401
+ ### 1. Expert Count Selection
402
+
403
+ ```python
404
+ # Rule of thumb: More experts = more capacity, but diminishing returns
405
+ # Typical configurations:
406
+ # - Small models (1B-7B): 8-16 experts
407
+ # - Medium models (7B-30B): 8-64 experts
408
+ # - Large models (30B+): 64-256 experts
409
+
410
+ # Example: Mixtral 8x7B
411
+ # Total params: 47B (8 experts × 7B each)
412
+ # Active params: 13B (2 experts × 7B, top-2 routing)
413
+ # Efficiency: 47B capacity with 13B compute
414
+ ```
415
+
416
+ ### 2. Capacity Factor Tuning
417
+
418
+ ```python
419
+ # Capacity = (tokens_per_batch / num_experts) * capacity_factor
420
+
421
+ # Training: Lower capacity (faster, drops some tokens)
422
+ train_capacity_factor = 1.25 # 25% buffer
423
+
424
+ # Evaluation: Higher capacity (no dropping)
425
+ eval_capacity_factor = 2.0 # 100% buffer
426
+
427
+ # Formula:
428
+ expert_capacity = int((seq_len * batch_size / num_experts) * capacity_factor)
429
+ ```
430
+
431
+ ### 3. Learning Rate Guidelines
432
+
433
+ ```python
434
+ # MoE models need lower LR than dense models
435
+ # - Dense model: lr = 6e-4
436
+ # - MoE model: lr = 1e-4 (3-6× lower)
437
+
438
+ # Also extend decay schedule
439
+ dense_lr_decay_iters = 300000
440
+ moe_lr_decay_iters = 500000 # 1.5-2× longer
441
+ ```
442
+
443
+ ### 4. Loss Coefficient Tuning
444
+
445
+ ```python
446
+ # Start with standard values
447
+ moe_loss_coeff = 0.01 # Auxiliary loss (load balancing)
448
+ router_z_loss_coeff = 0.001 # Router entropy (stability)
449
+
450
+ # If load imbalance persists, increase aux loss
451
+ if max_expert_usage / min_expert_usage > 2.0:
452
+ moe_loss_coeff = 0.1 # Stronger load balancing
453
+
454
+ # If training unstable, increase z-loss
455
+ if grad_norm > 10.0:
456
+ router_z_loss_coeff = 0.01
457
+ ```
458
+
459
+ ### 5. Avoid Common Pitfalls
460
+
461
+ ```python
462
+ # ❌ Bad: Using same LR as dense model
463
+ optimizer = Adam(model.parameters(), lr=6e-4)
464
+
465
+ # ✅ Good: Lower LR for MoE
466
+ optimizer = Adam([
467
+ {'params': model.non_moe_params, 'lr': 6e-4},
468
+ {'params': model.moe_params, 'lr': 1e-4}
469
+ ])
470
+
471
+ # ❌ Bad: No load balancing
472
+ loss = lm_loss
473
+
474
+ # ✅ Good: Add auxiliary loss
475
+ loss = lm_loss + 0.01 * aux_loss + 0.001 * z_loss
476
+
477
+ # ❌ Bad: Too many experts for small dataset
478
+ num_experts = 128 # Overfitting risk
479
+
480
+ # ✅ Good: Match experts to data diversity
481
+ num_experts = 8 # Better for small datasets
482
+ ```
483
+
484
+ ## Inference Optimization
485
+
486
+ ### Sparse Inference
487
+
488
+ ```python
489
+ # Only activate top-k experts (huge memory savings)
490
+ @torch.no_grad()
491
+ def moe_inference(x, model, top_k=2):
492
+ """Sparse MoE inference: only load k experts."""
493
+ # Router
494
+ gate_logits = model.gate(x)
495
+ topk_scores, topk_indices = torch.topk(
496
+ torch.softmax(gate_logits, dim=-1),
497
+ k=top_k,
498
+ dim=-1
499
+ )
500
+
501
+ # Load and run only top-k experts
502
+ output = torch.zeros_like(x)
503
+ for i in range(top_k):
504
+ expert_idx = topk_indices[:, i]
505
+ # Load expert from disk/offload if needed
506
+ expert = model.load_expert(expert_idx)
507
+ output += topk_scores[:, i:i+1] * expert(x)
508
+
509
+ return output
510
+ ```
511
+
512
+ ## Resources
513
+
514
+ - **DeepSpeed MoE Tutorial**: https://www.deepspeed.ai/tutorials/mixture-of-experts-nlg/
515
+ - **Mixtral Paper**: https://arxiv.org/abs/2401.04088
516
+ - **Switch Transformers**: https://arxiv.org/abs/2101.03961
517
+ - **HuggingFace MoE Guide**: https://huggingface.co/blog/moe
518
+ - **NVIDIA MoE Blog**: https://developer.nvidia.com/blog/applying-mixture-of-experts-in-llm-architectures/
519
+
520
+ ## See Also
521
+
522
+ - `references/architectures.md` - MoE model architectures (Mixtral, Switch, DeepSeek-V3)
523
+ - `references/training.md` - Advanced training techniques and optimization
524
+ - `references/inference.md` - Production deployment and serving patterns
525
+
526
+