@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,428 @@
1
+ # Model Merging Examples
2
+
3
+ Real-world merge configurations from successful models on HuggingFace and research papers.
4
+
5
+ ## Table of Contents
6
+ - Successful Merges
7
+ - Mixtral-based Merges
8
+ - Llama-based Merges
9
+ - Task-Specific Merges
10
+ - Production Examples
11
+
12
+ ## Successful Merges
13
+
14
+ ### Marcoro14-7B-slerp
15
+
16
+ **Achievement**: #1 on Open LLM Leaderboard (February 2024)
17
+ **Method**: SLERP
18
+ **Source**: HuggingFace
19
+
20
+ ```yaml
21
+ # marcoro14-7b-slerp.yml
22
+ merge_method: slerp
23
+ slices:
24
+ - sources:
25
+ - model: AIDC-ai-business/Marcoroni-7B-v3
26
+ layer_range: [0, 32]
27
+ - model: EmbeddedLLM/Mistral-7B-Merge-14-v0.1
28
+ layer_range: [0, 32]
29
+ parameters:
30
+ t: 0.5 # Equal blend
31
+ dtype: bfloat16
32
+ ```
33
+
34
+ **Results**:
35
+ - Average: 74.32 on Open LLM Leaderboard
36
+ - Strong across all tasks
37
+ - Smooth capability combination
38
+
39
+ ### goliath-120b (Mixtral MoE)
40
+
41
+ **Method**: Linear + SLERP
42
+ **Achievement**: Top-performing 120B model
43
+
44
+ ```yaml
45
+ # goliath-120b.yml
46
+ merge_method: slerp
47
+ slices:
48
+ - sources:
49
+ - model: alpindale/c4ai-command-r-plus-GPTQ
50
+ layer_range: [0, 40]
51
+ - model: CohereForAI/c4ai-command-r-v01
52
+ layer_range: [0, 40]
53
+ parameters:
54
+ t:
55
+ - filter: self_attn
56
+ value: [0, 0.5, 0.3, 0.7, 1] # Layer-specific blending
57
+ - filter: mlp
58
+ value: [1, 0.5, 0.7, 0.3, 0]
59
+ - value: 0.5 # Default
60
+ dtype: float16
61
+ ```
62
+
63
+ ## Mixtral-based Merges
64
+
65
+ ### Math + Code Specialist
66
+
67
+ **Goal**: Combine mathematical reasoning with code generation
68
+
69
+ ```yaml
70
+ # math-code-mixtral.yml
71
+ merge_method: task_arithmetic
72
+ base_model: mistralai/Mixtral-8x7B-v0.1
73
+ models:
74
+ - model: WizardLM/WizardMath-7B-V1.1
75
+ parameters:
76
+ weight: 0.6 # Emphasize math
77
+ - model: ajibawa-2023/Code-Mixtral-8x7B
78
+ parameters:
79
+ weight: 0.4 # Add code
80
+ dtype: bfloat16
81
+ ```
82
+
83
+ **Expected capabilities**:
84
+ - Strong mathematical reasoning
85
+ - Code generation and understanding
86
+ - Technical problem-solving
87
+
88
+ ### Chat + Roleplay Merge
89
+
90
+ ```yaml
91
+ # chat-roleplay.yml
92
+ merge_method: slerp
93
+ slices:
94
+ - sources:
95
+ - model: teknium/OpenHermes-2.5-Mistral-7B
96
+ layer_range: [0, 32]
97
+ - model: Undi95/MLewd-ReMM-L2-Chat-20B-Part1
98
+ layer_range: [0, 32]
99
+ parameters:
100
+ t: 0.5
101
+ dtype: bfloat16
102
+ ```
103
+
104
+ ### Multi-Task TIES Merge
105
+
106
+ ```yaml
107
+ # multi-task-mixtral.yml
108
+ merge_method: ties
109
+ base_model: mistralai/Mixtral-8x7B-v0.1
110
+ models:
111
+ - model: WizardLM/WizardMath-7B-V1.1
112
+ parameters:
113
+ density: 0.5
114
+ weight: 1.0
115
+ - model: teknium/OpenHermes-2.5-Mistral-7B
116
+ parameters:
117
+ density: 0.5
118
+ weight: 1.0
119
+ - model: ajibawa-2023/Code-Mixtral-8x7B
120
+ parameters:
121
+ density: 0.5
122
+ weight: 1.0
123
+ parameters:
124
+ normalize: true
125
+ dtype: bfloat16
126
+ ```
127
+
128
+ ## Llama-based Merges
129
+
130
+ ### Platypus-Hermes Merge
131
+
132
+ **Models**: Garage-bAInd/Platypus2-13B + WizardLM/WizardLM-13B-V1.2
133
+
134
+ ```yaml
135
+ # platypus-hermes-13b.yml
136
+ merge_method: linear
137
+ models:
138
+ - model: garage-bAInd/Platypus2-13B
139
+ parameters:
140
+ weight: 0.5
141
+ - model: WizardLM/WizardLM-13B-V1.2
142
+ parameters:
143
+ weight: 0.3
144
+ - model: psmathur/orca_mini_v3_13b
145
+ parameters:
146
+ weight: 0.2
147
+ dtype: float16
148
+ ```
149
+
150
+ ### DARE-TIES Llama Merge
151
+
152
+ **Source**: DARE paper (arXiv 2311.03099)
153
+
154
+ ```yaml
155
+ # dare-ties-llama.yml
156
+ merge_method: dare_ties
157
+ base_model: meta-llama/Llama-2-7b-hf
158
+ models:
159
+ - model: WizardLM/WizardLM-7B-V1.0
160
+ parameters:
161
+ density: 0.5 # Keep top 50%
162
+ weight: 0.6
163
+ dare:
164
+ drop_rate: 0.9 # Drop 90% of deltas
165
+ - model: garage-bAInd/Platypus-7B
166
+ parameters:
167
+ density: 0.5
168
+ weight: 0.4
169
+ dare:
170
+ drop_rate: 0.9
171
+ parameters:
172
+ int8_mask: true
173
+ dtype: bfloat16
174
+ ```
175
+
176
+ ## Task-Specific Merges
177
+
178
+ ### Medical Domain
179
+
180
+ **Goal**: Create medical specialist model
181
+
182
+ ```yaml
183
+ # medical-specialist.yml
184
+ merge_method: task_arithmetic
185
+ base_model: mistralai/Mistral-7B-v0.1
186
+ models:
187
+ - model: medalpaca/medalpaca-7b
188
+ parameters:
189
+ weight: 0.7 # Strong medical knowledge
190
+ - model: teknium/OpenHermes-2.5-Mistral-7B
191
+ parameters:
192
+ weight: 0.3 # Add general chat ability
193
+ dtype: bfloat16
194
+ ```
195
+
196
+ ### Legal Assistant
197
+
198
+ ```yaml
199
+ # legal-assistant.yml
200
+ merge_method: slerp
201
+ slices:
202
+ - sources:
203
+ - model: law-ai/legal-bert-7b
204
+ layer_range: [0, 32]
205
+ - model: teknium/OpenHermes-2.5-Mistral-7B
206
+ layer_range: [0, 32]
207
+ parameters:
208
+ t:
209
+ - filter: self_attn
210
+ value: 0.7 # Emphasize legal model in attention
211
+ - filter: mlp
212
+ value: 0.3 # More general chat in MLPs
213
+ - value: 0.5
214
+ dtype: bfloat16
215
+ ```
216
+
217
+ ### Multilingual Merge
218
+
219
+ ```yaml
220
+ # multilingual-merge.yml
221
+ merge_method: linear
222
+ models:
223
+ - model: mistralai/Mistral-7B-v0.1
224
+ parameters:
225
+ weight: 0.4 # English
226
+ - model: CohereForAI/aya-23-7B
227
+ parameters:
228
+ weight: 0.3 # Multilingual
229
+ - model: Qwen/Qwen3-7B
230
+ parameters:
231
+ weight: 0.3 # Asian languages
232
+ dtype: bfloat16
233
+ ```
234
+
235
+ ## Production Examples
236
+
237
+ ### Gradual Merge (Safer)
238
+
239
+ **Strategy**: Merge incrementally, test at each step
240
+
241
+ ```yaml
242
+ # Step 1: Merge two models
243
+ # step1.yml
244
+ merge_method: slerp
245
+ slices:
246
+ - sources:
247
+ - model: base_model
248
+ layer_range: [0, 32]
249
+ - model: specialist_1
250
+ layer_range: [0, 32]
251
+ parameters:
252
+ t: 0.3 # Conservative blend
253
+ dtype: bfloat16
254
+ ```
255
+
256
+ ```yaml
257
+ # Step 2: Add third model to result
258
+ # step2.yml
259
+ merge_method: slerp
260
+ slices:
261
+ - sources:
262
+ - model: ./merged_step1 # Previous merge
263
+ layer_range: [0, 32]
264
+ - model: specialist_2
265
+ layer_range: [0, 32]
266
+ parameters:
267
+ t: 0.3 # Conservative
268
+ dtype: bfloat16
269
+ ```
270
+
271
+ **Benefits**:
272
+ - Test after each merge
273
+ - Easier to debug
274
+ - Can stop if quality degrades
275
+
276
+ ### A/B Testing Setup
277
+
278
+ ```yaml
279
+ # variant_a.yml - Conservative
280
+ merge_method: slerp
281
+ slices:
282
+ - sources:
283
+ - model: base_model
284
+ layer_range: [0, 32]
285
+ - model: specialist
286
+ layer_range: [0, 32]
287
+ parameters:
288
+ t: 0.3 # 30% specialist
289
+ dtype: bfloat16
290
+ ```
291
+
292
+ ```yaml
293
+ # variant_b.yml - Aggressive
294
+ merge_method: slerp
295
+ slices:
296
+ - sources:
297
+ - model: base_model
298
+ layer_range: [0, 32]
299
+ - model: specialist
300
+ layer_range: [0, 32]
301
+ parameters:
302
+ t: 0.7 # 70% specialist
303
+ dtype: bfloat16
304
+ ```
305
+
306
+ **Test both**, choose best performer
307
+
308
+ ### Frankenmerge (Experimental)
309
+
310
+ **Warning**: Experimental, may not work
311
+
312
+ ```yaml
313
+ # frankenmerge.yml
314
+ merge_method: passthrough
315
+ slices:
316
+ # First 8 layers from model A
317
+ - sources:
318
+ - model: model_a
319
+ layer_range: [0, 8]
320
+
321
+ # Middle 16 layers from model B
322
+ - sources:
323
+ - model: model_b
324
+ layer_range: [8, 24]
325
+
326
+ # Last 8 layers from model C
327
+ - sources:
328
+ - model: model_c
329
+ layer_range: [24, 32]
330
+ dtype: bfloat16
331
+ ```
332
+
333
+ **Use case**: Create models with non-standard layer counts
334
+
335
+ ### MoE from Merges
336
+
337
+ ```yaml
338
+ # moe-from-merges.yml
339
+ merge_method: moe
340
+ base_model: mistralai/Mistral-7B-v0.1
341
+ experts:
342
+ - source_model: WizardLM/WizardMath-7B-V1.1
343
+ positive_prompts:
344
+ - "math"
345
+ - "calculate"
346
+ - "solve"
347
+ - "equation"
348
+
349
+ - source_model: ajibawa-2023/Code-Mistral-7B
350
+ positive_prompts:
351
+ - "code"
352
+ - "python"
353
+ - "function"
354
+ - "programming"
355
+
356
+ - source_model: teknium/OpenHermes-2.5-Mistral-7B
357
+ positive_prompts:
358
+ - "chat"
359
+ - "conversation"
360
+ - "help"
361
+ - "question"
362
+ dtype: bfloat16
363
+ ```
364
+
365
+ **Result**: Dynamic expert selection based on prompt
366
+
367
+ ## Command-Line Examples
368
+
369
+ ### Basic Merge
370
+
371
+ ```bash
372
+ # Simple two-model SLERP
373
+ mergekit-yaml config.yml ./output-model \
374
+ --cuda \
375
+ --lazy-unpickle
376
+ ```
377
+
378
+ ### Large Model Merge (Low VRAM)
379
+
380
+ ```bash
381
+ # Merge on CPU (slow but works with 8GB VRAM)
382
+ mergekit-yaml config.yml ./output-model \
383
+ --allow-crimes \ # Enable CPU offloading
384
+ --low-cpu-memory
385
+ ```
386
+
387
+ ### Merge and Upload
388
+
389
+ ```bash
390
+ # Merge and push to HuggingFace
391
+ mergekit-yaml config.yml ./merged-model --cuda
392
+
393
+ cd merged-model
394
+ python << EOF
395
+ from transformers import AutoModel, AutoTokenizer
396
+
397
+ model = AutoModel.from_pretrained("./")
398
+ tokenizer = AutoTokenizer.from_pretrained("./")
399
+
400
+ model.push_to_hub("username/my-merged-model")
401
+ tokenizer.push_to_hub("username/my-merged-model")
402
+ EOF
403
+ ```
404
+
405
+ ### Batch Merging
406
+
407
+ ```bash
408
+ # Merge multiple configs
409
+ for config in configs/*.yml; do
410
+ output="./output/$(basename $config .yml)"
411
+ mergekit-yaml $config $output --cuda
412
+ done
413
+ ```
414
+
415
+ ## Tips from Successful Merges
416
+
417
+ 1. **Start Conservative**: Use t=0.3-0.5 for SLERP, test before going higher
418
+ 2. **Match Architectures**: Only merge models with same base architecture
419
+ 3. **Test Extensively**: Benchmark on multiple tasks before deploying
420
+ 4. **Layer-Specific Merging**: Different t values for attention vs MLP often works better
421
+ 5. **DARE for Many Models**: When merging 3+ models, DARE-TIES often best
422
+ 6. **Gradual Merging**: For production, merge incrementally and test
423
+
424
+ ## Resources
425
+
426
+ - **HuggingFace Models**: Browse merged models for inspiration
427
+ - **Open LLM Leaderboard**: See top-performing merges
428
+ - **mergekit Examples**: https://github.com/arcee-ai/mergekit/tree/main/examples