@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,352 @@
1
+ # Model Merging Methods: Deep Dive
2
+
3
+ Complete technical guide to model merging algorithms based on research papers.
4
+
5
+ ## Table of Contents
6
+ - TIES-Merging Algorithm
7
+ - DARE (Drop And REscale)
8
+ - Linear Merging
9
+ - SLERP
10
+ - Task Arithmetic
11
+ - Comparison
12
+
13
+ ## TIES-Merging: Resolving Interference
14
+
15
+ **Paper**: "TIES-Merging: Resolving Interference When Merging Models" (NeurIPS 2023)
16
+ **Authors**: Prateek Yadav et al.
17
+ **Code**: https://github.com/prateeky2806/ties-merging
18
+
19
+ ### Algorithm Overview
20
+
21
+ TIES-Merging addresses two major sources of interference:
22
+ 1. Redundant parameter values
23
+ 2. Sign disagreement across models
24
+
25
+ **Three-Step Process**: TRIM, ELECT, MERGE
26
+
27
+ ### Step 1: TRIM (Reset Small Changes)
28
+
29
+ Remove parameters that changed minimally during fine-tuning.
30
+
31
+ ```python
32
+ def trim(task_vector, density=0.2):
33
+ """Keep top-k% parameters by magnitude, reset rest to 0."""
34
+ # Calculate magnitude
35
+ magnitudes = torch.abs(task_vector)
36
+
37
+ # Get threshold for top-k%
38
+ k = int(density * task_vector.numel())
39
+ threshold = torch.topk(magnitudes.flatten(), k).values.min()
40
+
41
+ # Create mask: keep parameters above threshold
42
+ mask = magnitudes >= threshold
43
+
44
+ # Apply mask
45
+ trimmed_vector = task_vector * mask
46
+
47
+ return trimmed_vector
48
+
49
+ # Example
50
+ task_vector_1 = finetuned_model_1 - base_model
51
+ task_vector_2 = finetuned_model_2 - base_model
52
+
53
+ trimmed_1 = trim(task_vector_1, density=0.2) # Keep top 20%
54
+ trimmed_2 = trim(task_vector_2, density=0.2)
55
+ ```
56
+
57
+ ### Step 2: ELECT SIGN (Resolve Conflicts)
58
+
59
+ When parameters have conflicting signs, elect the dominant sign.
60
+
61
+ ```python
62
+ def elect_sign(task_vectors):
63
+ """Resolve sign conflicts across multiple task vectors."""
64
+ # Stack all task vectors
65
+ stacked = torch.stack(task_vectors) # (num_models, num_params)
66
+
67
+ # Count positive vs negative for each parameter
68
+ positive_count = (stacked > 0).sum(dim=0)
69
+ negative_count = (stacked < 0).sum(dim=0)
70
+
71
+ # Elect majority sign
72
+ final_sign = torch.where(
73
+ positive_count > negative_count,
74
+ torch.ones_like(stacked[0]),
75
+ -torch.ones_like(stacked[0])
76
+ )
77
+
78
+ # Where tie, keep sign from first model
79
+ tie_mask = (positive_count == negative_count)
80
+ final_sign[tie_mask] = torch.sign(stacked[0][tie_mask])
81
+
82
+ return final_sign
83
+
84
+ # Example
85
+ task_vectors = [trimmed_1, trimmed_2, trimmed_3]
86
+ elected_sign = elect_sign(task_vectors)
87
+ ```
88
+
89
+ ### Step 3: MERGE (Disjoint Merging)
90
+
91
+ Merge only parameters that agree with elected sign.
92
+
93
+ ```python
94
+ def ties_merge(base_model, task_vectors, density=0.2, lambda_param=1.0):
95
+ """Complete TIES-Merging algorithm."""
96
+ # Step 1: Trim each task vector
97
+ trimmed_vectors = [trim(tv, density) for tv in task_vectors]
98
+
99
+ # Step 2: Elect sign
100
+ elected_sign = elect_sign(trimmed_vectors)
101
+
102
+ # Step 3: Merge aligned parameters
103
+ merged_task_vector = torch.zeros_like(task_vectors[0])
104
+
105
+ for tv in trimmed_vectors:
106
+ # Keep only parameters aligned with elected sign
107
+ aligned_mask = (torch.sign(tv) == elected_sign) | (tv == 0)
108
+ aligned_params = tv * aligned_mask
109
+
110
+ # Accumulate
111
+ merged_task_vector += aligned_params
112
+
113
+ # Average
114
+ num_models = len(task_vectors)
115
+ merged_task_vector /= num_models
116
+
117
+ # Add back to base model
118
+ final_model = base_model + lambda_param * merged_task_vector
119
+
120
+ return final_model
121
+
122
+ # Usage
123
+ base = load_model("mistralai/Mistral-7B-v0.1")
124
+ model_1 = load_model("WizardLM/WizardMath-7B-V1.1")
125
+ model_2 = load_model("teknium/OpenHermes-2.5-Mistral-7B")
126
+ model_3 = load_model("NousResearch/Nous-Hermes-2-Mistral-7B-DPO")
127
+
128
+ task_vectors = [
129
+ model_1 - base,
130
+ model_2 - base,
131
+ model_3 - base
132
+ ]
133
+
134
+ merged = ties_merge(base, task_vectors, density=0.5, lambda_param=1.0)
135
+ ```
136
+
137
+ ### Hyperparameters
138
+
139
+ **density** (ρ): Fraction of parameters to keep (default: 0.2)
140
+ - Lower (0.1-0.3): More aggressive pruning, higher sparsity
141
+ - Higher (0.5-0.8): Conservative pruning, denser result
142
+
143
+ **lambda** (λ): Scaling factor for merged task vector (default: 1.0)
144
+ - Lower (<1.0): Less influence from fine-tuned models
145
+ - Higher (>1.0): More influence from fine-tuned models
146
+
147
+ ## DARE: Drop And REscale
148
+
149
+ **Paper**: "Language Models are Super Mario: Absorbing Abilities from Homologous Models as a Free Lunch" (arXiv 2311.03099, 2023)
150
+ **Authors**: Le Yu, Bowen Yu, Haiyang Yu, Fei Huang, Yongbin Li
151
+
152
+ ### Algorithm
153
+
154
+ DARE randomly drops delta parameters and rescales remaining ones.
155
+
156
+ ### Mathematical Formulation
157
+
158
+ Given:
159
+ - Base model parameters: θ₀
160
+ - Fine-tuned model parameters: θₜ
161
+ - Delta parameters: δₜ = θₜ - θ₀
162
+
163
+ **Step 1: Random Drop**
164
+
165
+ ```
166
+ m_t ~ Bernoulli(p) # Drop mask
167
+ δ̃_t = (1 - m_t) ⊙ δ_t # Element-wise product
168
+ ```
169
+
170
+ **Step 2: Rescale**
171
+
172
+ ```
173
+ δ̂_t = δ̃_t / (1 - p) # Rescale to preserve expectation
174
+ ```
175
+
176
+ **Final Model**
177
+
178
+ ```
179
+ θ̂_t = θ₀ + δ̂_t
180
+ ```
181
+
182
+ ### Implementation
183
+
184
+ ```python
185
+ def dare(base_model, finetuned_model, drop_rate=0.9):
186
+ """DARE: Drop And REscale delta parameters."""
187
+ # Compute delta
188
+ delta = finetuned_model - base_model
189
+
190
+ # Random drop mask (Bernoulli)
191
+ drop_mask = torch.bernoulli(torch.full_like(delta, drop_rate))
192
+
193
+ # Apply mask (keep 1-p, drop p)
194
+ dropped_delta = delta * (1 - drop_mask)
195
+
196
+ # Rescale to preserve expectation
197
+ rescaled_delta = dropped_delta / (1 - drop_rate)
198
+
199
+ # Reconstruct model
200
+ result = base_model + rescaled_delta
201
+
202
+ return result
203
+
204
+ # Example
205
+ base = load_model("mistralai/Mistral-7B-v0.1")
206
+ finetuned = load_model("WizardLM/WizardMath-7B-V1.1")
207
+
208
+ # Drop 90% of delta parameters
209
+ result = dare(base, finetuned, drop_rate=0.9)
210
+ ```
211
+
212
+ ### DARE + TIES (DARE-TIES)
213
+
214
+ Combine both methods for best results.
215
+
216
+ ```python
217
+ def dare_ties(base_model, finetuned_models, drop_rate=0.9, density=0.5):
218
+ """DARE + TIES-Merging."""
219
+ # Step 1: Apply DARE to each model
220
+ dare_deltas = []
221
+ for model in finetuned_models:
222
+ delta = model - base_model
223
+
224
+ # DARE drop
225
+ drop_mask = torch.bernoulli(torch.full_like(delta, drop_rate))
226
+ dropped = delta * (1 - drop_mask)
227
+ rescaled = dropped / (1 - drop_rate)
228
+
229
+ dare_deltas.append(rescaled)
230
+
231
+ # Step 2: Apply TIES to DARE-processed deltas
232
+ merged = ties_merge(base_model, dare_deltas, density=density)
233
+
234
+ return merged
235
+ ```
236
+
237
+ ### Hyperparameters
238
+
239
+ **drop_rate** (p): Probability of dropping each parameter (default: 0.9)
240
+ - Lower (0.5-0.7): Conservative, keeps more parameters
241
+ - Higher (0.9-0.99): Aggressive, maximum sparsity
242
+ - Works well even at 0.99 for large models
243
+
244
+ **Observations**:
245
+ - Larger models tolerate higher drop rates
246
+ - Delta parameters with small absolute values (<0.002) can be safely dropped
247
+ - Performance improves with model size
248
+
249
+ ## Linear Merging (Model Soup)
250
+
251
+ Simple weighted average.
252
+
253
+ ```python
254
+ def linear_merge(models, weights):
255
+ """Weighted average of model parameters."""
256
+ assert len(models) == len(weights)
257
+ assert sum(weights) == 1.0, "Weights should sum to 1"
258
+
259
+ merged = sum(w * model for w, model in zip(weights, models))
260
+
261
+ return merged
262
+
263
+ # Example
264
+ models = [model_1, model_2, model_3]
265
+ weights = [0.4, 0.3, 0.3]
266
+ merged = linear_merge(models, weights)
267
+ ```
268
+
269
+ ## SLERP: Spherical Linear Interpolation
270
+
271
+ Interpolate along sphere in weight space.
272
+
273
+ ```python
274
+ def slerp(model_1, model_2, t=0.5):
275
+ """SLERP between two models."""
276
+ # Flatten parameters
277
+ p1 = torch.cat([p.flatten() for p in model_1.parameters()])
278
+ p2 = torch.cat([p.flatten() for p in model_2.parameters()])
279
+
280
+ # Normalize
281
+ p1_norm = p1 / p1.norm()
282
+ p2_norm = p2 / p2.norm()
283
+
284
+ # Compute angle
285
+ dot = (p1_norm * p2_norm).sum()
286
+ theta = torch.acos(torch.clamp(dot, -1.0, 1.0))
287
+
288
+ # SLERP formula
289
+ if theta < 1e-6:
290
+ # Vectors nearly parallel, use linear interpolation
291
+ result = (1 - t) * p1 + t * p2
292
+ else:
293
+ # Spherical interpolation
294
+ sin_theta = torch.sin(theta)
295
+ result = (torch.sin((1 - t) * theta) / sin_theta) * p1 + \
296
+ (torch.sin(t * theta) / sin_theta) * p2
297
+
298
+ # Reshape back to model
299
+ merged_model = reshape_to_model(result, model_1)
300
+
301
+ return merged_model
302
+
303
+ # Example
304
+ merged = slerp(model_1, model_2, t=0.5) # 50-50 blend
305
+ ```
306
+
307
+ ## Task Arithmetic
308
+
309
+ Add task vectors to base model.
310
+
311
+ ```python
312
+ def task_arithmetic(base_model, finetuned_models, lambdas):
313
+ """Task arithmetic merging."""
314
+ # Extract task vectors
315
+ task_vectors = [model - base_model for model in finetuned_models]
316
+
317
+ # Weighted sum
318
+ combined_vector = sum(λ * tv for λ, tv in zip(lambdas, task_vectors))
319
+
320
+ # Add to base
321
+ merged = base_model + combined_vector
322
+
323
+ return merged
324
+
325
+ # Example
326
+ base = load_model("mistralai/Mistral-7B-v0.1")
327
+ math_model = load_model("WizardLM/WizardMath-7B-V1.1")
328
+ code_model = load_model("ajibawa-2023/Code-Mistral-7B")
329
+
330
+ merged = task_arithmetic(
331
+ base,
332
+ [math_model, code_model],
333
+ lambdas=[0.6, 0.4]
334
+ )
335
+ ```
336
+
337
+ ## Method Comparison
338
+
339
+ | Method | Pros | Cons | Best For |
340
+ |--------|------|------|----------|
341
+ | **Linear** | Simple, fast | Basic averaging | 2-3 similar models |
342
+ | **SLERP** | Preserves magnitude | Only 2 models | Smooth blending |
343
+ | **Task Arithmetic** | Intuitive, flexible | Sign conflicts | Multiple specialized models |
344
+ | **TIES** | Resolves conflicts | More complex | Many task-specific models |
345
+ | **DARE** | High sparsity | Random variance | Reducing redundancy |
346
+ | **DARE-TIES** | Best performance | Most complex | Production (state-of-art) |
347
+
348
+ ## Resources
349
+
350
+ - **TIES Paper**: https://arxiv.org/abs/2306.01708
351
+ - **DARE Paper**: https://arxiv.org/abs/2311.03099
352
+ - **mergekit**: https://github.com/arcee-ai/mergekit