@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,219 @@
1
+ ---
2
+ name: simpo-training
3
+ description: Simple Preference Optimization for LLM alignment. Reference-free alternative to DPO with better performance (+6.4 points on AlpacaEval 2.0). No reference model needed, more efficient than DPO. Use for preference alignment when want simpler, faster training than DPO/PPO.
4
+ version: 1.0.0
5
+ author: Synthetic Sciences
6
+ license: MIT
7
+ tags: [Post-Training, SimPO, Preference Optimization, Alignment, DPO Alternative, Reference-Free, LLM Alignment, Efficient Training]
8
+ dependencies: [torch, transformers, datasets, trl, accelerate]
9
+ ---
10
+
11
+ # SimPO - Simple Preference Optimization
12
+
13
+ ## Quick start
14
+
15
+ SimPO is a reference-free preference optimization method that outperforms DPO without needing a reference model.
16
+
17
+ **Installation**:
18
+ ```bash
19
+ # Create environment
20
+ conda create -n simpo python=3.10 && conda activate simpo
21
+
22
+ # Install PyTorch 2.2.2
23
+ # Visit: https://pytorch.org/get-started/locally/
24
+
25
+ # Install alignment-handbook
26
+ git clone https://github.com/huggingface/alignment-handbook.git
27
+ cd alignment-handbook
28
+ python -m pip install .
29
+
30
+ # Install Flash Attention 2
31
+ python -m pip install flash-attn --no-build-isolation
32
+ ```
33
+
34
+ **Training** (Mistral 7B):
35
+ ```bash
36
+ ACCELERATE_LOG_LEVEL=info accelerate launch \
37
+ --config_file accelerate_configs/deepspeed_zero3.yaml \
38
+ scripts/run_simpo.py \
39
+ training_configs/mistral-7b-base-simpo.yaml
40
+ ```
41
+
42
+ ## Common workflows
43
+
44
+ ### Workflow 1: Train from base model (Mistral 7B)
45
+
46
+ **Config** (`mistral-7b-base-simpo.yaml`):
47
+ ```yaml
48
+ # Model
49
+ model_name_or_path: mistralai/Mistral-7B-v0.1
50
+ torch_dtype: bfloat16
51
+
52
+ # Dataset
53
+ dataset_mixer:
54
+ HuggingFaceH4/ultrafeedback_binarized: 1.0
55
+ dataset_splits:
56
+ - train_prefs
57
+ - test_prefs
58
+
59
+ # SimPO hyperparameters
60
+ beta: 2.0 # Reward scaling (2.0-10.0)
61
+ gamma_beta_ratio: 0.5 # Target margin (0-1)
62
+ loss_type: sigmoid # sigmoid or hinge
63
+ sft_weight: 0.0 # Optional SFT regularization
64
+
65
+ # Training
66
+ learning_rate: 5e-7 # Critical: 3e-7 to 1e-6
67
+ num_train_epochs: 1
68
+ per_device_train_batch_size: 1
69
+ gradient_accumulation_steps: 8
70
+
71
+ # Output
72
+ output_dir: ./outputs/mistral-7b-simpo
73
+ ```
74
+
75
+ **Launch training**:
76
+ ```bash
77
+ accelerate launch --config_file accelerate_configs/deepspeed_zero3.yaml \
78
+ scripts/run_simpo.py training_configs/mistral-7b-base-simpo.yaml
79
+ ```
80
+
81
+ ### Workflow 2: Fine-tune instruct model (Llama 3 8B)
82
+
83
+ **Config** (`llama3-8b-instruct-simpo.yaml`):
84
+ ```yaml
85
+ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
86
+
87
+ dataset_mixer:
88
+ argilla/ultrafeedback-binarized-preferences-cleaned: 1.0
89
+
90
+ beta: 2.5
91
+ gamma_beta_ratio: 0.5
92
+ learning_rate: 5e-7
93
+ sft_weight: 0.1 # Add SFT loss to preserve capabilities
94
+
95
+ num_train_epochs: 1
96
+ per_device_train_batch_size: 2
97
+ gradient_accumulation_steps: 4
98
+ output_dir: ./outputs/llama3-8b-simpo
99
+ ```
100
+
101
+ **Launch**:
102
+ ```bash
103
+ accelerate launch --config_file accelerate_configs/deepspeed_zero3.yaml \
104
+ scripts/run_simpo.py training_configs/llama3-8b-instruct-simpo.yaml
105
+ ```
106
+
107
+ ### Workflow 3: Reasoning-intensive tasks (lower LR)
108
+
109
+ **For math/code tasks**:
110
+ ```yaml
111
+ model_name_or_path: deepseek-ai/deepseek-math-7b-base
112
+
113
+ dataset_mixer:
114
+ argilla/distilabel-math-preference-dpo: 1.0
115
+
116
+ beta: 5.0 # Higher for stronger signal
117
+ gamma_beta_ratio: 0.7 # Larger margin
118
+ learning_rate: 3e-7 # Lower LR for reasoning
119
+ sft_weight: 0.0
120
+
121
+ num_train_epochs: 1
122
+ per_device_train_batch_size: 1
123
+ gradient_accumulation_steps: 16
124
+ ```
125
+
126
+ ## When to use vs alternatives
127
+
128
+ **Use SimPO when**:
129
+ - Want simpler training than DPO (no reference model)
130
+ - Have preference data (chosen/rejected pairs)
131
+ - Need better performance than DPO
132
+ - Limited compute resources
133
+ - Single-node training sufficient
134
+
135
+ **Algorithm selection**:
136
+ - **SimPO**: Simplest, best performance, no reference model
137
+ - **DPO**: Need reference model baseline, more conservative
138
+ - **PPO**: Maximum control, need reward model, complex setup
139
+ - **GRPO**: Memory-efficient RL, no critic
140
+
141
+ **Use alternatives instead**:
142
+ - **OpenRLHF**: Multi-node distributed training, PPO/GRPO
143
+ - **TRL**: Need multiple methods in one framework
144
+ - **DPO**: Established baseline comparison
145
+
146
+ ## Common issues
147
+
148
+ **Issue: Loss divergence**
149
+
150
+ Reduce learning rate:
151
+ ```yaml
152
+ learning_rate: 3e-7 # Reduce from 5e-7
153
+ ```
154
+
155
+ Reduce beta:
156
+ ```yaml
157
+ beta: 1.0 # Reduce from 2.0
158
+ ```
159
+
160
+ **Issue: Model forgets capabilities**
161
+
162
+ Add SFT regularization:
163
+ ```yaml
164
+ sft_weight: 0.1 # Add SFT loss component
165
+ ```
166
+
167
+ **Issue: Poor preference separation**
168
+
169
+ Increase beta and margin:
170
+ ```yaml
171
+ beta: 5.0 # Increase from 2.0
172
+ gamma_beta_ratio: 0.8 # Increase from 0.5
173
+ ```
174
+
175
+ **Issue: OOM during training**
176
+
177
+ Reduce batch size:
178
+ ```yaml
179
+ per_device_train_batch_size: 1
180
+ gradient_accumulation_steps: 16 # Maintain effective batch
181
+ ```
182
+
183
+ Enable gradient checkpointing:
184
+ ```yaml
185
+ gradient_checkpointing: true
186
+ ```
187
+
188
+ ## Advanced topics
189
+
190
+ **Loss functions**: See [references/loss-functions.md](references/loss-functions.md) for sigmoid vs hinge loss, mathematical formulations, and when to use each.
191
+
192
+ **Hyperparameter tuning**: See [references/hyperparameters.md](references/hyperparameters.md) for beta, gamma, learning rate selection guide, and model-size-specific recommendations.
193
+
194
+ **Dataset preparation**: See [references/datasets.md](references/datasets.md) for preference data formats, quality filtering, and custom dataset creation.
195
+
196
+ ## Hardware requirements
197
+
198
+ - **GPU**: NVIDIA A100/H100 recommended
199
+ - **VRAM**:
200
+ - 7B model: 1× A100 40GB (DeepSpeed ZeRO-3)
201
+ - 8B model: 2× A100 40GB
202
+ - 70B model: 8× A100 80GB
203
+ - **Single-node**: DeepSpeed ZeRO-3 sufficient
204
+ - **Mixed precision**: BF16 recommended
205
+
206
+ **Memory optimization**:
207
+ - DeepSpeed ZeRO-3 (default config)
208
+ - Gradient checkpointing
209
+ - Flash Attention 2
210
+
211
+ ## Resources
212
+
213
+ - Paper: https://arxiv.org/abs/2405.14734 (NeurIPS 2024)
214
+ - GitHub: https://github.com/princeton-nlp/SimPO
215
+ - Models: https://huggingface.co/princeton-nlp
216
+ - Alignment Handbook: https://github.com/huggingface/alignment-handbook
217
+
218
+
219
+
@@ -0,0 +1,478 @@
1
+ # Datasets
2
+
3
+ Complete guide to preference datasets for SimPO training.
4
+
5
+ ## Dataset Format
6
+
7
+ ### Required Fields
8
+
9
+ Preference datasets must contain:
10
+ ```json
11
+ {
12
+ "prompt": "User question or instruction",
13
+ "chosen": "Better/preferred response",
14
+ "rejected": "Worse/rejected response"
15
+ }
16
+ ```
17
+
18
+ **Alternative field names** (auto-detected):
19
+ - `prompt` → `question`, `instruction`, `input`
20
+ - `chosen` → `response_chosen`, `winner`, `preferred`
21
+ - `rejected` → `response_rejected`, `loser`
22
+
23
+ ### Example Entry
24
+
25
+ ```json
26
+ {
27
+ "prompt": "Explain quantum computing in simple terms.",
28
+ "chosen": "Quantum computing uses quantum bits (qubits) that can exist in multiple states simultaneously through superposition. This allows quantum computers to process many possibilities at once, making them potentially much faster than classical computers for specific tasks like cryptography and optimization.",
29
+ "rejected": "It's like regular computing but quantum."
30
+ }
31
+ ```
32
+
33
+ ## Popular Datasets
34
+
35
+ ### 1. UltraFeedback (Recommended)
36
+
37
+ **HuggingFaceH4/ultrafeedback_binarized**:
38
+ - **Size**: 60K preference pairs
39
+ - **Quality**: High (GPT-4 annotations)
40
+ - **Domain**: General instruction following
41
+ - **Format**: Clean, ready-to-use
42
+
43
+ **Config**:
44
+ ```yaml
45
+ dataset_mixer:
46
+ HuggingFaceH4/ultrafeedback_binarized: 1.0
47
+ dataset_splits:
48
+ - train_prefs
49
+ - test_prefs
50
+ ```
51
+
52
+ ### 2. Argilla UltraFeedback (Cleaned)
53
+
54
+ **argilla/ultrafeedback-binarized-preferences-cleaned**:
55
+ - **Size**: 50K pairs (filtered)
56
+ - **Quality**: Very high (deduped, cleaned)
57
+ - **Domain**: General
58
+ - **Format**: Clean
59
+
60
+ **Config**:
61
+ ```yaml
62
+ dataset_mixer:
63
+ argilla/ultrafeedback-binarized-preferences-cleaned: 1.0
64
+ ```
65
+
66
+ ### 3. Distilabel Math
67
+
68
+ **argilla/distilabel-math-preference-dpo**:
69
+ - **Size**: 30K pairs
70
+ - **Quality**: High (GSM8K, MATH)
71
+ - **Domain**: Math reasoning
72
+ - **Format**: Math-specific
73
+
74
+ **Config**:
75
+ ```yaml
76
+ dataset_mixer:
77
+ argilla/distilabel-math-preference-dpo: 1.0
78
+ ```
79
+
80
+ ### 4. HelpSteer
81
+
82
+ **nvidia/HelpSteer**:
83
+ - **Size**: 38K samples
84
+ - **Quality**: High (human ratings)
85
+ - **Domain**: Helpfulness alignment
86
+ - **Format**: Multi-attribute ratings
87
+
88
+ **Config**:
89
+ ```yaml
90
+ dataset_mixer:
91
+ nvidia/HelpSteer: 1.0
92
+ ```
93
+
94
+ ### 5. Anthropic HH-RLHF
95
+
96
+ **Anthropic/hh-rlhf**:
97
+ - **Size**: 161K samples
98
+ - **Quality**: High (human preferences)
99
+ - **Domain**: Harmless + helpful
100
+ - **Format**: Conversational
101
+
102
+ **Config**:
103
+ ```yaml
104
+ dataset_mixer:
105
+ Anthropic/hh-rlhf: 1.0
106
+ ```
107
+
108
+ ## Dataset Mixing
109
+
110
+ ### Multiple Datasets
111
+
112
+ **Equal mix**:
113
+ ```yaml
114
+ dataset_mixer:
115
+ HuggingFaceH4/ultrafeedback_binarized: 0.5
116
+ Anthropic/hh-rlhf: 0.5
117
+ ```
118
+
119
+ **Weighted mix**:
120
+ ```yaml
121
+ dataset_mixer:
122
+ HuggingFaceH4/ultrafeedback_binarized: 0.7
123
+ argilla/distilabel-math-preference-dpo: 0.2
124
+ nvidia/HelpSteer: 0.1
125
+ ```
126
+
127
+ **Domain-specific emphasis**:
128
+ ```yaml
129
+ # 80% general + 20% math
130
+ dataset_mixer:
131
+ HuggingFaceH4/ultrafeedback_binarized: 0.8
132
+ argilla/distilabel-math-preference-dpo: 0.2
133
+ ```
134
+
135
+ ## Data Quality
136
+
137
+ ### Quality Indicators
138
+
139
+ **Good preference data**:
140
+ - ✅ Clear quality difference between chosen/rejected
141
+ - ✅ Diverse prompts
142
+ - ✅ Minimal noise/annotation errors
143
+ - ✅ Appropriate difficulty level
144
+
145
+ **Poor preference data**:
146
+ - ❌ Ambiguous preferences
147
+ - ❌ Repetitive prompts
148
+ - ❌ Annotation noise
149
+ - ❌ Too easy/hard prompts
150
+
151
+ ### Quality Filtering
152
+
153
+ **Filter by length difference**:
154
+ ```python
155
+ def filter_by_length(example):
156
+ chosen_len = len(example['chosen'].split())
157
+ rejected_len = len(example['rejected'].split())
158
+ # Reject if chosen is much shorter (potential low-effort)
159
+ return chosen_len >= rejected_len * 0.5
160
+
161
+ dataset = dataset.filter(filter_by_length)
162
+ ```
163
+
164
+ **Filter by diversity**:
165
+ ```python
166
+ seen_prompts = set()
167
+
168
+ def filter_duplicates(example):
169
+ prompt = example['prompt']
170
+ if prompt in seen_prompts:
171
+ return False
172
+ seen_prompts.add(prompt)
173
+ return True
174
+
175
+ dataset = dataset.filter(filter_duplicates)
176
+ ```
177
+
178
+ ## Custom Dataset Creation
179
+
180
+ ### Format 1: JSON Lines
181
+
182
+ **File** (`preferences.jsonl`):
183
+ ```jsonl
184
+ {"prompt": "What is Python?", "chosen": "Python is a high-level programming language...", "rejected": "It's a snake."}
185
+ {"prompt": "Explain AI.", "chosen": "AI refers to systems that can...", "rejected": "It's computers that think."}
186
+ ```
187
+
188
+ **Load**:
189
+ ```yaml
190
+ dataset_mixer:
191
+ json:
192
+ data_files: preferences.jsonl
193
+ ```
194
+
195
+ ### Format 2: HuggingFace Dataset
196
+
197
+ **Create from dict**:
198
+ ```python
199
+ from datasets import Dataset
200
+
201
+ data = {
202
+ "prompt": ["What is Python?", "Explain AI."],
203
+ "chosen": ["Python is...", "AI refers to..."],
204
+ "rejected": ["It's a snake.", "It's computers..."]
205
+ }
206
+
207
+ dataset = Dataset.from_dict(data)
208
+ dataset.push_to_hub("username/my-preferences")
209
+ ```
210
+
211
+ **Use in config**:
212
+ ```yaml
213
+ dataset_mixer:
214
+ username/my-preferences: 1.0
215
+ ```
216
+
217
+ ### Format 3: ChatML
218
+
219
+ **For conversational data**:
220
+ ```json
221
+ {
222
+ "prompt": [
223
+ {"role": "user", "content": "What is quantum computing?"}
224
+ ],
225
+ "chosen": [
226
+ {"role": "assistant", "content": "Quantum computing uses qubits..."}
227
+ ],
228
+ "rejected": [
229
+ {"role": "assistant", "content": "It's like regular computing but quantum."}
230
+ ]
231
+ }
232
+ ```
233
+
234
+ **Apply chat template**:
235
+ ```yaml
236
+ dataset_text_field: null # Will apply chat template
237
+ ```
238
+
239
+ ## Synthetic Data Generation
240
+
241
+ ### Using GPT-4
242
+
243
+ **Prompt template**:
244
+ ```
245
+ Given the following question:
246
+ {prompt}
247
+
248
+ Generate two responses:
249
+ 1. A high-quality, detailed response (chosen)
250
+ 2. A low-quality, brief response (rejected)
251
+
252
+ Format as JSON with "chosen" and "rejected" fields.
253
+ ```
254
+
255
+ **Example code**:
256
+ ```python
257
+ import openai
258
+
259
+ def generate_pair(prompt):
260
+ response = openai.ChatCompletion.create(
261
+ model="gpt-4",
262
+ messages=[{
263
+ "role": "user",
264
+ "content": f"Given: {prompt}\n\nGenerate chosen/rejected pair in JSON."
265
+ }]
266
+ )
267
+ return json.loads(response.choices[0].message.content)
268
+
269
+ # Generate dataset
270
+ prompts = load_prompts()
271
+ dataset = [generate_pair(p) for p in prompts]
272
+ ```
273
+
274
+ ### Using Local Model
275
+
276
+ **With vLLM**:
277
+ ```python
278
+ from vllm import LLM
279
+
280
+ llm = LLM(model="meta-llama/Meta-Llama-3-70B-Instruct")
281
+
282
+ def generate_variations(prompt):
283
+ # Generate multiple completions
284
+ outputs = llm.generate(
285
+ [prompt] * 4,
286
+ sampling_params={
287
+ "temperature": 0.8,
288
+ "top_p": 0.9,
289
+ "max_tokens": 512
290
+ }
291
+ )
292
+
293
+ # Select best/worst
294
+ chosen = max(outputs, key=lambda x: len(x.outputs[0].text))
295
+ rejected = min(outputs, key=lambda x: len(x.outputs[0].text))
296
+
297
+ return {
298
+ "prompt": prompt,
299
+ "chosen": chosen.outputs[0].text,
300
+ "rejected": rejected.outputs[0].text
301
+ }
302
+ ```
303
+
304
+ ## Data Preprocessing
305
+
306
+ ### Truncation
307
+
308
+ **Limit sequence length**:
309
+ ```yaml
310
+ max_prompt_length: 512
311
+ max_completion_length: 512
312
+ max_length: 1024 # Total
313
+ ```
314
+
315
+ **Implementation**:
316
+ ```python
317
+ def truncate_example(example):
318
+ tokenizer.truncation_side = "left" # For prompts
319
+ prompt_tokens = tokenizer(
320
+ example['prompt'],
321
+ max_length=512,
322
+ truncation=True
323
+ )
324
+
325
+ tokenizer.truncation_side = "right" # For completions
326
+ chosen_tokens = tokenizer(
327
+ example['chosen'],
328
+ max_length=512,
329
+ truncation=True
330
+ )
331
+
332
+ return {
333
+ "prompt": tokenizer.decode(prompt_tokens['input_ids']),
334
+ "chosen": tokenizer.decode(chosen_tokens['input_ids'])
335
+ }
336
+
337
+ dataset = dataset.map(truncate_example)
338
+ ```
339
+
340
+ ### Deduplication
341
+
342
+ **Remove exact duplicates**:
343
+ ```python
344
+ dataset = dataset.unique('prompt')
345
+ ```
346
+
347
+ **Remove near-duplicates** (MinHash):
348
+ ```python
349
+ from datasketch import MinHash, MinHashLSH
350
+
351
+ def deduplicate_lsh(dataset, threshold=0.8):
352
+ lsh = MinHashLSH(threshold=threshold, num_perm=128)
353
+ seen = []
354
+
355
+ for i, example in enumerate(dataset):
356
+ m = MinHash(num_perm=128)
357
+ for word in example['prompt'].split():
358
+ m.update(word.encode('utf8'))
359
+
360
+ if not lsh.query(m):
361
+ lsh.insert(i, m)
362
+ seen.append(example)
363
+
364
+ return Dataset.from_list(seen)
365
+
366
+ dataset = deduplicate_lsh(dataset)
367
+ ```
368
+
369
+ ## Data Augmentation
370
+
371
+ ### Paraphrasing Prompts
372
+
373
+ ```python
374
+ def paraphrase_prompt(example):
375
+ # Use paraphrasing model
376
+ paraphrased = paraphrase_model(example['prompt'])
377
+
378
+ return [
379
+ example, # Original
380
+ {
381
+ "prompt": paraphrased,
382
+ "chosen": example['chosen'],
383
+ "rejected": example['rejected']
384
+ }
385
+ ]
386
+
387
+ dataset = dataset.map(paraphrase_prompt, batched=False, remove_columns=[])
388
+ ```
389
+
390
+ ### Difficulty Balancing
391
+
392
+ **Mix easy/medium/hard**:
393
+ ```python
394
+ def categorize_difficulty(example):
395
+ prompt_len = len(example['prompt'].split())
396
+ if prompt_len < 20:
397
+ return "easy"
398
+ elif prompt_len < 50:
399
+ return "medium"
400
+ else:
401
+ return "hard"
402
+
403
+ dataset = dataset.map(lambda x: {"difficulty": categorize_difficulty(x)})
404
+
405
+ # Sample balanced dataset
406
+ easy = dataset.filter(lambda x: x['difficulty'] == 'easy').shuffle().select(range(1000))
407
+ medium = dataset.filter(lambda x: x['difficulty'] == 'medium').shuffle().select(range(1000))
408
+ hard = dataset.filter(lambda x: x['difficulty'] == 'hard').shuffle().select(range(1000))
409
+
410
+ balanced = concatenate_datasets([easy, medium, hard]).shuffle()
411
+ ```
412
+
413
+ ## Dataset Statistics
414
+
415
+ ### Compute Stats
416
+
417
+ ```python
418
+ def compute_stats(dataset):
419
+ prompt_lens = [len(x['prompt'].split()) for x in dataset]
420
+ chosen_lens = [len(x['chosen'].split()) for x in dataset]
421
+ rejected_lens = [len(x['rejected'].split()) for x in dataset]
422
+
423
+ print(f"Dataset size: {len(dataset)}")
424
+ print(f"Avg prompt length: {np.mean(prompt_lens):.1f} words")
425
+ print(f"Avg chosen length: {np.mean(chosen_lens):.1f} words")
426
+ print(f"Avg rejected length: {np.mean(rejected_lens):.1f} words")
427
+ print(f"Chosen > Rejected: {sum(c > r for c, r in zip(chosen_lens, rejected_lens)) / len(dataset):.1%}")
428
+
429
+ compute_stats(dataset)
430
+ ```
431
+
432
+ **Expected output**:
433
+ ```
434
+ Dataset size: 50000
435
+ Avg prompt length: 45.2 words
436
+ Avg chosen length: 180.5 words
437
+ Avg rejected length: 120.3 words
438
+ Chosen > Rejected: 85.2%
439
+ ```
440
+
441
+ ## Best Practices
442
+
443
+ ### 1. Data Quality Over Quantity
444
+
445
+ - **Prefer**: 10K high-quality pairs
446
+ - **Over**: 100K noisy pairs
447
+
448
+ ### 2. Clear Preference Signals
449
+
450
+ - Chosen should be noticeably better
451
+ - Avoid marginal differences
452
+ - Remove ambiguous pairs
453
+
454
+ ### 3. Domain Matching
455
+
456
+ - Match dataset domain to target use case
457
+ - Mix datasets for broader coverage
458
+ - Include safety-filtered data
459
+
460
+ ### 4. Validate Before Training
461
+
462
+ ```python
463
+ # Sample 10 random examples
464
+ samples = dataset.shuffle().select(range(10))
465
+
466
+ for ex in samples:
467
+ print(f"Prompt: {ex['prompt']}")
468
+ print(f"Chosen: {ex['chosen'][:100]}...")
469
+ print(f"Rejected: {ex['rejected'][:100]}...")
470
+ print(f"Preference clear: {'✓' if len(ex['chosen']) > len(ex['rejected']) else '?'}")
471
+ print()
472
+ ```
473
+
474
+ ## References
475
+
476
+ - HuggingFace Datasets: https://huggingface.co/datasets
477
+ - Alignment Handbook: https://github.com/huggingface/alignment-handbook
478
+ - UltraFeedback: https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized