@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: simpo-training
|
|
3
|
+
description: Simple Preference Optimization for LLM alignment. Reference-free alternative to DPO with better performance (+6.4 points on AlpacaEval 2.0). No reference model needed, more efficient than DPO. Use for preference alignment when want simpler, faster training than DPO/PPO.
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
author: Synthetic Sciences
|
|
6
|
+
license: MIT
|
|
7
|
+
tags: [Post-Training, SimPO, Preference Optimization, Alignment, DPO Alternative, Reference-Free, LLM Alignment, Efficient Training]
|
|
8
|
+
dependencies: [torch, transformers, datasets, trl, accelerate]
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# SimPO - Simple Preference Optimization
|
|
12
|
+
|
|
13
|
+
## Quick start
|
|
14
|
+
|
|
15
|
+
SimPO is a reference-free preference optimization method that outperforms DPO without needing a reference model.
|
|
16
|
+
|
|
17
|
+
**Installation**:
|
|
18
|
+
```bash
|
|
19
|
+
# Create environment
|
|
20
|
+
conda create -n simpo python=3.10 && conda activate simpo
|
|
21
|
+
|
|
22
|
+
# Install PyTorch 2.2.2
|
|
23
|
+
# Visit: https://pytorch.org/get-started/locally/
|
|
24
|
+
|
|
25
|
+
# Install alignment-handbook
|
|
26
|
+
git clone https://github.com/huggingface/alignment-handbook.git
|
|
27
|
+
cd alignment-handbook
|
|
28
|
+
python -m pip install .
|
|
29
|
+
|
|
30
|
+
# Install Flash Attention 2
|
|
31
|
+
python -m pip install flash-attn --no-build-isolation
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
**Training** (Mistral 7B):
|
|
35
|
+
```bash
|
|
36
|
+
ACCELERATE_LOG_LEVEL=info accelerate launch \
|
|
37
|
+
--config_file accelerate_configs/deepspeed_zero3.yaml \
|
|
38
|
+
scripts/run_simpo.py \
|
|
39
|
+
training_configs/mistral-7b-base-simpo.yaml
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Common workflows
|
|
43
|
+
|
|
44
|
+
### Workflow 1: Train from base model (Mistral 7B)
|
|
45
|
+
|
|
46
|
+
**Config** (`mistral-7b-base-simpo.yaml`):
|
|
47
|
+
```yaml
|
|
48
|
+
# Model
|
|
49
|
+
model_name_or_path: mistralai/Mistral-7B-v0.1
|
|
50
|
+
torch_dtype: bfloat16
|
|
51
|
+
|
|
52
|
+
# Dataset
|
|
53
|
+
dataset_mixer:
|
|
54
|
+
HuggingFaceH4/ultrafeedback_binarized: 1.0
|
|
55
|
+
dataset_splits:
|
|
56
|
+
- train_prefs
|
|
57
|
+
- test_prefs
|
|
58
|
+
|
|
59
|
+
# SimPO hyperparameters
|
|
60
|
+
beta: 2.0 # Reward scaling (2.0-10.0)
|
|
61
|
+
gamma_beta_ratio: 0.5 # Target margin (0-1)
|
|
62
|
+
loss_type: sigmoid # sigmoid or hinge
|
|
63
|
+
sft_weight: 0.0 # Optional SFT regularization
|
|
64
|
+
|
|
65
|
+
# Training
|
|
66
|
+
learning_rate: 5e-7 # Critical: 3e-7 to 1e-6
|
|
67
|
+
num_train_epochs: 1
|
|
68
|
+
per_device_train_batch_size: 1
|
|
69
|
+
gradient_accumulation_steps: 8
|
|
70
|
+
|
|
71
|
+
# Output
|
|
72
|
+
output_dir: ./outputs/mistral-7b-simpo
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
**Launch training**:
|
|
76
|
+
```bash
|
|
77
|
+
accelerate launch --config_file accelerate_configs/deepspeed_zero3.yaml \
|
|
78
|
+
scripts/run_simpo.py training_configs/mistral-7b-base-simpo.yaml
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Workflow 2: Fine-tune instruct model (Llama 3 8B)
|
|
82
|
+
|
|
83
|
+
**Config** (`llama3-8b-instruct-simpo.yaml`):
|
|
84
|
+
```yaml
|
|
85
|
+
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
|
86
|
+
|
|
87
|
+
dataset_mixer:
|
|
88
|
+
argilla/ultrafeedback-binarized-preferences-cleaned: 1.0
|
|
89
|
+
|
|
90
|
+
beta: 2.5
|
|
91
|
+
gamma_beta_ratio: 0.5
|
|
92
|
+
learning_rate: 5e-7
|
|
93
|
+
sft_weight: 0.1 # Add SFT loss to preserve capabilities
|
|
94
|
+
|
|
95
|
+
num_train_epochs: 1
|
|
96
|
+
per_device_train_batch_size: 2
|
|
97
|
+
gradient_accumulation_steps: 4
|
|
98
|
+
output_dir: ./outputs/llama3-8b-simpo
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
**Launch**:
|
|
102
|
+
```bash
|
|
103
|
+
accelerate launch --config_file accelerate_configs/deepspeed_zero3.yaml \
|
|
104
|
+
scripts/run_simpo.py training_configs/llama3-8b-instruct-simpo.yaml
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Workflow 3: Reasoning-intensive tasks (lower LR)
|
|
108
|
+
|
|
109
|
+
**For math/code tasks**:
|
|
110
|
+
```yaml
|
|
111
|
+
model_name_or_path: deepseek-ai/deepseek-math-7b-base
|
|
112
|
+
|
|
113
|
+
dataset_mixer:
|
|
114
|
+
argilla/distilabel-math-preference-dpo: 1.0
|
|
115
|
+
|
|
116
|
+
beta: 5.0 # Higher for stronger signal
|
|
117
|
+
gamma_beta_ratio: 0.7 # Larger margin
|
|
118
|
+
learning_rate: 3e-7 # Lower LR for reasoning
|
|
119
|
+
sft_weight: 0.0
|
|
120
|
+
|
|
121
|
+
num_train_epochs: 1
|
|
122
|
+
per_device_train_batch_size: 1
|
|
123
|
+
gradient_accumulation_steps: 16
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## When to use vs alternatives
|
|
127
|
+
|
|
128
|
+
**Use SimPO when**:
|
|
129
|
+
- Want simpler training than DPO (no reference model)
|
|
130
|
+
- Have preference data (chosen/rejected pairs)
|
|
131
|
+
- Need better performance than DPO
|
|
132
|
+
- Limited compute resources
|
|
133
|
+
- Single-node training sufficient
|
|
134
|
+
|
|
135
|
+
**Algorithm selection**:
|
|
136
|
+
- **SimPO**: Simplest, best performance, no reference model
|
|
137
|
+
- **DPO**: Need reference model baseline, more conservative
|
|
138
|
+
- **PPO**: Maximum control, need reward model, complex setup
|
|
139
|
+
- **GRPO**: Memory-efficient RL, no critic
|
|
140
|
+
|
|
141
|
+
**Use alternatives instead**:
|
|
142
|
+
- **OpenRLHF**: Multi-node distributed training, PPO/GRPO
|
|
143
|
+
- **TRL**: Need multiple methods in one framework
|
|
144
|
+
- **DPO**: Established baseline comparison
|
|
145
|
+
|
|
146
|
+
## Common issues
|
|
147
|
+
|
|
148
|
+
**Issue: Loss divergence**
|
|
149
|
+
|
|
150
|
+
Reduce learning rate:
|
|
151
|
+
```yaml
|
|
152
|
+
learning_rate: 3e-7 # Reduce from 5e-7
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
Reduce beta:
|
|
156
|
+
```yaml
|
|
157
|
+
beta: 1.0 # Reduce from 2.0
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
**Issue: Model forgets capabilities**
|
|
161
|
+
|
|
162
|
+
Add SFT regularization:
|
|
163
|
+
```yaml
|
|
164
|
+
sft_weight: 0.1 # Add SFT loss component
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
**Issue: Poor preference separation**
|
|
168
|
+
|
|
169
|
+
Increase beta and margin:
|
|
170
|
+
```yaml
|
|
171
|
+
beta: 5.0 # Increase from 2.0
|
|
172
|
+
gamma_beta_ratio: 0.8 # Increase from 0.5
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
**Issue: OOM during training**
|
|
176
|
+
|
|
177
|
+
Reduce batch size:
|
|
178
|
+
```yaml
|
|
179
|
+
per_device_train_batch_size: 1
|
|
180
|
+
gradient_accumulation_steps: 16 # Maintain effective batch
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
Enable gradient checkpointing:
|
|
184
|
+
```yaml
|
|
185
|
+
gradient_checkpointing: true
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## Advanced topics
|
|
189
|
+
|
|
190
|
+
**Loss functions**: See [references/loss-functions.md](references/loss-functions.md) for sigmoid vs hinge loss, mathematical formulations, and when to use each.
|
|
191
|
+
|
|
192
|
+
**Hyperparameter tuning**: See [references/hyperparameters.md](references/hyperparameters.md) for beta, gamma, learning rate selection guide, and model-size-specific recommendations.
|
|
193
|
+
|
|
194
|
+
**Dataset preparation**: See [references/datasets.md](references/datasets.md) for preference data formats, quality filtering, and custom dataset creation.
|
|
195
|
+
|
|
196
|
+
## Hardware requirements
|
|
197
|
+
|
|
198
|
+
- **GPU**: NVIDIA A100/H100 recommended
|
|
199
|
+
- **VRAM**:
|
|
200
|
+
- 7B model: 1× A100 40GB (DeepSpeed ZeRO-3)
|
|
201
|
+
- 8B model: 2× A100 40GB
|
|
202
|
+
- 70B model: 8× A100 80GB
|
|
203
|
+
- **Single-node**: DeepSpeed ZeRO-3 sufficient
|
|
204
|
+
- **Mixed precision**: BF16 recommended
|
|
205
|
+
|
|
206
|
+
**Memory optimization**:
|
|
207
|
+
- DeepSpeed ZeRO-3 (default config)
|
|
208
|
+
- Gradient checkpointing
|
|
209
|
+
- Flash Attention 2
|
|
210
|
+
|
|
211
|
+
## Resources
|
|
212
|
+
|
|
213
|
+
- Paper: https://arxiv.org/abs/2405.14734 (NeurIPS 2024)
|
|
214
|
+
- GitHub: https://github.com/princeton-nlp/SimPO
|
|
215
|
+
- Models: https://huggingface.co/princeton-nlp
|
|
216
|
+
- Alignment Handbook: https://github.com/huggingface/alignment-handbook
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
|
|
@@ -0,0 +1,478 @@
|
|
|
1
|
+
# Datasets
|
|
2
|
+
|
|
3
|
+
Complete guide to preference datasets for SimPO training.
|
|
4
|
+
|
|
5
|
+
## Dataset Format
|
|
6
|
+
|
|
7
|
+
### Required Fields
|
|
8
|
+
|
|
9
|
+
Preference datasets must contain:
|
|
10
|
+
```json
|
|
11
|
+
{
|
|
12
|
+
"prompt": "User question or instruction",
|
|
13
|
+
"chosen": "Better/preferred response",
|
|
14
|
+
"rejected": "Worse/rejected response"
|
|
15
|
+
}
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
**Alternative field names** (auto-detected):
|
|
19
|
+
- `prompt` → `question`, `instruction`, `input`
|
|
20
|
+
- `chosen` → `response_chosen`, `winner`, `preferred`
|
|
21
|
+
- `rejected` → `response_rejected`, `loser`
|
|
22
|
+
|
|
23
|
+
### Example Entry
|
|
24
|
+
|
|
25
|
+
```json
|
|
26
|
+
{
|
|
27
|
+
"prompt": "Explain quantum computing in simple terms.",
|
|
28
|
+
"chosen": "Quantum computing uses quantum bits (qubits) that can exist in multiple states simultaneously through superposition. This allows quantum computers to process many possibilities at once, making them potentially much faster than classical computers for specific tasks like cryptography and optimization.",
|
|
29
|
+
"rejected": "It's like regular computing but quantum."
|
|
30
|
+
}
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Popular Datasets
|
|
34
|
+
|
|
35
|
+
### 1. UltraFeedback (Recommended)
|
|
36
|
+
|
|
37
|
+
**HuggingFaceH4/ultrafeedback_binarized**:
|
|
38
|
+
- **Size**: 60K preference pairs
|
|
39
|
+
- **Quality**: High (GPT-4 annotations)
|
|
40
|
+
- **Domain**: General instruction following
|
|
41
|
+
- **Format**: Clean, ready-to-use
|
|
42
|
+
|
|
43
|
+
**Config**:
|
|
44
|
+
```yaml
|
|
45
|
+
dataset_mixer:
|
|
46
|
+
HuggingFaceH4/ultrafeedback_binarized: 1.0
|
|
47
|
+
dataset_splits:
|
|
48
|
+
- train_prefs
|
|
49
|
+
- test_prefs
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### 2. Argilla UltraFeedback (Cleaned)
|
|
53
|
+
|
|
54
|
+
**argilla/ultrafeedback-binarized-preferences-cleaned**:
|
|
55
|
+
- **Size**: 50K pairs (filtered)
|
|
56
|
+
- **Quality**: Very high (deduped, cleaned)
|
|
57
|
+
- **Domain**: General
|
|
58
|
+
- **Format**: Clean
|
|
59
|
+
|
|
60
|
+
**Config**:
|
|
61
|
+
```yaml
|
|
62
|
+
dataset_mixer:
|
|
63
|
+
argilla/ultrafeedback-binarized-preferences-cleaned: 1.0
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### 3. Distilabel Math
|
|
67
|
+
|
|
68
|
+
**argilla/distilabel-math-preference-dpo**:
|
|
69
|
+
- **Size**: 30K pairs
|
|
70
|
+
- **Quality**: High (GSM8K, MATH)
|
|
71
|
+
- **Domain**: Math reasoning
|
|
72
|
+
- **Format**: Math-specific
|
|
73
|
+
|
|
74
|
+
**Config**:
|
|
75
|
+
```yaml
|
|
76
|
+
dataset_mixer:
|
|
77
|
+
argilla/distilabel-math-preference-dpo: 1.0
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### 4. HelpSteer
|
|
81
|
+
|
|
82
|
+
**nvidia/HelpSteer**:
|
|
83
|
+
- **Size**: 38K samples
|
|
84
|
+
- **Quality**: High (human ratings)
|
|
85
|
+
- **Domain**: Helpfulness alignment
|
|
86
|
+
- **Format**: Multi-attribute ratings
|
|
87
|
+
|
|
88
|
+
**Config**:
|
|
89
|
+
```yaml
|
|
90
|
+
dataset_mixer:
|
|
91
|
+
nvidia/HelpSteer: 1.0
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### 5. Anthropic HH-RLHF
|
|
95
|
+
|
|
96
|
+
**Anthropic/hh-rlhf**:
|
|
97
|
+
- **Size**: 161K samples
|
|
98
|
+
- **Quality**: High (human preferences)
|
|
99
|
+
- **Domain**: Harmless + helpful
|
|
100
|
+
- **Format**: Conversational
|
|
101
|
+
|
|
102
|
+
**Config**:
|
|
103
|
+
```yaml
|
|
104
|
+
dataset_mixer:
|
|
105
|
+
Anthropic/hh-rlhf: 1.0
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Dataset Mixing
|
|
109
|
+
|
|
110
|
+
### Multiple Datasets
|
|
111
|
+
|
|
112
|
+
**Equal mix**:
|
|
113
|
+
```yaml
|
|
114
|
+
dataset_mixer:
|
|
115
|
+
HuggingFaceH4/ultrafeedback_binarized: 0.5
|
|
116
|
+
Anthropic/hh-rlhf: 0.5
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
**Weighted mix**:
|
|
120
|
+
```yaml
|
|
121
|
+
dataset_mixer:
|
|
122
|
+
HuggingFaceH4/ultrafeedback_binarized: 0.7
|
|
123
|
+
argilla/distilabel-math-preference-dpo: 0.2
|
|
124
|
+
nvidia/HelpSteer: 0.1
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
**Domain-specific emphasis**:
|
|
128
|
+
```yaml
|
|
129
|
+
# 80% general + 20% math
|
|
130
|
+
dataset_mixer:
|
|
131
|
+
HuggingFaceH4/ultrafeedback_binarized: 0.8
|
|
132
|
+
argilla/distilabel-math-preference-dpo: 0.2
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Data Quality
|
|
136
|
+
|
|
137
|
+
### Quality Indicators
|
|
138
|
+
|
|
139
|
+
**Good preference data**:
|
|
140
|
+
- ✅ Clear quality difference between chosen/rejected
|
|
141
|
+
- ✅ Diverse prompts
|
|
142
|
+
- ✅ Minimal noise/annotation errors
|
|
143
|
+
- ✅ Appropriate difficulty level
|
|
144
|
+
|
|
145
|
+
**Poor preference data**:
|
|
146
|
+
- ❌ Ambiguous preferences
|
|
147
|
+
- ❌ Repetitive prompts
|
|
148
|
+
- ❌ Annotation noise
|
|
149
|
+
- ❌ Too easy/hard prompts
|
|
150
|
+
|
|
151
|
+
### Quality Filtering
|
|
152
|
+
|
|
153
|
+
**Filter by length difference**:
|
|
154
|
+
```python
|
|
155
|
+
def filter_by_length(example):
|
|
156
|
+
chosen_len = len(example['chosen'].split())
|
|
157
|
+
rejected_len = len(example['rejected'].split())
|
|
158
|
+
# Reject if chosen is much shorter (potential low-effort)
|
|
159
|
+
return chosen_len >= rejected_len * 0.5
|
|
160
|
+
|
|
161
|
+
dataset = dataset.filter(filter_by_length)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
**Filter by diversity**:
|
|
165
|
+
```python
|
|
166
|
+
seen_prompts = set()
|
|
167
|
+
|
|
168
|
+
def filter_duplicates(example):
|
|
169
|
+
prompt = example['prompt']
|
|
170
|
+
if prompt in seen_prompts:
|
|
171
|
+
return False
|
|
172
|
+
seen_prompts.add(prompt)
|
|
173
|
+
return True
|
|
174
|
+
|
|
175
|
+
dataset = dataset.filter(filter_duplicates)
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## Custom Dataset Creation
|
|
179
|
+
|
|
180
|
+
### Format 1: JSON Lines
|
|
181
|
+
|
|
182
|
+
**File** (`preferences.jsonl`):
|
|
183
|
+
```jsonl
|
|
184
|
+
{"prompt": "What is Python?", "chosen": "Python is a high-level programming language...", "rejected": "It's a snake."}
|
|
185
|
+
{"prompt": "Explain AI.", "chosen": "AI refers to systems that can...", "rejected": "It's computers that think."}
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
**Load**:
|
|
189
|
+
```yaml
|
|
190
|
+
dataset_mixer:
|
|
191
|
+
json:
|
|
192
|
+
data_files: preferences.jsonl
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
### Format 2: HuggingFace Dataset
|
|
196
|
+
|
|
197
|
+
**Create from dict**:
|
|
198
|
+
```python
|
|
199
|
+
from datasets import Dataset
|
|
200
|
+
|
|
201
|
+
data = {
|
|
202
|
+
"prompt": ["What is Python?", "Explain AI."],
|
|
203
|
+
"chosen": ["Python is...", "AI refers to..."],
|
|
204
|
+
"rejected": ["It's a snake.", "It's computers..."]
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
dataset = Dataset.from_dict(data)
|
|
208
|
+
dataset.push_to_hub("username/my-preferences")
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
**Use in config**:
|
|
212
|
+
```yaml
|
|
213
|
+
dataset_mixer:
|
|
214
|
+
username/my-preferences: 1.0
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### Format 3: ChatML
|
|
218
|
+
|
|
219
|
+
**For conversational data**:
|
|
220
|
+
```json
|
|
221
|
+
{
|
|
222
|
+
"prompt": [
|
|
223
|
+
{"role": "user", "content": "What is quantum computing?"}
|
|
224
|
+
],
|
|
225
|
+
"chosen": [
|
|
226
|
+
{"role": "assistant", "content": "Quantum computing uses qubits..."}
|
|
227
|
+
],
|
|
228
|
+
"rejected": [
|
|
229
|
+
{"role": "assistant", "content": "It's like regular computing but quantum."}
|
|
230
|
+
]
|
|
231
|
+
}
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
**Apply chat template**:
|
|
235
|
+
```yaml
|
|
236
|
+
dataset_text_field: null # Will apply chat template
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
## Synthetic Data Generation
|
|
240
|
+
|
|
241
|
+
### Using GPT-4
|
|
242
|
+
|
|
243
|
+
**Prompt template**:
|
|
244
|
+
```
|
|
245
|
+
Given the following question:
|
|
246
|
+
{prompt}
|
|
247
|
+
|
|
248
|
+
Generate two responses:
|
|
249
|
+
1. A high-quality, detailed response (chosen)
|
|
250
|
+
2. A low-quality, brief response (rejected)
|
|
251
|
+
|
|
252
|
+
Format as JSON with "chosen" and "rejected" fields.
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
**Example code**:
|
|
256
|
+
```python
|
|
257
|
+
import openai
|
|
258
|
+
|
|
259
|
+
def generate_pair(prompt):
|
|
260
|
+
response = openai.ChatCompletion.create(
|
|
261
|
+
model="gpt-4",
|
|
262
|
+
messages=[{
|
|
263
|
+
"role": "user",
|
|
264
|
+
"content": f"Given: {prompt}\n\nGenerate chosen/rejected pair in JSON."
|
|
265
|
+
}]
|
|
266
|
+
)
|
|
267
|
+
return json.loads(response.choices[0].message.content)
|
|
268
|
+
|
|
269
|
+
# Generate dataset
|
|
270
|
+
prompts = load_prompts()
|
|
271
|
+
dataset = [generate_pair(p) for p in prompts]
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
### Using Local Model
|
|
275
|
+
|
|
276
|
+
**With vLLM**:
|
|
277
|
+
```python
|
|
278
|
+
from vllm import LLM
|
|
279
|
+
|
|
280
|
+
llm = LLM(model="meta-llama/Meta-Llama-3-70B-Instruct")
|
|
281
|
+
|
|
282
|
+
def generate_variations(prompt):
|
|
283
|
+
# Generate multiple completions
|
|
284
|
+
outputs = llm.generate(
|
|
285
|
+
[prompt] * 4,
|
|
286
|
+
sampling_params={
|
|
287
|
+
"temperature": 0.8,
|
|
288
|
+
"top_p": 0.9,
|
|
289
|
+
"max_tokens": 512
|
|
290
|
+
}
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
# Select best/worst
|
|
294
|
+
chosen = max(outputs, key=lambda x: len(x.outputs[0].text))
|
|
295
|
+
rejected = min(outputs, key=lambda x: len(x.outputs[0].text))
|
|
296
|
+
|
|
297
|
+
return {
|
|
298
|
+
"prompt": prompt,
|
|
299
|
+
"chosen": chosen.outputs[0].text,
|
|
300
|
+
"rejected": rejected.outputs[0].text
|
|
301
|
+
}
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
## Data Preprocessing
|
|
305
|
+
|
|
306
|
+
### Truncation
|
|
307
|
+
|
|
308
|
+
**Limit sequence length**:
|
|
309
|
+
```yaml
|
|
310
|
+
max_prompt_length: 512
|
|
311
|
+
max_completion_length: 512
|
|
312
|
+
max_length: 1024 # Total
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
**Implementation**:
|
|
316
|
+
```python
|
|
317
|
+
def truncate_example(example):
|
|
318
|
+
tokenizer.truncation_side = "left" # For prompts
|
|
319
|
+
prompt_tokens = tokenizer(
|
|
320
|
+
example['prompt'],
|
|
321
|
+
max_length=512,
|
|
322
|
+
truncation=True
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
tokenizer.truncation_side = "right" # For completions
|
|
326
|
+
chosen_tokens = tokenizer(
|
|
327
|
+
example['chosen'],
|
|
328
|
+
max_length=512,
|
|
329
|
+
truncation=True
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
return {
|
|
333
|
+
"prompt": tokenizer.decode(prompt_tokens['input_ids']),
|
|
334
|
+
"chosen": tokenizer.decode(chosen_tokens['input_ids'])
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
dataset = dataset.map(truncate_example)
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
### Deduplication
|
|
341
|
+
|
|
342
|
+
**Remove exact duplicates**:
|
|
343
|
+
```python
|
|
344
|
+
dataset = dataset.unique('prompt')
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
**Remove near-duplicates** (MinHash):
|
|
348
|
+
```python
|
|
349
|
+
from datasketch import MinHash, MinHashLSH
|
|
350
|
+
|
|
351
|
+
def deduplicate_lsh(dataset, threshold=0.8):
|
|
352
|
+
lsh = MinHashLSH(threshold=threshold, num_perm=128)
|
|
353
|
+
seen = []
|
|
354
|
+
|
|
355
|
+
for i, example in enumerate(dataset):
|
|
356
|
+
m = MinHash(num_perm=128)
|
|
357
|
+
for word in example['prompt'].split():
|
|
358
|
+
m.update(word.encode('utf8'))
|
|
359
|
+
|
|
360
|
+
if not lsh.query(m):
|
|
361
|
+
lsh.insert(i, m)
|
|
362
|
+
seen.append(example)
|
|
363
|
+
|
|
364
|
+
return Dataset.from_list(seen)
|
|
365
|
+
|
|
366
|
+
dataset = deduplicate_lsh(dataset)
|
|
367
|
+
```
|
|
368
|
+
|
|
369
|
+
## Data Augmentation
|
|
370
|
+
|
|
371
|
+
### Paraphrasing Prompts
|
|
372
|
+
|
|
373
|
+
```python
|
|
374
|
+
def paraphrase_prompt(example):
|
|
375
|
+
# Use paraphrasing model
|
|
376
|
+
paraphrased = paraphrase_model(example['prompt'])
|
|
377
|
+
|
|
378
|
+
return [
|
|
379
|
+
example, # Original
|
|
380
|
+
{
|
|
381
|
+
"prompt": paraphrased,
|
|
382
|
+
"chosen": example['chosen'],
|
|
383
|
+
"rejected": example['rejected']
|
|
384
|
+
}
|
|
385
|
+
]
|
|
386
|
+
|
|
387
|
+
dataset = dataset.map(paraphrase_prompt, batched=False, remove_columns=[])
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
### Difficulty Balancing
|
|
391
|
+
|
|
392
|
+
**Mix easy/medium/hard**:
|
|
393
|
+
```python
|
|
394
|
+
def categorize_difficulty(example):
|
|
395
|
+
prompt_len = len(example['prompt'].split())
|
|
396
|
+
if prompt_len < 20:
|
|
397
|
+
return "easy"
|
|
398
|
+
elif prompt_len < 50:
|
|
399
|
+
return "medium"
|
|
400
|
+
else:
|
|
401
|
+
return "hard"
|
|
402
|
+
|
|
403
|
+
dataset = dataset.map(lambda x: {"difficulty": categorize_difficulty(x)})
|
|
404
|
+
|
|
405
|
+
# Sample balanced dataset
|
|
406
|
+
easy = dataset.filter(lambda x: x['difficulty'] == 'easy').shuffle().select(range(1000))
|
|
407
|
+
medium = dataset.filter(lambda x: x['difficulty'] == 'medium').shuffle().select(range(1000))
|
|
408
|
+
hard = dataset.filter(lambda x: x['difficulty'] == 'hard').shuffle().select(range(1000))
|
|
409
|
+
|
|
410
|
+
balanced = concatenate_datasets([easy, medium, hard]).shuffle()
|
|
411
|
+
```
|
|
412
|
+
|
|
413
|
+
## Dataset Statistics
|
|
414
|
+
|
|
415
|
+
### Compute Stats
|
|
416
|
+
|
|
417
|
+
```python
|
|
418
|
+
def compute_stats(dataset):
|
|
419
|
+
prompt_lens = [len(x['prompt'].split()) for x in dataset]
|
|
420
|
+
chosen_lens = [len(x['chosen'].split()) for x in dataset]
|
|
421
|
+
rejected_lens = [len(x['rejected'].split()) for x in dataset]
|
|
422
|
+
|
|
423
|
+
print(f"Dataset size: {len(dataset)}")
|
|
424
|
+
print(f"Avg prompt length: {np.mean(prompt_lens):.1f} words")
|
|
425
|
+
print(f"Avg chosen length: {np.mean(chosen_lens):.1f} words")
|
|
426
|
+
print(f"Avg rejected length: {np.mean(rejected_lens):.1f} words")
|
|
427
|
+
print(f"Chosen > Rejected: {sum(c > r for c, r in zip(chosen_lens, rejected_lens)) / len(dataset):.1%}")
|
|
428
|
+
|
|
429
|
+
compute_stats(dataset)
|
|
430
|
+
```
|
|
431
|
+
|
|
432
|
+
**Expected output**:
|
|
433
|
+
```
|
|
434
|
+
Dataset size: 50000
|
|
435
|
+
Avg prompt length: 45.2 words
|
|
436
|
+
Avg chosen length: 180.5 words
|
|
437
|
+
Avg rejected length: 120.3 words
|
|
438
|
+
Chosen > Rejected: 85.2%
|
|
439
|
+
```
|
|
440
|
+
|
|
441
|
+
## Best Practices
|
|
442
|
+
|
|
443
|
+
### 1. Data Quality Over Quantity
|
|
444
|
+
|
|
445
|
+
- **Prefer**: 10K high-quality pairs
|
|
446
|
+
- **Over**: 100K noisy pairs
|
|
447
|
+
|
|
448
|
+
### 2. Clear Preference Signals
|
|
449
|
+
|
|
450
|
+
- Chosen should be noticeably better
|
|
451
|
+
- Avoid marginal differences
|
|
452
|
+
- Remove ambiguous pairs
|
|
453
|
+
|
|
454
|
+
### 3. Domain Matching
|
|
455
|
+
|
|
456
|
+
- Match dataset domain to target use case
|
|
457
|
+
- Mix datasets for broader coverage
|
|
458
|
+
- Include safety-filtered data
|
|
459
|
+
|
|
460
|
+
### 4. Validate Before Training
|
|
461
|
+
|
|
462
|
+
```python
|
|
463
|
+
# Sample 10 random examples
|
|
464
|
+
samples = dataset.shuffle().select(range(10))
|
|
465
|
+
|
|
466
|
+
for ex in samples:
|
|
467
|
+
print(f"Prompt: {ex['prompt']}")
|
|
468
|
+
print(f"Chosen: {ex['chosen'][:100]}...")
|
|
469
|
+
print(f"Rejected: {ex['rejected'][:100]}...")
|
|
470
|
+
print(f"Preference clear: {'✓' if len(ex['chosen']) > len(ex['rejected']) else '?'}")
|
|
471
|
+
print()
|
|
472
|
+
```
|
|
473
|
+
|
|
474
|
+
## References
|
|
475
|
+
|
|
476
|
+
- HuggingFace Datasets: https://huggingface.co/datasets
|
|
477
|
+
- Alignment Handbook: https://github.com/huggingface/alignment-handbook
|
|
478
|
+
- UltraFeedback: https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized
|