@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,454 @@
|
|
|
1
|
+
# Multi-Node Training
|
|
2
|
+
|
|
3
|
+
Complete guide to distributed Ray cluster training with OpenRLHF across multiple machines.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
OpenRLHF uses Ray for distributed scheduling, allowing Actor, Critic, Reward, and Reference models to span multiple nodes. Supports fault tolerance through checkpointing and automatic task rescheduling.
|
|
8
|
+
|
|
9
|
+
## Ray Cluster Setup
|
|
10
|
+
|
|
11
|
+
### 1. Start Head Node (Master Machine)
|
|
12
|
+
|
|
13
|
+
**In Docker container**:
|
|
14
|
+
```bash
|
|
15
|
+
# Launch container on master node
|
|
16
|
+
docker run --runtime=nvidia -it --rm --shm-size="10g" \
|
|
17
|
+
--cap-add=SYS_ADMIN -v $PWD:/openrlhf \
|
|
18
|
+
nvcr.io/nvidia/pytorch:25.02-py3 bash
|
|
19
|
+
|
|
20
|
+
# Start Ray head node
|
|
21
|
+
ray start --head --node-ip-address 0.0.0.0 --num-gpus 8
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
**Output**:
|
|
25
|
+
```
|
|
26
|
+
Ray runtime started.
|
|
27
|
+
Dashboard: http://0.0.0.0:8265
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### 2. Connect Worker Nodes
|
|
31
|
+
|
|
32
|
+
**On each worker machine**:
|
|
33
|
+
```bash
|
|
34
|
+
# Launch container
|
|
35
|
+
docker run --runtime=nvidia -it --rm --shm-size="10g" \
|
|
36
|
+
--cap-add=SYS_ADMIN -v $PWD:/openrlhf \
|
|
37
|
+
nvcr.io/nvidia/pytorch:25.02-py3 bash
|
|
38
|
+
|
|
39
|
+
# Connect to head node
|
|
40
|
+
ray start --address {MASTER-NODE-IP}:6379 --num-gpus 8
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
**Replace `{MASTER-NODE-IP}`** with head node's IP address.
|
|
44
|
+
|
|
45
|
+
### 3. Verify Cluster
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
# On head node
|
|
49
|
+
ray status
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
**Output**:
|
|
53
|
+
```
|
|
54
|
+
Nodes: 4
|
|
55
|
+
- 1 head node (8 GPUs)
|
|
56
|
+
- 3 worker nodes (8 GPUs each)
|
|
57
|
+
Total GPUs: 32
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Distributed Training Configuration
|
|
61
|
+
|
|
62
|
+
### Multi-Node PPO Training
|
|
63
|
+
|
|
64
|
+
**4-node cluster (32 GPUs)** - 70B model:
|
|
65
|
+
```bash
|
|
66
|
+
ray job submit --address="http://127.0.0.1:8265" \
|
|
67
|
+
--runtime-env-json='{"working_dir": "/openrlhf"}' \
|
|
68
|
+
-- python3 -m openrlhf.cli.train_ppo_ray \
|
|
69
|
+
--ref_num_nodes 1 --ref_num_gpus_per_node 8 \
|
|
70
|
+
--reward_num_nodes 1 --reward_num_gpus_per_node 8 \
|
|
71
|
+
--critic_num_nodes 1 --critic_num_gpus_per_node 8 \
|
|
72
|
+
--actor_num_nodes 1 --actor_num_gpus_per_node 8 \
|
|
73
|
+
--vllm_num_engines 2 --vllm_tensor_parallel_size 4 \
|
|
74
|
+
--pretrain meta-llama/Llama-2-70b-hf \
|
|
75
|
+
--reward_pretrain ./reward-model-70b \
|
|
76
|
+
--save_path ./output/llama-70b-ppo \
|
|
77
|
+
--ckpt_path ./checkpoints/llama-70b-ppo \
|
|
78
|
+
--save_steps 100 --logging_steps 1 \
|
|
79
|
+
--micro_train_batch_size 2 --train_batch_size 128 \
|
|
80
|
+
--micro_rollout_batch_size 4 --rollout_batch_size 1024 \
|
|
81
|
+
--max_epochs 1 --prompt_max_len 1024 --generate_max_len 1024 \
|
|
82
|
+
--zero_stage 3 --bf16 \
|
|
83
|
+
--actor_learning_rate 5e-7 --critic_learning_rate 9e-6 \
|
|
84
|
+
--init_kl_coef 0.01 --normalize_reward \
|
|
85
|
+
--gradient_checkpointing --flash_attn
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
**GPU allocation**:
|
|
89
|
+
- **Node 1**: Reference model (8 GPUs)
|
|
90
|
+
- **Node 2**: Reward model (8 GPUs)
|
|
91
|
+
- **Node 3**: Critic model (8 GPUs)
|
|
92
|
+
- **Node 4**: Actor model (8 GPUs)
|
|
93
|
+
|
|
94
|
+
### Model Distribution Arguments
|
|
95
|
+
|
|
96
|
+
**Per-model configuration**:
|
|
97
|
+
```bash
|
|
98
|
+
# Actor model
|
|
99
|
+
--actor_num_nodes 2 # 2 nodes for actor
|
|
100
|
+
--actor_num_gpus_per_node 8 # 8 GPUs per node = 16 GPUs total
|
|
101
|
+
|
|
102
|
+
# Critic model
|
|
103
|
+
--critic_num_nodes 1
|
|
104
|
+
--critic_num_gpus_per_node 8
|
|
105
|
+
|
|
106
|
+
# Reward model
|
|
107
|
+
--reward_num_nodes 1
|
|
108
|
+
--reward_num_gpus_per_node 8
|
|
109
|
+
|
|
110
|
+
# Reference model
|
|
111
|
+
--ref_num_nodes 1
|
|
112
|
+
--ref_num_gpus_per_node 8
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Hybrid Engine (Colocated Models)
|
|
116
|
+
|
|
117
|
+
**Share GPUs across models**:
|
|
118
|
+
```bash
|
|
119
|
+
# Colocate all models on same GPUs
|
|
120
|
+
--colocate_all_models
|
|
121
|
+
|
|
122
|
+
# Or colocate specific pairs
|
|
123
|
+
--colocate_actor_ref # Actor + Reference
|
|
124
|
+
--colocate_critic_reward # Critic + Reward
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
**Example (2-node, 16 GPUs)**:
|
|
128
|
+
```bash
|
|
129
|
+
ray job submit --address="http://127.0.0.1:8265" \
|
|
130
|
+
-- python3 -m openrlhf.cli.train_ppo_ray \
|
|
131
|
+
--colocate_all_models \
|
|
132
|
+
--vllm_enable_sleep --deepspeed_enable_sleep \
|
|
133
|
+
--actor_num_nodes 2 --actor_num_gpus_per_node 8 \
|
|
134
|
+
--critic_num_nodes 0 --critic_num_gpus_per_node 0 \
|
|
135
|
+
--reward_num_nodes 0 --reward_num_gpus_per_node 0 \
|
|
136
|
+
--ref_num_nodes 0 --ref_num_gpus_per_node 0 \
|
|
137
|
+
--vllm_num_engines 4 --vllm_tensor_parallel_size 4 \
|
|
138
|
+
# ... other args
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
**Result**: All models share 16 GPUs via sleep/wake cycles.
|
|
142
|
+
|
|
143
|
+
## vLLM Configuration
|
|
144
|
+
|
|
145
|
+
### Tensor Parallelism
|
|
146
|
+
|
|
147
|
+
**Multi-GPU per engine**:
|
|
148
|
+
```bash
|
|
149
|
+
--vllm_num_engines 4 # 4 engines
|
|
150
|
+
--vllm_tensor_parallel_size 4 # 4 GPUs each = 16 GPUs total
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### GPU Memory Management
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
--vllm_gpu_memory_utilization 0.5 # Use 50% GPU for vLLM
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
**Calculation**:
|
|
160
|
+
- A100 80GB × 0.5 = 40GB for vLLM
|
|
161
|
+
- Remaining 40GB for other models (if colocated)
|
|
162
|
+
|
|
163
|
+
## Checkpointing
|
|
164
|
+
|
|
165
|
+
### Enable Checkpointing
|
|
166
|
+
|
|
167
|
+
**Basic checkpointing**:
|
|
168
|
+
```bash
|
|
169
|
+
--save_path ./output/model # Final save path
|
|
170
|
+
--ckpt_path ./checkpoints/model # Checkpoint directory
|
|
171
|
+
--save_steps 100 # Save every 100 steps
|
|
172
|
+
--save_value_network # Also save critic
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
**HuggingFace format**:
|
|
176
|
+
```bash
|
|
177
|
+
--save_hf_ckpt # Save as HuggingFace model (easier loading)
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
**DeepSpeed universal checkpoint**:
|
|
181
|
+
```bash
|
|
182
|
+
--use_ds_universal_ckpt # Compatible across ZeRO stages
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Checkpoint Content
|
|
186
|
+
|
|
187
|
+
**Saved state**:
|
|
188
|
+
```python
|
|
189
|
+
{
|
|
190
|
+
"global_step": 1000,
|
|
191
|
+
"episode": 10,
|
|
192
|
+
"data_loader_state_dict": {...},
|
|
193
|
+
"actor_model": {...}, # DeepSpeed checkpoint
|
|
194
|
+
"critic_model": {...} # If --save_value_network
|
|
195
|
+
}
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
**Files created**:
|
|
199
|
+
```
|
|
200
|
+
checkpoints/llama-70b-ppo/
|
|
201
|
+
├── global_step_1000/
|
|
202
|
+
│ ├── actor/
|
|
203
|
+
│ │ ├── mp_rank_00_model_states.pt
|
|
204
|
+
│ │ ├── zero_pp_rank_0_mp_rank_00optim_states.pt
|
|
205
|
+
│ │ └── ...
|
|
206
|
+
│ └── critic/ (if --save_value_network)
|
|
207
|
+
│ └── ...
|
|
208
|
+
└── hf_ckpt/ (if --save_hf_ckpt)
|
|
209
|
+
├── config.json
|
|
210
|
+
├── pytorch_model.bin
|
|
211
|
+
└── ...
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### Resume Training
|
|
215
|
+
|
|
216
|
+
**From checkpoint**:
|
|
217
|
+
```bash
|
|
218
|
+
ray job submit --address="http://127.0.0.1:8265" \
|
|
219
|
+
-- python3 -m openrlhf.cli.train_ppo_ray \
|
|
220
|
+
--load_checkpoint # Enable resume
|
|
221
|
+
--ckpt_path ./checkpoints/llama-70b-ppo # Checkpoint dir
|
|
222
|
+
# ... other args (must match original)
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
**Resume logic**:
|
|
226
|
+
1. `PPOTrainer.fit()` checks for existing checkpoints
|
|
227
|
+
2. Loads latest checkpoint from `ckpt_path`
|
|
228
|
+
3. Restores `global_step`, `episode`, dataloader state
|
|
229
|
+
4. Continues training from that point
|
|
230
|
+
|
|
231
|
+
## Fault Tolerance
|
|
232
|
+
|
|
233
|
+
### Automatic Task Rescheduling
|
|
234
|
+
|
|
235
|
+
**Ray's built-in fault tolerance**:
|
|
236
|
+
- If worker node fails → Ray reschedules tasks on available nodes
|
|
237
|
+
- Requires sufficient resources on remaining nodes
|
|
238
|
+
- May need to reinitialize some components
|
|
239
|
+
|
|
240
|
+
### DeepSpeed Sleep Mode Protection
|
|
241
|
+
|
|
242
|
+
**Prevents OOM-related failures**:
|
|
243
|
+
```bash
|
|
244
|
+
--deepspeed_enable_sleep # Offload to CPU when not training
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
**Sleep/wake cycle**:
|
|
248
|
+
1. Model offloaded to CPU after training
|
|
249
|
+
2. Frees GPU memory for other components
|
|
250
|
+
3. Reloaded from CPU before next training step
|
|
251
|
+
4. Synchronized via Ray barriers
|
|
252
|
+
|
|
253
|
+
**OOM prevention**:
|
|
254
|
+
- Models don't compete for GPU memory
|
|
255
|
+
- Sequential loading prevents concurrent OOM
|
|
256
|
+
- Barriers ensure synchronization
|
|
257
|
+
|
|
258
|
+
### Checkpoint-Based Recovery
|
|
259
|
+
|
|
260
|
+
**Recover from catastrophic failure**:
|
|
261
|
+
1. Training interrupted (node crash, OOM, etc.)
|
|
262
|
+
2. Restart Ray cluster
|
|
263
|
+
3. Resume with `--load_checkpoint`
|
|
264
|
+
4. Training continues from last saved step
|
|
265
|
+
|
|
266
|
+
**Best practice**:
|
|
267
|
+
```bash
|
|
268
|
+
--save_steps 100 # Frequent checkpointing (every 100 steps)
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
## Monitoring
|
|
272
|
+
|
|
273
|
+
### Ray Dashboard
|
|
274
|
+
|
|
275
|
+
**Access dashboard**:
|
|
276
|
+
```
|
|
277
|
+
http://{HEAD-NODE-IP}:8265
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
**Monitor**:
|
|
281
|
+
- Node status (active, idle, failed)
|
|
282
|
+
- GPU utilization per node
|
|
283
|
+
- Task scheduling (which models on which nodes)
|
|
284
|
+
- Resource usage (memory, CPU, GPU)
|
|
285
|
+
|
|
286
|
+
### Weights & Biases Integration
|
|
287
|
+
|
|
288
|
+
**Enable W&B logging**:
|
|
289
|
+
```bash
|
|
290
|
+
--use_wandb {your-wandb-token}
|
|
291
|
+
--wandb_org your-org
|
|
292
|
+
--wandb_project llama-70b-ppo
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
**Metrics logged**:
|
|
296
|
+
- Training loss per step
|
|
297
|
+
- Reward scores
|
|
298
|
+
- KL divergence
|
|
299
|
+
- GPU utilization per node
|
|
300
|
+
|
|
301
|
+
## Performance Optimization
|
|
302
|
+
|
|
303
|
+
### InfiniBand for Multi-Node
|
|
304
|
+
|
|
305
|
+
**For nodes with InfiniBand**:
|
|
306
|
+
```bash
|
|
307
|
+
# Set environment variable before starting Ray
|
|
308
|
+
export NCCL_IB_HCA=mlx5_0 # InfiniBand device
|
|
309
|
+
export NCCL_SOCKET_IFNAME=ib0
|
|
310
|
+
export NCCL_IB_DISABLE=0
|
|
311
|
+
|
|
312
|
+
ray start --head --node-ip-address 0.0.0.0 --num-gpus 8
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
**Performance gain**: 2-3× faster multi-node communication
|
|
316
|
+
|
|
317
|
+
### Gradient Checkpointing
|
|
318
|
+
|
|
319
|
+
**Reduce memory, enable larger models**:
|
|
320
|
+
```bash
|
|
321
|
+
--gradient_checkpointing # Trade compute for memory
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
### Flash Attention 2
|
|
325
|
+
|
|
326
|
+
**Faster attention, lower memory**:
|
|
327
|
+
```bash
|
|
328
|
+
--flash_attn # Requires FlashAttention installed
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
### Packing Samples
|
|
332
|
+
|
|
333
|
+
**Improve GPU utilization**:
|
|
334
|
+
```bash
|
|
335
|
+
--packing_samples # Pack multiple samples per batch
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
## Troubleshooting
|
|
339
|
+
|
|
340
|
+
### Ray Connection Issues
|
|
341
|
+
|
|
342
|
+
**Symptom**: Worker nodes can't connect to head
|
|
343
|
+
|
|
344
|
+
**Solution**: Check firewall/network
|
|
345
|
+
```bash
|
|
346
|
+
# On head node, ensure ports open
|
|
347
|
+
# Default ports: 6379 (Redis), 8265 (Dashboard), 10001-10100 (workers)
|
|
348
|
+
|
|
349
|
+
# Test connection from worker
|
|
350
|
+
telnet {HEAD-NODE-IP} 6379
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
### Node Failures During Training
|
|
354
|
+
|
|
355
|
+
**Symptom**: Ray reports node failure
|
|
356
|
+
|
|
357
|
+
**Solution 1** - Resume from checkpoint:
|
|
358
|
+
```bash
|
|
359
|
+
# Fix failed node or remove from cluster
|
|
360
|
+
ray stop # On failed node
|
|
361
|
+
# Then resume training with --load_checkpoint
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
**Solution 2** - Adjust resources:
|
|
365
|
+
```bash
|
|
366
|
+
# Reduce nodes if some failed
|
|
367
|
+
--actor_num_nodes 1 # Instead of 2
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
### OOM on Multi-Node
|
|
371
|
+
|
|
372
|
+
**Symptom**: OOM despite multi-node setup
|
|
373
|
+
|
|
374
|
+
**Solution 1** - Reduce batch sizes:
|
|
375
|
+
```bash
|
|
376
|
+
--micro_train_batch_size 1 # Reduce from 2
|
|
377
|
+
--micro_rollout_batch_size 2 # Reduce from 4
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
**Solution 2** - Enable sleep modes:
|
|
381
|
+
```bash
|
|
382
|
+
--vllm_enable_sleep
|
|
383
|
+
--deepspeed_enable_sleep
|
|
384
|
+
```
|
|
385
|
+
|
|
386
|
+
**Solution 3** - Increase ZeRO stage:
|
|
387
|
+
```bash
|
|
388
|
+
--zero_stage 3 # Maximum sharding
|
|
389
|
+
```
|
|
390
|
+
|
|
391
|
+
### Checkpoint Loading Fails
|
|
392
|
+
|
|
393
|
+
**Symptom**: `FileNotFoundError` when resuming
|
|
394
|
+
|
|
395
|
+
**Check checkpoint path**:
|
|
396
|
+
```bash
|
|
397
|
+
ls -la ./checkpoints/llama-70b-ppo/
|
|
398
|
+
# Verify global_step_* directories exist
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
**Solution**: Ensure `--ckpt_path` matches save location
|
|
402
|
+
```bash
|
|
403
|
+
--ckpt_path ./checkpoints/llama-70b-ppo # Same as during save
|
|
404
|
+
```
|
|
405
|
+
|
|
406
|
+
## Complete Multi-Node Example
|
|
407
|
+
|
|
408
|
+
### 8-node cluster (64 GPUs) - 70B model
|
|
409
|
+
|
|
410
|
+
**Head node (Node 1)**:
|
|
411
|
+
```bash
|
|
412
|
+
ray start --head --node-ip-address 10.0.0.1 --num-gpus 8
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
**Worker nodes (Nodes 2-8)**:
|
|
416
|
+
```bash
|
|
417
|
+
ray start --address 10.0.0.1:6379 --num-gpus 8
|
|
418
|
+
```
|
|
419
|
+
|
|
420
|
+
**Submit job**:
|
|
421
|
+
```bash
|
|
422
|
+
ray job submit --address="http://10.0.0.1:8265" \
|
|
423
|
+
--runtime-env-json='{"working_dir": "/openrlhf"}' \
|
|
424
|
+
-- python3 -m openrlhf.cli.train_ppo_ray \
|
|
425
|
+
--ref_num_nodes 2 --ref_num_gpus_per_node 8 \
|
|
426
|
+
--reward_num_nodes 2 --reward_num_gpus_per_node 8 \
|
|
427
|
+
--critic_num_nodes 2 --critic_num_gpus_per_node 8 \
|
|
428
|
+
--actor_num_nodes 2 --actor_num_gpus_per_node 8 \
|
|
429
|
+
--vllm_num_engines 4 --vllm_tensor_parallel_size 4 \
|
|
430
|
+
--pretrain meta-llama/Llama-2-70b-hf \
|
|
431
|
+
--reward_pretrain ./reward-70b \
|
|
432
|
+
--save_path ./output/llama-70b-ppo \
|
|
433
|
+
--ckpt_path ./checkpoints/llama-70b-ppo \
|
|
434
|
+
--save_steps 100 --save_hf_ckpt \
|
|
435
|
+
--micro_train_batch_size 1 --train_batch_size 128 \
|
|
436
|
+
--micro_rollout_batch_size 2 --rollout_batch_size 1024 \
|
|
437
|
+
--max_epochs 1 --bf16 --zero_stage 3 \
|
|
438
|
+
--actor_learning_rate 5e-7 --critic_learning_rate 9e-6 \
|
|
439
|
+
--gradient_checkpointing --flash_attn --packing_samples \
|
|
440
|
+
--use_wandb {token} --wandb_project llama-70b-ppo
|
|
441
|
+
```
|
|
442
|
+
|
|
443
|
+
**GPU allocation**:
|
|
444
|
+
- Reference: 16 GPUs (2 nodes × 8)
|
|
445
|
+
- Reward: 16 GPUs (2 nodes × 8)
|
|
446
|
+
- Critic: 16 GPUs (2 nodes × 8)
|
|
447
|
+
- Actor: 16 GPUs (2 nodes × 8)
|
|
448
|
+
- **Total**: 64 GPUs
|
|
449
|
+
|
|
450
|
+
## References
|
|
451
|
+
|
|
452
|
+
- Ray Docs: https://docs.ray.io/
|
|
453
|
+
- OpenRLHF: https://github.com/OpenRLHF/OpenRLHF
|
|
454
|
+
- DeepSpeed ZeRO: https://www.deepspeed.ai/tutorials/zero/
|