@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,417 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# /// script
|
|
3
|
+
# requires-python = ">=3.10"
|
|
4
|
+
# dependencies = []
|
|
5
|
+
# ///
|
|
6
|
+
"""
|
|
7
|
+
Dataset Format Inspector for TRL Training (LLM-Optimized Output)
|
|
8
|
+
|
|
9
|
+
Inspects Hugging Face datasets to determine TRL training compatibility.
|
|
10
|
+
Uses Datasets Server API for instant results - no dataset download needed!
|
|
11
|
+
|
|
12
|
+
ULTRA-EFFICIENT: Uses HF Datasets Server API - completes in <2 seconds.
|
|
13
|
+
|
|
14
|
+
Usage with HF Jobs:
|
|
15
|
+
hf_jobs("uv", {
|
|
16
|
+
"script": "https://huggingface.co/datasets/evalstate/trl-helpers/raw/main/dataset_inspector.py",
|
|
17
|
+
"script_args": ["--dataset", "your/dataset", "--split", "train"]
|
|
18
|
+
})
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import sys
|
|
23
|
+
import json
|
|
24
|
+
import urllib.request
|
|
25
|
+
import urllib.parse
|
|
26
|
+
from typing import List, Dict, Any
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def parse_args():
|
|
30
|
+
parser = argparse.ArgumentParser(description="Inspect dataset format for TRL training")
|
|
31
|
+
parser.add_argument("--dataset", type=str, required=True, help="Dataset name")
|
|
32
|
+
parser.add_argument("--split", type=str, default="train", help="Dataset split (default: train)")
|
|
33
|
+
parser.add_argument("--config", type=str, default="default", help="Dataset config name (default: default)")
|
|
34
|
+
parser.add_argument("--preview", type=int, default=150, help="Max chars per field preview")
|
|
35
|
+
parser.add_argument("--samples", type=int, default=5, help="Number of samples to fetch (default: 5)")
|
|
36
|
+
parser.add_argument("--json-output", action="store_true", help="Output as JSON")
|
|
37
|
+
return parser.parse_args()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def api_request(url: str) -> Dict:
|
|
41
|
+
"""Make API request to Datasets Server"""
|
|
42
|
+
try:
|
|
43
|
+
with urllib.request.urlopen(url, timeout=10) as response:
|
|
44
|
+
return json.loads(response.read().decode())
|
|
45
|
+
except urllib.error.HTTPError as e:
|
|
46
|
+
if e.code == 404:
|
|
47
|
+
return None
|
|
48
|
+
raise Exception(f"API request failed: {e.code} {e.reason}")
|
|
49
|
+
except Exception as e:
|
|
50
|
+
raise Exception(f"API request failed: {str(e)}")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def get_splits(dataset: str) -> Dict:
|
|
54
|
+
"""Get available splits for dataset"""
|
|
55
|
+
url = f"https://datasets-server.huggingface.co/splits?dataset={urllib.parse.quote(dataset)}"
|
|
56
|
+
return api_request(url)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def get_rows(dataset: str, config: str, split: str, offset: int = 0, length: int = 5) -> Dict:
|
|
60
|
+
"""Get rows from dataset"""
|
|
61
|
+
url = f"https://datasets-server.huggingface.co/rows?dataset={urllib.parse.quote(dataset)}&config={config}&split={split}&offset={offset}&length={length}"
|
|
62
|
+
return api_request(url)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def find_columns(columns: List[str], patterns: List[str]) -> List[str]:
|
|
66
|
+
"""Find columns matching patterns"""
|
|
67
|
+
return [c for c in columns if any(p in c.lower() for p in patterns)]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def check_sft_compatibility(columns: List[str]) -> Dict[str, Any]:
|
|
71
|
+
"""Check SFT compatibility"""
|
|
72
|
+
has_messages = "messages" in columns
|
|
73
|
+
has_text = "text" in columns
|
|
74
|
+
has_prompt_completion = "prompt" in columns and "completion" in columns
|
|
75
|
+
|
|
76
|
+
ready = has_messages or has_text or has_prompt_completion
|
|
77
|
+
|
|
78
|
+
possible_prompt = find_columns(columns, ["prompt", "instruction", "question", "input"])
|
|
79
|
+
possible_response = find_columns(columns, ["response", "completion", "output", "answer"])
|
|
80
|
+
|
|
81
|
+
return {
|
|
82
|
+
"ready": ready,
|
|
83
|
+
"reason": "messages" if has_messages else "text" if has_text else "prompt+completion" if has_prompt_completion else None,
|
|
84
|
+
"possible_prompt": possible_prompt[0] if possible_prompt else None,
|
|
85
|
+
"possible_response": possible_response[0] if possible_response else None,
|
|
86
|
+
"has_context": "context" in columns,
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def check_dpo_compatibility(columns: List[str]) -> Dict[str, Any]:
|
|
91
|
+
"""Check DPO compatibility"""
|
|
92
|
+
has_standard = "prompt" in columns and "chosen" in columns and "rejected" in columns
|
|
93
|
+
|
|
94
|
+
possible_prompt = find_columns(columns, ["prompt", "instruction", "question", "input"])
|
|
95
|
+
possible_chosen = find_columns(columns, ["chosen", "preferred", "winner"])
|
|
96
|
+
possible_rejected = find_columns(columns, ["rejected", "dispreferred", "loser"])
|
|
97
|
+
|
|
98
|
+
can_map = bool(possible_prompt and possible_chosen and possible_rejected)
|
|
99
|
+
|
|
100
|
+
return {
|
|
101
|
+
"ready": has_standard,
|
|
102
|
+
"can_map": can_map,
|
|
103
|
+
"prompt_col": possible_prompt[0] if possible_prompt else None,
|
|
104
|
+
"chosen_col": possible_chosen[0] if possible_chosen else None,
|
|
105
|
+
"rejected_col": possible_rejected[0] if possible_rejected else None,
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def check_grpo_compatibility(columns: List[str]) -> Dict[str, Any]:
|
|
110
|
+
"""Check GRPO compatibility"""
|
|
111
|
+
has_prompt = "prompt" in columns
|
|
112
|
+
has_no_responses = "chosen" not in columns and "rejected" not in columns
|
|
113
|
+
|
|
114
|
+
possible_prompt = find_columns(columns, ["prompt", "instruction", "question", "input"])
|
|
115
|
+
|
|
116
|
+
return {
|
|
117
|
+
"ready": has_prompt and has_no_responses,
|
|
118
|
+
"can_map": bool(possible_prompt) and has_no_responses,
|
|
119
|
+
"prompt_col": possible_prompt[0] if possible_prompt else None,
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def check_kto_compatibility(columns: List[str]) -> Dict[str, Any]:
|
|
124
|
+
"""Check KTO compatibility"""
|
|
125
|
+
return {"ready": "prompt" in columns and "completion" in columns and "label" in columns}
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def generate_mapping_code(method: str, info: Dict[str, Any]) -> str:
|
|
129
|
+
"""Generate mapping code for a training method"""
|
|
130
|
+
if method == "SFT":
|
|
131
|
+
if info["ready"]:
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
prompt_col = info.get("possible_prompt")
|
|
135
|
+
response_col = info.get("possible_response")
|
|
136
|
+
has_context = info.get("has_context", False)
|
|
137
|
+
|
|
138
|
+
if not prompt_col:
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
if has_context and response_col:
|
|
142
|
+
return f"""def format_for_sft(example):
|
|
143
|
+
text = f"Instruction: {{example['{prompt_col}']}}\n\n"
|
|
144
|
+
if example.get('context'):
|
|
145
|
+
text += f"Context: {{example['context']}}\n\n"
|
|
146
|
+
text += f"Response: {{example['{response_col}']}}"
|
|
147
|
+
return {{'text': text}}
|
|
148
|
+
|
|
149
|
+
dataset = dataset.map(format_for_sft, remove_columns=dataset.column_names)"""
|
|
150
|
+
elif response_col:
|
|
151
|
+
return f"""def format_for_sft(example):
|
|
152
|
+
return {{'text': f"{{example['{prompt_col}']}}\n\n{{example['{response_col}']}}}}
|
|
153
|
+
|
|
154
|
+
dataset = dataset.map(format_for_sft, remove_columns=dataset.column_names)"""
|
|
155
|
+
else:
|
|
156
|
+
return f"""def format_for_sft(example):
|
|
157
|
+
return {{'text': example['{prompt_col}']}}
|
|
158
|
+
|
|
159
|
+
dataset = dataset.map(format_for_sft, remove_columns=dataset.column_names)"""
|
|
160
|
+
|
|
161
|
+
elif method == "DPO":
|
|
162
|
+
if info["ready"] or not info["can_map"]:
|
|
163
|
+
return None
|
|
164
|
+
|
|
165
|
+
return f"""def format_for_dpo(example):
|
|
166
|
+
return {{
|
|
167
|
+
'prompt': example['{info['prompt_col']}'],
|
|
168
|
+
'chosen': example['{info['chosen_col']}'],
|
|
169
|
+
'rejected': example['{info['rejected_col']}'],
|
|
170
|
+
}}
|
|
171
|
+
|
|
172
|
+
dataset = dataset.map(format_for_dpo, remove_columns=dataset.column_names)"""
|
|
173
|
+
|
|
174
|
+
elif method == "GRPO":
|
|
175
|
+
if info["ready"] or not info["can_map"]:
|
|
176
|
+
return None
|
|
177
|
+
|
|
178
|
+
return f"""def format_for_grpo(example):
|
|
179
|
+
return {{'prompt': example['{info['prompt_col']}']}}
|
|
180
|
+
|
|
181
|
+
dataset = dataset.map(format_for_grpo, remove_columns=dataset.column_names)"""
|
|
182
|
+
|
|
183
|
+
return None
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def format_value_preview(value: Any, max_chars: int) -> str:
|
|
187
|
+
"""Format value for preview"""
|
|
188
|
+
if value is None:
|
|
189
|
+
return "None"
|
|
190
|
+
elif isinstance(value, str):
|
|
191
|
+
return value[:max_chars] + ("..." if len(value) > max_chars else "")
|
|
192
|
+
elif isinstance(value, list):
|
|
193
|
+
if len(value) > 0 and isinstance(value[0], dict):
|
|
194
|
+
return f"[{len(value)} items] Keys: {list(value[0].keys())}"
|
|
195
|
+
preview = str(value)
|
|
196
|
+
return preview[:max_chars] + ("..." if len(preview) > max_chars else "")
|
|
197
|
+
else:
|
|
198
|
+
preview = str(value)
|
|
199
|
+
return preview[:max_chars] + ("..." if len(preview) > max_chars else "")
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def main():
|
|
203
|
+
args = parse_args()
|
|
204
|
+
|
|
205
|
+
print(f"Fetching dataset info via Datasets Server API...")
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
# Get splits info
|
|
209
|
+
splits_data = get_splits(args.dataset)
|
|
210
|
+
if not splits_data or "splits" not in splits_data:
|
|
211
|
+
print(f"ERROR: Could not fetch splits for dataset '{args.dataset}'")
|
|
212
|
+
print(f" Dataset may not exist or is not accessible via Datasets Server API")
|
|
213
|
+
sys.exit(1)
|
|
214
|
+
|
|
215
|
+
# Find the right config
|
|
216
|
+
available_configs = set()
|
|
217
|
+
split_found = False
|
|
218
|
+
config_to_use = args.config
|
|
219
|
+
|
|
220
|
+
for split_info in splits_data["splits"]:
|
|
221
|
+
available_configs.add(split_info["config"])
|
|
222
|
+
if split_info["config"] == args.config and split_info["split"] == args.split:
|
|
223
|
+
split_found = True
|
|
224
|
+
|
|
225
|
+
# If default config not found, try first available
|
|
226
|
+
if not split_found and available_configs:
|
|
227
|
+
config_to_use = list(available_configs)[0]
|
|
228
|
+
print(f"Config '{args.config}' not found, trying '{config_to_use}'...")
|
|
229
|
+
|
|
230
|
+
# Get rows
|
|
231
|
+
rows_data = get_rows(args.dataset, config_to_use, args.split, offset=0, length=args.samples)
|
|
232
|
+
|
|
233
|
+
if not rows_data or "rows" not in rows_data:
|
|
234
|
+
print(f"ERROR: Could not fetch rows for dataset '{args.dataset}'")
|
|
235
|
+
print(f" Split '{args.split}' may not exist")
|
|
236
|
+
print(f" Available configs: {', '.join(sorted(available_configs))}")
|
|
237
|
+
sys.exit(1)
|
|
238
|
+
|
|
239
|
+
rows = rows_data["rows"]
|
|
240
|
+
if not rows:
|
|
241
|
+
print(f"ERROR: No rows found in split '{args.split}'")
|
|
242
|
+
sys.exit(1)
|
|
243
|
+
|
|
244
|
+
# Extract column info from first row
|
|
245
|
+
first_row = rows[0]["row"]
|
|
246
|
+
columns = list(first_row.keys())
|
|
247
|
+
features = rows_data.get("features", [])
|
|
248
|
+
|
|
249
|
+
# Get total count if available
|
|
250
|
+
total_examples = "Unknown"
|
|
251
|
+
for split_info in splits_data["splits"]:
|
|
252
|
+
if split_info["config"] == config_to_use and split_info["split"] == args.split:
|
|
253
|
+
total_examples = f"{split_info.get('num_examples', 'Unknown'):,}" if isinstance(split_info.get('num_examples'), int) else "Unknown"
|
|
254
|
+
break
|
|
255
|
+
|
|
256
|
+
except Exception as e:
|
|
257
|
+
print(f"ERROR: {str(e)}")
|
|
258
|
+
sys.exit(1)
|
|
259
|
+
|
|
260
|
+
# Run compatibility checks
|
|
261
|
+
sft_info = check_sft_compatibility(columns)
|
|
262
|
+
dpo_info = check_dpo_compatibility(columns)
|
|
263
|
+
grpo_info = check_grpo_compatibility(columns)
|
|
264
|
+
kto_info = check_kto_compatibility(columns)
|
|
265
|
+
|
|
266
|
+
# Determine recommended methods
|
|
267
|
+
recommended = []
|
|
268
|
+
if sft_info["ready"]:
|
|
269
|
+
recommended.append("SFT")
|
|
270
|
+
elif sft_info["possible_prompt"]:
|
|
271
|
+
recommended.append("SFT (needs mapping)")
|
|
272
|
+
|
|
273
|
+
if dpo_info["ready"]:
|
|
274
|
+
recommended.append("DPO")
|
|
275
|
+
elif dpo_info["can_map"]:
|
|
276
|
+
recommended.append("DPO (needs mapping)")
|
|
277
|
+
|
|
278
|
+
if grpo_info["ready"]:
|
|
279
|
+
recommended.append("GRPO")
|
|
280
|
+
elif grpo_info["can_map"]:
|
|
281
|
+
recommended.append("GRPO (needs mapping)")
|
|
282
|
+
|
|
283
|
+
if kto_info["ready"]:
|
|
284
|
+
recommended.append("KTO")
|
|
285
|
+
|
|
286
|
+
# JSON output mode
|
|
287
|
+
if args.json_output:
|
|
288
|
+
result = {
|
|
289
|
+
"dataset": args.dataset,
|
|
290
|
+
"config": config_to_use,
|
|
291
|
+
"split": args.split,
|
|
292
|
+
"total_examples": total_examples,
|
|
293
|
+
"columns": columns,
|
|
294
|
+
"features": [{"name": f["name"], "type": f["type"]} for f in features] if features else [],
|
|
295
|
+
"compatibility": {
|
|
296
|
+
"SFT": sft_info,
|
|
297
|
+
"DPO": dpo_info,
|
|
298
|
+
"GRPO": grpo_info,
|
|
299
|
+
"KTO": kto_info,
|
|
300
|
+
},
|
|
301
|
+
"recommended_methods": recommended,
|
|
302
|
+
}
|
|
303
|
+
print(json.dumps(result, indent=2))
|
|
304
|
+
sys.exit(0)
|
|
305
|
+
|
|
306
|
+
# Human-readable output optimized for LLM parsing
|
|
307
|
+
print("=" * 80)
|
|
308
|
+
print(f"DATASET INSPECTION RESULTS")
|
|
309
|
+
print("=" * 80)
|
|
310
|
+
|
|
311
|
+
print(f"\nDataset: {args.dataset}")
|
|
312
|
+
print(f"Config: {config_to_use}")
|
|
313
|
+
print(f"Split: {args.split}")
|
|
314
|
+
print(f"Total examples: {total_examples}")
|
|
315
|
+
print(f"Samples fetched: {len(rows)}")
|
|
316
|
+
|
|
317
|
+
print(f"\n{'COLUMNS':-<80}")
|
|
318
|
+
if features:
|
|
319
|
+
for feature in features:
|
|
320
|
+
print(f" {feature['name']}: {feature['type']}")
|
|
321
|
+
else:
|
|
322
|
+
for col in columns:
|
|
323
|
+
print(f" {col}: (type info not available)")
|
|
324
|
+
|
|
325
|
+
print(f"\n{'EXAMPLE DATA':-<80}")
|
|
326
|
+
example = first_row
|
|
327
|
+
for col in columns:
|
|
328
|
+
value = example.get(col)
|
|
329
|
+
display = format_value_preview(value, args.preview)
|
|
330
|
+
print(f"\n{col}:")
|
|
331
|
+
print(f" {display}")
|
|
332
|
+
|
|
333
|
+
print(f"\n{'TRAINING METHOD COMPATIBILITY':-<80}")
|
|
334
|
+
|
|
335
|
+
# SFT
|
|
336
|
+
print(f"\n[SFT] {'✓ READY' if sft_info['ready'] else '✗ NEEDS MAPPING'}")
|
|
337
|
+
if sft_info["ready"]:
|
|
338
|
+
print(f" Reason: Dataset has '{sft_info['reason']}' field")
|
|
339
|
+
print(f" Action: Use directly with SFTTrainer")
|
|
340
|
+
elif sft_info["possible_prompt"]:
|
|
341
|
+
print(f" Detected: prompt='{sft_info['possible_prompt']}' response='{sft_info['possible_response']}'")
|
|
342
|
+
print(f" Action: Apply mapping code (see below)")
|
|
343
|
+
else:
|
|
344
|
+
print(f" Status: Cannot determine mapping - manual inspection needed")
|
|
345
|
+
|
|
346
|
+
# DPO
|
|
347
|
+
print(f"\n[DPO] {'✓ READY' if dpo_info['ready'] else '✗ NEEDS MAPPING' if dpo_info['can_map'] else '✗ INCOMPATIBLE'}")
|
|
348
|
+
if dpo_info["ready"]:
|
|
349
|
+
print(f" Reason: Dataset has 'prompt', 'chosen', 'rejected' fields")
|
|
350
|
+
print(f" Action: Use directly with DPOTrainer")
|
|
351
|
+
elif dpo_info["can_map"]:
|
|
352
|
+
print(f" Detected: prompt='{dpo_info['prompt_col']}' chosen='{dpo_info['chosen_col']}' rejected='{dpo_info['rejected_col']}'")
|
|
353
|
+
print(f" Action: Apply mapping code (see below)")
|
|
354
|
+
else:
|
|
355
|
+
print(f" Status: Missing required fields (prompt + chosen + rejected)")
|
|
356
|
+
|
|
357
|
+
# GRPO
|
|
358
|
+
print(f"\n[GRPO] {'✓ READY' if grpo_info['ready'] else '✗ NEEDS MAPPING' if grpo_info['can_map'] else '✗ INCOMPATIBLE'}")
|
|
359
|
+
if grpo_info["ready"]:
|
|
360
|
+
print(f" Reason: Dataset has 'prompt' field")
|
|
361
|
+
print(f" Action: Use directly with GRPOTrainer")
|
|
362
|
+
elif grpo_info["can_map"]:
|
|
363
|
+
print(f" Detected: prompt='{grpo_info['prompt_col']}'")
|
|
364
|
+
print(f" Action: Apply mapping code (see below)")
|
|
365
|
+
else:
|
|
366
|
+
print(f" Status: Missing prompt field")
|
|
367
|
+
|
|
368
|
+
# KTO
|
|
369
|
+
print(f"\n[KTO] {'✓ READY' if kto_info['ready'] else '✗ INCOMPATIBLE'}")
|
|
370
|
+
if kto_info["ready"]:
|
|
371
|
+
print(f" Reason: Dataset has 'prompt', 'completion', 'label' fields")
|
|
372
|
+
print(f" Action: Use directly with KTOTrainer")
|
|
373
|
+
else:
|
|
374
|
+
print(f" Status: Missing required fields (prompt + completion + label)")
|
|
375
|
+
|
|
376
|
+
# Mapping code
|
|
377
|
+
print(f"\n{'MAPPING CODE (if needed)':-<80}")
|
|
378
|
+
|
|
379
|
+
mapping_needed = False
|
|
380
|
+
|
|
381
|
+
sft_mapping = generate_mapping_code("SFT", sft_info)
|
|
382
|
+
if sft_mapping:
|
|
383
|
+
print(f"\n# For SFT Training:")
|
|
384
|
+
print(sft_mapping)
|
|
385
|
+
mapping_needed = True
|
|
386
|
+
|
|
387
|
+
dpo_mapping = generate_mapping_code("DPO", dpo_info)
|
|
388
|
+
if dpo_mapping:
|
|
389
|
+
print(f"\n# For DPO Training:")
|
|
390
|
+
print(dpo_mapping)
|
|
391
|
+
mapping_needed = True
|
|
392
|
+
|
|
393
|
+
grpo_mapping = generate_mapping_code("GRPO", grpo_info)
|
|
394
|
+
if grpo_mapping:
|
|
395
|
+
print(f"\n# For GRPO Training:")
|
|
396
|
+
print(grpo_mapping)
|
|
397
|
+
mapping_needed = True
|
|
398
|
+
|
|
399
|
+
if not mapping_needed:
|
|
400
|
+
print("\nNo mapping needed - dataset is ready for training!")
|
|
401
|
+
|
|
402
|
+
print(f"\n{'SUMMARY':-<80}")
|
|
403
|
+
print(f"Recommended training methods: {', '.join(recommended) if recommended else 'None (dataset needs formatting)'}")
|
|
404
|
+
print(f"\nNote: Used Datasets Server API (instant, no download required)")
|
|
405
|
+
|
|
406
|
+
print("\n" + "=" * 80)
|
|
407
|
+
sys.exit(0)
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
if __name__ == "__main__":
|
|
411
|
+
try:
|
|
412
|
+
main()
|
|
413
|
+
except KeyboardInterrupt:
|
|
414
|
+
sys.exit(0)
|
|
415
|
+
except Exception as e:
|
|
416
|
+
print(f"ERROR: {e}", file=sys.stderr)
|
|
417
|
+
sys.exit(1)
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# /// script
|
|
3
|
+
# requires-python = ">=3.10"
|
|
4
|
+
# dependencies = []
|
|
5
|
+
# ///
|
|
6
|
+
"""
|
|
7
|
+
Estimate training time and cost for TRL jobs.
|
|
8
|
+
|
|
9
|
+
Usage with uv:
|
|
10
|
+
uv run estimate_cost.py --model <model> --dataset <dataset> --hardware <flavor>
|
|
11
|
+
|
|
12
|
+
Example:
|
|
13
|
+
uv run estimate_cost.py --model Qwen/Qwen2.5-0.5B --dataset trl-lib/Capybara --hardware a10g-large
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import argparse
|
|
17
|
+
|
|
18
|
+
# Hardware costs per hour (approximate)
|
|
19
|
+
HARDWARE_COSTS = {
|
|
20
|
+
"t4-small": 0.75,
|
|
21
|
+
"t4-medium": 1.50,
|
|
22
|
+
"l4x1": 2.50,
|
|
23
|
+
"a10g-small": 3.50,
|
|
24
|
+
"a10g-large": 5.00,
|
|
25
|
+
"a10g-largex2": 10.00,
|
|
26
|
+
"a10g-largex4": 20.00,
|
|
27
|
+
"a100-large": 10.00,
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
# Model sizes in billions of parameters
|
|
31
|
+
MODEL_SIZES = {
|
|
32
|
+
"0.5B": 0.5,
|
|
33
|
+
"1.5B": 1.5,
|
|
34
|
+
"3B": 3,
|
|
35
|
+
"7B": 7,
|
|
36
|
+
"13B": 13,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
def estimate_training_time(model_params, dataset_size, epochs, hardware):
|
|
40
|
+
"""Estimate training time in hours."""
|
|
41
|
+
# Rough estimates based on empirical observations
|
|
42
|
+
# These are approximations and actual times will vary
|
|
43
|
+
|
|
44
|
+
base_time_per_1k_examples = 0.1 # hours for 1B model on a10g-large
|
|
45
|
+
|
|
46
|
+
# Adjust for model size
|
|
47
|
+
time = base_time_per_1k_examples * model_params * (dataset_size / 1000) * epochs
|
|
48
|
+
|
|
49
|
+
# Adjust for hardware (relative to a10g-large baseline)
|
|
50
|
+
hardware_multipliers = {
|
|
51
|
+
"t4-small": 2.0,
|
|
52
|
+
"t4-medium": 1.5,
|
|
53
|
+
"l4x1": 1.2,
|
|
54
|
+
"a10g-small": 1.3,
|
|
55
|
+
"a10g-large": 1.0,
|
|
56
|
+
"a10g-largex2": 0.6,
|
|
57
|
+
"a10g-largex4": 0.4,
|
|
58
|
+
"a100-large": 0.7,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
multiplier = hardware_multipliers.get(hardware, 1.0)
|
|
62
|
+
time *= multiplier
|
|
63
|
+
|
|
64
|
+
return time
|
|
65
|
+
|
|
66
|
+
def parse_args():
|
|
67
|
+
parser = argparse.ArgumentParser(description="Estimate training cost for TRL jobs")
|
|
68
|
+
parser.add_argument("--model", required=True, help="Model name or size (e.g., 'Qwen/Qwen2.5-0.5B' or '0.5B')")
|
|
69
|
+
parser.add_argument("--dataset", required=True, help="Dataset name")
|
|
70
|
+
parser.add_argument("--hardware", required=True, choices=HARDWARE_COSTS.keys(), help="Hardware flavor")
|
|
71
|
+
parser.add_argument("--dataset-size", type=int, help="Override dataset size (number of examples)")
|
|
72
|
+
parser.add_argument("--epochs", type=int, default=3, help="Number of training epochs")
|
|
73
|
+
return parser.parse_args()
|
|
74
|
+
|
|
75
|
+
def extract_model_size(model_name):
|
|
76
|
+
"""Extract model size from name or return parsed value."""
|
|
77
|
+
for size_str, size_val in MODEL_SIZES.items():
|
|
78
|
+
if size_str in model_name:
|
|
79
|
+
return size_val
|
|
80
|
+
|
|
81
|
+
# Try to parse directly
|
|
82
|
+
try:
|
|
83
|
+
if "B" in model_name:
|
|
84
|
+
return float(model_name.replace("B", ""))
|
|
85
|
+
except:
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
return 1.0 # Default to 1B if can't determine
|
|
89
|
+
|
|
90
|
+
def main():
|
|
91
|
+
args = parse_args()
|
|
92
|
+
|
|
93
|
+
# Extract model parameters
|
|
94
|
+
model_params = extract_model_size(args.model)
|
|
95
|
+
print(f"📊 Model: {args.model} (~{model_params}B parameters)")
|
|
96
|
+
|
|
97
|
+
# Estimate dataset size (would need to load to get real size)
|
|
98
|
+
if args.dataset_size:
|
|
99
|
+
dataset_size = args.dataset_size
|
|
100
|
+
else:
|
|
101
|
+
# Common dataset sizes (approximations)
|
|
102
|
+
dataset_sizes = {
|
|
103
|
+
"trl-lib/Capybara": 16000,
|
|
104
|
+
"Anthropic/hh-rlhf": 160000,
|
|
105
|
+
}
|
|
106
|
+
dataset_size = dataset_sizes.get(args.dataset, 10000)
|
|
107
|
+
|
|
108
|
+
print(f"📦 Dataset: {args.dataset} (~{dataset_size} examples)")
|
|
109
|
+
print(f"🔄 Epochs: {args.epochs}")
|
|
110
|
+
print(f"💻 Hardware: {args.hardware}")
|
|
111
|
+
print()
|
|
112
|
+
|
|
113
|
+
# Estimate training time
|
|
114
|
+
estimated_hours = estimate_training_time(model_params, dataset_size, args.epochs, args.hardware)
|
|
115
|
+
estimated_cost = estimated_hours * HARDWARE_COSTS[args.hardware]
|
|
116
|
+
|
|
117
|
+
# Recommend timeout with buffer
|
|
118
|
+
recommended_timeout_hours = estimated_hours * 1.3 # 30% buffer
|
|
119
|
+
|
|
120
|
+
print(f"⏱️ Estimated training time: {estimated_hours:.1f} hours")
|
|
121
|
+
print(f"💰 Estimated cost: ${estimated_cost:.2f}")
|
|
122
|
+
print(f"⏰ Recommended timeout: {recommended_timeout_hours:.1f}h (with 30% buffer)")
|
|
123
|
+
print()
|
|
124
|
+
|
|
125
|
+
# Warnings and recommendations
|
|
126
|
+
if estimated_hours > 4:
|
|
127
|
+
print("⚠️ Long training time - consider:")
|
|
128
|
+
print(" - Using faster hardware")
|
|
129
|
+
print(" - Reducing epochs")
|
|
130
|
+
print(" - Using a smaller dataset subset for testing")
|
|
131
|
+
|
|
132
|
+
if model_params >= 7 and args.hardware not in ["a10g-largex2", "a10g-largex4", "a100-large"]:
|
|
133
|
+
print("⚠️ Large model - consider using:")
|
|
134
|
+
print(" - Larger GPU (a100-large)")
|
|
135
|
+
print(" - Multi-GPU setup (a10g-largex2 or a10g-largex4)")
|
|
136
|
+
print(" - LoRA/PEFT for memory efficiency")
|
|
137
|
+
|
|
138
|
+
print()
|
|
139
|
+
print("📋 Example job configuration:")
|
|
140
|
+
print(f"""
|
|
141
|
+
hf_jobs("uv", {{
|
|
142
|
+
"script": "your_training_script.py",
|
|
143
|
+
"flavor": "{args.hardware}",
|
|
144
|
+
"timeout": "{recommended_timeout_hours:.0f}h",
|
|
145
|
+
"secrets": {{"HF_TOKEN": "$HF_TOKEN"}}
|
|
146
|
+
}})
|
|
147
|
+
""")
|
|
148
|
+
|
|
149
|
+
if __name__ == "__main__":
|
|
150
|
+
main()
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# /// script
|
|
3
|
+
# requires-python = ">=3.10"
|
|
4
|
+
# dependencies = [
|
|
5
|
+
# "trl>=0.12.0",
|
|
6
|
+
# "transformers>=4.36.0",
|
|
7
|
+
# "accelerate>=0.24.0",
|
|
8
|
+
# "trackio",
|
|
9
|
+
# ]
|
|
10
|
+
# ///
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
Production-ready DPO training example for preference learning.
|
|
14
|
+
|
|
15
|
+
DPO (Direct Preference Optimization) trains models on preference pairs
|
|
16
|
+
(chosen vs rejected responses) without requiring a reward model.
|
|
17
|
+
|
|
18
|
+
Usage with hf_jobs MCP tool:
|
|
19
|
+
hf_jobs("uv", {
|
|
20
|
+
"script": '''<paste this entire file>''',
|
|
21
|
+
"flavor": "a10g-large",
|
|
22
|
+
"timeout": "3h",
|
|
23
|
+
"secrets": {"HF_TOKEN": "$HF_TOKEN"},
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
Or submit the script content directly inline without saving to a file.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
import trackio
|
|
30
|
+
from datasets import load_dataset
|
|
31
|
+
from trl import DPOTrainer, DPOConfig
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# Load preference dataset
|
|
35
|
+
print("📦 Loading dataset...")
|
|
36
|
+
dataset = load_dataset("trl-lib/ultrafeedback_binarized", split="train")
|
|
37
|
+
print(f"✅ Dataset loaded: {len(dataset)} preference pairs")
|
|
38
|
+
|
|
39
|
+
# Create train/eval split
|
|
40
|
+
print("🔀 Creating train/eval split...")
|
|
41
|
+
dataset_split = dataset.train_test_split(test_size=0.1, seed=42)
|
|
42
|
+
train_dataset = dataset_split["train"]
|
|
43
|
+
eval_dataset = dataset_split["test"]
|
|
44
|
+
print(f" Train: {len(train_dataset)} pairs")
|
|
45
|
+
print(f" Eval: {len(eval_dataset)} pairs")
|
|
46
|
+
|
|
47
|
+
# Training configuration
|
|
48
|
+
config = DPOConfig(
|
|
49
|
+
# CRITICAL: Hub settings
|
|
50
|
+
output_dir="qwen-dpo-aligned",
|
|
51
|
+
push_to_hub=True,
|
|
52
|
+
hub_model_id="username/qwen-dpo-aligned",
|
|
53
|
+
hub_strategy="every_save",
|
|
54
|
+
|
|
55
|
+
# DPO-specific parameters
|
|
56
|
+
beta=0.1, # KL penalty coefficient (higher = stay closer to reference)
|
|
57
|
+
|
|
58
|
+
# Training parameters
|
|
59
|
+
num_train_epochs=1, # DPO typically needs fewer epochs than SFT
|
|
60
|
+
per_device_train_batch_size=4,
|
|
61
|
+
gradient_accumulation_steps=4,
|
|
62
|
+
learning_rate=5e-7, # DPO uses much lower LR than SFT
|
|
63
|
+
# max_length=1024, # Default - only set if you need different sequence length
|
|
64
|
+
|
|
65
|
+
# Logging & checkpointing
|
|
66
|
+
logging_steps=10,
|
|
67
|
+
save_strategy="steps",
|
|
68
|
+
save_steps=100,
|
|
69
|
+
save_total_limit=2,
|
|
70
|
+
|
|
71
|
+
# Evaluation - IMPORTANT: Only enable if eval_dataset provided
|
|
72
|
+
eval_strategy="steps",
|
|
73
|
+
eval_steps=100,
|
|
74
|
+
|
|
75
|
+
# Optimization
|
|
76
|
+
warmup_ratio=0.1,
|
|
77
|
+
lr_scheduler_type="cosine",
|
|
78
|
+
|
|
79
|
+
# Monitoring
|
|
80
|
+
report_to="trackio", # Integrate with Trackio
|
|
81
|
+
project="meaningful_project_name", # project name for the training name (trackio)
|
|
82
|
+
run_name="baseline-run", #Descriptive name for this training run
|
|
83
|
+
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Initialize and train
|
|
87
|
+
# Note: DPO requires an instruct-tuned model as the base
|
|
88
|
+
print("🎯 Initializing trainer...")
|
|
89
|
+
trainer = DPOTrainer(
|
|
90
|
+
model="Qwen/Qwen2.5-0.5B-Instruct", # Use instruct model, not base model
|
|
91
|
+
train_dataset=train_dataset,
|
|
92
|
+
eval_dataset=eval_dataset, # CRITICAL: Must provide eval_dataset when eval_strategy is enabled
|
|
93
|
+
args=config,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
print("🚀 Starting DPO training...")
|
|
97
|
+
trainer.train()
|
|
98
|
+
|
|
99
|
+
print("💾 Pushing to Hub...")
|
|
100
|
+
trainer.push_to_hub()
|
|
101
|
+
|
|
102
|
+
# Finish Trackio tracking
|
|
103
|
+
trackio.finish()
|
|
104
|
+
|
|
105
|
+
print("✅ Complete! Model at: https://huggingface.co/username/qwen-dpo-aligned")
|
|
106
|
+
print("📊 View metrics at: https://huggingface.co/spaces/username/trackio")
|