@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,718 @@
|
|
|
1
|
+
# /// script
|
|
2
|
+
# requires-python = ">=3.10"
|
|
3
|
+
# dependencies = [
|
|
4
|
+
# "datasets",
|
|
5
|
+
# "transformers",
|
|
6
|
+
# "vllm>=0.6.5",
|
|
7
|
+
# "huggingface-hub[hf_transfer]",
|
|
8
|
+
# "torch",
|
|
9
|
+
# "numpy",
|
|
10
|
+
# "tqdm",
|
|
11
|
+
# "scikit-learn",
|
|
12
|
+
# ]
|
|
13
|
+
# ///
|
|
14
|
+
"""
|
|
15
|
+
Generate high-quality synthetic data using Chain-of-Thought Self-Instruct methodology.
|
|
16
|
+
|
|
17
|
+
This script implements the CoT-Self-Instruct approach from the paper "CoT-Self-Instruct:
|
|
18
|
+
Building high-quality synthetic prompts for reasoning and non-reasoning tasks" (2025).
|
|
19
|
+
|
|
20
|
+
It supports two modes:
|
|
21
|
+
1. Reasoning tasks: Generates both questions and answers with Chain-of-Thought
|
|
22
|
+
2. Instruction tasks: Generates diverse prompts for general instruction following
|
|
23
|
+
|
|
24
|
+
Example usage:
|
|
25
|
+
# Reasoning tasks with Answer-Consistency filtering
|
|
26
|
+
uv run cot-self-instruct.py \\
|
|
27
|
+
--seed-dataset davanstrien/s1k-reasoning \\
|
|
28
|
+
--output-dataset username/synthetic-math \\
|
|
29
|
+
--task-type reasoning \\
|
|
30
|
+
--num-samples 5000 \\
|
|
31
|
+
--filter-method answer-consistency
|
|
32
|
+
|
|
33
|
+
# Instruction tasks with RIP filtering
|
|
34
|
+
uv run cot-self-instruct.py \\
|
|
35
|
+
--seed-dataset wildchat-filtered \\
|
|
36
|
+
--output-dataset username/synthetic-prompts \\
|
|
37
|
+
--task-type instruction \\
|
|
38
|
+
--filter-method rip \\
|
|
39
|
+
--reward-model Nexusflow/Athene-RM-8B
|
|
40
|
+
|
|
41
|
+
# HF Jobs execution
|
|
42
|
+
hf jobs uv run --flavor l4x4 \\
|
|
43
|
+
--image vllm/vllm-openai \\
|
|
44
|
+
-e HF_TOKEN=$(python3 -c "from huggingface_hub import get_token; print(get_token())") \\
|
|
45
|
+
https://huggingface.co/datasets/uv-scripts/synthetic-data/raw/main/cot-self-instruct.py \\
|
|
46
|
+
[args...]
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
import argparse
|
|
50
|
+
import json
|
|
51
|
+
import logging
|
|
52
|
+
import os
|
|
53
|
+
import random
|
|
54
|
+
import re
|
|
55
|
+
import sys
|
|
56
|
+
from collections import Counter
|
|
57
|
+
from datetime import datetime
|
|
58
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
59
|
+
|
|
60
|
+
import numpy as np
|
|
61
|
+
import torch
|
|
62
|
+
from datasets import Dataset, load_dataset
|
|
63
|
+
from huggingface_hub import DatasetCard, login
|
|
64
|
+
from sklearn.cluster import KMeans
|
|
65
|
+
from tqdm.auto import tqdm
|
|
66
|
+
from transformers import AutoTokenizer
|
|
67
|
+
from vllm import LLM, SamplingParams
|
|
68
|
+
|
|
69
|
+
# Enable HF Transfer for faster downloads
|
|
70
|
+
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
|
71
|
+
|
|
72
|
+
logging.basicConfig(
|
|
73
|
+
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
|
74
|
+
)
|
|
75
|
+
logger = logging.getLogger(__name__)
|
|
76
|
+
|
|
77
|
+
# Prompt templates from the paper
|
|
78
|
+
REASONING_PROMPT_TEMPLATE = """You are a reasoning question generator assistant. Your goal is to create a novel, and challenging reasoning question. You are provided the following seed questions:
|
|
79
|
+
Seed Question 1: {seed1}
|
|
80
|
+
Seed Question 2: {seed2}
|
|
81
|
+
Your task is to:
|
|
82
|
+
1. Write a brand-new, self-contained reasoning question that meets the following requirements:
|
|
83
|
+
(a) The question draws inspiration from the seed question without copying it verbatim, remaining novel and of comparable difficulty.
|
|
84
|
+
(b) The question's final answer should be a single, unambiguous scalar value (e.g., an integer, reduced fraction, exact radical), or another answer type that can be verified in one step (e.g., 'yes/no,' a choice from A to D).
|
|
85
|
+
2. Then reason step by step, solve the new question and format your output as follows:
|
|
86
|
+
[New Question Begin]{{your_generated_question}}[New Question End]
|
|
87
|
+
[Final Answer to New Question Begin]\\boxed{{your_final_answer}}[Final Answer to New Question End]"""
|
|
88
|
+
|
|
89
|
+
INSTRUCTION_PROMPT_TEMPLATE = """You are a prompt generator assistant. Your goal is to create diverse and creative synthetic prompts.
|
|
90
|
+
Please follow the steps below to create synthetic prompts.
|
|
91
|
+
Step 1: Carefully read #Prompt 1# and #Prompt 2#. Identify and list all the common elements between these two prompts. If no common elements are found, list the main elements from each prompt.
|
|
92
|
+
Step 2: Develop a comprehensive plan based on the #Common Elements List# or #Main Elements List# from Step 1. This plan will guide the generation of new synthetic prompts that are similar to the original prompts.
|
|
93
|
+
Step 3: Execute the plan step by step and provide one #Synthetic Prompt#.
|
|
94
|
+
Please reply strictly in the following format:
|
|
95
|
+
- Step 1 #Common Elements List# or #Main Elements List#:
|
|
96
|
+
- Step 2 #Plan#:
|
|
97
|
+
- Step 3 #Synthetic Prompt#:
|
|
98
|
+
#Prompt 1#:
|
|
99
|
+
{prompt1}
|
|
100
|
+
#Prompt 2#:
|
|
101
|
+
{prompt2}"""
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def check_gpu_availability() -> int:
|
|
105
|
+
"""Check if CUDA is available and return the number of GPUs."""
|
|
106
|
+
if not torch.cuda.is_available():
|
|
107
|
+
logger.error("CUDA is not available. This script requires a GPU.")
|
|
108
|
+
logger.error(
|
|
109
|
+
"Please run on a machine with NVIDIA GPU or use HF Jobs with GPU flavor."
|
|
110
|
+
)
|
|
111
|
+
sys.exit(1)
|
|
112
|
+
|
|
113
|
+
num_gpus = torch.cuda.device_count()
|
|
114
|
+
for i in range(num_gpus):
|
|
115
|
+
gpu_name = torch.cuda.get_device_name(i)
|
|
116
|
+
gpu_memory = torch.cuda.get_device_properties(i).total_memory / 1024**3
|
|
117
|
+
logger.info(f"GPU {i}: {gpu_name} with {gpu_memory:.1f} GB memory")
|
|
118
|
+
|
|
119
|
+
return num_gpus
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def parse_thinking_output(text: str) -> str:
|
|
123
|
+
"""Remove thinking tokens from model output."""
|
|
124
|
+
# Remove <think>...</think> blocks
|
|
125
|
+
text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
|
|
126
|
+
return text.strip()
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def extract_reasoning_output(text: str) -> Tuple[Optional[str], Optional[str]]:
|
|
130
|
+
"""Extract question and answer from reasoning task output."""
|
|
131
|
+
text = parse_thinking_output(text)
|
|
132
|
+
|
|
133
|
+
# Extract question
|
|
134
|
+
question_match = re.search(r'\[New Question Begin\](.*?)\[New Question End\]', text, re.DOTALL)
|
|
135
|
+
if not question_match:
|
|
136
|
+
return None, None
|
|
137
|
+
question = question_match.group(1).strip()
|
|
138
|
+
|
|
139
|
+
# Extract answer
|
|
140
|
+
answer_match = re.search(r'\[Final Answer to New Question Begin\]\\?boxed\{(.*?)\}\[Final Answer to New Question End\]', text, re.DOTALL)
|
|
141
|
+
if not answer_match:
|
|
142
|
+
# Try without \boxed
|
|
143
|
+
answer_match = re.search(r'\[Final Answer to New Question Begin\](.*?)\[Final Answer to New Question End\]', text, re.DOTALL)
|
|
144
|
+
|
|
145
|
+
if not answer_match:
|
|
146
|
+
return question, None
|
|
147
|
+
|
|
148
|
+
answer = answer_match.group(1).strip()
|
|
149
|
+
return question, answer
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def extract_instruction_output(text: str) -> Optional[str]:
|
|
153
|
+
"""Extract synthetic prompt from instruction task output."""
|
|
154
|
+
text = parse_thinking_output(text)
|
|
155
|
+
|
|
156
|
+
# Look for the synthetic prompt after "Step 3 #Synthetic Prompt#:"
|
|
157
|
+
match = re.search(r'Step 3 #Synthetic Prompt#:\s*(.+)', text, re.DOTALL)
|
|
158
|
+
if match:
|
|
159
|
+
return match.group(1).strip()
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def categorize_prompts(prompts: List[str], num_categories: int = 8) -> Dict[int, List[int]]:
|
|
164
|
+
"""Categorize prompts using clustering for instruction tasks."""
|
|
165
|
+
from transformers import AutoModel
|
|
166
|
+
|
|
167
|
+
logger.info(f"Categorizing {len(prompts)} prompts into {num_categories} categories...")
|
|
168
|
+
|
|
169
|
+
# Use a small model for embeddings
|
|
170
|
+
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
|
|
171
|
+
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
|
|
172
|
+
|
|
173
|
+
# Get embeddings
|
|
174
|
+
embeddings = []
|
|
175
|
+
for prompt in tqdm(prompts, desc="Computing embeddings"):
|
|
176
|
+
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
|
|
177
|
+
with torch.no_grad():
|
|
178
|
+
outputs = model(**inputs)
|
|
179
|
+
embedding = outputs.last_hidden_state.mean(dim=1).numpy()
|
|
180
|
+
embeddings.append(embedding[0])
|
|
181
|
+
|
|
182
|
+
# Cluster
|
|
183
|
+
kmeans = KMeans(n_clusters=num_categories, random_state=42)
|
|
184
|
+
labels = kmeans.fit_predict(embeddings)
|
|
185
|
+
|
|
186
|
+
# Group by category
|
|
187
|
+
categories = {}
|
|
188
|
+
for idx, label in enumerate(labels):
|
|
189
|
+
if label not in categories:
|
|
190
|
+
categories[label] = []
|
|
191
|
+
categories[label].append(idx)
|
|
192
|
+
|
|
193
|
+
return categories
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def generate_synthetic_data(
|
|
197
|
+
llm: LLM,
|
|
198
|
+
seed_data: List[Dict],
|
|
199
|
+
task_type: str,
|
|
200
|
+
num_samples: int,
|
|
201
|
+
categories: Optional[Dict[int, List[int]]] = None,
|
|
202
|
+
) -> List[Dict]:
|
|
203
|
+
"""Generate synthetic data using CoT-Self-Instruct."""
|
|
204
|
+
synthetic_data = []
|
|
205
|
+
|
|
206
|
+
# Set up progress bar
|
|
207
|
+
pbar = tqdm(total=num_samples, desc="Generating synthetic data")
|
|
208
|
+
|
|
209
|
+
while len(synthetic_data) < num_samples:
|
|
210
|
+
# Sample seed data
|
|
211
|
+
if task_type == "reasoning":
|
|
212
|
+
# Random sampling for reasoning tasks
|
|
213
|
+
seeds = random.sample(seed_data, min(2, len(seed_data)))
|
|
214
|
+
prompt = REASONING_PROMPT_TEMPLATE.format(
|
|
215
|
+
seed1=seeds[0].get("question", seeds[0].get("prompt", "")),
|
|
216
|
+
seed2=seeds[1].get("question", seeds[1].get("prompt", "")) if len(seeds) > 1 else seeds[0].get("question", seeds[0].get("prompt", ""))
|
|
217
|
+
)
|
|
218
|
+
else:
|
|
219
|
+
# Category-aware sampling for instruction tasks
|
|
220
|
+
if categories:
|
|
221
|
+
# Pick a random category
|
|
222
|
+
category = random.choice(list(categories.keys()))
|
|
223
|
+
category_indices = categories[category]
|
|
224
|
+
indices = random.sample(category_indices, min(2, len(category_indices)))
|
|
225
|
+
seeds = [seed_data[i] for i in indices]
|
|
226
|
+
else:
|
|
227
|
+
seeds = random.sample(seed_data, min(2, len(seed_data)))
|
|
228
|
+
|
|
229
|
+
prompt = INSTRUCTION_PROMPT_TEMPLATE.format(
|
|
230
|
+
prompt1=seeds[0].get("prompt", seeds[0].get("question", "")),
|
|
231
|
+
prompt2=seeds[1].get("prompt", seeds[1].get("question", "")) if len(seeds) > 1 else seeds[0].get("prompt", seeds[0].get("question", ""))
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# Generate
|
|
235
|
+
sampling_params = SamplingParams(
|
|
236
|
+
temperature=0.7 if task_type == "reasoning" else 0.8,
|
|
237
|
+
top_p=0.95 if task_type == "reasoning" else 0.9,
|
|
238
|
+
max_tokens=2048,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
outputs = llm.generate([prompt], sampling_params)
|
|
242
|
+
output_text = outputs[0].outputs[0].text
|
|
243
|
+
|
|
244
|
+
# Parse output
|
|
245
|
+
if task_type == "reasoning":
|
|
246
|
+
question, answer = extract_reasoning_output(output_text)
|
|
247
|
+
if question and answer:
|
|
248
|
+
synthetic_data.append({
|
|
249
|
+
"question": question,
|
|
250
|
+
"answer": answer,
|
|
251
|
+
"seed_indices": [seed_data.index(s) for s in seeds],
|
|
252
|
+
})
|
|
253
|
+
pbar.update(1)
|
|
254
|
+
else:
|
|
255
|
+
synthetic_prompt = extract_instruction_output(output_text)
|
|
256
|
+
if synthetic_prompt:
|
|
257
|
+
synthetic_data.append({
|
|
258
|
+
"prompt": synthetic_prompt,
|
|
259
|
+
"seed_indices": [seed_data.index(s) for s in seeds],
|
|
260
|
+
})
|
|
261
|
+
pbar.update(1)
|
|
262
|
+
|
|
263
|
+
pbar.close()
|
|
264
|
+
return synthetic_data
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def answer_consistency_filter(
|
|
268
|
+
llm: LLM,
|
|
269
|
+
synthetic_data: List[Dict],
|
|
270
|
+
k_responses: int = 16,
|
|
271
|
+
threshold: float = 0.5,
|
|
272
|
+
) -> List[Dict]:
|
|
273
|
+
"""Filter reasoning tasks using Answer-Consistency."""
|
|
274
|
+
logger.info(f"Applying Answer-Consistency filter with K={k_responses}")
|
|
275
|
+
|
|
276
|
+
filtered_data = []
|
|
277
|
+
|
|
278
|
+
for item in tqdm(synthetic_data, desc="Answer-Consistency filtering"):
|
|
279
|
+
question = item["question"]
|
|
280
|
+
original_answer = item["answer"]
|
|
281
|
+
|
|
282
|
+
# Generate K responses
|
|
283
|
+
prompts = [question] * k_responses
|
|
284
|
+
sampling_params = SamplingParams(
|
|
285
|
+
temperature=0.6,
|
|
286
|
+
top_p=0.95,
|
|
287
|
+
max_tokens=1024,
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
outputs = llm.generate(prompts, sampling_params)
|
|
291
|
+
|
|
292
|
+
# Extract answers
|
|
293
|
+
answers = []
|
|
294
|
+
for output in outputs:
|
|
295
|
+
text = output.outputs[0].text
|
|
296
|
+
# Try to extract boxed answer
|
|
297
|
+
match = re.search(r'\\boxed\{(.*?)\}', text)
|
|
298
|
+
if match:
|
|
299
|
+
answers.append(match.group(1).strip())
|
|
300
|
+
|
|
301
|
+
if not answers:
|
|
302
|
+
continue
|
|
303
|
+
|
|
304
|
+
# Get majority answer
|
|
305
|
+
answer_counts = Counter(answers)
|
|
306
|
+
if answer_counts:
|
|
307
|
+
majority_answer, count = answer_counts.most_common(1)[0]
|
|
308
|
+
|
|
309
|
+
# Check if majority answer matches original and meets threshold
|
|
310
|
+
if (majority_answer == original_answer and
|
|
311
|
+
count / len(answers) >= threshold):
|
|
312
|
+
item["consistency_score"] = count / len(answers)
|
|
313
|
+
filtered_data.append(item)
|
|
314
|
+
|
|
315
|
+
logger.info(f"Answer-Consistency: kept {len(filtered_data)}/{len(synthetic_data)} examples")
|
|
316
|
+
return filtered_data
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def rip_filter(
|
|
320
|
+
llm: LLM,
|
|
321
|
+
synthetic_data: List[Dict],
|
|
322
|
+
reward_model_id: str,
|
|
323
|
+
k_responses: int = 32,
|
|
324
|
+
threshold: float = 0.5,
|
|
325
|
+
) -> List[Dict]:
|
|
326
|
+
"""Filter using Rejecting Instruction Preferences (RIP)."""
|
|
327
|
+
logger.info(f"Applying RIP filter with K={k_responses} and reward model {reward_model_id}")
|
|
328
|
+
|
|
329
|
+
# Note: In a full implementation, you would load and use the actual reward model
|
|
330
|
+
# For this example, we'll use a placeholder scoring mechanism
|
|
331
|
+
logger.warning("RIP filtering requires a reward model implementation - using placeholder")
|
|
332
|
+
|
|
333
|
+
filtered_data = []
|
|
334
|
+
|
|
335
|
+
for item in tqdm(synthetic_data, desc="RIP filtering"):
|
|
336
|
+
prompt = item.get("prompt", item.get("question", ""))
|
|
337
|
+
|
|
338
|
+
# Generate K responses
|
|
339
|
+
prompts = [prompt] * k_responses
|
|
340
|
+
sampling_params = SamplingParams(
|
|
341
|
+
temperature=1.0,
|
|
342
|
+
top_p=1.0,
|
|
343
|
+
max_tokens=1024,
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
outputs = llm.generate(prompts, sampling_params)
|
|
347
|
+
|
|
348
|
+
# In real implementation: score each response with reward model
|
|
349
|
+
# For now, use length as a proxy (longer responses often score higher)
|
|
350
|
+
scores = [len(output.outputs[0].text) for output in outputs]
|
|
351
|
+
|
|
352
|
+
# Use minimum score as quality indicator
|
|
353
|
+
min_score = min(scores) if scores else 0
|
|
354
|
+
normalized_score = min_score / 1000 # Normalize to 0-1 range
|
|
355
|
+
|
|
356
|
+
if normalized_score >= threshold:
|
|
357
|
+
item["rip_score"] = normalized_score
|
|
358
|
+
filtered_data.append(item)
|
|
359
|
+
|
|
360
|
+
logger.info(f"RIP filter: kept {len(filtered_data)}/{len(synthetic_data)} examples")
|
|
361
|
+
return filtered_data
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def create_dataset_card(
|
|
365
|
+
task_type: str,
|
|
366
|
+
source_dataset: str,
|
|
367
|
+
generation_model: str,
|
|
368
|
+
filter_method: str,
|
|
369
|
+
num_generated: int,
|
|
370
|
+
num_filtered: int,
|
|
371
|
+
generation_time: str,
|
|
372
|
+
additional_info: Dict = None,
|
|
373
|
+
) -> str:
|
|
374
|
+
"""Create a comprehensive dataset card."""
|
|
375
|
+
filter_info = ""
|
|
376
|
+
if filter_method == "answer-consistency":
|
|
377
|
+
filter_info = """
|
|
378
|
+
### Answer-Consistency Filtering
|
|
379
|
+
|
|
380
|
+
This dataset was filtered using Answer-Consistency:
|
|
381
|
+
- Generated K responses for each synthetic question
|
|
382
|
+
- Kept only examples where majority answer matched the generated answer
|
|
383
|
+
- Ensures high-quality, correctly solved problems"""
|
|
384
|
+
elif filter_method == "rip":
|
|
385
|
+
filter_info = """
|
|
386
|
+
### RIP (Rejecting Instruction Preferences) Filtering
|
|
387
|
+
|
|
388
|
+
This dataset was filtered using RIP:
|
|
389
|
+
- Generated K responses for each synthetic prompt
|
|
390
|
+
- Scored responses using a reward model
|
|
391
|
+
- Kept only prompts with high minimum scores"""
|
|
392
|
+
|
|
393
|
+
return f"""---
|
|
394
|
+
tags:
|
|
395
|
+
- synthetic-data
|
|
396
|
+
- cot-self-instruct
|
|
397
|
+
- {task_type}
|
|
398
|
+
- uv-script
|
|
399
|
+
---
|
|
400
|
+
|
|
401
|
+
# CoT-Self-Instruct Synthetic Data
|
|
402
|
+
|
|
403
|
+
This dataset contains synthetic {task_type} data generated using the Chain-of-Thought Self-Instruct methodology.
|
|
404
|
+
|
|
405
|
+
## Generation Details
|
|
406
|
+
|
|
407
|
+
- **Source Dataset**: [{source_dataset}](https://huggingface.co/datasets/{source_dataset})
|
|
408
|
+
- **Generation Model**: [{generation_model}](https://huggingface.co/{generation_model})
|
|
409
|
+
- **Task Type**: {task_type}
|
|
410
|
+
- **Filter Method**: {filter_method}
|
|
411
|
+
- **Generated Examples**: {num_generated:,}
|
|
412
|
+
- **After Filtering**: {num_filtered:,} ({(num_filtered/num_generated)*100:.1f}% acceptance rate)
|
|
413
|
+
- **Generation Date**: {generation_time}
|
|
414
|
+
{filter_info}
|
|
415
|
+
|
|
416
|
+
## Methodology
|
|
417
|
+
|
|
418
|
+
Generated using CoT-Self-Instruct, which:
|
|
419
|
+
1. Uses Chain-of-Thought reasoning to analyze seed examples
|
|
420
|
+
2. Generates new synthetic examples of similar quality and complexity
|
|
421
|
+
3. Applies quality filtering to ensure high-quality outputs
|
|
422
|
+
|
|
423
|
+
Based on the paper: "CoT-Self-Instruct: Building high-quality synthetic prompts for reasoning and non-reasoning tasks" (2025)
|
|
424
|
+
|
|
425
|
+
## Generation Script
|
|
426
|
+
|
|
427
|
+
Generated using the CoT-Self-Instruct script from [uv-scripts/synthetic-data](https://huggingface.co/datasets/uv-scripts/synthetic-data).
|
|
428
|
+
|
|
429
|
+
To reproduce:
|
|
430
|
+
```bash
|
|
431
|
+
uv run https://huggingface.co/datasets/uv-scripts/synthetic-data/raw/main/cot-self-instruct.py \\
|
|
432
|
+
--seed-dataset {source_dataset} \\
|
|
433
|
+
--output-dataset <your-dataset> \\
|
|
434
|
+
--task-type {task_type} \\
|
|
435
|
+
--generation-model {generation_model} \\
|
|
436
|
+
--filter-method {filter_method}
|
|
437
|
+
```
|
|
438
|
+
"""
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def main():
|
|
442
|
+
parser = argparse.ArgumentParser(
|
|
443
|
+
description="Generate synthetic data using CoT-Self-Instruct",
|
|
444
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
445
|
+
epilog=__doc__,
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
# Dataset arguments
|
|
449
|
+
parser.add_argument(
|
|
450
|
+
"--seed-dataset",
|
|
451
|
+
type=str,
|
|
452
|
+
required=True,
|
|
453
|
+
help="HuggingFace dataset ID containing seed examples",
|
|
454
|
+
)
|
|
455
|
+
parser.add_argument(
|
|
456
|
+
"--output-dataset",
|
|
457
|
+
type=str,
|
|
458
|
+
required=True,
|
|
459
|
+
help="HuggingFace dataset ID for output",
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
# Task configuration
|
|
463
|
+
parser.add_argument(
|
|
464
|
+
"--task-type",
|
|
465
|
+
type=str,
|
|
466
|
+
choices=["reasoning", "instruction", "auto"],
|
|
467
|
+
default="auto",
|
|
468
|
+
help="Type of task (reasoning generates Q&A, instruction generates prompts)",
|
|
469
|
+
)
|
|
470
|
+
parser.add_argument(
|
|
471
|
+
"--task-column",
|
|
472
|
+
type=str,
|
|
473
|
+
default=None,
|
|
474
|
+
help="Column name containing tasks (auto-detected if not specified)",
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
# Model configuration
|
|
478
|
+
parser.add_argument(
|
|
479
|
+
"--generation-model",
|
|
480
|
+
type=str,
|
|
481
|
+
default="Qwen/Qwen3-30B-A3B-Thinking-2507",
|
|
482
|
+
help="Model for synthetic data generation",
|
|
483
|
+
)
|
|
484
|
+
parser.add_argument(
|
|
485
|
+
"--filter-model",
|
|
486
|
+
type=str,
|
|
487
|
+
default=None,
|
|
488
|
+
help="Model for filtering (defaults to generation model)",
|
|
489
|
+
)
|
|
490
|
+
parser.add_argument(
|
|
491
|
+
"--reward-model",
|
|
492
|
+
type=str,
|
|
493
|
+
default="Nexusflow/Athene-RM-8B",
|
|
494
|
+
help="Reward model for RIP filtering",
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
# Generation parameters
|
|
498
|
+
parser.add_argument(
|
|
499
|
+
"--num-samples",
|
|
500
|
+
type=int,
|
|
501
|
+
default=5000,
|
|
502
|
+
help="Number of synthetic examples to generate",
|
|
503
|
+
)
|
|
504
|
+
parser.add_argument(
|
|
505
|
+
"--batch-size",
|
|
506
|
+
type=int,
|
|
507
|
+
default=1,
|
|
508
|
+
help="Batch size for generation",
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
# Filtering parameters
|
|
512
|
+
parser.add_argument(
|
|
513
|
+
"--filter-method",
|
|
514
|
+
type=str,
|
|
515
|
+
choices=["answer-consistency", "rip", "both", "none"],
|
|
516
|
+
default="answer-consistency",
|
|
517
|
+
help="Quality filtering method",
|
|
518
|
+
)
|
|
519
|
+
parser.add_argument(
|
|
520
|
+
"--k-responses",
|
|
521
|
+
type=int,
|
|
522
|
+
default=16,
|
|
523
|
+
help="Number of responses for filtering",
|
|
524
|
+
)
|
|
525
|
+
parser.add_argument(
|
|
526
|
+
"--quality-threshold",
|
|
527
|
+
type=float,
|
|
528
|
+
default=0.5,
|
|
529
|
+
help="Minimum quality threshold for filtering",
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
# GPU configuration
|
|
533
|
+
parser.add_argument(
|
|
534
|
+
"--tensor-parallel-size",
|
|
535
|
+
type=int,
|
|
536
|
+
default=None,
|
|
537
|
+
help="Number of GPUs for tensor parallelism (auto-detected if not set)",
|
|
538
|
+
)
|
|
539
|
+
parser.add_argument(
|
|
540
|
+
"--gpu-memory-utilization",
|
|
541
|
+
type=float,
|
|
542
|
+
default=0.9,
|
|
543
|
+
help="GPU memory utilization",
|
|
544
|
+
)
|
|
545
|
+
|
|
546
|
+
# Other arguments
|
|
547
|
+
parser.add_argument(
|
|
548
|
+
"--hf-token",
|
|
549
|
+
type=str,
|
|
550
|
+
default=None,
|
|
551
|
+
help="HuggingFace API token",
|
|
552
|
+
)
|
|
553
|
+
parser.add_argument(
|
|
554
|
+
"--seed",
|
|
555
|
+
type=int,
|
|
556
|
+
default=42,
|
|
557
|
+
help="Random seed",
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
args = parser.parse_args()
|
|
561
|
+
|
|
562
|
+
# Set random seeds
|
|
563
|
+
random.seed(args.seed)
|
|
564
|
+
np.random.seed(args.seed)
|
|
565
|
+
torch.manual_seed(args.seed)
|
|
566
|
+
|
|
567
|
+
# Check GPU
|
|
568
|
+
num_gpus = check_gpu_availability()
|
|
569
|
+
tensor_parallel_size = args.tensor_parallel_size or num_gpus
|
|
570
|
+
|
|
571
|
+
# Authentication
|
|
572
|
+
hf_token = args.hf_token or os.environ.get("HF_TOKEN")
|
|
573
|
+
if hf_token:
|
|
574
|
+
login(token=hf_token)
|
|
575
|
+
|
|
576
|
+
# Load seed dataset
|
|
577
|
+
logger.info(f"Loading seed dataset: {args.seed_dataset}")
|
|
578
|
+
seed_dataset = load_dataset(args.seed_dataset, split="train")
|
|
579
|
+
|
|
580
|
+
# Auto-detect task type and column if needed
|
|
581
|
+
if args.task_type == "auto":
|
|
582
|
+
columns = seed_dataset.column_names
|
|
583
|
+
if "question" in columns and "answer" in columns:
|
|
584
|
+
args.task_type = "reasoning"
|
|
585
|
+
logger.info("Auto-detected task type: reasoning")
|
|
586
|
+
else:
|
|
587
|
+
args.task_type = "instruction"
|
|
588
|
+
logger.info("Auto-detected task type: instruction")
|
|
589
|
+
|
|
590
|
+
if not args.task_column:
|
|
591
|
+
if args.task_type == "reasoning":
|
|
592
|
+
args.task_column = "question"
|
|
593
|
+
else:
|
|
594
|
+
# Try to find prompt column
|
|
595
|
+
for col in ["prompt", "instruction", "text", "input"]:
|
|
596
|
+
if col in seed_dataset.column_names:
|
|
597
|
+
args.task_column = col
|
|
598
|
+
break
|
|
599
|
+
|
|
600
|
+
logger.info(f"Using task column: {args.task_column}")
|
|
601
|
+
|
|
602
|
+
# Convert to list of dicts
|
|
603
|
+
seed_data = seed_dataset.to_list()
|
|
604
|
+
|
|
605
|
+
# Categorize prompts for instruction tasks
|
|
606
|
+
categories = None
|
|
607
|
+
if args.task_type == "instruction" and len(seed_data) > 100:
|
|
608
|
+
prompts = [item.get(args.task_column, "") for item in seed_data]
|
|
609
|
+
categories = categorize_prompts(prompts)
|
|
610
|
+
|
|
611
|
+
# Initialize generation model
|
|
612
|
+
logger.info(f"Loading generation model: {args.generation_model}")
|
|
613
|
+
generation_llm = LLM(
|
|
614
|
+
model=args.generation_model,
|
|
615
|
+
tensor_parallel_size=tensor_parallel_size,
|
|
616
|
+
gpu_memory_utilization=args.gpu_memory_utilization,
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
# Generate synthetic data
|
|
620
|
+
start_time = datetime.now()
|
|
621
|
+
synthetic_data = generate_synthetic_data(
|
|
622
|
+
generation_llm,
|
|
623
|
+
seed_data,
|
|
624
|
+
args.task_type,
|
|
625
|
+
args.num_samples,
|
|
626
|
+
categories,
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
# Apply filtering
|
|
630
|
+
filter_llm = generation_llm
|
|
631
|
+
if args.filter_model and args.filter_model != args.generation_model:
|
|
632
|
+
logger.info(f"Loading filter model: {args.filter_model}")
|
|
633
|
+
# Clean up generation model
|
|
634
|
+
del generation_llm
|
|
635
|
+
torch.cuda.empty_cache()
|
|
636
|
+
|
|
637
|
+
filter_llm = LLM(
|
|
638
|
+
model=args.filter_model,
|
|
639
|
+
tensor_parallel_size=tensor_parallel_size,
|
|
640
|
+
gpu_memory_utilization=args.gpu_memory_utilization,
|
|
641
|
+
)
|
|
642
|
+
|
|
643
|
+
filtered_data = synthetic_data
|
|
644
|
+
if args.filter_method != "none":
|
|
645
|
+
if args.filter_method == "answer-consistency" and args.task_type == "reasoning":
|
|
646
|
+
filtered_data = answer_consistency_filter(
|
|
647
|
+
filter_llm,
|
|
648
|
+
synthetic_data,
|
|
649
|
+
args.k_responses,
|
|
650
|
+
args.quality_threshold,
|
|
651
|
+
)
|
|
652
|
+
elif args.filter_method == "rip":
|
|
653
|
+
filtered_data = rip_filter(
|
|
654
|
+
filter_llm,
|
|
655
|
+
synthetic_data,
|
|
656
|
+
args.reward_model,
|
|
657
|
+
args.k_responses,
|
|
658
|
+
args.quality_threshold,
|
|
659
|
+
)
|
|
660
|
+
elif args.filter_method == "both":
|
|
661
|
+
if args.task_type == "reasoning":
|
|
662
|
+
filtered_data = answer_consistency_filter(
|
|
663
|
+
filter_llm,
|
|
664
|
+
synthetic_data,
|
|
665
|
+
args.k_responses,
|
|
666
|
+
args.quality_threshold,
|
|
667
|
+
)
|
|
668
|
+
filtered_data = rip_filter(
|
|
669
|
+
filter_llm,
|
|
670
|
+
filtered_data,
|
|
671
|
+
args.reward_model,
|
|
672
|
+
args.k_responses,
|
|
673
|
+
args.quality_threshold,
|
|
674
|
+
)
|
|
675
|
+
|
|
676
|
+
# Create HuggingFace dataset
|
|
677
|
+
logger.info(f"Creating dataset with {len(filtered_data)} examples")
|
|
678
|
+
dataset = Dataset.from_list(filtered_data)
|
|
679
|
+
|
|
680
|
+
# Create dataset card
|
|
681
|
+
generation_time = start_time.strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
682
|
+
dataset_card = create_dataset_card(
|
|
683
|
+
args.task_type,
|
|
684
|
+
args.seed_dataset,
|
|
685
|
+
args.generation_model,
|
|
686
|
+
args.filter_method,
|
|
687
|
+
len(synthetic_data),
|
|
688
|
+
len(filtered_data),
|
|
689
|
+
generation_time,
|
|
690
|
+
)
|
|
691
|
+
|
|
692
|
+
# Push to hub
|
|
693
|
+
logger.info(f"Pushing dataset to: {args.output_dataset}")
|
|
694
|
+
# Create dataset card
|
|
695
|
+
card = DatasetCard(dataset_card)
|
|
696
|
+
dataset.push_to_hub(args.output_dataset)
|
|
697
|
+
# Push card separately
|
|
698
|
+
card.push_to_hub(args.output_dataset)
|
|
699
|
+
|
|
700
|
+
logger.info("Done! Dataset available at: https://huggingface.co/datasets/" + args.output_dataset)
|
|
701
|
+
|
|
702
|
+
# Print example HF Jobs command if running locally
|
|
703
|
+
if len(sys.argv) > 1:
|
|
704
|
+
print("\nTo run on HF Jobs:")
|
|
705
|
+
print(f"""hf jobs uv run --flavor l4x4 \\
|
|
706
|
+
--image vllm/vllm-openai \\
|
|
707
|
+
-e HF_TOKEN=$(python3 -c "from huggingface_hub import get_token; print(get_token())") \\
|
|
708
|
+
https://huggingface.co/datasets/uv-scripts/synthetic-data/raw/main/cot-self-instruct.py \\
|
|
709
|
+
--seed-dataset {args.seed_dataset} \\
|
|
710
|
+
--output-dataset {args.output_dataset} \\
|
|
711
|
+
--task-type {args.task_type} \\
|
|
712
|
+
--generation-model {args.generation_model} \\
|
|
713
|
+
--filter-method {args.filter_method} \\
|
|
714
|
+
--num-samples {args.num_samples}""")
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
if __name__ == "__main__":
|
|
718
|
+
main()
|