@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,587 @@
|
|
|
1
|
+
# /// script
|
|
2
|
+
# requires-python = ">=3.10"
|
|
3
|
+
# dependencies = [
|
|
4
|
+
# "datasets",
|
|
5
|
+
# "flashinfer-python",
|
|
6
|
+
# "huggingface-hub[hf_transfer]",
|
|
7
|
+
# "hf-xet>= 1.1.7",
|
|
8
|
+
# "torch",
|
|
9
|
+
# "transformers",
|
|
10
|
+
# "vllm>=0.8.5",
|
|
11
|
+
# ]
|
|
12
|
+
#
|
|
13
|
+
# ///
|
|
14
|
+
"""
|
|
15
|
+
Generate responses for prompts in a dataset using vLLM for efficient GPU inference.
|
|
16
|
+
|
|
17
|
+
This script loads a dataset from Hugging Face Hub containing chat-formatted messages,
|
|
18
|
+
applies the model's chat template, generates responses using vLLM, and saves the
|
|
19
|
+
results back to the Hub with a comprehensive dataset card.
|
|
20
|
+
|
|
21
|
+
Example usage:
|
|
22
|
+
# Local execution with auto GPU detection
|
|
23
|
+
uv run generate-responses.py \\
|
|
24
|
+
username/input-dataset \\
|
|
25
|
+
username/output-dataset \\
|
|
26
|
+
--messages-column messages
|
|
27
|
+
|
|
28
|
+
# With custom model and sampling parameters
|
|
29
|
+
uv run generate-responses.py \\
|
|
30
|
+
username/input-dataset \\
|
|
31
|
+
username/output-dataset \\
|
|
32
|
+
--model-id meta-llama/Llama-3.1-8B-Instruct \\
|
|
33
|
+
--temperature 0.9 \\
|
|
34
|
+
--top-p 0.95 \\
|
|
35
|
+
--max-tokens 2048
|
|
36
|
+
|
|
37
|
+
# HF Jobs execution (see script output for full command)
|
|
38
|
+
hf jobs uv run --flavor a100x4 ...
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
import argparse
|
|
42
|
+
import logging
|
|
43
|
+
import os
|
|
44
|
+
import sys
|
|
45
|
+
from datetime import datetime
|
|
46
|
+
from typing import Optional
|
|
47
|
+
|
|
48
|
+
from datasets import load_dataset
|
|
49
|
+
from huggingface_hub import DatasetCard, get_token, login
|
|
50
|
+
from torch import cuda
|
|
51
|
+
from tqdm.auto import tqdm
|
|
52
|
+
from transformers import AutoTokenizer
|
|
53
|
+
from vllm import LLM, SamplingParams
|
|
54
|
+
|
|
55
|
+
# Enable HF Transfer for faster downloads
|
|
56
|
+
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
|
57
|
+
|
|
58
|
+
logging.basicConfig(
|
|
59
|
+
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
|
60
|
+
)
|
|
61
|
+
logger = logging.getLogger(__name__)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def check_gpu_availability() -> int:
|
|
65
|
+
"""Check if CUDA is available and return the number of GPUs."""
|
|
66
|
+
if not cuda.is_available():
|
|
67
|
+
logger.error("CUDA is not available. This script requires a GPU.")
|
|
68
|
+
logger.error(
|
|
69
|
+
"Please run on a machine with NVIDIA GPU or use HF Jobs with GPU flavor."
|
|
70
|
+
)
|
|
71
|
+
sys.exit(1)
|
|
72
|
+
|
|
73
|
+
num_gpus = cuda.device_count()
|
|
74
|
+
for i in range(num_gpus):
|
|
75
|
+
gpu_name = cuda.get_device_name(i)
|
|
76
|
+
gpu_memory = cuda.get_device_properties(i).total_memory / 1024**3
|
|
77
|
+
logger.info(f"GPU {i}: {gpu_name} with {gpu_memory:.1f} GB memory")
|
|
78
|
+
|
|
79
|
+
return num_gpus
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def create_dataset_card(
|
|
83
|
+
source_dataset: str,
|
|
84
|
+
model_id: str,
|
|
85
|
+
messages_column: str,
|
|
86
|
+
prompt_column: Optional[str],
|
|
87
|
+
sampling_params: SamplingParams,
|
|
88
|
+
tensor_parallel_size: int,
|
|
89
|
+
num_examples: int,
|
|
90
|
+
generation_time: str,
|
|
91
|
+
num_skipped: int = 0,
|
|
92
|
+
max_model_len_used: Optional[int] = None,
|
|
93
|
+
) -> str:
|
|
94
|
+
"""Create a comprehensive dataset card documenting the generation process."""
|
|
95
|
+
filtering_section = ""
|
|
96
|
+
if num_skipped > 0:
|
|
97
|
+
skip_percentage = (num_skipped / num_examples) * 100
|
|
98
|
+
processed = num_examples - num_skipped
|
|
99
|
+
filtering_section = f"""
|
|
100
|
+
|
|
101
|
+
### Filtering Statistics
|
|
102
|
+
|
|
103
|
+
- **Total Examples**: {num_examples:,}
|
|
104
|
+
- **Processed**: {processed:,} ({100 - skip_percentage:.1f}%)
|
|
105
|
+
- **Skipped (too long)**: {num_skipped:,} ({skip_percentage:.1f}%)
|
|
106
|
+
- **Max Model Length Used**: {max_model_len_used:,} tokens
|
|
107
|
+
|
|
108
|
+
Note: Prompts exceeding the maximum model length were skipped and have empty responses."""
|
|
109
|
+
|
|
110
|
+
return f"""---
|
|
111
|
+
tags:
|
|
112
|
+
- generated
|
|
113
|
+
- vllm
|
|
114
|
+
- uv-script
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
# Generated Responses Dataset
|
|
118
|
+
|
|
119
|
+
This dataset contains generated responses for prompts from [{source_dataset}](https://huggingface.co/datasets/{source_dataset}).
|
|
120
|
+
|
|
121
|
+
## Generation Details
|
|
122
|
+
|
|
123
|
+
- **Source Dataset**: [{source_dataset}](https://huggingface.co/datasets/{source_dataset})
|
|
124
|
+
- **Input Column**: `{prompt_column if prompt_column else messages_column}` ({"plain text prompts" if prompt_column else "chat messages"})
|
|
125
|
+
- **Model**: [{model_id}](https://huggingface.co/{model_id})
|
|
126
|
+
- **Number of Examples**: {num_examples:,}
|
|
127
|
+
- **Generation Date**: {generation_time}{filtering_section}
|
|
128
|
+
|
|
129
|
+
### Sampling Parameters
|
|
130
|
+
|
|
131
|
+
- **Temperature**: {sampling_params.temperature}
|
|
132
|
+
- **Top P**: {sampling_params.top_p}
|
|
133
|
+
- **Top K**: {sampling_params.top_k}
|
|
134
|
+
- **Min P**: {sampling_params.min_p}
|
|
135
|
+
- **Max Tokens**: {sampling_params.max_tokens}
|
|
136
|
+
- **Repetition Penalty**: {sampling_params.repetition_penalty}
|
|
137
|
+
|
|
138
|
+
### Hardware Configuration
|
|
139
|
+
|
|
140
|
+
- **Tensor Parallel Size**: {tensor_parallel_size}
|
|
141
|
+
- **GPU Configuration**: {tensor_parallel_size} GPU(s)
|
|
142
|
+
|
|
143
|
+
## Dataset Structure
|
|
144
|
+
|
|
145
|
+
The dataset contains all columns from the source dataset plus:
|
|
146
|
+
- `response`: The generated response from the model
|
|
147
|
+
|
|
148
|
+
## Generation Script
|
|
149
|
+
|
|
150
|
+
Generated using the vLLM inference script from [uv-scripts/vllm](https://huggingface.co/datasets/uv-scripts/vllm).
|
|
151
|
+
|
|
152
|
+
To reproduce this generation:
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
uv run https://huggingface.co/datasets/uv-scripts/vllm/raw/main/generate-responses.py \\
|
|
156
|
+
{source_dataset} \\
|
|
157
|
+
<output-dataset> \\
|
|
158
|
+
--model-id {model_id} \\
|
|
159
|
+
{"--prompt-column " + prompt_column if prompt_column else "--messages-column " + messages_column} \\
|
|
160
|
+
--temperature {sampling_params.temperature} \\
|
|
161
|
+
--top-p {sampling_params.top_p} \\
|
|
162
|
+
--top-k {sampling_params.top_k} \\
|
|
163
|
+
--max-tokens {sampling_params.max_tokens}{f" \\\\\\n --max-model-len {max_model_len_used}" if max_model_len_used else ""}
|
|
164
|
+
```
|
|
165
|
+
"""
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def main(
|
|
169
|
+
src_dataset_hub_id: str,
|
|
170
|
+
output_dataset_hub_id: str,
|
|
171
|
+
model_id: str = "Qwen/Qwen3-30B-A3B-Instruct-2507",
|
|
172
|
+
messages_column: str = "messages",
|
|
173
|
+
prompt_column: Optional[str] = None,
|
|
174
|
+
output_column: str = "response",
|
|
175
|
+
temperature: float = 0.7,
|
|
176
|
+
top_p: float = 0.8,
|
|
177
|
+
top_k: int = 20,
|
|
178
|
+
min_p: float = 0.0,
|
|
179
|
+
max_tokens: int = 16384,
|
|
180
|
+
repetition_penalty: float = 1.0,
|
|
181
|
+
gpu_memory_utilization: float = 0.90,
|
|
182
|
+
max_model_len: Optional[int] = None,
|
|
183
|
+
tensor_parallel_size: Optional[int] = None,
|
|
184
|
+
skip_long_prompts: bool = True,
|
|
185
|
+
max_samples: Optional[int] = None,
|
|
186
|
+
hf_token: Optional[str] = None,
|
|
187
|
+
):
|
|
188
|
+
"""
|
|
189
|
+
Main generation pipeline.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
src_dataset_hub_id: Input dataset on Hugging Face Hub
|
|
193
|
+
output_dataset_hub_id: Where to save results on Hugging Face Hub
|
|
194
|
+
model_id: Hugging Face model ID for generation
|
|
195
|
+
messages_column: Column name containing chat messages
|
|
196
|
+
prompt_column: Column name containing plain text prompts (alternative to messages_column)
|
|
197
|
+
output_column: Column name for generated responses
|
|
198
|
+
temperature: Sampling temperature
|
|
199
|
+
top_p: Top-p sampling parameter
|
|
200
|
+
top_k: Top-k sampling parameter
|
|
201
|
+
min_p: Minimum probability threshold
|
|
202
|
+
max_tokens: Maximum tokens to generate
|
|
203
|
+
repetition_penalty: Repetition penalty parameter
|
|
204
|
+
gpu_memory_utilization: GPU memory utilization factor
|
|
205
|
+
max_model_len: Maximum model context length (None uses model default)
|
|
206
|
+
tensor_parallel_size: Number of GPUs to use (auto-detect if None)
|
|
207
|
+
skip_long_prompts: Skip prompts exceeding max_model_len instead of failing
|
|
208
|
+
max_samples: Maximum number of samples to process (None for all)
|
|
209
|
+
hf_token: Hugging Face authentication token
|
|
210
|
+
"""
|
|
211
|
+
generation_start_time = datetime.now().isoformat()
|
|
212
|
+
|
|
213
|
+
# GPU check and configuration
|
|
214
|
+
num_gpus = check_gpu_availability()
|
|
215
|
+
if tensor_parallel_size is None:
|
|
216
|
+
tensor_parallel_size = num_gpus
|
|
217
|
+
logger.info(
|
|
218
|
+
f"Auto-detected {num_gpus} GPU(s), using tensor_parallel_size={tensor_parallel_size}"
|
|
219
|
+
)
|
|
220
|
+
else:
|
|
221
|
+
logger.info(f"Using specified tensor_parallel_size={tensor_parallel_size}")
|
|
222
|
+
if tensor_parallel_size > num_gpus:
|
|
223
|
+
logger.warning(
|
|
224
|
+
f"Requested {tensor_parallel_size} GPUs but only {num_gpus} available"
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# Authentication - try multiple methods
|
|
228
|
+
HF_TOKEN = hf_token or os.environ.get("HF_TOKEN") or get_token()
|
|
229
|
+
|
|
230
|
+
if not HF_TOKEN:
|
|
231
|
+
logger.error("No HuggingFace token found. Please provide token via:")
|
|
232
|
+
logger.error(" 1. --hf-token argument")
|
|
233
|
+
logger.error(" 2. HF_TOKEN environment variable")
|
|
234
|
+
logger.error(" 3. Run 'huggingface-cli login' or use login() in Python")
|
|
235
|
+
sys.exit(1)
|
|
236
|
+
|
|
237
|
+
logger.info("HuggingFace token found, authenticating...")
|
|
238
|
+
login(token=HF_TOKEN)
|
|
239
|
+
|
|
240
|
+
# Initialize vLLM
|
|
241
|
+
logger.info(f"Loading model: {model_id}")
|
|
242
|
+
vllm_kwargs = {
|
|
243
|
+
"model": model_id,
|
|
244
|
+
"tensor_parallel_size": tensor_parallel_size,
|
|
245
|
+
"gpu_memory_utilization": gpu_memory_utilization,
|
|
246
|
+
}
|
|
247
|
+
if max_model_len is not None:
|
|
248
|
+
vllm_kwargs["max_model_len"] = max_model_len
|
|
249
|
+
logger.info(f"Using max_model_len={max_model_len}")
|
|
250
|
+
|
|
251
|
+
llm = LLM(**vllm_kwargs)
|
|
252
|
+
|
|
253
|
+
# Load tokenizer for chat template
|
|
254
|
+
logger.info("Loading tokenizer...")
|
|
255
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
256
|
+
|
|
257
|
+
# Create sampling parameters
|
|
258
|
+
sampling_params = SamplingParams(
|
|
259
|
+
temperature=temperature,
|
|
260
|
+
top_p=top_p,
|
|
261
|
+
top_k=top_k,
|
|
262
|
+
min_p=min_p,
|
|
263
|
+
max_tokens=max_tokens,
|
|
264
|
+
repetition_penalty=repetition_penalty,
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
# Load dataset
|
|
268
|
+
logger.info(f"Loading dataset: {src_dataset_hub_id}")
|
|
269
|
+
dataset = load_dataset(src_dataset_hub_id, split="train")
|
|
270
|
+
|
|
271
|
+
# Apply max_samples if specified
|
|
272
|
+
if max_samples is not None and max_samples < len(dataset):
|
|
273
|
+
logger.info(f"Limiting dataset to {max_samples} samples")
|
|
274
|
+
dataset = dataset.select(range(max_samples))
|
|
275
|
+
|
|
276
|
+
total_examples = len(dataset)
|
|
277
|
+
logger.info(f"Dataset loaded with {total_examples:,} examples")
|
|
278
|
+
|
|
279
|
+
# Determine which column to use and validate
|
|
280
|
+
if prompt_column:
|
|
281
|
+
# Use prompt column mode
|
|
282
|
+
if prompt_column not in dataset.column_names:
|
|
283
|
+
logger.error(
|
|
284
|
+
f"Column '{prompt_column}' not found. Available columns: {dataset.column_names}"
|
|
285
|
+
)
|
|
286
|
+
sys.exit(1)
|
|
287
|
+
logger.info(f"Using prompt column mode with column: '{prompt_column}'")
|
|
288
|
+
use_messages = False
|
|
289
|
+
else:
|
|
290
|
+
# Use messages column mode
|
|
291
|
+
if messages_column not in dataset.column_names:
|
|
292
|
+
logger.error(
|
|
293
|
+
f"Column '{messages_column}' not found. Available columns: {dataset.column_names}"
|
|
294
|
+
)
|
|
295
|
+
sys.exit(1)
|
|
296
|
+
logger.info(f"Using messages column mode with column: '{messages_column}'")
|
|
297
|
+
use_messages = True
|
|
298
|
+
|
|
299
|
+
# Get effective max length for filtering
|
|
300
|
+
if max_model_len is not None:
|
|
301
|
+
effective_max_len = max_model_len
|
|
302
|
+
else:
|
|
303
|
+
# Get model's default max length
|
|
304
|
+
effective_max_len = llm.llm_engine.model_config.max_model_len
|
|
305
|
+
logger.info(f"Using effective max model length: {effective_max_len}")
|
|
306
|
+
|
|
307
|
+
# Process messages and apply chat template
|
|
308
|
+
logger.info("Preparing prompts...")
|
|
309
|
+
all_prompts = []
|
|
310
|
+
valid_prompts = []
|
|
311
|
+
valid_indices = []
|
|
312
|
+
skipped_info = []
|
|
313
|
+
|
|
314
|
+
for i, example in enumerate(tqdm(dataset, desc="Processing prompts")):
|
|
315
|
+
if use_messages:
|
|
316
|
+
# Messages mode: use existing chat messages
|
|
317
|
+
messages = example[messages_column]
|
|
318
|
+
# Apply chat template
|
|
319
|
+
prompt = tokenizer.apply_chat_template(
|
|
320
|
+
messages, tokenize=False, add_generation_prompt=True
|
|
321
|
+
)
|
|
322
|
+
else:
|
|
323
|
+
# Prompt mode: convert plain text to messages format
|
|
324
|
+
user_prompt = example[prompt_column]
|
|
325
|
+
messages = [{"role": "user", "content": user_prompt}]
|
|
326
|
+
# Apply chat template
|
|
327
|
+
prompt = tokenizer.apply_chat_template(
|
|
328
|
+
messages, tokenize=False, add_generation_prompt=True
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
all_prompts.append(prompt)
|
|
332
|
+
|
|
333
|
+
# Count tokens if filtering is enabled
|
|
334
|
+
if skip_long_prompts:
|
|
335
|
+
tokens = tokenizer.encode(prompt)
|
|
336
|
+
if len(tokens) <= effective_max_len:
|
|
337
|
+
valid_prompts.append(prompt)
|
|
338
|
+
valid_indices.append(i)
|
|
339
|
+
else:
|
|
340
|
+
skipped_info.append((i, len(tokens)))
|
|
341
|
+
else:
|
|
342
|
+
valid_prompts.append(prompt)
|
|
343
|
+
valid_indices.append(i)
|
|
344
|
+
|
|
345
|
+
# Log filtering results
|
|
346
|
+
if skip_long_prompts and skipped_info:
|
|
347
|
+
logger.warning(
|
|
348
|
+
f"Skipped {len(skipped_info)} prompts that exceed max_model_len ({effective_max_len} tokens)"
|
|
349
|
+
)
|
|
350
|
+
logger.info("Skipped prompt details (first 10):")
|
|
351
|
+
for idx, (prompt_idx, token_count) in enumerate(skipped_info[:10]):
|
|
352
|
+
logger.info(
|
|
353
|
+
f" - Example {prompt_idx}: {token_count} tokens (exceeds by {token_count - effective_max_len})"
|
|
354
|
+
)
|
|
355
|
+
if len(skipped_info) > 10:
|
|
356
|
+
logger.info(f" ... and {len(skipped_info) - 10} more")
|
|
357
|
+
|
|
358
|
+
skip_percentage = (len(skipped_info) / total_examples) * 100
|
|
359
|
+
if skip_percentage > 10:
|
|
360
|
+
logger.warning(f"WARNING: {skip_percentage:.1f}% of prompts were skipped!")
|
|
361
|
+
|
|
362
|
+
if not valid_prompts:
|
|
363
|
+
logger.error("No valid prompts to process after filtering!")
|
|
364
|
+
sys.exit(1)
|
|
365
|
+
|
|
366
|
+
# Generate responses - vLLM handles batching internally
|
|
367
|
+
logger.info(f"Starting generation for {len(valid_prompts):,} valid prompts...")
|
|
368
|
+
logger.info("vLLM will handle batching and scheduling automatically")
|
|
369
|
+
|
|
370
|
+
outputs = llm.generate(valid_prompts, sampling_params)
|
|
371
|
+
|
|
372
|
+
# Extract generated text and create full response list
|
|
373
|
+
logger.info("Extracting generated responses...")
|
|
374
|
+
responses = [""] * total_examples # Initialize with empty strings
|
|
375
|
+
|
|
376
|
+
for idx, output in enumerate(outputs):
|
|
377
|
+
original_idx = valid_indices[idx]
|
|
378
|
+
response = output.outputs[0].text.strip()
|
|
379
|
+
responses[original_idx] = response
|
|
380
|
+
|
|
381
|
+
# Add responses to dataset
|
|
382
|
+
logger.info("Adding responses to dataset...")
|
|
383
|
+
dataset = dataset.add_column(output_column, responses)
|
|
384
|
+
|
|
385
|
+
# Create dataset card
|
|
386
|
+
logger.info("Creating dataset card...")
|
|
387
|
+
card_content = create_dataset_card(
|
|
388
|
+
source_dataset=src_dataset_hub_id,
|
|
389
|
+
model_id=model_id,
|
|
390
|
+
messages_column=messages_column,
|
|
391
|
+
prompt_column=prompt_column,
|
|
392
|
+
sampling_params=sampling_params,
|
|
393
|
+
tensor_parallel_size=tensor_parallel_size,
|
|
394
|
+
num_examples=total_examples,
|
|
395
|
+
generation_time=generation_start_time,
|
|
396
|
+
num_skipped=len(skipped_info) if skip_long_prompts else 0,
|
|
397
|
+
max_model_len_used=effective_max_len if skip_long_prompts else None,
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
# Push dataset to hub
|
|
401
|
+
logger.info(f"Pushing dataset to: {output_dataset_hub_id}")
|
|
402
|
+
dataset.push_to_hub(output_dataset_hub_id, token=HF_TOKEN)
|
|
403
|
+
|
|
404
|
+
# Push dataset card
|
|
405
|
+
card = DatasetCard(card_content)
|
|
406
|
+
card.push_to_hub(output_dataset_hub_id, token=HF_TOKEN)
|
|
407
|
+
|
|
408
|
+
logger.info("✅ Generation complete!")
|
|
409
|
+
logger.info(
|
|
410
|
+
f"Dataset available at: https://huggingface.co/datasets/{output_dataset_hub_id}"
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
if __name__ == "__main__":
|
|
415
|
+
if len(sys.argv) > 1:
|
|
416
|
+
parser = argparse.ArgumentParser(
|
|
417
|
+
description="Generate responses for dataset prompts using vLLM",
|
|
418
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
419
|
+
epilog="""
|
|
420
|
+
Examples:
|
|
421
|
+
# Basic usage with default Qwen model
|
|
422
|
+
uv run generate-responses.py input-dataset output-dataset
|
|
423
|
+
|
|
424
|
+
# With custom model and parameters
|
|
425
|
+
uv run generate-responses.py input-dataset output-dataset \\
|
|
426
|
+
--model-id meta-llama/Llama-3.1-8B-Instruct \\
|
|
427
|
+
--temperature 0.9 \\
|
|
428
|
+
--max-tokens 2048
|
|
429
|
+
|
|
430
|
+
# Force specific GPU configuration
|
|
431
|
+
uv run generate-responses.py input-dataset output-dataset \\
|
|
432
|
+
--tensor-parallel-size 2 \\
|
|
433
|
+
--gpu-memory-utilization 0.95
|
|
434
|
+
|
|
435
|
+
# Using environment variable for token
|
|
436
|
+
HF_TOKEN=hf_xxx uv run generate-responses.py input-dataset output-dataset
|
|
437
|
+
""",
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
parser.add_argument(
|
|
441
|
+
"src_dataset_hub_id",
|
|
442
|
+
help="Input dataset on Hugging Face Hub (e.g., username/dataset-name)",
|
|
443
|
+
)
|
|
444
|
+
parser.add_argument(
|
|
445
|
+
"output_dataset_hub_id", help="Output dataset name on Hugging Face Hub"
|
|
446
|
+
)
|
|
447
|
+
parser.add_argument(
|
|
448
|
+
"--model-id",
|
|
449
|
+
type=str,
|
|
450
|
+
default="Qwen/Qwen3-30B-A3B-Instruct-2507",
|
|
451
|
+
help="Model to use for generation (default: Qwen3-30B-A3B-Instruct-2507)",
|
|
452
|
+
)
|
|
453
|
+
parser.add_argument(
|
|
454
|
+
"--messages-column",
|
|
455
|
+
type=str,
|
|
456
|
+
default="messages",
|
|
457
|
+
help="Column containing chat messages (default: messages)",
|
|
458
|
+
)
|
|
459
|
+
parser.add_argument(
|
|
460
|
+
"--prompt-column",
|
|
461
|
+
type=str,
|
|
462
|
+
help="Column containing plain text prompts (alternative to --messages-column)",
|
|
463
|
+
)
|
|
464
|
+
parser.add_argument(
|
|
465
|
+
"--output-column",
|
|
466
|
+
type=str,
|
|
467
|
+
default="response",
|
|
468
|
+
help="Column name for generated responses (default: response)",
|
|
469
|
+
)
|
|
470
|
+
parser.add_argument(
|
|
471
|
+
"--max-samples",
|
|
472
|
+
type=int,
|
|
473
|
+
help="Maximum number of samples to process (default: all)",
|
|
474
|
+
)
|
|
475
|
+
parser.add_argument(
|
|
476
|
+
"--temperature",
|
|
477
|
+
type=float,
|
|
478
|
+
default=0.7,
|
|
479
|
+
help="Sampling temperature (default: 0.7)",
|
|
480
|
+
)
|
|
481
|
+
parser.add_argument(
|
|
482
|
+
"--top-p",
|
|
483
|
+
type=float,
|
|
484
|
+
default=0.8,
|
|
485
|
+
help="Top-p sampling parameter (default: 0.8)",
|
|
486
|
+
)
|
|
487
|
+
parser.add_argument(
|
|
488
|
+
"--top-k",
|
|
489
|
+
type=int,
|
|
490
|
+
default=20,
|
|
491
|
+
help="Top-k sampling parameter (default: 20)",
|
|
492
|
+
)
|
|
493
|
+
parser.add_argument(
|
|
494
|
+
"--min-p",
|
|
495
|
+
type=float,
|
|
496
|
+
default=0.0,
|
|
497
|
+
help="Minimum probability threshold (default: 0.0)",
|
|
498
|
+
)
|
|
499
|
+
parser.add_argument(
|
|
500
|
+
"--max-tokens",
|
|
501
|
+
type=int,
|
|
502
|
+
default=16384,
|
|
503
|
+
help="Maximum tokens to generate (default: 16384)",
|
|
504
|
+
)
|
|
505
|
+
parser.add_argument(
|
|
506
|
+
"--repetition-penalty",
|
|
507
|
+
type=float,
|
|
508
|
+
default=1.0,
|
|
509
|
+
help="Repetition penalty (default: 1.0)",
|
|
510
|
+
)
|
|
511
|
+
parser.add_argument(
|
|
512
|
+
"--gpu-memory-utilization",
|
|
513
|
+
type=float,
|
|
514
|
+
default=0.90,
|
|
515
|
+
help="GPU memory utilization factor (default: 0.90)",
|
|
516
|
+
)
|
|
517
|
+
parser.add_argument(
|
|
518
|
+
"--max-model-len",
|
|
519
|
+
type=int,
|
|
520
|
+
help="Maximum model context length (default: model's default)",
|
|
521
|
+
)
|
|
522
|
+
parser.add_argument(
|
|
523
|
+
"--tensor-parallel-size",
|
|
524
|
+
type=int,
|
|
525
|
+
help="Number of GPUs to use (default: auto-detect)",
|
|
526
|
+
)
|
|
527
|
+
parser.add_argument(
|
|
528
|
+
"--hf-token",
|
|
529
|
+
type=str,
|
|
530
|
+
help="Hugging Face token (can also use HF_TOKEN env var)",
|
|
531
|
+
)
|
|
532
|
+
parser.add_argument(
|
|
533
|
+
"--skip-long-prompts",
|
|
534
|
+
action="store_true",
|
|
535
|
+
default=True,
|
|
536
|
+
help="Skip prompts that exceed max_model_len instead of failing (default: True)",
|
|
537
|
+
)
|
|
538
|
+
parser.add_argument(
|
|
539
|
+
"--no-skip-long-prompts",
|
|
540
|
+
dest="skip_long_prompts",
|
|
541
|
+
action="store_false",
|
|
542
|
+
help="Fail on prompts that exceed max_model_len",
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
args = parser.parse_args()
|
|
546
|
+
|
|
547
|
+
main(
|
|
548
|
+
src_dataset_hub_id=args.src_dataset_hub_id,
|
|
549
|
+
output_dataset_hub_id=args.output_dataset_hub_id,
|
|
550
|
+
model_id=args.model_id,
|
|
551
|
+
messages_column=args.messages_column,
|
|
552
|
+
prompt_column=args.prompt_column,
|
|
553
|
+
output_column=args.output_column,
|
|
554
|
+
temperature=args.temperature,
|
|
555
|
+
top_p=args.top_p,
|
|
556
|
+
top_k=args.top_k,
|
|
557
|
+
min_p=args.min_p,
|
|
558
|
+
max_tokens=args.max_tokens,
|
|
559
|
+
repetition_penalty=args.repetition_penalty,
|
|
560
|
+
gpu_memory_utilization=args.gpu_memory_utilization,
|
|
561
|
+
max_model_len=args.max_model_len,
|
|
562
|
+
tensor_parallel_size=args.tensor_parallel_size,
|
|
563
|
+
skip_long_prompts=args.skip_long_prompts,
|
|
564
|
+
max_samples=args.max_samples,
|
|
565
|
+
hf_token=args.hf_token,
|
|
566
|
+
)
|
|
567
|
+
else:
|
|
568
|
+
# Show HF Jobs example when run without arguments
|
|
569
|
+
print("""
|
|
570
|
+
vLLM Response Generation Script
|
|
571
|
+
==============================
|
|
572
|
+
|
|
573
|
+
This script requires arguments. For usage information:
|
|
574
|
+
uv run generate-responses.py --help
|
|
575
|
+
|
|
576
|
+
Example HF Jobs command with multi-GPU:
|
|
577
|
+
# If you're logged in with huggingface-cli, token will be auto-detected
|
|
578
|
+
hf jobs uv run \\
|
|
579
|
+
--flavor l4x4 \\
|
|
580
|
+
https://huggingface.co/datasets/uv-scripts/vllm/raw/main/generate-responses.py \\
|
|
581
|
+
username/input-dataset \\
|
|
582
|
+
username/output-dataset \\
|
|
583
|
+
--messages-column messages \\
|
|
584
|
+
--model-id Qwen/Qwen3-30B-A3B-Instruct-2507 \\
|
|
585
|
+
--temperature 0.7 \\
|
|
586
|
+
--max-tokens 16384
|
|
587
|
+
""")
|