@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: model-pruning
|
|
3
|
+
description: Reduce LLM size and accelerate inference using pruning techniques like Wanda and SparseGPT. Use when compressing models without retraining, achieving 50% sparsity with minimal accuracy loss, or enabling faster inference on hardware accelerators. Covers unstructured pruning, structured pruning, N:M sparsity, magnitude pruning, and one-shot methods.
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
author: Synthetic Sciences
|
|
6
|
+
license: MIT
|
|
7
|
+
tags: [Emerging Techniques, Model Pruning, Wanda, SparseGPT, Sparsity, Model Compression, N:M Sparsity, One-Shot Pruning, Structured Pruning, Unstructured Pruning, Fast Inference]
|
|
8
|
+
dependencies: [transformers, torch]
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Model Pruning: Compressing LLMs
|
|
12
|
+
|
|
13
|
+
## When to Use This Skill
|
|
14
|
+
|
|
15
|
+
Use Model Pruning when you need to:
|
|
16
|
+
- **Reduce model size** by 40-60% with <1% accuracy loss
|
|
17
|
+
- **Accelerate inference** using hardware-friendly sparsity (2-4× speedup)
|
|
18
|
+
- **Deploy on constrained hardware** (mobile, edge devices)
|
|
19
|
+
- **Compress without retraining** using one-shot methods
|
|
20
|
+
- **Enable efficient serving** with reduced memory footprint
|
|
21
|
+
|
|
22
|
+
**Key Techniques**: Wanda (weights × activations), SparseGPT (second-order), structured pruning, N:M sparsity
|
|
23
|
+
|
|
24
|
+
**Papers**: Wanda ICLR 2024 (arXiv 2306.11695), SparseGPT (arXiv 2301.00774)
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
# Wanda implementation
|
|
30
|
+
git clone https://github.com/locuslab/wanda
|
|
31
|
+
cd wanda
|
|
32
|
+
pip install -r requirements.txt
|
|
33
|
+
|
|
34
|
+
# Optional: SparseGPT
|
|
35
|
+
git clone https://github.com/IST-DASLab/sparsegpt
|
|
36
|
+
cd sparsegpt
|
|
37
|
+
pip install -e .
|
|
38
|
+
|
|
39
|
+
# Dependencies
|
|
40
|
+
pip install torch transformers accelerate
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Quick Start
|
|
44
|
+
|
|
45
|
+
### Wanda Pruning (One-Shot, No Retraining)
|
|
46
|
+
|
|
47
|
+
**Source**: ICLR 2024 (arXiv 2306.11695)
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
import torch
|
|
51
|
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
52
|
+
|
|
53
|
+
# Load model
|
|
54
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
55
|
+
"meta-llama/Llama-2-7b-hf",
|
|
56
|
+
torch_dtype=torch.float16,
|
|
57
|
+
device_map="cuda"
|
|
58
|
+
)
|
|
59
|
+
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
|
|
60
|
+
|
|
61
|
+
# Calibration data (small dataset for activation statistics)
|
|
62
|
+
calib_data = [
|
|
63
|
+
"The quick brown fox jumps over the lazy dog.",
|
|
64
|
+
"Machine learning is transforming the world.",
|
|
65
|
+
"Artificial intelligence powers modern applications.",
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
# Wanda pruning function
|
|
69
|
+
def wanda_prune(model, calib_data, sparsity=0.5):
|
|
70
|
+
"""
|
|
71
|
+
Wanda: Prune by weight magnitude × input activation.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
sparsity: Fraction of weights to prune (0.5 = 50%)
|
|
75
|
+
"""
|
|
76
|
+
# 1. Collect activation statistics
|
|
77
|
+
activations = {}
|
|
78
|
+
|
|
79
|
+
def hook_fn(name):
|
|
80
|
+
def hook(module, input, output):
|
|
81
|
+
# Store input activation norms
|
|
82
|
+
activations[name] = input[0].detach().abs().mean(dim=0)
|
|
83
|
+
return hook
|
|
84
|
+
|
|
85
|
+
# Register hooks for all linear layers
|
|
86
|
+
hooks = []
|
|
87
|
+
for name, module in model.named_modules():
|
|
88
|
+
if isinstance(module, torch.nn.Linear):
|
|
89
|
+
hooks.append(module.register_forward_hook(hook_fn(name)))
|
|
90
|
+
|
|
91
|
+
# Run calibration data
|
|
92
|
+
model.eval()
|
|
93
|
+
with torch.no_grad():
|
|
94
|
+
for text in calib_data:
|
|
95
|
+
inputs = tokenizer(text, return_tensors="pt").to(model.device)
|
|
96
|
+
model(**inputs)
|
|
97
|
+
|
|
98
|
+
# Remove hooks
|
|
99
|
+
for hook in hooks:
|
|
100
|
+
hook.remove()
|
|
101
|
+
|
|
102
|
+
# 2. Prune weights based on |weight| × activation
|
|
103
|
+
for name, module in model.named_modules():
|
|
104
|
+
if isinstance(module, torch.nn.Linear) and name in activations:
|
|
105
|
+
W = module.weight.data
|
|
106
|
+
act = activations[name]
|
|
107
|
+
|
|
108
|
+
# Compute importance: |weight| × activation
|
|
109
|
+
importance = W.abs() * act.unsqueeze(0)
|
|
110
|
+
|
|
111
|
+
# Flatten and find threshold
|
|
112
|
+
threshold = torch.quantile(importance.flatten(), sparsity)
|
|
113
|
+
|
|
114
|
+
# Create mask
|
|
115
|
+
mask = importance >= threshold
|
|
116
|
+
|
|
117
|
+
# Apply mask (prune)
|
|
118
|
+
W *= mask.float()
|
|
119
|
+
|
|
120
|
+
return model
|
|
121
|
+
|
|
122
|
+
# Apply Wanda pruning (50% sparsity, one-shot, no retraining)
|
|
123
|
+
pruned_model = wanda_prune(model, calib_data, sparsity=0.5)
|
|
124
|
+
|
|
125
|
+
# Save
|
|
126
|
+
pruned_model.save_pretrained("./llama-2-7b-wanda-50")
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### SparseGPT (Second-Order Pruning)
|
|
130
|
+
|
|
131
|
+
**Source**: arXiv 2301.00774
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
from sparsegpt import SparseGPT
|
|
135
|
+
|
|
136
|
+
# Load model
|
|
137
|
+
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
|
|
138
|
+
|
|
139
|
+
# Initialize SparseGPT
|
|
140
|
+
pruner = SparseGPT(model)
|
|
141
|
+
|
|
142
|
+
# Calibration data
|
|
143
|
+
calib_data = load_calibration_data() # ~128 samples
|
|
144
|
+
|
|
145
|
+
# Prune (one-shot, layer-wise reconstruction)
|
|
146
|
+
pruned_model = pruner.prune(
|
|
147
|
+
calib_data=calib_data,
|
|
148
|
+
sparsity=0.5, # 50% sparsity
|
|
149
|
+
prunen=0, # Unstructured (0) or N:M structured
|
|
150
|
+
prunem=0,
|
|
151
|
+
percdamp=0.01, # Damping for Hessian inverse
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Results: Near-lossless pruning at 50% sparsity
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### N:M Structured Pruning (Hardware Accelerator)
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
def nm_prune(weight, n=2, m=4):
|
|
161
|
+
"""
|
|
162
|
+
N:M pruning: Keep N weights per M consecutive weights.
|
|
163
|
+
Example: 2:4 = keep 2 out of every 4 weights.
|
|
164
|
+
|
|
165
|
+
Compatible with NVIDIA sparse tensor cores (2:4, 4:8).
|
|
166
|
+
"""
|
|
167
|
+
# Reshape weight into groups of M
|
|
168
|
+
shape = weight.shape
|
|
169
|
+
weight_flat = weight.flatten()
|
|
170
|
+
|
|
171
|
+
# Pad to multiple of M
|
|
172
|
+
pad_size = (m - weight_flat.numel() % m) % m
|
|
173
|
+
weight_padded = F.pad(weight_flat, (0, pad_size))
|
|
174
|
+
|
|
175
|
+
# Reshape into (num_groups, m)
|
|
176
|
+
weight_grouped = weight_padded.reshape(-1, m)
|
|
177
|
+
|
|
178
|
+
# Find top-N in each group
|
|
179
|
+
_, indices = torch.topk(weight_grouped.abs(), n, dim=-1)
|
|
180
|
+
|
|
181
|
+
# Create mask
|
|
182
|
+
mask = torch.zeros_like(weight_grouped)
|
|
183
|
+
mask.scatter_(1, indices, 1.0)
|
|
184
|
+
|
|
185
|
+
# Apply mask
|
|
186
|
+
weight_pruned = weight_grouped * mask
|
|
187
|
+
|
|
188
|
+
# Reshape back
|
|
189
|
+
weight_pruned = weight_pruned.flatten()[:weight_flat.numel()]
|
|
190
|
+
return weight_pruned.reshape(shape)
|
|
191
|
+
|
|
192
|
+
# Apply 2:4 sparsity (NVIDIA hardware)
|
|
193
|
+
for name, module in model.named_modules():
|
|
194
|
+
if isinstance(module, torch.nn.Linear):
|
|
195
|
+
module.weight.data = nm_prune(module.weight.data, n=2, m=4)
|
|
196
|
+
|
|
197
|
+
# 50% sparsity, 2× speedup on A100 with sparse tensor cores
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## Core Concepts
|
|
201
|
+
|
|
202
|
+
### 1. Pruning Criteria
|
|
203
|
+
|
|
204
|
+
**Magnitude Pruning** (baseline):
|
|
205
|
+
```python
|
|
206
|
+
# Prune weights with smallest absolute values
|
|
207
|
+
importance = weight.abs()
|
|
208
|
+
threshold = torch.quantile(importance, sparsity)
|
|
209
|
+
mask = importance >= threshold
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
**Wanda** (weights × activations):
|
|
213
|
+
```python
|
|
214
|
+
# Importance = |weight| × input_activation
|
|
215
|
+
importance = weight.abs() * activation
|
|
216
|
+
# Better than magnitude alone (considers usage)
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
**SparseGPT** (second-order):
|
|
220
|
+
```python
|
|
221
|
+
# Uses Hessian (second derivative) for importance
|
|
222
|
+
# More accurate but computationally expensive
|
|
223
|
+
importance = weight^2 / diag(Hessian)
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
### 2. Structured vs Unstructured
|
|
227
|
+
|
|
228
|
+
**Unstructured** (fine-grained):
|
|
229
|
+
- Prune individual weights
|
|
230
|
+
- Higher quality (better accuracy)
|
|
231
|
+
- No hardware speedup (irregular sparsity)
|
|
232
|
+
|
|
233
|
+
**Structured** (coarse-grained):
|
|
234
|
+
- Prune entire neurons, heads, or layers
|
|
235
|
+
- Lower quality (more accuracy loss)
|
|
236
|
+
- Hardware speedup (regular sparsity)
|
|
237
|
+
|
|
238
|
+
**Semi-structured (N:M)**:
|
|
239
|
+
- Best of both worlds
|
|
240
|
+
- 50% sparsity (2:4) → 2× speedup on NVIDIA GPUs
|
|
241
|
+
- Minimal accuracy loss
|
|
242
|
+
|
|
243
|
+
### 3. Sparsity Patterns
|
|
244
|
+
|
|
245
|
+
```python
|
|
246
|
+
# Unstructured (random)
|
|
247
|
+
# [1, 0, 1, 0, 1, 1, 0, 0]
|
|
248
|
+
# Pros: Flexible, high quality
|
|
249
|
+
# Cons: No speedup
|
|
250
|
+
|
|
251
|
+
# Structured (block)
|
|
252
|
+
# [1, 1, 0, 0, 1, 1, 0, 0]
|
|
253
|
+
# Pros: Hardware friendly
|
|
254
|
+
# Cons: More accuracy loss
|
|
255
|
+
|
|
256
|
+
# N:M (semi-structured)
|
|
257
|
+
# [1, 0, 1, 0] [1, 1, 0, 0] (2:4 pattern)
|
|
258
|
+
# Pros: Hardware speedup + good quality
|
|
259
|
+
# Cons: Requires specific hardware (NVIDIA)
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
## Pruning Strategies
|
|
263
|
+
|
|
264
|
+
### Strategy 1: Gradual Magnitude Pruning
|
|
265
|
+
|
|
266
|
+
```python
|
|
267
|
+
def gradual_prune(model, initial_sparsity=0.0, final_sparsity=0.5, num_steps=100):
|
|
268
|
+
"""Gradually increase sparsity during training."""
|
|
269
|
+
for step in range(num_steps):
|
|
270
|
+
# Current sparsity
|
|
271
|
+
current_sparsity = initial_sparsity + (final_sparsity - initial_sparsity) * (step / num_steps)
|
|
272
|
+
|
|
273
|
+
# Prune at current sparsity
|
|
274
|
+
for module in model.modules():
|
|
275
|
+
if isinstance(module, torch.nn.Linear):
|
|
276
|
+
weight = module.weight.data
|
|
277
|
+
threshold = torch.quantile(weight.abs().flatten(), current_sparsity)
|
|
278
|
+
mask = weight.abs() >= threshold
|
|
279
|
+
weight *= mask.float()
|
|
280
|
+
|
|
281
|
+
# Train one step
|
|
282
|
+
train_step(model)
|
|
283
|
+
|
|
284
|
+
return model
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
### Strategy 2: Layer-wise Pruning
|
|
288
|
+
|
|
289
|
+
```python
|
|
290
|
+
def layer_wise_prune(model, sparsity_per_layer):
|
|
291
|
+
"""Different sparsity for different layers."""
|
|
292
|
+
# Early layers: Less pruning (more important)
|
|
293
|
+
# Late layers: More pruning (less critical)
|
|
294
|
+
|
|
295
|
+
sparsity_schedule = {
|
|
296
|
+
"layer.0": 0.3, # 30% sparsity
|
|
297
|
+
"layer.1": 0.4,
|
|
298
|
+
"layer.2": 0.5,
|
|
299
|
+
"layer.3": 0.6, # 60% sparsity
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
for name, module in model.named_modules():
|
|
303
|
+
if isinstance(module, torch.nn.Linear):
|
|
304
|
+
# Find layer index
|
|
305
|
+
for layer_name, sparsity in sparsity_schedule.items():
|
|
306
|
+
if layer_name in name:
|
|
307
|
+
# Prune at layer-specific sparsity
|
|
308
|
+
prune_layer(module, sparsity)
|
|
309
|
+
break
|
|
310
|
+
|
|
311
|
+
return model
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
### Strategy 3: Iterative Pruning + Fine-tuning
|
|
315
|
+
|
|
316
|
+
```python
|
|
317
|
+
def iterative_prune_finetune(model, target_sparsity=0.5, iterations=5):
|
|
318
|
+
"""Prune gradually with fine-tuning between iterations."""
|
|
319
|
+
current_sparsity = 0.0
|
|
320
|
+
sparsity_increment = target_sparsity / iterations
|
|
321
|
+
|
|
322
|
+
for i in range(iterations):
|
|
323
|
+
# Increase sparsity
|
|
324
|
+
current_sparsity += sparsity_increment
|
|
325
|
+
|
|
326
|
+
# Prune
|
|
327
|
+
prune_model(model, sparsity=current_sparsity)
|
|
328
|
+
|
|
329
|
+
# Fine-tune (recover accuracy)
|
|
330
|
+
fine_tune(model, epochs=2, lr=1e-5)
|
|
331
|
+
|
|
332
|
+
return model
|
|
333
|
+
|
|
334
|
+
# Results: Better accuracy than one-shot at high sparsity
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
## Production Deployment
|
|
338
|
+
|
|
339
|
+
### Complete Pruning Pipeline
|
|
340
|
+
|
|
341
|
+
```python
|
|
342
|
+
from transformers import Trainer, TrainingArguments
|
|
343
|
+
|
|
344
|
+
def production_pruning_pipeline(
|
|
345
|
+
model_name="meta-llama/Llama-2-7b-hf",
|
|
346
|
+
target_sparsity=0.5,
|
|
347
|
+
method="wanda", # or "sparsegpt"
|
|
348
|
+
):
|
|
349
|
+
# 1. Load model
|
|
350
|
+
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
|
|
351
|
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
352
|
+
|
|
353
|
+
# 2. Load calibration data
|
|
354
|
+
calib_dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="train[:1000]")
|
|
355
|
+
|
|
356
|
+
# 3. Apply pruning
|
|
357
|
+
if method == "wanda":
|
|
358
|
+
pruned_model = wanda_prune(model, calib_dataset, sparsity=target_sparsity)
|
|
359
|
+
elif method == "sparsegpt":
|
|
360
|
+
pruner = SparseGPT(model)
|
|
361
|
+
pruned_model = pruner.prune(calib_dataset, sparsity=target_sparsity)
|
|
362
|
+
|
|
363
|
+
# 4. (Optional) Fine-tune to recover accuracy
|
|
364
|
+
training_args = TrainingArguments(
|
|
365
|
+
output_dir="./pruned-model",
|
|
366
|
+
num_train_epochs=1,
|
|
367
|
+
per_device_train_batch_size=4,
|
|
368
|
+
learning_rate=1e-5,
|
|
369
|
+
bf16=True,
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
trainer = Trainer(
|
|
373
|
+
model=pruned_model,
|
|
374
|
+
args=training_args,
|
|
375
|
+
train_dataset=finetune_dataset,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
trainer.train()
|
|
379
|
+
|
|
380
|
+
# 5. Save
|
|
381
|
+
pruned_model.save_pretrained("./pruned-llama-7b-50")
|
|
382
|
+
tokenizer.save_pretrained("./pruned-llama-7b-50")
|
|
383
|
+
|
|
384
|
+
return pruned_model
|
|
385
|
+
|
|
386
|
+
# Usage
|
|
387
|
+
pruned_model = production_pruning_pipeline(
|
|
388
|
+
model_name="meta-llama/Llama-2-7b-hf",
|
|
389
|
+
target_sparsity=0.5,
|
|
390
|
+
method="wanda"
|
|
391
|
+
)
|
|
392
|
+
```
|
|
393
|
+
|
|
394
|
+
### Evaluation
|
|
395
|
+
|
|
396
|
+
```python
|
|
397
|
+
from lm_eval import evaluator
|
|
398
|
+
|
|
399
|
+
# Evaluate pruned vs original model
|
|
400
|
+
original_results = evaluator.simple_evaluate(
|
|
401
|
+
model="hf",
|
|
402
|
+
model_args="pretrained=meta-llama/Llama-2-7b-hf",
|
|
403
|
+
tasks=["arc_easy", "hellaswag", "winogrande"],
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
pruned_results = evaluator.simple_evaluate(
|
|
407
|
+
model="hf",
|
|
408
|
+
model_args="pretrained=./pruned-llama-7b-50",
|
|
409
|
+
tasks=["arc_easy", "hellaswag", "winogrande"],
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
# Compare
|
|
413
|
+
print(f"Original: {original_results['results']['arc_easy']['acc']:.3f}")
|
|
414
|
+
print(f"Pruned: {pruned_results['results']['arc_easy']['acc']:.3f}")
|
|
415
|
+
print(f"Degradation: {(original_results - pruned_results):.3f}")
|
|
416
|
+
|
|
417
|
+
# Typical results at 50% sparsity:
|
|
418
|
+
# - Wanda: <1% accuracy loss
|
|
419
|
+
# - SparseGPT: <0.5% accuracy loss
|
|
420
|
+
# - Magnitude: 2-3% accuracy loss
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
## Best Practices
|
|
424
|
+
|
|
425
|
+
### 1. Sparsity Selection
|
|
426
|
+
|
|
427
|
+
```python
|
|
428
|
+
# Conservative (safe)
|
|
429
|
+
sparsity = 0.3 # 30%, <0.5% loss
|
|
430
|
+
|
|
431
|
+
# Balanced (recommended)
|
|
432
|
+
sparsity = 0.5 # 50%, ~1% loss
|
|
433
|
+
|
|
434
|
+
# Aggressive (risky)
|
|
435
|
+
sparsity = 0.7 # 70%, 2-5% loss
|
|
436
|
+
|
|
437
|
+
# Extreme (model-dependent)
|
|
438
|
+
sparsity = 0.9 # 90%, significant degradation
|
|
439
|
+
```
|
|
440
|
+
|
|
441
|
+
### 2. Method Selection
|
|
442
|
+
|
|
443
|
+
```python
|
|
444
|
+
# One-shot, no retraining → Wanda or SparseGPT
|
|
445
|
+
if no_retraining_budget:
|
|
446
|
+
use_method = "wanda" # Faster
|
|
447
|
+
|
|
448
|
+
# Best quality → SparseGPT
|
|
449
|
+
if need_best_quality:
|
|
450
|
+
use_method = "sparsegpt" # More accurate
|
|
451
|
+
|
|
452
|
+
# Hardware speedup → N:M structured
|
|
453
|
+
if need_speedup:
|
|
454
|
+
use_method = "nm_prune" # 2:4 or 4:8
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
### 3. Avoid Common Pitfalls
|
|
458
|
+
|
|
459
|
+
```python
|
|
460
|
+
# ❌ Bad: Pruning without calibration data
|
|
461
|
+
prune_random(model) # No activation statistics
|
|
462
|
+
|
|
463
|
+
# ✅ Good: Use calibration data
|
|
464
|
+
prune_wanda(model, calib_data)
|
|
465
|
+
|
|
466
|
+
# ❌ Bad: Too high sparsity in one shot
|
|
467
|
+
prune(model, sparsity=0.9) # Massive accuracy loss
|
|
468
|
+
|
|
469
|
+
# ✅ Good: Gradual or iterative
|
|
470
|
+
iterative_prune(model, target=0.9, steps=10)
|
|
471
|
+
```
|
|
472
|
+
|
|
473
|
+
## Performance Comparison
|
|
474
|
+
|
|
475
|
+
**Pruning methods at 50% sparsity** (LLaMA-7B):
|
|
476
|
+
|
|
477
|
+
| Method | Accuracy Loss | Speed | Memory | Retraining Needed |
|
|
478
|
+
|--------|---------------|-------|---------|-------------------|
|
|
479
|
+
| **Magnitude** | -2.5% | 1.0× | -50% | No |
|
|
480
|
+
| **Wanda** | -0.8% | 1.0× | -50% | No |
|
|
481
|
+
| **SparseGPT** | -0.4% | 1.0× | -50% | No |
|
|
482
|
+
| **N:M (2:4)** | -1.0% | 2.0× | -50% | No |
|
|
483
|
+
| **Structured** | -3.0% | 2.0× | -50% | No |
|
|
484
|
+
|
|
485
|
+
**Source**: Wanda paper (ICLR 2024), SparseGPT paper
|
|
486
|
+
|
|
487
|
+
## Resources
|
|
488
|
+
|
|
489
|
+
- **Wanda Paper (ICLR 2024)**: https://arxiv.org/abs/2306.11695
|
|
490
|
+
- **Wanda GitHub**: https://github.com/locuslab/wanda
|
|
491
|
+
- **SparseGPT Paper**: https://arxiv.org/abs/2301.00774
|
|
492
|
+
- **SparseGPT GitHub**: https://github.com/IST-DASLab/sparsegpt
|
|
493
|
+
- **NVIDIA Sparse Tensor Cores**: https://developer.nvidia.com/blog/accelerating-inference-with-sparsity-using-ampere-and-tensorrt/
|
|
494
|
+
|
|
495
|
+
|