@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,503 @@
|
|
|
1
|
+
# HQQ Troubleshooting Guide
|
|
2
|
+
|
|
3
|
+
## Installation Issues
|
|
4
|
+
|
|
5
|
+
### Package Not Found
|
|
6
|
+
|
|
7
|
+
**Error**: `ModuleNotFoundError: No module named 'hqq'`
|
|
8
|
+
|
|
9
|
+
**Fix**:
|
|
10
|
+
```bash
|
|
11
|
+
pip install hqq
|
|
12
|
+
|
|
13
|
+
# Verify installation
|
|
14
|
+
python -c "import hqq; print(hqq.__version__)"
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
### Backend Dependencies Missing
|
|
18
|
+
|
|
19
|
+
**Error**: `ImportError: Cannot import marlin backend`
|
|
20
|
+
|
|
21
|
+
**Fix**:
|
|
22
|
+
```bash
|
|
23
|
+
# Install specific backend
|
|
24
|
+
pip install hqq[marlin]
|
|
25
|
+
|
|
26
|
+
# Or all backends
|
|
27
|
+
pip install hqq[all]
|
|
28
|
+
|
|
29
|
+
# For BitBlas
|
|
30
|
+
pip install bitblas
|
|
31
|
+
|
|
32
|
+
# For TorchAO
|
|
33
|
+
pip install torchao
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### CUDA Version Mismatch
|
|
37
|
+
|
|
38
|
+
**Error**: `RuntimeError: CUDA error: no kernel image is available`
|
|
39
|
+
|
|
40
|
+
**Fix**:
|
|
41
|
+
```bash
|
|
42
|
+
# Check CUDA version
|
|
43
|
+
nvcc --version
|
|
44
|
+
python -c "import torch; print(torch.version.cuda)"
|
|
45
|
+
|
|
46
|
+
# Reinstall PyTorch with matching CUDA
|
|
47
|
+
pip install torch --index-url https://download.pytorch.org/whl/cu121
|
|
48
|
+
|
|
49
|
+
# Then reinstall hqq
|
|
50
|
+
pip install hqq --force-reinstall
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Quantization Errors
|
|
54
|
+
|
|
55
|
+
### Out of Memory During Quantization
|
|
56
|
+
|
|
57
|
+
**Error**: `torch.cuda.OutOfMemoryError`
|
|
58
|
+
|
|
59
|
+
**Solutions**:
|
|
60
|
+
|
|
61
|
+
1. **Use CPU offloading**:
|
|
62
|
+
```python
|
|
63
|
+
from transformers import AutoModelForCausalLM, HqqConfig
|
|
64
|
+
|
|
65
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
66
|
+
"meta-llama/Llama-3.1-8B",
|
|
67
|
+
quantization_config=HqqConfig(nbits=4, group_size=64),
|
|
68
|
+
device_map="auto",
|
|
69
|
+
offload_folder="./offload"
|
|
70
|
+
)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
2. **Quantize layer by layer**:
|
|
74
|
+
```python
|
|
75
|
+
from hqq.models.hf.base import AutoHQQHFModel
|
|
76
|
+
|
|
77
|
+
model = AutoHQQHFModel.from_pretrained(
|
|
78
|
+
"meta-llama/Llama-3.1-8B",
|
|
79
|
+
quantization_config=config,
|
|
80
|
+
device_map="sequential"
|
|
81
|
+
)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
3. **Reduce group size**:
|
|
85
|
+
```python
|
|
86
|
+
config = HqqConfig(
|
|
87
|
+
nbits=4,
|
|
88
|
+
group_size=32 # Smaller groups use less memory during quantization
|
|
89
|
+
)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### NaN Values After Quantization
|
|
93
|
+
|
|
94
|
+
**Error**: `RuntimeWarning: invalid value encountered` or NaN outputs
|
|
95
|
+
|
|
96
|
+
**Solutions**:
|
|
97
|
+
|
|
98
|
+
1. **Check for outliers**:
|
|
99
|
+
```python
|
|
100
|
+
import torch
|
|
101
|
+
|
|
102
|
+
def check_weight_stats(model):
|
|
103
|
+
for name, param in model.named_parameters():
|
|
104
|
+
if param.numel() > 0:
|
|
105
|
+
has_nan = torch.isnan(param).any().item()
|
|
106
|
+
has_inf = torch.isinf(param).any().item()
|
|
107
|
+
if has_nan or has_inf:
|
|
108
|
+
print(f"{name}: NaN={has_nan}, Inf={has_inf}")
|
|
109
|
+
print(f" min={param.min():.4f}, max={param.max():.4f}")
|
|
110
|
+
|
|
111
|
+
check_weight_stats(model)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
2. **Use higher precision for problematic layers**:
|
|
115
|
+
```python
|
|
116
|
+
layer_configs = {
|
|
117
|
+
"problematic_layer": BaseQuantizeConfig(nbits=8, group_size=128),
|
|
118
|
+
"default": BaseQuantizeConfig(nbits=4, group_size=64)
|
|
119
|
+
}
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
3. **Skip embedding/lm_head**:
|
|
123
|
+
```python
|
|
124
|
+
config = HqqConfig(
|
|
125
|
+
nbits=4,
|
|
126
|
+
group_size=64,
|
|
127
|
+
skip_modules=["embed_tokens", "lm_head"]
|
|
128
|
+
)
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Wrong Output Shape
|
|
132
|
+
|
|
133
|
+
**Error**: `RuntimeError: shape mismatch`
|
|
134
|
+
|
|
135
|
+
**Fix**:
|
|
136
|
+
```python
|
|
137
|
+
# Ensure axis is correct for your model
|
|
138
|
+
config = BaseQuantizeConfig(
|
|
139
|
+
nbits=4,
|
|
140
|
+
group_size=64,
|
|
141
|
+
axis=1 # Usually 1 for most models, try 0 if issues
|
|
142
|
+
)
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## Backend Issues
|
|
146
|
+
|
|
147
|
+
### Marlin Backend Not Working
|
|
148
|
+
|
|
149
|
+
**Error**: `RuntimeError: Marlin kernel not available`
|
|
150
|
+
|
|
151
|
+
**Requirements**:
|
|
152
|
+
- Ampere (A100) or newer GPU (compute capability >= 8.0)
|
|
153
|
+
- 4-bit quantization only
|
|
154
|
+
- Group size must be 128
|
|
155
|
+
|
|
156
|
+
**Fix**:
|
|
157
|
+
```python
|
|
158
|
+
# Check GPU compatibility
|
|
159
|
+
import torch
|
|
160
|
+
device = torch.cuda.get_device_properties(0)
|
|
161
|
+
print(f"Compute capability: {device.major}.{device.minor}")
|
|
162
|
+
|
|
163
|
+
# Marlin requires >= 8.0
|
|
164
|
+
if device.major >= 8:
|
|
165
|
+
HQQLinear.set_backend("marlin")
|
|
166
|
+
else:
|
|
167
|
+
HQQLinear.set_backend("aten") # Fallback
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### TorchAO Backend Errors
|
|
171
|
+
|
|
172
|
+
**Error**: `ImportError: torchao not found`
|
|
173
|
+
|
|
174
|
+
**Fix**:
|
|
175
|
+
```bash
|
|
176
|
+
pip install torchao
|
|
177
|
+
|
|
178
|
+
# Verify
|
|
179
|
+
python -c "import torchao; print('TorchAO installed')"
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
**Error**: `RuntimeError: torchao int4 requires specific shapes`
|
|
183
|
+
|
|
184
|
+
**Fix**:
|
|
185
|
+
```python
|
|
186
|
+
# TorchAO int4 has shape requirements
|
|
187
|
+
# Ensure dimensions are divisible by 32
|
|
188
|
+
config = BaseQuantizeConfig(
|
|
189
|
+
nbits=4,
|
|
190
|
+
group_size=64 # Must be power of 2
|
|
191
|
+
)
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### Fallback to PyTorch Backend
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
from hqq.core.quantize import HQQLinear
|
|
198
|
+
|
|
199
|
+
def safe_set_backend(preferred_backend):
|
|
200
|
+
"""Set backend with fallback."""
|
|
201
|
+
try:
|
|
202
|
+
HQQLinear.set_backend(preferred_backend)
|
|
203
|
+
print(f"Using {preferred_backend} backend")
|
|
204
|
+
except Exception as e:
|
|
205
|
+
print(f"Failed to set {preferred_backend}: {e}")
|
|
206
|
+
print("Falling back to pytorch backend")
|
|
207
|
+
HQQLinear.set_backend("pytorch")
|
|
208
|
+
|
|
209
|
+
safe_set_backend("marlin")
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## Performance Issues
|
|
213
|
+
|
|
214
|
+
### Slow Inference
|
|
215
|
+
|
|
216
|
+
**Problem**: Inference slower than expected
|
|
217
|
+
|
|
218
|
+
**Solutions**:
|
|
219
|
+
|
|
220
|
+
1. **Use optimized backend**:
|
|
221
|
+
```python
|
|
222
|
+
from hqq.core.quantize import HQQLinear
|
|
223
|
+
|
|
224
|
+
# Try backends in order of speed
|
|
225
|
+
for backend in ["marlin", "torchao_int4", "aten", "pytorch_compile"]:
|
|
226
|
+
try:
|
|
227
|
+
HQQLinear.set_backend(backend)
|
|
228
|
+
print(f"Using {backend}")
|
|
229
|
+
break
|
|
230
|
+
except:
|
|
231
|
+
continue
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
2. **Enable torch.compile**:
|
|
235
|
+
```python
|
|
236
|
+
import torch
|
|
237
|
+
model = torch.compile(model, mode="reduce-overhead")
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
3. **Use CUDA graphs** (for fixed input shapes):
|
|
241
|
+
```python
|
|
242
|
+
# Warmup
|
|
243
|
+
for _ in range(3):
|
|
244
|
+
model.generate(**inputs, max_new_tokens=100)
|
|
245
|
+
|
|
246
|
+
# Enable CUDA graphs
|
|
247
|
+
torch.cuda.synchronize()
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
### High Memory Usage During Inference
|
|
251
|
+
|
|
252
|
+
**Problem**: Memory usage higher than expected for quantized model
|
|
253
|
+
|
|
254
|
+
**Solutions**:
|
|
255
|
+
|
|
256
|
+
1. **Clear KV cache**:
|
|
257
|
+
```python
|
|
258
|
+
# Use past_key_values management
|
|
259
|
+
outputs = model.generate(
|
|
260
|
+
**inputs,
|
|
261
|
+
max_new_tokens=100,
|
|
262
|
+
use_cache=True,
|
|
263
|
+
return_dict_in_generate=True
|
|
264
|
+
)
|
|
265
|
+
# Clear after use
|
|
266
|
+
del outputs.past_key_values
|
|
267
|
+
torch.cuda.empty_cache()
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
2. **Reduce batch size**:
|
|
271
|
+
```python
|
|
272
|
+
# Process in smaller batches
|
|
273
|
+
batch_size = 4 # Reduce if OOM
|
|
274
|
+
for i in range(0, len(prompts), batch_size):
|
|
275
|
+
batch = prompts[i:i+batch_size]
|
|
276
|
+
outputs = model.generate(...)
|
|
277
|
+
torch.cuda.empty_cache()
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
3. **Use gradient checkpointing** (for training):
|
|
281
|
+
```python
|
|
282
|
+
model.gradient_checkpointing_enable()
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
## Quality Issues
|
|
286
|
+
|
|
287
|
+
### Poor Generation Quality
|
|
288
|
+
|
|
289
|
+
**Problem**: Quantized model produces gibberish or low-quality output
|
|
290
|
+
|
|
291
|
+
**Solutions**:
|
|
292
|
+
|
|
293
|
+
1. **Increase precision**:
|
|
294
|
+
```python
|
|
295
|
+
# Try higher bit-width
|
|
296
|
+
config = HqqConfig(nbits=8, group_size=128) # Start high
|
|
297
|
+
# Then gradually reduce: 8 -> 4 -> 3 -> 2
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
2. **Use smaller group size**:
|
|
301
|
+
```python
|
|
302
|
+
config = HqqConfig(
|
|
303
|
+
nbits=4,
|
|
304
|
+
group_size=32 # Smaller = more accurate, more memory
|
|
305
|
+
)
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
3. **Skip sensitive layers**:
|
|
309
|
+
```python
|
|
310
|
+
config = HqqConfig(
|
|
311
|
+
nbits=4,
|
|
312
|
+
group_size=64,
|
|
313
|
+
skip_modules=["embed_tokens", "lm_head", "model.layers.0"]
|
|
314
|
+
)
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
4. **Compare outputs**:
|
|
318
|
+
```python
|
|
319
|
+
def compare_outputs(original_model, quantized_model, prompt):
|
|
320
|
+
"""Compare outputs between original and quantized."""
|
|
321
|
+
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
|
322
|
+
|
|
323
|
+
with torch.no_grad():
|
|
324
|
+
orig_out = original_model.generate(**inputs, max_new_tokens=50)
|
|
325
|
+
quant_out = quantized_model.generate(**inputs, max_new_tokens=50)
|
|
326
|
+
|
|
327
|
+
print("Original:", tokenizer.decode(orig_out[0]))
|
|
328
|
+
print("Quantized:", tokenizer.decode(quant_out[0]))
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
### Perplexity Degradation
|
|
332
|
+
|
|
333
|
+
**Problem**: Significant perplexity increase after quantization
|
|
334
|
+
|
|
335
|
+
**Diagnosis**:
|
|
336
|
+
```python
|
|
337
|
+
import torch
|
|
338
|
+
from datasets import load_dataset
|
|
339
|
+
|
|
340
|
+
def measure_perplexity(model, tokenizer, dataset_name="wikitext", split="test"):
|
|
341
|
+
"""Measure model perplexity."""
|
|
342
|
+
dataset = load_dataset(dataset_name, "wikitext-2-raw-v1", split=split)
|
|
343
|
+
text = "\n\n".join(dataset["text"])
|
|
344
|
+
|
|
345
|
+
encodings = tokenizer(text, return_tensors="pt")
|
|
346
|
+
max_length = 2048
|
|
347
|
+
stride = 512
|
|
348
|
+
|
|
349
|
+
nlls = []
|
|
350
|
+
for i in range(0, encodings.input_ids.size(1), stride):
|
|
351
|
+
begin = max(i + stride - max_length, 0)
|
|
352
|
+
end = min(i + stride, encodings.input_ids.size(1))
|
|
353
|
+
|
|
354
|
+
input_ids = encodings.input_ids[:, begin:end].to(model.device)
|
|
355
|
+
target_ids = input_ids.clone()
|
|
356
|
+
target_ids[:, :-stride] = -100
|
|
357
|
+
|
|
358
|
+
with torch.no_grad():
|
|
359
|
+
outputs = model(input_ids, labels=target_ids)
|
|
360
|
+
nlls.append(outputs.loss)
|
|
361
|
+
|
|
362
|
+
ppl = torch.exp(torch.stack(nlls).mean())
|
|
363
|
+
return ppl.item()
|
|
364
|
+
|
|
365
|
+
# Compare
|
|
366
|
+
orig_ppl = measure_perplexity(original_model, tokenizer)
|
|
367
|
+
quant_ppl = measure_perplexity(quantized_model, tokenizer)
|
|
368
|
+
print(f"Original PPL: {orig_ppl:.2f}")
|
|
369
|
+
print(f"Quantized PPL: {quant_ppl:.2f}")
|
|
370
|
+
print(f"Degradation: {((quant_ppl - orig_ppl) / orig_ppl * 100):.1f}%")
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
## Integration Issues
|
|
374
|
+
|
|
375
|
+
### HuggingFace Integration Errors
|
|
376
|
+
|
|
377
|
+
**Error**: `ValueError: Unknown quantization method: hqq`
|
|
378
|
+
|
|
379
|
+
**Fix**:
|
|
380
|
+
```bash
|
|
381
|
+
# Update transformers
|
|
382
|
+
pip install -U transformers>=4.36.0
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
**Error**: `AttributeError: 'HqqConfig' object has no attribute`
|
|
386
|
+
|
|
387
|
+
**Fix**:
|
|
388
|
+
```python
|
|
389
|
+
from transformers import HqqConfig
|
|
390
|
+
|
|
391
|
+
# Use correct parameter names
|
|
392
|
+
config = HqqConfig(
|
|
393
|
+
nbits=4, # Not 'bits'
|
|
394
|
+
group_size=64, # Not 'groupsize'
|
|
395
|
+
axis=1 # Not 'quant_axis'
|
|
396
|
+
)
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
### vLLM Integration Issues
|
|
400
|
+
|
|
401
|
+
**Error**: `ValueError: HQQ quantization not supported`
|
|
402
|
+
|
|
403
|
+
**Fix**:
|
|
404
|
+
```bash
|
|
405
|
+
# Update vLLM
|
|
406
|
+
pip install -U vllm>=0.3.0
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
**Usage**:
|
|
410
|
+
```python
|
|
411
|
+
from vllm import LLM
|
|
412
|
+
|
|
413
|
+
# Load pre-quantized model
|
|
414
|
+
llm = LLM(
|
|
415
|
+
model="mobiuslabsgmbh/Llama-3.1-8B-HQQ-4bit",
|
|
416
|
+
quantization="hqq"
|
|
417
|
+
)
|
|
418
|
+
```
|
|
419
|
+
|
|
420
|
+
### PEFT Integration Issues
|
|
421
|
+
|
|
422
|
+
**Error**: `RuntimeError: Cannot apply LoRA to quantized layer`
|
|
423
|
+
|
|
424
|
+
**Fix**:
|
|
425
|
+
```python
|
|
426
|
+
from peft import prepare_model_for_kbit_training
|
|
427
|
+
|
|
428
|
+
# Prepare model for training
|
|
429
|
+
model = prepare_model_for_kbit_training(model)
|
|
430
|
+
|
|
431
|
+
# Then apply LoRA
|
|
432
|
+
model = get_peft_model(model, lora_config)
|
|
433
|
+
```
|
|
434
|
+
|
|
435
|
+
## Debugging Tips
|
|
436
|
+
|
|
437
|
+
### Enable Verbose Logging
|
|
438
|
+
|
|
439
|
+
```python
|
|
440
|
+
import logging
|
|
441
|
+
|
|
442
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
443
|
+
logging.getLogger("hqq").setLevel(logging.DEBUG)
|
|
444
|
+
```
|
|
445
|
+
|
|
446
|
+
### Verify Quantization Applied
|
|
447
|
+
|
|
448
|
+
```python
|
|
449
|
+
def verify_quantization(model):
|
|
450
|
+
"""Check if model is properly quantized."""
|
|
451
|
+
from hqq.core.quantize import HQQLinear
|
|
452
|
+
|
|
453
|
+
total_linear = 0
|
|
454
|
+
quantized_linear = 0
|
|
455
|
+
|
|
456
|
+
for name, module in model.named_modules():
|
|
457
|
+
if isinstance(module, torch.nn.Linear):
|
|
458
|
+
total_linear += 1
|
|
459
|
+
elif isinstance(module, HQQLinear):
|
|
460
|
+
quantized_linear += 1
|
|
461
|
+
print(f"Quantized: {name} ({module.W_q.dtype}, {module.W_q.shape})")
|
|
462
|
+
|
|
463
|
+
print(f"\nTotal Linear: {total_linear}")
|
|
464
|
+
print(f"Quantized: {quantized_linear}")
|
|
465
|
+
print(f"Ratio: {quantized_linear / max(total_linear + quantized_linear, 1) * 100:.1f}%")
|
|
466
|
+
|
|
467
|
+
verify_quantization(model)
|
|
468
|
+
```
|
|
469
|
+
|
|
470
|
+
### Memory Profiling
|
|
471
|
+
|
|
472
|
+
```python
|
|
473
|
+
import torch
|
|
474
|
+
|
|
475
|
+
def profile_memory():
|
|
476
|
+
"""Profile GPU memory usage."""
|
|
477
|
+
print(f"Allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
|
|
478
|
+
print(f"Reserved: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")
|
|
479
|
+
print(f"Max Allocated: {torch.cuda.max_memory_allocated() / 1024**3:.2f} GB")
|
|
480
|
+
|
|
481
|
+
# Before quantization
|
|
482
|
+
profile_memory()
|
|
483
|
+
|
|
484
|
+
# After quantization
|
|
485
|
+
model = load_quantized_model(...)
|
|
486
|
+
profile_memory()
|
|
487
|
+
```
|
|
488
|
+
|
|
489
|
+
## Getting Help
|
|
490
|
+
|
|
491
|
+
1. **GitHub Issues**: https://github.com/mobiusml/hqq/issues
|
|
492
|
+
2. **HuggingFace Forums**: https://discuss.huggingface.co
|
|
493
|
+
3. **Discord**: Check HQQ community channels
|
|
494
|
+
|
|
495
|
+
### Reporting Issues
|
|
496
|
+
|
|
497
|
+
Include:
|
|
498
|
+
- HQQ version: `pip show hqq`
|
|
499
|
+
- PyTorch version: `python -c "import torch; print(torch.__version__)"`
|
|
500
|
+
- CUDA version: `nvcc --version`
|
|
501
|
+
- GPU model: `nvidia-smi --query-gpu=name --format=csv`
|
|
502
|
+
- Full error traceback
|
|
503
|
+
- Minimal reproducible code
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: hugging-face-cli
|
|
3
|
+
description: Execute Hugging Face Hub operations using the `hf` CLI. Use when the user needs to download models/datasets/spaces, upload files to Hub repositories, create repos, manage local cache, or run compute jobs on HF infrastructure. Covers authentication, file transfers, repository creation, cache operations, and cloud compute.
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
author: Synthetic Sciences
|
|
6
|
+
license: MIT
|
|
7
|
+
tags: [Hugging Face, CLI, Model Download, Hub]
|
|
8
|
+
dependencies: [huggingface-hub, transformers]
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Hugging Face CLI
|
|
12
|
+
|
|
13
|
+
The `hf` CLI provides direct terminal access to the Hugging Face Hub for downloading, uploading, and managing repositories, cache, and compute resources.
|
|
14
|
+
|
|
15
|
+
## Quick Command Reference
|
|
16
|
+
|
|
17
|
+
| Task | Command |
|
|
18
|
+
|------|---------|
|
|
19
|
+
| Login | `hf auth login` |
|
|
20
|
+
| Download model | `hf download <repo_id>` |
|
|
21
|
+
| Download to folder | `hf download <repo_id> --local-dir ./path` |
|
|
22
|
+
| Upload folder | `hf upload <repo_id> . .` |
|
|
23
|
+
| Create repo | `hf repo create <name>` |
|
|
24
|
+
| Create tag | `hf repo tag create <repo_id> <tag>` |
|
|
25
|
+
| Delete files | `hf repo-files delete <repo_id> <files>` |
|
|
26
|
+
| List cache | `hf cache ls` |
|
|
27
|
+
| Remove from cache | `hf cache rm <repo_or_revision>` |
|
|
28
|
+
| List models | `hf models ls` |
|
|
29
|
+
| Get model info | `hf models info <model_id>` |
|
|
30
|
+
| List datasets | `hf datasets ls` |
|
|
31
|
+
| Get dataset info | `hf datasets info <dataset_id>` |
|
|
32
|
+
| List spaces | `hf spaces ls` |
|
|
33
|
+
| Get space info | `hf spaces info <space_id>` |
|
|
34
|
+
| List endpoints | `hf endpoints ls` |
|
|
35
|
+
| Run GPU job | `hf jobs run --flavor a10g-small <image> <cmd>` |
|
|
36
|
+
| Environment info | `hf env` |
|
|
37
|
+
|
|
38
|
+
## Core Commands
|
|
39
|
+
|
|
40
|
+
### Authentication
|
|
41
|
+
```bash
|
|
42
|
+
hf auth login # Interactive login
|
|
43
|
+
hf auth login --token $HF_TOKEN # Non-interactive
|
|
44
|
+
hf auth whoami # Check current user
|
|
45
|
+
hf auth list # List stored tokens
|
|
46
|
+
hf auth switch # Switch between tokens
|
|
47
|
+
hf auth logout # Log out
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Download
|
|
51
|
+
```bash
|
|
52
|
+
hf download <repo_id> # Full repo to cache
|
|
53
|
+
hf download <repo_id> file.safetensors # Specific file
|
|
54
|
+
hf download <repo_id> --local-dir ./models # To local directory
|
|
55
|
+
hf download <repo_id> --include "*.safetensors" # Filter by pattern
|
|
56
|
+
hf download <repo_id> --repo-type dataset # Dataset
|
|
57
|
+
hf download <repo_id> --revision v1.0 # Specific version
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Upload
|
|
61
|
+
```bash
|
|
62
|
+
hf upload <repo_id> . . # Current dir to root
|
|
63
|
+
hf upload <repo_id> ./models /weights # Folder to path
|
|
64
|
+
hf upload <repo_id> model.safetensors # Single file
|
|
65
|
+
hf upload <repo_id> . . --repo-type dataset # Dataset
|
|
66
|
+
hf upload <repo_id> . . --create-pr # Create PR
|
|
67
|
+
hf upload <repo_id> . . --commit-message="msg" # Custom message
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Repository Management
|
|
71
|
+
```bash
|
|
72
|
+
hf repo create <name> # Create model repo
|
|
73
|
+
hf repo create <name> --repo-type dataset # Create dataset
|
|
74
|
+
hf repo create <name> --private # Private repo
|
|
75
|
+
hf repo create <name> --repo-type space --space_sdk gradio # Gradio space
|
|
76
|
+
hf repo delete <repo_id> # Delete repo
|
|
77
|
+
hf repo move <from_id> <to_id> # Move repo to new namespace
|
|
78
|
+
hf repo settings <repo_id> --private true # Update repo settings
|
|
79
|
+
hf repo list --repo-type model # List repos
|
|
80
|
+
hf repo branch create <repo_id> release-v1 # Create branch
|
|
81
|
+
hf repo branch delete <repo_id> release-v1 # Delete branch
|
|
82
|
+
hf repo tag create <repo_id> v1.0 # Create tag
|
|
83
|
+
hf repo tag list <repo_id> # List tags
|
|
84
|
+
hf repo tag delete <repo_id> v1.0 # Delete tag
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Delete Files from Repo
|
|
88
|
+
```bash
|
|
89
|
+
hf repo-files delete <repo_id> folder/ # Delete folder
|
|
90
|
+
hf repo-files delete <repo_id> "*.txt" # Delete with pattern
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Cache Management
|
|
94
|
+
```bash
|
|
95
|
+
hf cache ls # List cached repos
|
|
96
|
+
hf cache ls --revisions # Include individual revisions
|
|
97
|
+
hf cache rm model/gpt2 # Remove cached repo
|
|
98
|
+
hf cache rm <revision_hash> # Remove cached revision
|
|
99
|
+
hf cache prune # Remove detached revisions
|
|
100
|
+
hf cache verify gpt2 # Verify checksums from cache
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Browse Hub
|
|
104
|
+
```bash
|
|
105
|
+
# Models
|
|
106
|
+
hf models ls # List top trending models
|
|
107
|
+
hf models ls --search "MiniMax" --author MiniMaxAI # Search models
|
|
108
|
+
hf models ls --filter "text-generation" --limit 20 # Filter by task
|
|
109
|
+
hf models info MiniMaxAI/MiniMax-M2.1 # Get model info
|
|
110
|
+
|
|
111
|
+
# Datasets
|
|
112
|
+
hf datasets ls # List top trending datasets
|
|
113
|
+
hf datasets ls --search "finepdfs" --sort downloads # Search datasets
|
|
114
|
+
hf datasets info HuggingFaceFW/finepdfs # Get dataset info
|
|
115
|
+
|
|
116
|
+
# Spaces
|
|
117
|
+
hf spaces ls # List top trending spaces
|
|
118
|
+
hf spaces ls --filter "3d" --limit 10 # Filter by 3D modeling spaces
|
|
119
|
+
hf spaces info enzostvs/deepsite # Get space info
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Jobs (Cloud Compute)
|
|
123
|
+
```bash
|
|
124
|
+
hf jobs run python:3.12 python script.py # Run on CPU
|
|
125
|
+
hf jobs run --flavor a10g-small <image> <cmd> # Run on GPU
|
|
126
|
+
hf jobs run --secrets HF_TOKEN <image> <cmd> # With HF token
|
|
127
|
+
hf jobs ps # List jobs
|
|
128
|
+
hf jobs logs <job_id> # View logs
|
|
129
|
+
hf jobs cancel <job_id> # Cancel job
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### Inference Endpoints
|
|
133
|
+
```bash
|
|
134
|
+
hf endpoints ls # List endpoints
|
|
135
|
+
hf endpoints deploy my-endpoint \
|
|
136
|
+
--repo openai/gpt-oss-120b \
|
|
137
|
+
--framework vllm \
|
|
138
|
+
--accelerator gpu \
|
|
139
|
+
--instance-size x4 \
|
|
140
|
+
--instance-type nvidia-a10g \
|
|
141
|
+
--region us-east-1 \
|
|
142
|
+
--vendor aws
|
|
143
|
+
hf endpoints describe my-endpoint # Show endpoint details
|
|
144
|
+
hf endpoints pause my-endpoint # Pause endpoint
|
|
145
|
+
hf endpoints resume my-endpoint # Resume endpoint
|
|
146
|
+
hf endpoints scale-to-zero my-endpoint # Scale to zero
|
|
147
|
+
hf endpoints delete my-endpoint --yes # Delete endpoint
|
|
148
|
+
```
|
|
149
|
+
**GPU Flavors:** `cpu-basic`, `cpu-upgrade`, `cpu-xl`, `t4-small`, `t4-medium`, `l4x1`, `l4x4`, `l40sx1`, `l40sx4`, `l40sx8`, `a10g-small`, `a10g-large`, `a10g-largex2`, `a10g-largex4`, `a100-large`, `h100`, `h100x8`
|
|
150
|
+
|
|
151
|
+
## Common Patterns
|
|
152
|
+
|
|
153
|
+
### Download and Use Model Locally
|
|
154
|
+
```bash
|
|
155
|
+
# Download to local directory for deployment
|
|
156
|
+
hf download meta-llama/Llama-3.2-1B-Instruct --local-dir ./model
|
|
157
|
+
|
|
158
|
+
# Or use cache and get path
|
|
159
|
+
MODEL_PATH=$(hf download meta-llama/Llama-3.2-1B-Instruct --quiet)
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### Publish Model/Dataset
|
|
163
|
+
```bash
|
|
164
|
+
hf repo create my-username/my-model --private
|
|
165
|
+
hf upload my-username/my-model ./output . --commit-message="Initial release"
|
|
166
|
+
hf repo tag create my-username/my-model v1.0
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### Sync Space with Local
|
|
170
|
+
```bash
|
|
171
|
+
hf upload my-username/my-space . . --repo-type space \
|
|
172
|
+
--exclude="logs/*" --delete="*" --commit-message="Sync"
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Check Cache Usage
|
|
176
|
+
```bash
|
|
177
|
+
hf cache ls # See all cached repos and sizes
|
|
178
|
+
hf cache rm model/gpt2 # Remove a repo from cache
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## Key Options
|
|
182
|
+
|
|
183
|
+
- `--repo-type`: `model` (default), `dataset`, `space`
|
|
184
|
+
- `--revision`: Branch, tag, or commit hash
|
|
185
|
+
- `--token`: Override authentication
|
|
186
|
+
- `--quiet`: Output only essential info (paths/URLs)
|
|
187
|
+
|
|
188
|
+
## References
|
|
189
|
+
|
|
190
|
+
- **Complete command reference**: See [references/commands.md](references/commands.md)
|
|
191
|
+
- **Workflow examples**: See [references/examples.md](references/examples.md)
|