@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
# Saving Training Results to Hugging Face Hub
|
|
2
|
+
|
|
3
|
+
**⚠️ CRITICAL:** Training environments are ephemeral. ALL results are lost when a job completes unless pushed to the Hub.
|
|
4
|
+
|
|
5
|
+
## Why Hub Push is Required
|
|
6
|
+
|
|
7
|
+
When running on Hugging Face Jobs:
|
|
8
|
+
- Environment is temporary
|
|
9
|
+
- All files deleted on job completion
|
|
10
|
+
- No local disk persistence
|
|
11
|
+
- Cannot access results after job ends
|
|
12
|
+
|
|
13
|
+
**Without Hub push, training is completely wasted.**
|
|
14
|
+
|
|
15
|
+
## Required Configuration
|
|
16
|
+
|
|
17
|
+
### 1. Training Configuration
|
|
18
|
+
|
|
19
|
+
In your SFTConfig or trainer config:
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
SFTConfig(
|
|
23
|
+
push_to_hub=True, # Enable Hub push
|
|
24
|
+
hub_model_id="username/model-name", # Target repository
|
|
25
|
+
)
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### 2. Job Configuration
|
|
29
|
+
|
|
30
|
+
When submitting the job:
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
hf_jobs("uv", {
|
|
34
|
+
"script": "train.py",
|
|
35
|
+
"secrets": {"HF_TOKEN": "$HF_TOKEN"} # Provide authentication
|
|
36
|
+
})
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
**The `$HF_TOKEN` placeholder is automatically replaced with your Hugging Face token.**
|
|
40
|
+
|
|
41
|
+
## Complete Example
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
# train.py
|
|
45
|
+
# /// script
|
|
46
|
+
# dependencies = ["trl"]
|
|
47
|
+
# ///
|
|
48
|
+
|
|
49
|
+
from trl import SFTTrainer, SFTConfig
|
|
50
|
+
from datasets import load_dataset
|
|
51
|
+
|
|
52
|
+
dataset = load_dataset("trl-lib/Capybara", split="train")
|
|
53
|
+
|
|
54
|
+
# Configure with Hub push
|
|
55
|
+
config = SFTConfig(
|
|
56
|
+
output_dir="my-model",
|
|
57
|
+
num_train_epochs=3,
|
|
58
|
+
|
|
59
|
+
# ✅ CRITICAL: Hub push configuration
|
|
60
|
+
push_to_hub=True,
|
|
61
|
+
hub_model_id="myusername/my-trained-model",
|
|
62
|
+
|
|
63
|
+
# Optional: Push strategy
|
|
64
|
+
push_to_hub_model_id="myusername/my-trained-model",
|
|
65
|
+
push_to_hub_organization=None,
|
|
66
|
+
push_to_hub_token=None, # Uses environment token
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
trainer = SFTTrainer(
|
|
70
|
+
model="Qwen/Qwen2.5-0.5B",
|
|
71
|
+
train_dataset=dataset,
|
|
72
|
+
args=config,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
trainer.train()
|
|
76
|
+
|
|
77
|
+
# ✅ Push final model
|
|
78
|
+
trainer.push_to_hub()
|
|
79
|
+
|
|
80
|
+
print("✅ Model saved to: https://huggingface.co/myusername/my-trained-model")
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
**Submit with authentication:**
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
hf_jobs("uv", {
|
|
87
|
+
"script": "train.py",
|
|
88
|
+
"flavor": "a10g-large",
|
|
89
|
+
"timeout": "2h",
|
|
90
|
+
"secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Required!
|
|
91
|
+
})
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## What Gets Saved
|
|
95
|
+
|
|
96
|
+
When `push_to_hub=True`:
|
|
97
|
+
|
|
98
|
+
1. **Model weights** - Final trained parameters
|
|
99
|
+
2. **Tokenizer** - Associated tokenizer
|
|
100
|
+
3. **Configuration** - Model config (config.json)
|
|
101
|
+
4. **Training arguments** - Hyperparameters used
|
|
102
|
+
5. **Model card** - Auto-generated documentation
|
|
103
|
+
6. **Checkpoints** - If `save_strategy="steps"` enabled
|
|
104
|
+
|
|
105
|
+
## Checkpoint Saving
|
|
106
|
+
|
|
107
|
+
Save intermediate checkpoints during training:
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
SFTConfig(
|
|
111
|
+
output_dir="my-model",
|
|
112
|
+
push_to_hub=True,
|
|
113
|
+
hub_model_id="username/my-model",
|
|
114
|
+
|
|
115
|
+
# Checkpoint configuration
|
|
116
|
+
save_strategy="steps",
|
|
117
|
+
save_steps=100, # Save every 100 steps
|
|
118
|
+
save_total_limit=3, # Keep only last 3 checkpoints
|
|
119
|
+
)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
**Benefits:**
|
|
123
|
+
- Resume training if job fails
|
|
124
|
+
- Compare checkpoint performance
|
|
125
|
+
- Use intermediate models
|
|
126
|
+
|
|
127
|
+
**Checkpoints are pushed to:** `username/my-model` (same repo)
|
|
128
|
+
|
|
129
|
+
## Authentication Methods
|
|
130
|
+
|
|
131
|
+
### Method 1: Automatic Token (Recommended)
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
"secrets": {"HF_TOKEN": "$HF_TOKEN"}
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Uses your logged-in Hugging Face token automatically.
|
|
138
|
+
|
|
139
|
+
### Method 2: Explicit Token
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
"secrets": {"HF_TOKEN": "hf_abc123..."}
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Provide token explicitly (not recommended for security).
|
|
146
|
+
|
|
147
|
+
### Method 3: Environment Variable
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
"env": {"HF_TOKEN": "hf_abc123..."}
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
Pass as regular environment variable (less secure than secrets).
|
|
154
|
+
|
|
155
|
+
**Always prefer Method 1** for security and convenience.
|
|
156
|
+
|
|
157
|
+
## Verification Checklist
|
|
158
|
+
|
|
159
|
+
Before submitting any training job, verify:
|
|
160
|
+
|
|
161
|
+
- [ ] `push_to_hub=True` in training config
|
|
162
|
+
- [ ] `hub_model_id` is specified (format: `username/model-name`)
|
|
163
|
+
- [ ] `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job config
|
|
164
|
+
- [ ] Repository name doesn't conflict with existing repos
|
|
165
|
+
- [ ] You have write access to the target namespace
|
|
166
|
+
|
|
167
|
+
## Repository Setup
|
|
168
|
+
|
|
169
|
+
### Automatic Creation
|
|
170
|
+
|
|
171
|
+
If repository doesn't exist, it's created automatically when first pushing.
|
|
172
|
+
|
|
173
|
+
### Manual Creation
|
|
174
|
+
|
|
175
|
+
Create repository before training:
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
from huggingface_hub import HfApi
|
|
179
|
+
|
|
180
|
+
api = HfApi()
|
|
181
|
+
api.create_repo(
|
|
182
|
+
repo_id="username/model-name",
|
|
183
|
+
repo_type="model",
|
|
184
|
+
private=False, # or True for private repo
|
|
185
|
+
)
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Repository Naming
|
|
189
|
+
|
|
190
|
+
**Valid names:**
|
|
191
|
+
- `username/my-model`
|
|
192
|
+
- `username/model-name`
|
|
193
|
+
- `organization/model-name`
|
|
194
|
+
|
|
195
|
+
**Invalid names:**
|
|
196
|
+
- `model-name` (missing username)
|
|
197
|
+
- `username/model name` (spaces not allowed)
|
|
198
|
+
- `username/MODEL` (uppercase discouraged)
|
|
199
|
+
|
|
200
|
+
## Troubleshooting
|
|
201
|
+
|
|
202
|
+
### Error: 401 Unauthorized
|
|
203
|
+
|
|
204
|
+
**Cause:** HF_TOKEN not provided or invalid
|
|
205
|
+
|
|
206
|
+
**Solutions:**
|
|
207
|
+
1. Verify `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job config
|
|
208
|
+
2. Check you're logged in: `huggingface-cli whoami`
|
|
209
|
+
3. Re-login: `huggingface-cli login`
|
|
210
|
+
|
|
211
|
+
### Error: 403 Forbidden
|
|
212
|
+
|
|
213
|
+
**Cause:** No write access to repository
|
|
214
|
+
|
|
215
|
+
**Solutions:**
|
|
216
|
+
1. Check repository namespace matches your username
|
|
217
|
+
2. Verify you're a member of organization (if using org namespace)
|
|
218
|
+
3. Check repository isn't private (if accessing org repo)
|
|
219
|
+
|
|
220
|
+
### Error: Repository not found
|
|
221
|
+
|
|
222
|
+
**Cause:** Repository doesn't exist and auto-creation failed
|
|
223
|
+
|
|
224
|
+
**Solutions:**
|
|
225
|
+
1. Manually create repository first
|
|
226
|
+
2. Check repository name format
|
|
227
|
+
3. Verify namespace exists
|
|
228
|
+
|
|
229
|
+
### Error: Push failed during training
|
|
230
|
+
|
|
231
|
+
**Cause:** Network issues or Hub unavailable
|
|
232
|
+
|
|
233
|
+
**Solutions:**
|
|
234
|
+
1. Training continues but final push fails
|
|
235
|
+
2. Checkpoints may be saved
|
|
236
|
+
3. Re-run push manually after job completes
|
|
237
|
+
|
|
238
|
+
### Issue: Model saved but not visible
|
|
239
|
+
|
|
240
|
+
**Possible causes:**
|
|
241
|
+
1. Repository is private—check https://huggingface.co/username
|
|
242
|
+
2. Wrong namespace—verify `hub_model_id` matches login
|
|
243
|
+
3. Push still in progress—wait a few minutes
|
|
244
|
+
|
|
245
|
+
## Manual Push After Training
|
|
246
|
+
|
|
247
|
+
If training completes but push fails, push manually:
|
|
248
|
+
|
|
249
|
+
```python
|
|
250
|
+
from transformers import AutoModel, AutoTokenizer
|
|
251
|
+
|
|
252
|
+
# Load from local checkpoint
|
|
253
|
+
model = AutoModel.from_pretrained("./output_dir")
|
|
254
|
+
tokenizer = AutoTokenizer.from_pretrained("./output_dir")
|
|
255
|
+
|
|
256
|
+
# Push to Hub
|
|
257
|
+
model.push_to_hub("username/model-name", token="hf_abc123...")
|
|
258
|
+
tokenizer.push_to_hub("username/model-name", token="hf_abc123...")
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
**Note:** Only possible if job hasn't completed (files still exist).
|
|
262
|
+
|
|
263
|
+
## Best Practices
|
|
264
|
+
|
|
265
|
+
1. **Always enable `push_to_hub=True`**
|
|
266
|
+
2. **Use checkpoint saving** for long training runs
|
|
267
|
+
3. **Verify Hub push** in logs before job completes
|
|
268
|
+
4. **Set appropriate `save_total_limit`** to avoid excessive checkpoints
|
|
269
|
+
5. **Use descriptive repo names** (e.g., `qwen-capybara-sft` not `model1`)
|
|
270
|
+
6. **Add model card** with training details
|
|
271
|
+
7. **Tag models** with relevant tags (e.g., `text-generation`, `fine-tuned`)
|
|
272
|
+
|
|
273
|
+
## Monitoring Push Progress
|
|
274
|
+
|
|
275
|
+
Check logs for push progress:
|
|
276
|
+
|
|
277
|
+
```python
|
|
278
|
+
hf_jobs("logs", {"job_id": "your-job-id"})
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
**Look for:**
|
|
282
|
+
```
|
|
283
|
+
Pushing model to username/model-name...
|
|
284
|
+
Upload file pytorch_model.bin: 100%
|
|
285
|
+
✅ Model pushed successfully
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
## Example: Full Production Setup
|
|
289
|
+
|
|
290
|
+
```python
|
|
291
|
+
# production_train.py
|
|
292
|
+
# /// script
|
|
293
|
+
# dependencies = ["trl>=0.12.0", "peft>=0.7.0"]
|
|
294
|
+
# ///
|
|
295
|
+
|
|
296
|
+
from datasets import load_dataset
|
|
297
|
+
from peft import LoraConfig
|
|
298
|
+
from trl import SFTTrainer, SFTConfig
|
|
299
|
+
import os
|
|
300
|
+
|
|
301
|
+
# Verify token is available
|
|
302
|
+
assert "HF_TOKEN" in os.environ, "HF_TOKEN not found in environment!"
|
|
303
|
+
|
|
304
|
+
# Load dataset
|
|
305
|
+
dataset = load_dataset("trl-lib/Capybara", split="train")
|
|
306
|
+
print(f"✅ Dataset loaded: {len(dataset)} examples")
|
|
307
|
+
|
|
308
|
+
# Configure with comprehensive Hub settings
|
|
309
|
+
config = SFTConfig(
|
|
310
|
+
output_dir="qwen-capybara-sft",
|
|
311
|
+
|
|
312
|
+
# Hub configuration
|
|
313
|
+
push_to_hub=True,
|
|
314
|
+
hub_model_id="myusername/qwen-capybara-sft",
|
|
315
|
+
hub_strategy="checkpoint", # Push checkpoints
|
|
316
|
+
|
|
317
|
+
# Checkpoint configuration
|
|
318
|
+
save_strategy="steps",
|
|
319
|
+
save_steps=100,
|
|
320
|
+
save_total_limit=3,
|
|
321
|
+
|
|
322
|
+
# Training settings
|
|
323
|
+
num_train_epochs=3,
|
|
324
|
+
per_device_train_batch_size=4,
|
|
325
|
+
|
|
326
|
+
# Logging
|
|
327
|
+
logging_steps=10,
|
|
328
|
+
logging_first_step=True,
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
# Train with LoRA
|
|
332
|
+
trainer = SFTTrainer(
|
|
333
|
+
model="Qwen/Qwen2.5-0.5B",
|
|
334
|
+
train_dataset=dataset,
|
|
335
|
+
args=config,
|
|
336
|
+
peft_config=LoraConfig(r=16, lora_alpha=32),
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
print("🚀 Starting training...")
|
|
340
|
+
trainer.train()
|
|
341
|
+
|
|
342
|
+
print("💾 Pushing final model to Hub...")
|
|
343
|
+
trainer.push_to_hub()
|
|
344
|
+
|
|
345
|
+
print("✅ Training complete!")
|
|
346
|
+
print(f"Model available at: https://huggingface.co/myusername/qwen-capybara-sft")
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
**Submit:**
|
|
350
|
+
|
|
351
|
+
```python
|
|
352
|
+
hf_jobs("uv", {
|
|
353
|
+
"script": "production_train.py",
|
|
354
|
+
"flavor": "a10g-large",
|
|
355
|
+
"timeout": "6h",
|
|
356
|
+
"secrets": {"HF_TOKEN": "$HF_TOKEN"}
|
|
357
|
+
})
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
## Key Takeaway
|
|
361
|
+
|
|
362
|
+
**Without `push_to_hub=True` and `secrets={"HF_TOKEN": "$HF_TOKEN"}`, all training results are permanently lost.**
|
|
363
|
+
|
|
364
|
+
Always verify both are configured before submitting any training job.
|