@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
# MoE Inference Optimization
|
|
2
|
+
|
|
3
|
+
Complete guide to optimizing MoE inference based on MoE-Inference-Bench research (arXiv 2508.17467, 2024).
|
|
4
|
+
|
|
5
|
+
## Table of Contents
|
|
6
|
+
- Performance Metrics
|
|
7
|
+
- vLLM Optimizations
|
|
8
|
+
- Quantization
|
|
9
|
+
- Expert Parallelism
|
|
10
|
+
- Optimization Techniques
|
|
11
|
+
- Production Deployment
|
|
12
|
+
|
|
13
|
+
## Performance Metrics
|
|
14
|
+
|
|
15
|
+
**Source**: MoE-Inference-Bench (arXiv 2508.17467)
|
|
16
|
+
|
|
17
|
+
### Key Metrics
|
|
18
|
+
|
|
19
|
+
1. **Time to First Token (TTFT)**
|
|
20
|
+
- Latency until first token generated
|
|
21
|
+
- Critical for user experience
|
|
22
|
+
|
|
23
|
+
2. **Inter-Token Latency (ITL)**
|
|
24
|
+
- Time between consecutive tokens
|
|
25
|
+
- Affects streaming experience
|
|
26
|
+
|
|
27
|
+
3. **Throughput**
|
|
28
|
+
- Formula: `(Batch Size × (Input + Output Tokens)) / Total Latency`
|
|
29
|
+
- Higher is better
|
|
30
|
+
|
|
31
|
+
### Benchmark Results (H100 GPU)
|
|
32
|
+
|
|
33
|
+
**LLM Performance**:
|
|
34
|
+
- **OLMoE-1B-7B**: Highest throughput
|
|
35
|
+
- **Mixtral-8x7B**: Highest accuracy, lower throughput
|
|
36
|
+
- **Qwen3-30B**: High accuracy, moderate throughput
|
|
37
|
+
|
|
38
|
+
**VLM Performance**:
|
|
39
|
+
- **DeepSeek-VL2-Tiny**: Fastest, lowest accuracy
|
|
40
|
+
- **DeepSeek-VL2**: Highest accuracy, lowest throughput
|
|
41
|
+
|
|
42
|
+
## vLLM Optimizations
|
|
43
|
+
|
|
44
|
+
**Source**: MoE-Inference-Bench 2024, vLLM documentation
|
|
45
|
+
|
|
46
|
+
### Expert Parallelism
|
|
47
|
+
|
|
48
|
+
Distribute experts across GPUs for parallel execution.
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
from vllm import LLM, SamplingParams
|
|
52
|
+
|
|
53
|
+
# Enable expert parallelism
|
|
54
|
+
llm = LLM(
|
|
55
|
+
model="mistralai/Mixtral-8x7B-v0.1",
|
|
56
|
+
tensor_parallel_size=2, # Tensor parallelism
|
|
57
|
+
enable_expert_parallel=True, # Expert parallelism
|
|
58
|
+
gpu_memory_utilization=0.9
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Generate
|
|
62
|
+
outputs = llm.generate(
|
|
63
|
+
prompts=["What is mixture of experts?"],
|
|
64
|
+
sampling_params=SamplingParams(temperature=0.7, max_tokens=256)
|
|
65
|
+
)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Parallelism Strategies
|
|
69
|
+
|
|
70
|
+
**From MoE-Inference-Bench**:
|
|
71
|
+
|
|
72
|
+
| Strategy | Throughput Gain | Best For |
|
|
73
|
+
|----------|----------------|----------|
|
|
74
|
+
| **Tensor Parallelism** | High | Large models, multi-GPU |
|
|
75
|
+
| **Expert Parallelism** | Moderate | MoE-specific, many experts |
|
|
76
|
+
| **Pipeline Parallelism** | Low | Very large models |
|
|
77
|
+
|
|
78
|
+
**Recommendation**: Tensor parallelism most effective for MoE models
|
|
79
|
+
|
|
80
|
+
### Fused MoE Kernels
|
|
81
|
+
|
|
82
|
+
**Performance Gain**: 12-18% throughput improvement
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
# vLLM automatically uses fused kernels when available
|
|
86
|
+
llm = LLM(
|
|
87
|
+
model="mistralai/Mixtral-8x7B-v0.1",
|
|
88
|
+
use_v2_block_manager=True # Enable fused MoE kernels
|
|
89
|
+
)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
**What it does**:
|
|
93
|
+
- Reduces kernel launch overhead
|
|
94
|
+
- Combines multiple operations into single kernel
|
|
95
|
+
- Better GPU utilization
|
|
96
|
+
|
|
97
|
+
## Quantization
|
|
98
|
+
|
|
99
|
+
**Source**: MoE-Inference-Bench quantization analysis
|
|
100
|
+
|
|
101
|
+
### FP8 Quantization
|
|
102
|
+
|
|
103
|
+
**Performance**: 20-30% throughput improvement over FP16
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from vllm import LLM
|
|
107
|
+
|
|
108
|
+
# FP8 quantization
|
|
109
|
+
llm = LLM(
|
|
110
|
+
model="mistralai/Mixtral-8x7B-v0.1",
|
|
111
|
+
quantization="fp8" # FP8 quantization
|
|
112
|
+
)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
**Trade-offs**:
|
|
116
|
+
- Throughput: +20-30%
|
|
117
|
+
- Memory: -40-50%
|
|
118
|
+
- Accuracy: Minimal degradation (<1%)
|
|
119
|
+
|
|
120
|
+
### INT8 Quantization
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
# INT8 weight-only quantization
|
|
124
|
+
llm = LLM(
|
|
125
|
+
model="mistralai/Mixtral-8x7B-v0.1",
|
|
126
|
+
quantization="awq" # or "gptq"
|
|
127
|
+
)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
**Performance**:
|
|
131
|
+
- Throughput: +15-20%
|
|
132
|
+
- Memory: -50-60%
|
|
133
|
+
- Quality: Slight degradation (1-2%)
|
|
134
|
+
|
|
135
|
+
## Expert Configuration
|
|
136
|
+
|
|
137
|
+
**Source**: MoE-Inference-Bench hyperparameter analysis
|
|
138
|
+
|
|
139
|
+
### Active Experts
|
|
140
|
+
|
|
141
|
+
**Key Finding**: Single-expert activation → 50-80% higher throughput
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
# Top-1 routing (best throughput)
|
|
145
|
+
# Mixtral default is top-2, but top-1 can be enforced at inference
|
|
146
|
+
|
|
147
|
+
# Model architecture determines this
|
|
148
|
+
# Cannot change at runtime, but affects deployment planning
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
**Performance vs Experts**:
|
|
152
|
+
- 1 expert/token: +50-80% throughput vs top-2
|
|
153
|
+
- 2 experts/token: Balanced (Mixtral default)
|
|
154
|
+
- 3+ experts/token: Lower throughput, higher quality
|
|
155
|
+
|
|
156
|
+
### Total Expert Count
|
|
157
|
+
|
|
158
|
+
**Scaling**: Non-linear, diminishing returns at high counts
|
|
159
|
+
|
|
160
|
+
| Total Experts | Throughput | Memory |
|
|
161
|
+
|--------------|------------|--------|
|
|
162
|
+
| 8 | Baseline | Baseline |
|
|
163
|
+
| 16 | +15% | +20% |
|
|
164
|
+
| 32 | +25% | +45% |
|
|
165
|
+
| 64 | +30% | +90% |
|
|
166
|
+
| 128 | +32% | +180% |
|
|
167
|
+
|
|
168
|
+
**Recommendation**: 8-32 experts for optimal throughput/memory
|
|
169
|
+
|
|
170
|
+
### FFN Dimension
|
|
171
|
+
|
|
172
|
+
**Key Finding**: Performance degrades with increasing FFN size
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
# Smaller FFN = better throughput
|
|
176
|
+
# Trade-off: model capacity vs inference speed
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
| FFN Dimension | Throughput | Quality |
|
|
180
|
+
|---------------|------------|---------|
|
|
181
|
+
| 2048 | High | Moderate |
|
|
182
|
+
| 4096 | Moderate | High |
|
|
183
|
+
| 8192 | Low | Very High |
|
|
184
|
+
|
|
185
|
+
## Optimization Techniques
|
|
186
|
+
|
|
187
|
+
**Source**: MoE-Inference-Bench optimization experiments
|
|
188
|
+
|
|
189
|
+
### 1. Speculative Decoding
|
|
190
|
+
|
|
191
|
+
**Performance**: 1.5-2.5× speedup
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
from vllm import LLM, SamplingParams
|
|
195
|
+
|
|
196
|
+
# Main model (large MoE)
|
|
197
|
+
main_model = LLM(model="mistralai/Mixtral-8x7B-v0.1")
|
|
198
|
+
|
|
199
|
+
# Draft model (small, fast)
|
|
200
|
+
draft_model = LLM(model="Qwen/Qwen3-1.7B")
|
|
201
|
+
|
|
202
|
+
# Speculative decoding with draft model
|
|
203
|
+
# vLLM handles automatically if draft model specified
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
**Best draft models** (from research):
|
|
207
|
+
- Medium-sized (1.7B-3B parameters)
|
|
208
|
+
- Qwen3-1.7B most effective
|
|
209
|
+
- Too small (<1B): low acceptance rate
|
|
210
|
+
- Too large (>7B): overhead dominates
|
|
211
|
+
|
|
212
|
+
### 2. Expert Pruning
|
|
213
|
+
|
|
214
|
+
**Performance**: 50% pruning → significant throughput gain
|
|
215
|
+
|
|
216
|
+
```python
|
|
217
|
+
# Prune least-used experts (offline)
|
|
218
|
+
# Example: Keep top-50% experts by usage
|
|
219
|
+
|
|
220
|
+
# Requires profiling on representative data:
|
|
221
|
+
# 1. Track expert utilization
|
|
222
|
+
# 2. Prune unused/rarely-used experts
|
|
223
|
+
# 3. Fine-tune pruned model (optional)
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
**Trade-off**:
|
|
227
|
+
- 50% pruning: +40-60% throughput, -2-5% accuracy
|
|
228
|
+
- 75% pruning: +80-120% throughput, -5-15% accuracy
|
|
229
|
+
|
|
230
|
+
### 3. Batch Size Tuning
|
|
231
|
+
|
|
232
|
+
```python
|
|
233
|
+
# Larger batches = better throughput (until OOM)
|
|
234
|
+
llm = LLM(
|
|
235
|
+
model="mistralai/Mixtral-8x7B-v0.1",
|
|
236
|
+
max_num_seqs=256, # Maximum batch size
|
|
237
|
+
max_num_batched_tokens=8192 # Total tokens in batch
|
|
238
|
+
)
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
**Optimal batch sizes** (H100):
|
|
242
|
+
- Mixtral-8x7B: 64-128
|
|
243
|
+
- Smaller MoE (8 experts): 128-256
|
|
244
|
+
- Larger MoE (>16 experts): 32-64
|
|
245
|
+
|
|
246
|
+
## Production Deployment
|
|
247
|
+
|
|
248
|
+
### Single GPU (Consumer Hardware)
|
|
249
|
+
|
|
250
|
+
```python
|
|
251
|
+
from vllm import LLM
|
|
252
|
+
|
|
253
|
+
# Optimize for single GPU
|
|
254
|
+
llm = LLM(
|
|
255
|
+
model="mistralai/Mixtral-8x7B-v0.1",
|
|
256
|
+
gpu_memory_utilization=0.95, # Use 95% of VRAM
|
|
257
|
+
max_num_seqs=32, # Smaller batches
|
|
258
|
+
quantization="awq" # Quantize to fit
|
|
259
|
+
)
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
**Minimum requirements**:
|
|
263
|
+
- Mixtral-8x7B: 48GB VRAM (FP16) or 24GB (INT8)
|
|
264
|
+
- Expert parallelism not needed
|
|
265
|
+
|
|
266
|
+
### Multi-GPU (Data Center)
|
|
267
|
+
|
|
268
|
+
```python
|
|
269
|
+
# Tensor parallelism + Expert parallelism
|
|
270
|
+
llm = LLM(
|
|
271
|
+
model="mistralai/Mixtral-8x7B-v0.1",
|
|
272
|
+
tensor_parallel_size=2, # Split across 2 GPUs
|
|
273
|
+
enable_expert_parallel=True, # Distribute experts
|
|
274
|
+
gpu_memory_utilization=0.9
|
|
275
|
+
)
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
**Scaling strategy**:
|
|
279
|
+
- 2 GPUs: Tensor parallelism
|
|
280
|
+
- 4+ GPUs: Tensor + expert parallelism
|
|
281
|
+
- 8+ GPUs: Consider pipeline parallelism
|
|
282
|
+
|
|
283
|
+
### Production Configuration
|
|
284
|
+
|
|
285
|
+
```python
|
|
286
|
+
# Optimized for production
|
|
287
|
+
llm = LLM(
|
|
288
|
+
model="mistralai/Mixtral-8x7B-v0.1",
|
|
289
|
+
|
|
290
|
+
# Parallelism
|
|
291
|
+
tensor_parallel_size=2,
|
|
292
|
+
enable_expert_parallel=True,
|
|
293
|
+
|
|
294
|
+
# Memory
|
|
295
|
+
gpu_memory_utilization=0.9,
|
|
296
|
+
swap_space=4, # 4GB CPU swap
|
|
297
|
+
|
|
298
|
+
# Performance
|
|
299
|
+
use_v2_block_manager=True, # Fused kernels
|
|
300
|
+
max_num_seqs=64,
|
|
301
|
+
max_num_batched_tokens=4096,
|
|
302
|
+
|
|
303
|
+
# Optional: Quantization
|
|
304
|
+
quantization="fp8"
|
|
305
|
+
)
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
### Monitoring
|
|
309
|
+
|
|
310
|
+
```python
|
|
311
|
+
import time
|
|
312
|
+
|
|
313
|
+
# Track metrics
|
|
314
|
+
def monitor_inference(llm, prompts):
|
|
315
|
+
start = time.time()
|
|
316
|
+
outputs = llm.generate(prompts)
|
|
317
|
+
end = time.time()
|
|
318
|
+
|
|
319
|
+
total_time = end - start
|
|
320
|
+
total_tokens = sum(len(o.outputs[0].token_ids) for o in outputs)
|
|
321
|
+
|
|
322
|
+
print(f"Throughput: {total_tokens / total_time:.2f} tokens/sec")
|
|
323
|
+
print(f"Latency: {total_time / len(prompts):.2f} sec/request")
|
|
324
|
+
|
|
325
|
+
return outputs
|
|
326
|
+
|
|
327
|
+
# Usage
|
|
328
|
+
outputs = monitor_inference(llm, ["Prompt 1", "Prompt 2"])
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
## Optimization Checklist
|
|
332
|
+
|
|
333
|
+
**From MoE-Inference-Bench best practices:**
|
|
334
|
+
|
|
335
|
+
- [ ] Use FP8 quantization (20-30% speedup)
|
|
336
|
+
- [ ] Enable fused MoE kernels (12-18% speedup)
|
|
337
|
+
- [ ] Tune batch size for your hardware
|
|
338
|
+
- [ ] Use tensor parallelism for multi-GPU
|
|
339
|
+
- [ ] Consider speculative decoding (1.5-2.5× speedup)
|
|
340
|
+
- [ ] Profile expert utilization, prune if needed
|
|
341
|
+
- [ ] Optimize active expert count (top-1 vs top-2)
|
|
342
|
+
- [ ] Monitor and tune GPU memory utilization
|
|
343
|
+
|
|
344
|
+
## Resources
|
|
345
|
+
|
|
346
|
+
- **MoE-Inference-Bench**: https://arxiv.org/abs/2508.17467
|
|
347
|
+
- **vLLM Documentation**: https://docs.vllm.ai
|
|
348
|
+
- **PyTorch MoE Optimization**: https://pytorch.org/blog/accelerating-moe-model/
|