@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,666 @@
|
|
|
1
|
+
# AudioCraft Advanced Usage Guide
|
|
2
|
+
|
|
3
|
+
## Fine-tuning MusicGen
|
|
4
|
+
|
|
5
|
+
### Custom dataset preparation
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
import os
|
|
9
|
+
import json
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
import torchaudio
|
|
12
|
+
|
|
13
|
+
def prepare_dataset(audio_dir, output_dir, metadata_file):
|
|
14
|
+
"""
|
|
15
|
+
Prepare dataset for MusicGen fine-tuning.
|
|
16
|
+
|
|
17
|
+
Directory structure:
|
|
18
|
+
output_dir/
|
|
19
|
+
├── audio/
|
|
20
|
+
│ ├── 0001.wav
|
|
21
|
+
│ ├── 0002.wav
|
|
22
|
+
│ └── ...
|
|
23
|
+
└── metadata.json
|
|
24
|
+
"""
|
|
25
|
+
output_dir = Path(output_dir)
|
|
26
|
+
audio_output = output_dir / "audio"
|
|
27
|
+
audio_output.mkdir(parents=True, exist_ok=True)
|
|
28
|
+
|
|
29
|
+
# Load metadata (format: {"path": "...", "description": "..."})
|
|
30
|
+
with open(metadata_file) as f:
|
|
31
|
+
metadata = json.load(f)
|
|
32
|
+
|
|
33
|
+
processed = []
|
|
34
|
+
|
|
35
|
+
for idx, item in enumerate(metadata):
|
|
36
|
+
audio_path = Path(audio_dir) / item["path"]
|
|
37
|
+
|
|
38
|
+
# Load and resample to 32kHz
|
|
39
|
+
wav, sr = torchaudio.load(str(audio_path))
|
|
40
|
+
if sr != 32000:
|
|
41
|
+
resampler = torchaudio.transforms.Resample(sr, 32000)
|
|
42
|
+
wav = resampler(wav)
|
|
43
|
+
|
|
44
|
+
# Convert to mono if stereo
|
|
45
|
+
if wav.shape[0] > 1:
|
|
46
|
+
wav = wav.mean(dim=0, keepdim=True)
|
|
47
|
+
|
|
48
|
+
# Save processed audio
|
|
49
|
+
output_path = audio_output / f"{idx:04d}.wav"
|
|
50
|
+
torchaudio.save(str(output_path), wav, sample_rate=32000)
|
|
51
|
+
|
|
52
|
+
processed.append({
|
|
53
|
+
"path": str(output_path.relative_to(output_dir)),
|
|
54
|
+
"description": item["description"],
|
|
55
|
+
"duration": wav.shape[1] / 32000
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
# Save processed metadata
|
|
59
|
+
with open(output_dir / "metadata.json", "w") as f:
|
|
60
|
+
json.dump(processed, f, indent=2)
|
|
61
|
+
|
|
62
|
+
print(f"Processed {len(processed)} samples")
|
|
63
|
+
return processed
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Fine-tuning with dora
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
# AudioCraft uses dora for experiment management
|
|
70
|
+
# Install dora
|
|
71
|
+
pip install dora-search
|
|
72
|
+
|
|
73
|
+
# Clone AudioCraft
|
|
74
|
+
git clone https://github.com/facebookresearch/audiocraft.git
|
|
75
|
+
cd audiocraft
|
|
76
|
+
|
|
77
|
+
# Create config for fine-tuning
|
|
78
|
+
cat > config/solver/musicgen/finetune.yaml << 'EOF'
|
|
79
|
+
defaults:
|
|
80
|
+
- musicgen/musicgen_base
|
|
81
|
+
- /model: lm/musicgen_lm
|
|
82
|
+
- /conditioner: cond_base
|
|
83
|
+
|
|
84
|
+
solver: musicgen
|
|
85
|
+
autocast: true
|
|
86
|
+
autocast_dtype: float16
|
|
87
|
+
|
|
88
|
+
optim:
|
|
89
|
+
epochs: 100
|
|
90
|
+
batch_size: 4
|
|
91
|
+
lr: 1e-4
|
|
92
|
+
ema: 0.999
|
|
93
|
+
optimizer: adamw
|
|
94
|
+
|
|
95
|
+
dataset:
|
|
96
|
+
batch_size: 4
|
|
97
|
+
num_workers: 4
|
|
98
|
+
train:
|
|
99
|
+
- dset: your_dataset
|
|
100
|
+
root: /path/to/dataset
|
|
101
|
+
valid:
|
|
102
|
+
- dset: your_dataset
|
|
103
|
+
root: /path/to/dataset
|
|
104
|
+
|
|
105
|
+
checkpoint:
|
|
106
|
+
save_every: 10
|
|
107
|
+
keep_every_states: null
|
|
108
|
+
EOF
|
|
109
|
+
|
|
110
|
+
# Run fine-tuning
|
|
111
|
+
dora run solver=musicgen/finetune
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### LoRA fine-tuning
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from peft import LoraConfig, get_peft_model
|
|
118
|
+
from audiocraft.models import MusicGen
|
|
119
|
+
import torch
|
|
120
|
+
|
|
121
|
+
# Load base model
|
|
122
|
+
model = MusicGen.get_pretrained('facebook/musicgen-small')
|
|
123
|
+
|
|
124
|
+
# Get the language model component
|
|
125
|
+
lm = model.lm
|
|
126
|
+
|
|
127
|
+
# Configure LoRA
|
|
128
|
+
lora_config = LoraConfig(
|
|
129
|
+
r=8,
|
|
130
|
+
lora_alpha=16,
|
|
131
|
+
target_modules=["q_proj", "v_proj", "k_proj", "out_proj"],
|
|
132
|
+
lora_dropout=0.05,
|
|
133
|
+
bias="none"
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Apply LoRA
|
|
137
|
+
lm = get_peft_model(lm, lora_config)
|
|
138
|
+
lm.print_trainable_parameters()
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Multi-GPU Training
|
|
142
|
+
|
|
143
|
+
### DataParallel
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
import torch
|
|
147
|
+
import torch.nn as nn
|
|
148
|
+
from audiocraft.models import MusicGen
|
|
149
|
+
|
|
150
|
+
model = MusicGen.get_pretrained('facebook/musicgen-small')
|
|
151
|
+
|
|
152
|
+
# Wrap LM with DataParallel
|
|
153
|
+
if torch.cuda.device_count() > 1:
|
|
154
|
+
model.lm = nn.DataParallel(model.lm)
|
|
155
|
+
|
|
156
|
+
model.to("cuda")
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### DistributedDataParallel
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
import torch.distributed as dist
|
|
163
|
+
from torch.nn.parallel import DistributedDataParallel as DDP
|
|
164
|
+
|
|
165
|
+
def setup(rank, world_size):
|
|
166
|
+
dist.init_process_group("nccl", rank=rank, world_size=world_size)
|
|
167
|
+
torch.cuda.set_device(rank)
|
|
168
|
+
|
|
169
|
+
def train(rank, world_size):
|
|
170
|
+
setup(rank, world_size)
|
|
171
|
+
|
|
172
|
+
model = MusicGen.get_pretrained('facebook/musicgen-small')
|
|
173
|
+
model.lm = model.lm.to(rank)
|
|
174
|
+
model.lm = DDP(model.lm, device_ids=[rank])
|
|
175
|
+
|
|
176
|
+
# Training loop
|
|
177
|
+
# ...
|
|
178
|
+
|
|
179
|
+
dist.destroy_process_group()
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## Custom Conditioning
|
|
183
|
+
|
|
184
|
+
### Adding new conditioners
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
from audiocraft.modules.conditioners import BaseConditioner
|
|
188
|
+
import torch
|
|
189
|
+
|
|
190
|
+
class CustomConditioner(BaseConditioner):
|
|
191
|
+
"""Custom conditioner for additional control signals."""
|
|
192
|
+
|
|
193
|
+
def __init__(self, dim, output_dim):
|
|
194
|
+
super().__init__(dim, output_dim)
|
|
195
|
+
self.embed = torch.nn.Linear(dim, output_dim)
|
|
196
|
+
|
|
197
|
+
def forward(self, x):
|
|
198
|
+
return self.embed(x)
|
|
199
|
+
|
|
200
|
+
def tokenize(self, x):
|
|
201
|
+
# Tokenize input for conditioning
|
|
202
|
+
return x
|
|
203
|
+
|
|
204
|
+
# Use with MusicGen
|
|
205
|
+
from audiocraft.models.builders import get_lm_model
|
|
206
|
+
|
|
207
|
+
# Modify model config to include custom conditioner
|
|
208
|
+
# This requires editing the model configuration
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### Melody conditioning internals
|
|
212
|
+
|
|
213
|
+
```python
|
|
214
|
+
from audiocraft.models import MusicGen
|
|
215
|
+
from audiocraft.modules.codebooks_patterns import DelayedPatternProvider
|
|
216
|
+
import torch
|
|
217
|
+
|
|
218
|
+
model = MusicGen.get_pretrained('facebook/musicgen-melody')
|
|
219
|
+
|
|
220
|
+
# Access chroma extractor
|
|
221
|
+
chroma_extractor = model.lm.condition_provider.conditioners.get('chroma')
|
|
222
|
+
|
|
223
|
+
# Manual chroma extraction
|
|
224
|
+
def extract_chroma(audio, sr):
|
|
225
|
+
"""Extract chroma features from audio."""
|
|
226
|
+
import librosa
|
|
227
|
+
|
|
228
|
+
# Compute chroma
|
|
229
|
+
chroma = librosa.feature.chroma_cqt(y=audio.numpy(), sr=sr)
|
|
230
|
+
|
|
231
|
+
return torch.from_numpy(chroma).float()
|
|
232
|
+
|
|
233
|
+
# Use extracted chroma for conditioning
|
|
234
|
+
chroma = extract_chroma(melody_audio, sample_rate)
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## EnCodec Deep Dive
|
|
238
|
+
|
|
239
|
+
### Custom compression settings
|
|
240
|
+
|
|
241
|
+
```python
|
|
242
|
+
from audiocraft.models import CompressionModel
|
|
243
|
+
import torch
|
|
244
|
+
|
|
245
|
+
# Load EnCodec
|
|
246
|
+
encodec = CompressionModel.get_pretrained('facebook/encodec_32khz')
|
|
247
|
+
|
|
248
|
+
# Access codec parameters
|
|
249
|
+
print(f"Sample rate: {encodec.sample_rate}")
|
|
250
|
+
print(f"Channels: {encodec.channels}")
|
|
251
|
+
print(f"Cardinality: {encodec.cardinality}") # Codebook size
|
|
252
|
+
print(f"Num codebooks: {encodec.num_codebooks}")
|
|
253
|
+
print(f"Frame rate: {encodec.frame_rate}")
|
|
254
|
+
|
|
255
|
+
# Encode with specific bandwidth
|
|
256
|
+
# Lower bandwidth = more compression, lower quality
|
|
257
|
+
encodec.set_target_bandwidth(6.0) # 6 kbps
|
|
258
|
+
|
|
259
|
+
audio = torch.randn(1, 1, 32000) # 1 second
|
|
260
|
+
encoded = encodec.encode(audio)
|
|
261
|
+
decoded = encodec.decode(encoded[0])
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
### Streaming encoding
|
|
265
|
+
|
|
266
|
+
```python
|
|
267
|
+
import torch
|
|
268
|
+
from audiocraft.models import CompressionModel
|
|
269
|
+
|
|
270
|
+
encodec = CompressionModel.get_pretrained('facebook/encodec_32khz')
|
|
271
|
+
|
|
272
|
+
def encode_streaming(audio_stream, chunk_size=32000):
|
|
273
|
+
"""Encode audio in streaming fashion."""
|
|
274
|
+
all_codes = []
|
|
275
|
+
|
|
276
|
+
for chunk in audio_stream:
|
|
277
|
+
# Ensure chunk is right shape
|
|
278
|
+
if chunk.dim() == 1:
|
|
279
|
+
chunk = chunk.unsqueeze(0).unsqueeze(0)
|
|
280
|
+
|
|
281
|
+
with torch.no_grad():
|
|
282
|
+
codes = encodec.encode(chunk)[0]
|
|
283
|
+
all_codes.append(codes)
|
|
284
|
+
|
|
285
|
+
return torch.cat(all_codes, dim=-1)
|
|
286
|
+
|
|
287
|
+
def decode_streaming(codes_stream, output_stream):
|
|
288
|
+
"""Decode codes in streaming fashion."""
|
|
289
|
+
for codes in codes_stream:
|
|
290
|
+
with torch.no_grad():
|
|
291
|
+
audio = encodec.decode(codes)
|
|
292
|
+
output_stream.write(audio.cpu().numpy())
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
## MultiBand Diffusion
|
|
296
|
+
|
|
297
|
+
### Using MBD for enhanced quality
|
|
298
|
+
|
|
299
|
+
```python
|
|
300
|
+
from audiocraft.models import MusicGen, MultiBandDiffusion
|
|
301
|
+
|
|
302
|
+
# Load MusicGen
|
|
303
|
+
model = MusicGen.get_pretrained('facebook/musicgen-medium')
|
|
304
|
+
|
|
305
|
+
# Load MultiBand Diffusion
|
|
306
|
+
mbd = MultiBandDiffusion.get_mbd_musicgen()
|
|
307
|
+
|
|
308
|
+
model.set_generation_params(duration=10)
|
|
309
|
+
|
|
310
|
+
# Generate with standard decoder
|
|
311
|
+
descriptions = ["epic orchestral music"]
|
|
312
|
+
wav_standard = model.generate(descriptions)
|
|
313
|
+
|
|
314
|
+
# Generate tokens and use MBD decoder
|
|
315
|
+
with torch.no_grad():
|
|
316
|
+
# Get tokens
|
|
317
|
+
gen_tokens = model.generate_tokens(descriptions)
|
|
318
|
+
|
|
319
|
+
# Decode with MBD
|
|
320
|
+
wav_mbd = mbd.tokens_to_wav(gen_tokens)
|
|
321
|
+
|
|
322
|
+
# Compare quality
|
|
323
|
+
print(f"Standard shape: {wav_standard.shape}")
|
|
324
|
+
print(f"MBD shape: {wav_mbd.shape}")
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
## API Server Deployment
|
|
328
|
+
|
|
329
|
+
### FastAPI server
|
|
330
|
+
|
|
331
|
+
```python
|
|
332
|
+
from fastapi import FastAPI, HTTPException
|
|
333
|
+
from pydantic import BaseModel
|
|
334
|
+
import torch
|
|
335
|
+
import torchaudio
|
|
336
|
+
from audiocraft.models import MusicGen
|
|
337
|
+
import io
|
|
338
|
+
import base64
|
|
339
|
+
|
|
340
|
+
app = FastAPI()
|
|
341
|
+
|
|
342
|
+
# Load model at startup
|
|
343
|
+
model = None
|
|
344
|
+
|
|
345
|
+
@app.on_event("startup")
|
|
346
|
+
async def load_model():
|
|
347
|
+
global model
|
|
348
|
+
model = MusicGen.get_pretrained('facebook/musicgen-small')
|
|
349
|
+
model.set_generation_params(duration=10)
|
|
350
|
+
|
|
351
|
+
class GenerateRequest(BaseModel):
|
|
352
|
+
prompt: str
|
|
353
|
+
duration: float = 10.0
|
|
354
|
+
temperature: float = 1.0
|
|
355
|
+
cfg_coef: float = 3.0
|
|
356
|
+
|
|
357
|
+
class GenerateResponse(BaseModel):
|
|
358
|
+
audio_base64: str
|
|
359
|
+
sample_rate: int
|
|
360
|
+
duration: float
|
|
361
|
+
|
|
362
|
+
@app.post("/generate", response_model=GenerateResponse)
|
|
363
|
+
async def generate(request: GenerateRequest):
|
|
364
|
+
if model is None:
|
|
365
|
+
raise HTTPException(status_code=500, detail="Model not loaded")
|
|
366
|
+
|
|
367
|
+
try:
|
|
368
|
+
model.set_generation_params(
|
|
369
|
+
duration=min(request.duration, 30),
|
|
370
|
+
temperature=request.temperature,
|
|
371
|
+
cfg_coef=request.cfg_coef
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
with torch.no_grad():
|
|
375
|
+
wav = model.generate([request.prompt])
|
|
376
|
+
|
|
377
|
+
# Convert to bytes
|
|
378
|
+
buffer = io.BytesIO()
|
|
379
|
+
torchaudio.save(buffer, wav[0].cpu(), sample_rate=32000, format="wav")
|
|
380
|
+
buffer.seek(0)
|
|
381
|
+
|
|
382
|
+
audio_base64 = base64.b64encode(buffer.read()).decode()
|
|
383
|
+
|
|
384
|
+
return GenerateResponse(
|
|
385
|
+
audio_base64=audio_base64,
|
|
386
|
+
sample_rate=32000,
|
|
387
|
+
duration=wav.shape[-1] / 32000
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
except Exception as e:
|
|
391
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
392
|
+
|
|
393
|
+
@app.get("/health")
|
|
394
|
+
async def health():
|
|
395
|
+
return {"status": "ok", "model_loaded": model is not None}
|
|
396
|
+
|
|
397
|
+
# Run: uvicorn server:app --host 0.0.0.0 --port 8000
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
### Batch processing service
|
|
401
|
+
|
|
402
|
+
```python
|
|
403
|
+
import asyncio
|
|
404
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
405
|
+
import torch
|
|
406
|
+
from audiocraft.models import MusicGen
|
|
407
|
+
|
|
408
|
+
class MusicGenService:
|
|
409
|
+
def __init__(self, model_name='facebook/musicgen-small', max_workers=2):
|
|
410
|
+
self.model = MusicGen.get_pretrained(model_name)
|
|
411
|
+
self.executor = ThreadPoolExecutor(max_workers=max_workers)
|
|
412
|
+
self.lock = asyncio.Lock()
|
|
413
|
+
|
|
414
|
+
async def generate_async(self, prompt, duration=10):
|
|
415
|
+
"""Async generation with thread pool."""
|
|
416
|
+
loop = asyncio.get_event_loop()
|
|
417
|
+
|
|
418
|
+
def _generate():
|
|
419
|
+
with torch.no_grad():
|
|
420
|
+
self.model.set_generation_params(duration=duration)
|
|
421
|
+
return self.model.generate([prompt])
|
|
422
|
+
|
|
423
|
+
# Run in thread pool
|
|
424
|
+
wav = await loop.run_in_executor(self.executor, _generate)
|
|
425
|
+
return wav[0].cpu()
|
|
426
|
+
|
|
427
|
+
async def generate_batch_async(self, prompts, duration=10):
|
|
428
|
+
"""Process multiple prompts concurrently."""
|
|
429
|
+
tasks = [self.generate_async(p, duration) for p in prompts]
|
|
430
|
+
return await asyncio.gather(*tasks)
|
|
431
|
+
|
|
432
|
+
# Usage
|
|
433
|
+
service = MusicGenService()
|
|
434
|
+
|
|
435
|
+
async def main():
|
|
436
|
+
prompts = ["jazz piano", "rock guitar", "electronic beats"]
|
|
437
|
+
results = await service.generate_batch_async(prompts)
|
|
438
|
+
return results
|
|
439
|
+
```
|
|
440
|
+
|
|
441
|
+
## Integration Patterns
|
|
442
|
+
|
|
443
|
+
### LangChain tool
|
|
444
|
+
|
|
445
|
+
```python
|
|
446
|
+
from langchain.tools import BaseTool
|
|
447
|
+
import torch
|
|
448
|
+
import torchaudio
|
|
449
|
+
from audiocraft.models import MusicGen
|
|
450
|
+
import tempfile
|
|
451
|
+
|
|
452
|
+
class MusicGeneratorTool(BaseTool):
|
|
453
|
+
name = "music_generator"
|
|
454
|
+
description = "Generate music from a text description. Input should be a detailed description of the music style, mood, and instruments."
|
|
455
|
+
|
|
456
|
+
def __init__(self):
|
|
457
|
+
super().__init__()
|
|
458
|
+
self.model = MusicGen.get_pretrained('facebook/musicgen-small')
|
|
459
|
+
self.model.set_generation_params(duration=15)
|
|
460
|
+
|
|
461
|
+
def _run(self, description: str) -> str:
|
|
462
|
+
with torch.no_grad():
|
|
463
|
+
wav = self.model.generate([description])
|
|
464
|
+
|
|
465
|
+
# Save to temp file
|
|
466
|
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
|
467
|
+
torchaudio.save(f.name, wav[0].cpu(), sample_rate=32000)
|
|
468
|
+
return f"Generated music saved to: {f.name}"
|
|
469
|
+
|
|
470
|
+
async def _arun(self, description: str) -> str:
|
|
471
|
+
return self._run(description)
|
|
472
|
+
```
|
|
473
|
+
|
|
474
|
+
### Gradio with advanced controls
|
|
475
|
+
|
|
476
|
+
```python
|
|
477
|
+
import gradio as gr
|
|
478
|
+
import torch
|
|
479
|
+
import torchaudio
|
|
480
|
+
from audiocraft.models import MusicGen
|
|
481
|
+
|
|
482
|
+
models = {}
|
|
483
|
+
|
|
484
|
+
def load_model(model_size):
|
|
485
|
+
if model_size not in models:
|
|
486
|
+
model_name = f"facebook/musicgen-{model_size}"
|
|
487
|
+
models[model_size] = MusicGen.get_pretrained(model_name)
|
|
488
|
+
return models[model_size]
|
|
489
|
+
|
|
490
|
+
def generate(prompt, duration, temperature, cfg_coef, top_k, model_size):
|
|
491
|
+
model = load_model(model_size)
|
|
492
|
+
|
|
493
|
+
model.set_generation_params(
|
|
494
|
+
duration=duration,
|
|
495
|
+
temperature=temperature,
|
|
496
|
+
cfg_coef=cfg_coef,
|
|
497
|
+
top_k=top_k
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
with torch.no_grad():
|
|
501
|
+
wav = model.generate([prompt])
|
|
502
|
+
|
|
503
|
+
# Save
|
|
504
|
+
path = "output.wav"
|
|
505
|
+
torchaudio.save(path, wav[0].cpu(), sample_rate=32000)
|
|
506
|
+
return path
|
|
507
|
+
|
|
508
|
+
demo = gr.Interface(
|
|
509
|
+
fn=generate,
|
|
510
|
+
inputs=[
|
|
511
|
+
gr.Textbox(label="Prompt", lines=3),
|
|
512
|
+
gr.Slider(1, 30, value=10, label="Duration (s)"),
|
|
513
|
+
gr.Slider(0.1, 2.0, value=1.0, label="Temperature"),
|
|
514
|
+
gr.Slider(0.5, 10.0, value=3.0, label="CFG Coefficient"),
|
|
515
|
+
gr.Slider(50, 500, value=250, step=50, label="Top-K"),
|
|
516
|
+
gr.Dropdown(["small", "medium", "large"], value="small", label="Model Size")
|
|
517
|
+
],
|
|
518
|
+
outputs=gr.Audio(label="Generated Music"),
|
|
519
|
+
title="MusicGen Advanced",
|
|
520
|
+
allow_flagging="never"
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
demo.launch(share=True)
|
|
524
|
+
```
|
|
525
|
+
|
|
526
|
+
## Audio Processing Pipeline
|
|
527
|
+
|
|
528
|
+
### Post-processing chain
|
|
529
|
+
|
|
530
|
+
```python
|
|
531
|
+
import torch
|
|
532
|
+
import torchaudio
|
|
533
|
+
import torchaudio.transforms as T
|
|
534
|
+
import numpy as np
|
|
535
|
+
|
|
536
|
+
class AudioPostProcessor:
|
|
537
|
+
def __init__(self, sample_rate=32000):
|
|
538
|
+
self.sample_rate = sample_rate
|
|
539
|
+
|
|
540
|
+
def normalize(self, audio, target_db=-14.0):
|
|
541
|
+
"""Normalize audio to target loudness."""
|
|
542
|
+
rms = torch.sqrt(torch.mean(audio ** 2))
|
|
543
|
+
target_rms = 10 ** (target_db / 20)
|
|
544
|
+
gain = target_rms / (rms + 1e-8)
|
|
545
|
+
return audio * gain
|
|
546
|
+
|
|
547
|
+
def fade_in_out(self, audio, fade_duration=0.1):
|
|
548
|
+
"""Apply fade in/out."""
|
|
549
|
+
fade_samples = int(fade_duration * self.sample_rate)
|
|
550
|
+
|
|
551
|
+
# Create fade curves
|
|
552
|
+
fade_in = torch.linspace(0, 1, fade_samples)
|
|
553
|
+
fade_out = torch.linspace(1, 0, fade_samples)
|
|
554
|
+
|
|
555
|
+
# Apply fades
|
|
556
|
+
audio[..., :fade_samples] *= fade_in
|
|
557
|
+
audio[..., -fade_samples:] *= fade_out
|
|
558
|
+
|
|
559
|
+
return audio
|
|
560
|
+
|
|
561
|
+
def apply_reverb(self, audio, decay=0.5):
|
|
562
|
+
"""Apply simple reverb effect."""
|
|
563
|
+
impulse = torch.zeros(int(self.sample_rate * 0.5))
|
|
564
|
+
impulse[0] = 1.0
|
|
565
|
+
impulse[int(self.sample_rate * 0.1)] = decay * 0.5
|
|
566
|
+
impulse[int(self.sample_rate * 0.2)] = decay * 0.25
|
|
567
|
+
|
|
568
|
+
# Convolve
|
|
569
|
+
audio = torch.nn.functional.conv1d(
|
|
570
|
+
audio.unsqueeze(0),
|
|
571
|
+
impulse.unsqueeze(0).unsqueeze(0),
|
|
572
|
+
padding=len(impulse) // 2
|
|
573
|
+
).squeeze(0)
|
|
574
|
+
|
|
575
|
+
return audio
|
|
576
|
+
|
|
577
|
+
def process(self, audio):
|
|
578
|
+
"""Full processing pipeline."""
|
|
579
|
+
audio = self.normalize(audio)
|
|
580
|
+
audio = self.fade_in_out(audio)
|
|
581
|
+
return audio
|
|
582
|
+
|
|
583
|
+
# Usage with MusicGen
|
|
584
|
+
from audiocraft.models import MusicGen
|
|
585
|
+
|
|
586
|
+
model = MusicGen.get_pretrained('facebook/musicgen-small')
|
|
587
|
+
model.set_generation_params(duration=10)
|
|
588
|
+
|
|
589
|
+
wav = model.generate(["chill ambient music"])
|
|
590
|
+
processor = AudioPostProcessor()
|
|
591
|
+
wav_processed = processor.process(wav[0].cpu())
|
|
592
|
+
|
|
593
|
+
torchaudio.save("processed.wav", wav_processed, sample_rate=32000)
|
|
594
|
+
```
|
|
595
|
+
|
|
596
|
+
## Evaluation
|
|
597
|
+
|
|
598
|
+
### Audio quality metrics
|
|
599
|
+
|
|
600
|
+
```python
|
|
601
|
+
import torch
|
|
602
|
+
from audiocraft.metrics import CLAPTextConsistencyMetric
|
|
603
|
+
from audiocraft.data.audio import audio_read
|
|
604
|
+
|
|
605
|
+
def evaluate_generation(audio_path, text_prompt):
|
|
606
|
+
"""Evaluate generated audio quality."""
|
|
607
|
+
# Load audio
|
|
608
|
+
wav, sr = audio_read(audio_path)
|
|
609
|
+
|
|
610
|
+
# CLAP consistency (text-audio alignment)
|
|
611
|
+
clap_metric = CLAPTextConsistencyMetric()
|
|
612
|
+
clap_score = clap_metric.compute(wav, [text_prompt])
|
|
613
|
+
|
|
614
|
+
return {
|
|
615
|
+
"clap_score": clap_score,
|
|
616
|
+
"duration": wav.shape[-1] / sr
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
# Batch evaluation
|
|
620
|
+
def evaluate_batch(generations):
|
|
621
|
+
"""Evaluate multiple generations."""
|
|
622
|
+
results = []
|
|
623
|
+
for gen in generations:
|
|
624
|
+
result = evaluate_generation(gen["path"], gen["prompt"])
|
|
625
|
+
result["prompt"] = gen["prompt"]
|
|
626
|
+
results.append(result)
|
|
627
|
+
|
|
628
|
+
# Aggregate
|
|
629
|
+
avg_clap = sum(r["clap_score"] for r in results) / len(results)
|
|
630
|
+
return {
|
|
631
|
+
"individual": results,
|
|
632
|
+
"average_clap": avg_clap
|
|
633
|
+
}
|
|
634
|
+
```
|
|
635
|
+
|
|
636
|
+
## Model Comparison
|
|
637
|
+
|
|
638
|
+
### MusicGen variants benchmark
|
|
639
|
+
|
|
640
|
+
| Model | CLAP Score | Generation Time (10s) | VRAM |
|
|
641
|
+
|-------|------------|----------------------|------|
|
|
642
|
+
| musicgen-small | 0.35 | ~5s | 2GB |
|
|
643
|
+
| musicgen-medium | 0.42 | ~15s | 4GB |
|
|
644
|
+
| musicgen-large | 0.48 | ~30s | 8GB |
|
|
645
|
+
| musicgen-melody | 0.45 | ~15s | 4GB |
|
|
646
|
+
| musicgen-stereo-medium | 0.41 | ~18s | 5GB |
|
|
647
|
+
|
|
648
|
+
### Prompt engineering tips
|
|
649
|
+
|
|
650
|
+
```python
|
|
651
|
+
# Good prompts - specific and descriptive
|
|
652
|
+
good_prompts = [
|
|
653
|
+
"upbeat electronic dance music with synthesizer leads and punchy drums at 128 bpm",
|
|
654
|
+
"melancholic piano ballad with strings, slow tempo, emotional and cinematic",
|
|
655
|
+
"funky disco groove with slap bass, brass section, and rhythmic guitar"
|
|
656
|
+
]
|
|
657
|
+
|
|
658
|
+
# Bad prompts - too vague
|
|
659
|
+
bad_prompts = [
|
|
660
|
+
"nice music",
|
|
661
|
+
"song",
|
|
662
|
+
"good beat"
|
|
663
|
+
]
|
|
664
|
+
|
|
665
|
+
# Structure: [mood] [genre] with [instruments] at [tempo/style]
|
|
666
|
+
```
|