@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# Loss Functions
|
|
2
|
+
|
|
3
|
+
## Built-in Loss Functions
|
|
4
|
+
|
|
5
|
+
Pass as string to `forward_backward(data, loss_fn)`.
|
|
6
|
+
|
|
7
|
+
## Supervised Learning
|
|
8
|
+
|
|
9
|
+
### cross_entropy
|
|
10
|
+
|
|
11
|
+
Standard next-token prediction loss.
|
|
12
|
+
|
|
13
|
+
$$\mathcal{L}(\theta) = -\mathbb{E}_x[\log p_\theta(x)]$$
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
fwd_bwd = training_client.forward_backward(data, loss_fn="cross_entropy")
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
**Inputs:**
|
|
20
|
+
- `target_tokens: array[(N,), int]` - Target token IDs
|
|
21
|
+
- `weights: array[(N,), float]` - Loss weights (0 = ignore, 1 = train)
|
|
22
|
+
|
|
23
|
+
**Outputs:**
|
|
24
|
+
- `logprobs: array[(N,), float]` - Log probabilities
|
|
25
|
+
- `loss:sum` (scalar) - Total weighted loss
|
|
26
|
+
|
|
27
|
+
## Policy Gradient Methods
|
|
28
|
+
|
|
29
|
+
### importance_sampling
|
|
30
|
+
|
|
31
|
+
Policy gradient with importance weighting for off-policy correction:
|
|
32
|
+
|
|
33
|
+
$$\mathcal{L}_{IS}(\theta) = \mathbb{E}_{x\sim q}\left[\frac{p_\theta(x)}{q(x)}A(x)\right]$$
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
fwd_bwd = training_client.forward_backward(data, loss_fn="importance_sampling")
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
**Inputs:**
|
|
40
|
+
- `target_tokens: array[(N,), int]` - Target tokens
|
|
41
|
+
- `logprobs: array[(N,), float]` - Sampling logprobs (from q)
|
|
42
|
+
- `advantages: array[(N,), float]` - Advantage values
|
|
43
|
+
|
|
44
|
+
### ppo
|
|
45
|
+
|
|
46
|
+
Proximal Policy Optimization with clipping:
|
|
47
|
+
|
|
48
|
+
$$\mathcal{L}_{PPO}(\theta) = -\mathbb{E}_{x \sim q}\left[\min\left(\frac{p_\theta(x)}{q(x)} A(x), \text{clip}\left(\frac{p_\theta(x)}{q(x)}, 1-\epsilon, 1+\epsilon\right) A(x)\right)\right]$$
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
fwd_bwd = training_client.forward_backward(
|
|
52
|
+
data,
|
|
53
|
+
loss_fn="ppo",
|
|
54
|
+
loss_fn_config={"clip_low_threshold": 0.9, "clip_high_threshold": 1.1}
|
|
55
|
+
)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
**Inputs:** Same as `importance_sampling`
|
|
59
|
+
|
|
60
|
+
### cispo
|
|
61
|
+
|
|
62
|
+
Clipped Importance Sampling Policy Optimization:
|
|
63
|
+
|
|
64
|
+
$$\mathcal{L}_{CISPO}(\theta) = \mathbb{E}_{x \sim q}\left[\text{sg}\left(\text{clip}\left(\frac{p_\theta(x)}{q(x)}, 1-\epsilon, 1+\epsilon\right)\right) \log p_\theta(x) A(x)\right]$$
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
fwd_bwd = training_client.forward_backward(
|
|
68
|
+
data,
|
|
69
|
+
loss_fn="cispo",
|
|
70
|
+
loss_fn_config={"clip_low_threshold": 0.8, "clip_high_threshold": 1.2}
|
|
71
|
+
)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### dro
|
|
75
|
+
|
|
76
|
+
Direct Reward Optimization with quadratic penalty:
|
|
77
|
+
|
|
78
|
+
$$\mathcal{L}_{DRO}(\theta) = \mathbb{E}_{x \sim q}\left[\log p_\theta(x) A(x) - \frac{1}{2}\beta \left(\log \frac{p_\theta(x)}{q(x)}\right)^2\right]$$
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
fwd_bwd = training_client.forward_backward(
|
|
82
|
+
data,
|
|
83
|
+
loss_fn="dro",
|
|
84
|
+
loss_fn_config={"beta": 0.05}
|
|
85
|
+
)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Custom Loss Functions
|
|
89
|
+
|
|
90
|
+
For losses not covered above, use `forward_backward_custom`:
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
def custom_loss(data: list[Datum], logprobs: list[torch.Tensor]) -> tuple[torch.Tensor, dict[str, float]]:
|
|
94
|
+
loss = (logprobs ** 2).sum()
|
|
95
|
+
return loss, {"custom_loss": loss.item()}
|
|
96
|
+
|
|
97
|
+
loss, metrics = training_client.forward_backward_custom(data, custom_loss)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Multi-Sequence Loss
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
def variance_loss(data: list[Datum], logprobs: list[torch.Tensor]) -> tuple[torch.Tensor, dict[str, float]]:
|
|
104
|
+
flat_logprobs = torch.cat(logprobs)
|
|
105
|
+
variance = torch.var(flat_logprobs)
|
|
106
|
+
return variance, {"variance_loss": variance.item()}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### How forward_backward_custom Works
|
|
110
|
+
|
|
111
|
+
1. Forward pass computes logprobs
|
|
112
|
+
2. Custom function computes loss from logprobs
|
|
113
|
+
3. `loss.backward()` computes grad_outputs
|
|
114
|
+
4. Second forward_backward with linear surrogate loss
|
|
115
|
+
|
|
116
|
+
**Note:** Uses 1.5x FLOPs and up to 3x wall time compared to built-in losses.
|
|
117
|
+
|
|
118
|
+
## Implementation Details
|
|
119
|
+
|
|
120
|
+
All losses:
|
|
121
|
+
- Applied at token level with shape `(N,)` where N = sequence length
|
|
122
|
+
- Use sum reduction (not mean)
|
|
123
|
+
- Accept numpy.ndarray or torch.Tensor inputs
|
|
124
|
+
|
|
125
|
+
### cross_entropy Implementation
|
|
126
|
+
```python
|
|
127
|
+
elementwise_loss = -target_logprobs * weights
|
|
128
|
+
loss = elementwise_loss.sum()
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### importance_sampling Implementation
|
|
132
|
+
```python
|
|
133
|
+
prob_ratio = torch.exp(target_logprobs - sampling_logprobs)
|
|
134
|
+
loss = -(prob_ratio * advantages).sum()
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### ppo Implementation
|
|
138
|
+
```python
|
|
139
|
+
prob_ratio = torch.exp(target_logprobs - sampling_logprobs)
|
|
140
|
+
clipped_ratio = torch.clamp(prob_ratio, 1 - eps, 1 + eps)
|
|
141
|
+
unclipped = prob_ratio * advantages
|
|
142
|
+
clipped = clipped_ratio * advantages
|
|
143
|
+
loss = -torch.min(unclipped, clipped).sum()
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### cispo Implementation
|
|
147
|
+
```python
|
|
148
|
+
prob_ratio = torch.exp(target_logprobs - sampling_logprobs)
|
|
149
|
+
clipped_ratio = torch.clamp(prob_ratio, 1 - eps, 1 + eps)
|
|
150
|
+
loss = -(clipped_ratio.detach() * target_logprobs * advantages).sum()
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### dro Implementation
|
|
154
|
+
```python
|
|
155
|
+
quadratic_term = (target_logprobs - sampling_logprobs) ** 2
|
|
156
|
+
loss = -(target_logprobs * advantages - 0.5 * beta * quadratic_term).sum()
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Notes
|
|
160
|
+
|
|
161
|
+
- KL regularization: Include in reward rather than loss (see `incorporate_kl_penalty` in Cookbook)
|
|
162
|
+
- Aggregation: Token-level losses are summed; for different schemes, modify advantages
|
|
163
|
+
- Reference: [Schulman et al., 2017](https://arxiv.org/abs/1707.06347) for PPO
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# Available Models & LoRA
|
|
2
|
+
|
|
3
|
+
## Model Selection Guide
|
|
4
|
+
|
|
5
|
+
- **Use MoE models** - More cost effective than dense
|
|
6
|
+
- **Base models** - Only for research or full post-training
|
|
7
|
+
- **Instruction models** - Fast inference, no chain-of-thought
|
|
8
|
+
- **Hybrid/Reasoning models** - Long chain-of-thought for quality
|
|
9
|
+
|
|
10
|
+
## Model Lineup
|
|
11
|
+
|
|
12
|
+
| Model | Type | Architecture |
|
|
13
|
+
|-------|------|--------------|
|
|
14
|
+
| **Qwen/Qwen3-VL-235B-A22B-Instruct** | Vision | MoE Large |
|
|
15
|
+
| **Qwen/Qwen3-VL-30B-A3B-Instruct** | Vision | MoE Medium |
|
|
16
|
+
| Qwen/Qwen3-235B-A22B-Instruct-2507 | Instruction | MoE Large |
|
|
17
|
+
| Qwen/Qwen3-30B-A3B-Instruct-2507 | Instruction | MoE Medium |
|
|
18
|
+
| **Qwen/Qwen3-30B-A3B** | Hybrid | MoE Medium |
|
|
19
|
+
| Qwen/Qwen3-30B-A3B-Base | Base | MoE Medium |
|
|
20
|
+
| Qwen/Qwen3-32B | Hybrid | Dense Medium |
|
|
21
|
+
| Qwen/Qwen3-8B | Hybrid | Dense Small |
|
|
22
|
+
| Qwen/Qwen3-8B-Base | Base | Dense Small |
|
|
23
|
+
| Qwen/Qwen3-4B-Instruct-2507 | Instruction | Dense Compact |
|
|
24
|
+
| openai/gpt-oss-120b | Reasoning | MoE Medium |
|
|
25
|
+
| openai/gpt-oss-20b | Reasoning | MoE Small |
|
|
26
|
+
| deepseek-ai/DeepSeek-V3.1 | Hybrid | MoE Large |
|
|
27
|
+
| deepseek-ai/DeepSeek-V3.1-Base | Base | MoE Large |
|
|
28
|
+
| **meta-llama/Llama-3.1-8B** | Base | Dense Small |
|
|
29
|
+
| meta-llama/Llama-3.1-8B-Instruct | Instruction | Dense Small |
|
|
30
|
+
| meta-llama/Llama-3.3-70B-Instruct | Instruction | Dense Large |
|
|
31
|
+
| meta-llama/Llama-3.1-70B | Base | Dense Large |
|
|
32
|
+
| meta-llama/Llama-3.2-3B | Base | Dense Compact |
|
|
33
|
+
| meta-llama/Llama-3.2-1B | Base | Dense Compact |
|
|
34
|
+
| moonshotai/Kimi-K2-Thinking | Reasoning | MoE Large |
|
|
35
|
+
|
|
36
|
+
**Sizes:** Compact (1-4B), Small (8B), Medium (30-32B), Large (70B+)
|
|
37
|
+
|
|
38
|
+
**Types:**
|
|
39
|
+
- **Base**: Pretrained, for post-training research
|
|
40
|
+
- **Instruction**: Chat-tuned, fast inference
|
|
41
|
+
- **Hybrid**: Thinking + non-thinking modes
|
|
42
|
+
- **Reasoning**: Always uses chain-of-thought
|
|
43
|
+
- **Vision**: VLMs with image processing
|
|
44
|
+
|
|
45
|
+
## LoRA Primer
|
|
46
|
+
|
|
47
|
+
LoRA (Low-Rank Adaptation) fine-tunes small parameter subset instead of all weights.
|
|
48
|
+
|
|
49
|
+
### When LoRA Works Well
|
|
50
|
+
|
|
51
|
+
- SL on small-medium instruction datasets: **Same as full fine-tuning**
|
|
52
|
+
- RL: **Equivalent to full fine-tuning even with small ranks**
|
|
53
|
+
- Large datasets: May underperform (increase rank)
|
|
54
|
+
|
|
55
|
+
### LoRA Learning Rate
|
|
56
|
+
|
|
57
|
+
**Critical:** LoRA needs 20-100x higher LR than full fine-tuning!
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from tinker_cookbook.hyperparam_utils import get_lora_lr_over_full_finetune_lr
|
|
61
|
+
|
|
62
|
+
model_name = "meta-llama/Llama-3.1-8B"
|
|
63
|
+
factor = get_lora_lr_over_full_finetune_lr(model_name)
|
|
64
|
+
# Returns 10.0 for all models (empirically validated)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Recommended Learning Rate
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from tinker_cookbook.hyperparam_utils import get_lr
|
|
71
|
+
|
|
72
|
+
recommended_lr = get_lr("meta-llama/Llama-3.1-8B")
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### LoRA Rank
|
|
76
|
+
|
|
77
|
+
Default rank: 32
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from tinker_cookbook.hyperparam_utils import get_lora_param_count
|
|
81
|
+
|
|
82
|
+
# Check parameter count
|
|
83
|
+
param_count = get_lora_param_count("meta-llama/Llama-3.1-8B", lora_rank=32)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
**Rule of thumb:** LoRA params ≥ completion tokens for good SL results.
|
|
87
|
+
|
|
88
|
+
For RL: Small ranks work fine.
|
|
89
|
+
|
|
90
|
+
**Optimal LR does NOT depend on rank** - same LR works across ranks.
|
|
91
|
+
|
|
92
|
+
### LoRA Configuration
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
training_client = service_client.create_lora_training_client(
|
|
96
|
+
base_model="meta-llama/Llama-3.1-8B",
|
|
97
|
+
rank=32,
|
|
98
|
+
train_attn=True, # Attention layers (default)
|
|
99
|
+
train_mlp=True, # MLP layers (default)
|
|
100
|
+
train_unembed=False, # Output embedding (optional)
|
|
101
|
+
seed=42, # For reproducibility
|
|
102
|
+
)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
**Best practice:** Train all layers (attention + MLP), not just attention.
|
|
106
|
+
|
|
107
|
+
### Mathematical Definition
|
|
108
|
+
|
|
109
|
+
Original weight: W (n×n)
|
|
110
|
+
LoRA: W' = W + BA
|
|
111
|
+
|
|
112
|
+
- B: n×r matrix
|
|
113
|
+
- A: r×n matrix
|
|
114
|
+
- r: rank (default 32)
|
|
115
|
+
|
|
116
|
+
Think of LoRA as efficient random projection of parameter space.
|
|
117
|
+
|
|
118
|
+
## Model Selection Tips
|
|
119
|
+
|
|
120
|
+
1. **For cost efficiency:** Use MoE models (Qwen3-VL, Qwen3-30B-A3B)
|
|
121
|
+
2. **For experimentation:** Start with 8B models
|
|
122
|
+
3. **For vision tasks:** Qwen3-VL-30B-A3B-Instruct (cost-effective)
|
|
123
|
+
4. **For reasoning:** Hybrid or Reasoning models with CoT
|
|
124
|
+
5. **For latency:** Instruction models without CoT
|
|
125
|
+
|
|
126
|
+
## Creating Training Client
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
# Get available models
|
|
130
|
+
service_client = tinker.ServiceClient()
|
|
131
|
+
for model in service_client.get_server_capabilities().supported_models:
|
|
132
|
+
print(model.model_name)
|
|
133
|
+
|
|
134
|
+
# Create training client
|
|
135
|
+
training_client = service_client.create_lora_training_client(
|
|
136
|
+
base_model="Qwen/Qwen3-30B-A3B",
|
|
137
|
+
rank=32,
|
|
138
|
+
)
|
|
139
|
+
```
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
# Example Recipes
|
|
2
|
+
|
|
3
|
+
## sl_basic.py - Basic Supervised Learning
|
|
4
|
+
|
|
5
|
+
```python
|
|
6
|
+
import chz
|
|
7
|
+
import sys
|
|
8
|
+
import asyncio
|
|
9
|
+
from tinker_cookbook import cli_utils, model_info
|
|
10
|
+
from tinker_cookbook.recipes.chat_sl import chat_datasets
|
|
11
|
+
from tinker_cookbook.renderers import TrainOnWhat
|
|
12
|
+
from tinker_cookbook.supervised import train
|
|
13
|
+
from tinker_cookbook.supervised.data import FromConversationFileBuilder
|
|
14
|
+
from tinker_cookbook.supervised.types import ChatDatasetBuilderCommonConfig
|
|
15
|
+
|
|
16
|
+
def build_config_blueprint() -> chz.Blueprint[train.Config]:
|
|
17
|
+
model_name = "meta-llama/Llama-3.1-8B"
|
|
18
|
+
renderer_name = model_info.get_recommended_renderer_name(model_name)
|
|
19
|
+
common_config = ChatDatasetBuilderCommonConfig(
|
|
20
|
+
model_name_for_tokenizer=model_name,
|
|
21
|
+
renderer_name=renderer_name,
|
|
22
|
+
max_length=32768,
|
|
23
|
+
batch_size=128,
|
|
24
|
+
train_on_what=TrainOnWhat.ALL_ASSISTANT_MESSAGES,
|
|
25
|
+
)
|
|
26
|
+
dataset = chat_datasets.NoRobotsBuilder(common_config=common_config)
|
|
27
|
+
|
|
28
|
+
# For custom dataset:
|
|
29
|
+
# dataset = FromConversationFileBuilder(
|
|
30
|
+
# common_config=common_config,
|
|
31
|
+
# file_path="/path/to/dataset.jsonl"
|
|
32
|
+
# )
|
|
33
|
+
|
|
34
|
+
return chz.Blueprint(train.Config).apply({
|
|
35
|
+
"log_path": "/tmp/tinker-examples/sl_basic",
|
|
36
|
+
"model_name": model_name,
|
|
37
|
+
"dataset_builder": dataset,
|
|
38
|
+
"learning_rate": 2e-4,
|
|
39
|
+
"lr_schedule": "linear",
|
|
40
|
+
"num_epochs": 1,
|
|
41
|
+
"eval_every": 8,
|
|
42
|
+
})
|
|
43
|
+
|
|
44
|
+
def main(config: train.Config):
|
|
45
|
+
cli_utils.check_log_dir(config.log_path, behavior_if_exists="ask")
|
|
46
|
+
asyncio.run(train.main(config))
|
|
47
|
+
|
|
48
|
+
if __name__ == "__main__":
|
|
49
|
+
blueprint = build_config_blueprint()
|
|
50
|
+
blueprint.make_from_argv(sys.argv[1:])
|
|
51
|
+
main(blueprint.make())
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## sl_loop.py - Manual Training Loop
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
import chz
|
|
58
|
+
import datasets
|
|
59
|
+
import tinker
|
|
60
|
+
from tinker_cookbook import checkpoint_utils, model_info, renderers
|
|
61
|
+
from tinker_cookbook.supervised.common import compute_mean_nll
|
|
62
|
+
from tinker_cookbook.supervised.data import conversation_to_datum
|
|
63
|
+
from tinker_cookbook.tokenizer_utils import get_tokenizer
|
|
64
|
+
|
|
65
|
+
@chz.chz
|
|
66
|
+
class Config:
|
|
67
|
+
log_path: str = "/tmp/tinker-examples/sl-loop"
|
|
68
|
+
model_name: str = "meta-llama/Llama-3.1-8B"
|
|
69
|
+
batch_size: int = 128
|
|
70
|
+
learning_rate: float = 1e-4
|
|
71
|
+
max_length: int = 32768
|
|
72
|
+
train_on_what: renderers.TrainOnWhat = renderers.TrainOnWhat.ALL_ASSISTANT_MESSAGES
|
|
73
|
+
lora_rank: int = 32
|
|
74
|
+
|
|
75
|
+
def main(config: Config):
|
|
76
|
+
tokenizer = get_tokenizer(config.model_name)
|
|
77
|
+
renderer_name = model_info.get_recommended_renderer_name(config.model_name)
|
|
78
|
+
renderer = renderers.get_renderer(renderer_name, tokenizer)
|
|
79
|
+
|
|
80
|
+
dataset = datasets.load_dataset("HuggingFaceH4/no_robots")
|
|
81
|
+
train_dataset = dataset["train"].shuffle(seed=0)
|
|
82
|
+
n_batches = len(train_dataset) // config.batch_size
|
|
83
|
+
|
|
84
|
+
service_client = tinker.ServiceClient()
|
|
85
|
+
training_client = service_client.create_lora_training_client(
|
|
86
|
+
base_model=config.model_name, rank=config.lora_rank
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
for batch_idx in range(n_batches):
|
|
90
|
+
# Linear LR decay
|
|
91
|
+
lr_mult = max(0.0, 1.0 - batch_idx / n_batches)
|
|
92
|
+
adam_params = tinker.AdamParams(
|
|
93
|
+
learning_rate=config.learning_rate * lr_mult
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Get batch
|
|
97
|
+
start = batch_idx * config.batch_size
|
|
98
|
+
end = (batch_idx + 1) * config.batch_size
|
|
99
|
+
batch_rows = train_dataset.select(range(start, end))
|
|
100
|
+
|
|
101
|
+
batch = [
|
|
102
|
+
conversation_to_datum(row["messages"], renderer, config.max_length, config.train_on_what)
|
|
103
|
+
for row in batch_rows
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
# Training step
|
|
107
|
+
fwd_bwd = training_client.forward_backward(batch, loss_fn="cross_entropy")
|
|
108
|
+
optim = training_client.optim_step(adam_params)
|
|
109
|
+
fwd_bwd_result = fwd_bwd.result()
|
|
110
|
+
optim.result()
|
|
111
|
+
|
|
112
|
+
# Compute metrics
|
|
113
|
+
train_nll = compute_mean_nll(
|
|
114
|
+
[x["logprobs"] for x in fwd_bwd_result.loss_fn_outputs],
|
|
115
|
+
[d.loss_fn_inputs["weights"] for d in batch]
|
|
116
|
+
)
|
|
117
|
+
print(f"Step {batch_idx}, NLL: {train_nll:.4f}")
|
|
118
|
+
|
|
119
|
+
if __name__ == "__main__":
|
|
120
|
+
chz.nested_entrypoint(main)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## rl_basic.py - Basic RL
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
import asyncio
|
|
127
|
+
import chz
|
|
128
|
+
import sys
|
|
129
|
+
from tinker_cookbook import cli_utils, model_info
|
|
130
|
+
from tinker_cookbook.recipes.math_rl.math_env import Gsm8kDatasetBuilder
|
|
131
|
+
from tinker_cookbook.rl import train
|
|
132
|
+
|
|
133
|
+
def build_config_blueprint() -> chz.Blueprint[train.Config]:
|
|
134
|
+
model_name = "meta-llama/Llama-3.1-8B"
|
|
135
|
+
renderer_name = model_info.get_recommended_renderer_name(model_name)
|
|
136
|
+
builder = Gsm8kDatasetBuilder(
|
|
137
|
+
batch_size=128,
|
|
138
|
+
group_size=16,
|
|
139
|
+
renderer_name=renderer_name,
|
|
140
|
+
model_name_for_tokenizer=model_name,
|
|
141
|
+
)
|
|
142
|
+
return chz.Blueprint(train.Config).apply({
|
|
143
|
+
"model_name": model_name,
|
|
144
|
+
"log_path": "/tmp/tinker-examples/rl_basic",
|
|
145
|
+
"dataset_builder": builder,
|
|
146
|
+
"learning_rate": 4e-5,
|
|
147
|
+
"max_tokens": 256,
|
|
148
|
+
"eval_every": 0,
|
|
149
|
+
})
|
|
150
|
+
|
|
151
|
+
def main(config: train.Config):
|
|
152
|
+
cli_utils.check_log_dir(config.log_path, behavior_if_exists="ask")
|
|
153
|
+
asyncio.run(train.main(config))
|
|
154
|
+
|
|
155
|
+
if __name__ == "__main__":
|
|
156
|
+
blueprint = build_config_blueprint()
|
|
157
|
+
blueprint.make_from_argv(sys.argv[1:])
|
|
158
|
+
main(blueprint.make())
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## rl_loop.py - Manual RL Loop
|
|
162
|
+
|
|
163
|
+
```python
|
|
164
|
+
import chz
|
|
165
|
+
import datasets
|
|
166
|
+
import tinker
|
|
167
|
+
from tinker import types
|
|
168
|
+
from tinker.types.tensor_data import TensorData
|
|
169
|
+
import torch
|
|
170
|
+
from tinker_cookbook import model_info, renderers
|
|
171
|
+
from tinker_cookbook.recipes.math_rl.math_grading import extract_boxed, grade_answer
|
|
172
|
+
from tinker_cookbook.tokenizer_utils import get_tokenizer
|
|
173
|
+
|
|
174
|
+
@chz.chz
|
|
175
|
+
class Config:
|
|
176
|
+
model_name: str = "meta-llama/Llama-3.1-8B"
|
|
177
|
+
batch_size: int = 128
|
|
178
|
+
group_size: int = 16
|
|
179
|
+
learning_rate: float = 4e-5
|
|
180
|
+
max_tokens: int = 256
|
|
181
|
+
|
|
182
|
+
def get_reward(response: str, answer: str) -> float:
|
|
183
|
+
try:
|
|
184
|
+
given = extract_boxed(response)
|
|
185
|
+
return 1.0 if grade_answer(given, answer) else 0.0
|
|
186
|
+
except ValueError:
|
|
187
|
+
return 0.0
|
|
188
|
+
|
|
189
|
+
def main(config: Config):
|
|
190
|
+
tokenizer = get_tokenizer(config.model_name)
|
|
191
|
+
renderer = renderers.get_renderer(
|
|
192
|
+
model_info.get_recommended_renderer_name(config.model_name),
|
|
193
|
+
tokenizer
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
dataset = datasets.load_dataset("openai/gsm8k", "main")["train"]
|
|
197
|
+
|
|
198
|
+
service_client = tinker.ServiceClient()
|
|
199
|
+
training_client = service_client.create_lora_training_client(
|
|
200
|
+
base_model=config.model_name, rank=32
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
sampling_params = types.SamplingParams(
|
|
204
|
+
max_tokens=config.max_tokens,
|
|
205
|
+
stop=renderer.get_stop_sequences(),
|
|
206
|
+
)
|
|
207
|
+
adam_params = types.AdamParams(learning_rate=config.learning_rate)
|
|
208
|
+
|
|
209
|
+
for batch_idx in range(len(dataset) // config.batch_size):
|
|
210
|
+
# Save weights for sampling
|
|
211
|
+
path = training_client.save_weights_for_sampler(name=f"{batch_idx:06d}").result().path
|
|
212
|
+
sampling_client = service_client.create_sampling_client(model_path=path)
|
|
213
|
+
|
|
214
|
+
batch_start = batch_idx * config.batch_size
|
|
215
|
+
batch_rows = dataset.select(range(batch_start, batch_start + config.batch_size))
|
|
216
|
+
|
|
217
|
+
datums = []
|
|
218
|
+
for question, answer in zip(batch_rows["question"], batch_rows["answer"]):
|
|
219
|
+
convo = [{"role": "user", "content": question}]
|
|
220
|
+
prompt = renderer.build_generation_prompt(convo)
|
|
221
|
+
prompt_tokens = prompt.to_ints()
|
|
222
|
+
|
|
223
|
+
result = sampling_client.sample(
|
|
224
|
+
prompt=prompt,
|
|
225
|
+
num_samples=config.group_size,
|
|
226
|
+
sampling_params=sampling_params,
|
|
227
|
+
).result()
|
|
228
|
+
|
|
229
|
+
rewards = [get_reward(renderers.get_text_content(renderer.parse_response(s.tokens)[0]), answer)
|
|
230
|
+
for s in result.sequences]
|
|
231
|
+
mean_reward = sum(rewards) / len(rewards)
|
|
232
|
+
advantages = [r - mean_reward for r in rewards]
|
|
233
|
+
|
|
234
|
+
if all(a == 0 for a in advantages):
|
|
235
|
+
continue
|
|
236
|
+
|
|
237
|
+
for seq, advantage in zip(result.sequences, advantages):
|
|
238
|
+
tokens = prompt_tokens + seq.tokens
|
|
239
|
+
ob_len = len(prompt_tokens) - 1
|
|
240
|
+
|
|
241
|
+
datum = types.Datum(
|
|
242
|
+
model_input=types.ModelInput.from_ints(tokens=tokens[:-1]),
|
|
243
|
+
loss_fn_inputs={
|
|
244
|
+
"target_tokens": TensorData.from_torch(torch.tensor(tokens[1:])),
|
|
245
|
+
"logprobs": TensorData.from_torch(torch.tensor([0.0]*ob_len + list(seq.logprobs))),
|
|
246
|
+
"advantages": TensorData.from_torch(torch.tensor([0.0]*ob_len + [advantage]*(len(tokens)-1-ob_len))),
|
|
247
|
+
},
|
|
248
|
+
)
|
|
249
|
+
datums.append(datum)
|
|
250
|
+
|
|
251
|
+
fwd_bwd = training_client.forward_backward(datums, loss_fn="importance_sampling")
|
|
252
|
+
optim = training_client.optim_step(adam_params)
|
|
253
|
+
fwd_bwd.result()
|
|
254
|
+
optim.result()
|
|
255
|
+
|
|
256
|
+
if __name__ == "__main__":
|
|
257
|
+
chz.nested_entrypoint(main)
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
## Running Recipes
|
|
261
|
+
|
|
262
|
+
```bash
|
|
263
|
+
# Basic SL
|
|
264
|
+
python -m tinker_cookbook.recipes.sl_basic
|
|
265
|
+
|
|
266
|
+
# Manual SL loop
|
|
267
|
+
python -m tinker_cookbook.recipes.sl_loop
|
|
268
|
+
|
|
269
|
+
# Basic RL
|
|
270
|
+
python -m tinker_cookbook.recipes.rl_basic
|
|
271
|
+
|
|
272
|
+
# Manual RL loop
|
|
273
|
+
python -m tinker_cookbook.recipes.rl_loop
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
## CLI Overrides
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
python -m tinker_cookbook.recipes.sl_basic --learning_rate 1e-4 --batch_size 64
|
|
280
|
+
```
|