@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
# Reinforcement Learning
|
|
2
|
+
|
|
3
|
+
## Quick Start
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
python -m tinker_cookbook.recipes.rl_basic
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
Fine-tunes Llama-3.1-8B on GSM8K with reward:
|
|
10
|
+
```
|
|
11
|
+
1[answer correct] + 0.1 * (1[format correct] - 1)
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Basic RL Config
|
|
15
|
+
|
|
16
|
+
```python
|
|
17
|
+
import chz
|
|
18
|
+
import asyncio
|
|
19
|
+
from tinker_cookbook.rl import train
|
|
20
|
+
from tinker_cookbook import model_info
|
|
21
|
+
from tinker_cookbook.recipes.math_rl.math_env import Gsm8kDatasetBuilder
|
|
22
|
+
|
|
23
|
+
def build_config_blueprint() -> chz.Blueprint[train.Config]:
|
|
24
|
+
model_name = "meta-llama/Llama-3.1-8B"
|
|
25
|
+
renderer_name = model_info.get_recommended_renderer_name(model_name)
|
|
26
|
+
|
|
27
|
+
builder = Gsm8kDatasetBuilder(
|
|
28
|
+
batch_size=128,
|
|
29
|
+
group_size=16,
|
|
30
|
+
renderer_name=renderer_name,
|
|
31
|
+
model_name_for_tokenizer=model_name,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
return chz.Blueprint(train.Config).apply({
|
|
35
|
+
"model_name": model_name,
|
|
36
|
+
"log_path": "/tmp/rl_basic",
|
|
37
|
+
"dataset_builder": builder,
|
|
38
|
+
"learning_rate": 4e-5,
|
|
39
|
+
"max_tokens": 256,
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
if __name__ == "__main__":
|
|
43
|
+
blueprint = build_config_blueprint()
|
|
44
|
+
blueprint.make_from_argv(sys.argv[1:])
|
|
45
|
+
asyncio.run(train.main(blueprint.make()))
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Key Metrics
|
|
49
|
+
|
|
50
|
+
- `ac_tokens_per_turn` - Tokens per completion
|
|
51
|
+
- `env/all/correct` - Accuracy
|
|
52
|
+
- `env/all/format` - Format compliance
|
|
53
|
+
- `env/all/reward/total` - Mean total reward
|
|
54
|
+
- `entropy` - Per-token entropy
|
|
55
|
+
- `kl_sample_train_v1/v2` - KL divergence (sampler vs learner)
|
|
56
|
+
|
|
57
|
+
## Custom RL Loop
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
import tinker
|
|
61
|
+
from tinker import types
|
|
62
|
+
from tinker.types.tensor_data import TensorData
|
|
63
|
+
import torch
|
|
64
|
+
from tinker_cookbook import model_info, renderers
|
|
65
|
+
from tinker_cookbook.tokenizer_utils import get_tokenizer
|
|
66
|
+
|
|
67
|
+
@chz.chz
|
|
68
|
+
class Config:
|
|
69
|
+
model_name: str = "meta-llama/Llama-3.1-8B"
|
|
70
|
+
batch_size: int = 128
|
|
71
|
+
group_size: int = 16
|
|
72
|
+
learning_rate: float = 4e-5
|
|
73
|
+
max_tokens: int = 256
|
|
74
|
+
|
|
75
|
+
def main(config: Config):
|
|
76
|
+
service_client = tinker.ServiceClient()
|
|
77
|
+
training_client = service_client.create_lora_training_client(
|
|
78
|
+
base_model=config.model_name, rank=32
|
|
79
|
+
)
|
|
80
|
+
tokenizer = training_client.get_tokenizer()
|
|
81
|
+
renderer = renderers.get_renderer(
|
|
82
|
+
model_info.get_recommended_renderer_name(config.model_name),
|
|
83
|
+
tokenizer
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
sampling_params = types.SamplingParams(
|
|
87
|
+
max_tokens=config.max_tokens,
|
|
88
|
+
stop=renderer.get_stop_sequences(),
|
|
89
|
+
)
|
|
90
|
+
adam_params = types.AdamParams(learning_rate=config.learning_rate)
|
|
91
|
+
|
|
92
|
+
for batch_idx, batch_rows in enumerate(dataset):
|
|
93
|
+
# Save weights for sampling
|
|
94
|
+
sampling_path = training_client.save_weights_for_sampler(name=f"{batch_idx:06d}").result().path
|
|
95
|
+
sampling_client = service_client.create_sampling_client(model_path=sampling_path)
|
|
96
|
+
|
|
97
|
+
datums = []
|
|
98
|
+
for question, answer in batch_rows:
|
|
99
|
+
convo = [{"role": "user", "content": question}]
|
|
100
|
+
model_input = renderer.build_generation_prompt(convo)
|
|
101
|
+
prompt_tokens = model_input.to_ints()
|
|
102
|
+
|
|
103
|
+
# Sample group_size responses
|
|
104
|
+
result = sampling_client.sample(
|
|
105
|
+
prompt=model_input,
|
|
106
|
+
num_samples=config.group_size,
|
|
107
|
+
sampling_params=sampling_params,
|
|
108
|
+
).result()
|
|
109
|
+
|
|
110
|
+
rewards = []
|
|
111
|
+
for seq in result.sequences:
|
|
112
|
+
parsed, _ = renderer.parse_response(seq.tokens)
|
|
113
|
+
reward = compute_reward(parsed["content"], answer)
|
|
114
|
+
rewards.append(reward)
|
|
115
|
+
|
|
116
|
+
# GRPO-style advantage centering
|
|
117
|
+
mean_reward = sum(rewards) / len(rewards)
|
|
118
|
+
advantages = [r - mean_reward for r in rewards]
|
|
119
|
+
|
|
120
|
+
if all(a == 0 for a in advantages):
|
|
121
|
+
continue
|
|
122
|
+
|
|
123
|
+
for seq, advantage in zip(result.sequences, advantages):
|
|
124
|
+
tokens = prompt_tokens + seq.tokens
|
|
125
|
+
ob_len = len(prompt_tokens) - 1
|
|
126
|
+
|
|
127
|
+
datum = types.Datum(
|
|
128
|
+
model_input=types.ModelInput.from_ints(tokens=tokens[:-1]),
|
|
129
|
+
loss_fn_inputs={
|
|
130
|
+
"target_tokens": TensorData.from_torch(torch.tensor(tokens[1:])),
|
|
131
|
+
"logprobs": TensorData.from_torch(torch.tensor([0.0]*ob_len + list(seq.logprobs))),
|
|
132
|
+
"advantages": TensorData.from_torch(torch.tensor([0.0]*ob_len + [advantage]*(len(tokens)-1-ob_len))),
|
|
133
|
+
},
|
|
134
|
+
)
|
|
135
|
+
datums.append(datum)
|
|
136
|
+
|
|
137
|
+
# Training step
|
|
138
|
+
fwd_bwd = training_client.forward_backward(datums, loss_fn="importance_sampling")
|
|
139
|
+
optim = training_client.optim_step(adam_params)
|
|
140
|
+
fwd_bwd.result()
|
|
141
|
+
optim.result()
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Hyperparameters
|
|
145
|
+
|
|
146
|
+
### Batch and Group Sizes
|
|
147
|
+
|
|
148
|
+
- `batch_size`: Number of unique problems
|
|
149
|
+
- `group_size`: Rollouts per problem (for variance reduction)
|
|
150
|
+
|
|
151
|
+
Scale: `LR ∝ √batch_size`
|
|
152
|
+
|
|
153
|
+
### Multiple Updates (num_substeps)
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
# Default: 1 update per batch
|
|
157
|
+
num_substeps = 1
|
|
158
|
+
|
|
159
|
+
# Multiple updates: split batch into mini-batches
|
|
160
|
+
num_substeps = 4 # Batch must be divisible
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
Use with PPO objective. Start with 2-4.
|
|
164
|
+
|
|
165
|
+
### Streaming Minibatch Training
|
|
166
|
+
|
|
167
|
+
Overlaps sampling and training for throughput:
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
StreamMinibatchConfig(
|
|
171
|
+
groups_per_batch=128,
|
|
172
|
+
num_minibatches=8,
|
|
173
|
+
)
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Async Off-Policy Training
|
|
177
|
+
|
|
178
|
+
For long rollouts:
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
AsyncConfig(
|
|
182
|
+
max_steps_off_policy=3, # Max age of trajectories
|
|
183
|
+
groups_per_batch=64,
|
|
184
|
+
)
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## Monitoring
|
|
188
|
+
|
|
189
|
+
### KL Divergence
|
|
190
|
+
|
|
191
|
+
Monitor `kl_sample_train_v1/v2`:
|
|
192
|
+
- Should stay below 0.01 for stable training
|
|
193
|
+
- High KL indicates numerical instability
|
|
194
|
+
|
|
195
|
+
### Reward Curves
|
|
196
|
+
|
|
197
|
+
```python
|
|
198
|
+
import pandas
|
|
199
|
+
df = pandas.read_json("/tmp/rl-loop/metrics.jsonl", lines=True)
|
|
200
|
+
plt.plot(df["reward/total"])
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## Loss Functions for RL
|
|
204
|
+
|
|
205
|
+
| Loss | Description |
|
|
206
|
+
|------|-------------|
|
|
207
|
+
| `importance_sampling` | Policy gradient with importance weighting |
|
|
208
|
+
| `ppo` | Proximal Policy Optimization with clipping |
|
|
209
|
+
| `cispo` | Clipped Importance Sampling PO |
|
|
210
|
+
| `dro` | Direct Reward Optimization |
|
|
211
|
+
|
|
212
|
+
See [Loss Functions](loss-functions.md) for details.
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
# Rendering to Tokens
|
|
2
|
+
|
|
3
|
+
Renderers convert messages ↔ tokens for training and inference.
|
|
4
|
+
|
|
5
|
+
## Getting a Renderer
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
from tinker_cookbook.model_info import get_recommended_renderer_name
|
|
9
|
+
from tinker_cookbook.renderers import get_renderer
|
|
10
|
+
from tinker_cookbook.tokenizer_utils import get_tokenizer
|
|
11
|
+
|
|
12
|
+
model_name = "meta-llama/Llama-3.1-8B"
|
|
13
|
+
renderer_name = get_recommended_renderer_name(model_name)
|
|
14
|
+
|
|
15
|
+
tokenizer = get_tokenizer(model_name)
|
|
16
|
+
renderer = get_renderer(name=renderer_name, tokenizer=tokenizer)
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
**Renderer names:** `qwen3`, `qwen3_disable_thinking`, `qwen3_instruct`, `qwen3_vl`, `qwen3_vl_instruct`, `llama3`, `deepseekv3`, `deepseekv3_thinking`, `kimi_k2`, `gpt_oss_no_sysprompt`, `gpt_oss_low_reasoning`, `gpt_oss_medium_reasoning`, `gpt_oss_high_reasoning`, `role_colon`
|
|
20
|
+
|
|
21
|
+
## HuggingFace Compatibility
|
|
22
|
+
|
|
23
|
+
Default renderers produce **identical tokens** to HuggingFace's `apply_chat_template`:
|
|
24
|
+
|
|
25
|
+
| Renderer | HF Equivalent |
|
|
26
|
+
|----------|---------------|
|
|
27
|
+
| `qwen3` | `apply_chat_template(..., enable_thinking=True)` |
|
|
28
|
+
| `qwen3_disable_thinking` | `apply_chat_template(..., enable_thinking=False)` |
|
|
29
|
+
| `llama3` | `apply_chat_template(...)` * |
|
|
30
|
+
| `deepseekv3` | `apply_chat_template(...)` |
|
|
31
|
+
|
|
32
|
+
\* Llama3 omits "Cutting Knowledge Date..." preamble
|
|
33
|
+
|
|
34
|
+
## Core Methods
|
|
35
|
+
|
|
36
|
+
### build_supervised_example
|
|
37
|
+
|
|
38
|
+
For training with loss weights:
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from tinker_cookbook.renderers import TrainOnWhat
|
|
42
|
+
|
|
43
|
+
messages = [
|
|
44
|
+
{"role": "user", "content": "What is 2+2?"},
|
|
45
|
+
{"role": "assistant", "content": "4"},
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
model_input, weights = renderer.build_supervised_example(
|
|
49
|
+
messages,
|
|
50
|
+
train_on_what=TrainOnWhat.ALL_ASSISTANT_MESSAGES,
|
|
51
|
+
)
|
|
52
|
+
# model_input: ModelInput (list of chunks)
|
|
53
|
+
# weights: per-token weights (0.0 = prompt, 1.0 = train)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### build_generation_prompt
|
|
57
|
+
|
|
58
|
+
For inference:
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
messages = [
|
|
62
|
+
{"role": "system", "content": "You are helpful."},
|
|
63
|
+
{"role": "user", "content": "What is 2+2?"},
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
prompt = renderer.build_generation_prompt(messages)
|
|
67
|
+
# Returns ModelInput ready for sampling
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### get_stop_sequences
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
stop_sequences = renderer.get_stop_sequences()
|
|
74
|
+
|
|
75
|
+
sampling_params = SamplingParams(
|
|
76
|
+
max_tokens=100,
|
|
77
|
+
stop=stop_sequences,
|
|
78
|
+
)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### parse_response
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
output_tokens = result.sequences[0].tokens
|
|
85
|
+
message, success = renderer.parse_response(output_tokens)
|
|
86
|
+
# {"role": "assistant", "content": "..."}
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## TrainOnWhat Enum
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
from tinker_cookbook.renderers import TrainOnWhat
|
|
93
|
+
|
|
94
|
+
# Train on ALL assistant messages
|
|
95
|
+
TrainOnWhat.ALL_ASSISTANT_MESSAGES
|
|
96
|
+
|
|
97
|
+
# Train only on LAST assistant message
|
|
98
|
+
TrainOnWhat.LAST_ASSISTANT_MESSAGE
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
**ALL_ASSISTANT_MESSAGES:**
|
|
102
|
+
```python
|
|
103
|
+
messages = [
|
|
104
|
+
{"role": "user", "content": "Hello"},
|
|
105
|
+
{"role": "assistant", "content": "Hi!"}, # weight=1
|
|
106
|
+
{"role": "user", "content": "How are you?"},
|
|
107
|
+
{"role": "assistant", "content": "Good!"}, # weight=1
|
|
108
|
+
]
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
**LAST_ASSISTANT_MESSAGE:**
|
|
112
|
+
```python
|
|
113
|
+
messages = [
|
|
114
|
+
{"role": "user", "content": "Let me think..."},
|
|
115
|
+
{"role": "assistant", "content": "..."}, # weight=0
|
|
116
|
+
{"role": "user", "content": "Answer?"},
|
|
117
|
+
{"role": "assistant", "content": "42"}, # weight=1
|
|
118
|
+
]
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Use `LAST` for classification, reward modeling, preference learning.
|
|
122
|
+
|
|
123
|
+
## Message Formats
|
|
124
|
+
|
|
125
|
+
### Text-Only
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
messages = [
|
|
129
|
+
{"role": "system", "content": "You are helpful."},
|
|
130
|
+
{"role": "user", "content": "Hello"},
|
|
131
|
+
{"role": "assistant", "content": "Hi!"},
|
|
132
|
+
]
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Vision (Multi-Modal)
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
messages = [
|
|
139
|
+
{
|
|
140
|
+
"role": "user",
|
|
141
|
+
"content": [
|
|
142
|
+
{"type": "image", "image": image_bytes},
|
|
143
|
+
{"type": "text", "text": "What's in this image?"},
|
|
144
|
+
]
|
|
145
|
+
},
|
|
146
|
+
{"role": "assistant", "content": "A cat."}
|
|
147
|
+
]
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Using with conversation_to_datum
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
from tinker_cookbook.supervised.data import conversation_to_datum
|
|
154
|
+
|
|
155
|
+
datum = conversation_to_datum(
|
|
156
|
+
messages=messages,
|
|
157
|
+
renderer=renderer,
|
|
158
|
+
max_length=2048,
|
|
159
|
+
train_on_what=TrainOnWhat.ALL_ASSISTANT_MESSAGES,
|
|
160
|
+
)
|
|
161
|
+
# Returns Datum ready for training
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## Format Examples
|
|
165
|
+
|
|
166
|
+
### ChatML
|
|
167
|
+
|
|
168
|
+
```
|
|
169
|
+
<|im_start|>system
|
|
170
|
+
You are helpful.<|im_end|>
|
|
171
|
+
<|im_start|>user
|
|
172
|
+
What is 2+2?<|im_end|>
|
|
173
|
+
<|im_start|>assistant
|
|
174
|
+
4<|im_end|>
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### Llama 3
|
|
178
|
+
|
|
179
|
+
```
|
|
180
|
+
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
|
181
|
+
|
|
182
|
+
You are helpful.<|eot_id|><|start_header_id|>user<|end_header_id|>
|
|
183
|
+
|
|
184
|
+
What is 2+2?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
|
185
|
+
|
|
186
|
+
4<|eot_id|>
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
**Important:** Don't construct formats manually—use renderers!
|
|
190
|
+
|
|
191
|
+
## Vision Renderers
|
|
192
|
+
|
|
193
|
+
For VLMs (Qwen3-VL):
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
from tinker_cookbook.image_processing_utils import get_image_processor
|
|
197
|
+
|
|
198
|
+
model_name = "Qwen/Qwen3-VL-235B-A22B-Instruct"
|
|
199
|
+
tokenizer = get_tokenizer(model_name)
|
|
200
|
+
image_processor = get_image_processor(model_name)
|
|
201
|
+
|
|
202
|
+
renderer = renderers.Qwen3VLInstructRenderer(tokenizer, image_processor)
|
|
203
|
+
|
|
204
|
+
messages = [
|
|
205
|
+
{
|
|
206
|
+
"role": "user",
|
|
207
|
+
"content": [
|
|
208
|
+
{"type": "image", "image": "https://example.com/image.png"},
|
|
209
|
+
{"type": "text", "text": "What is this?"},
|
|
210
|
+
]
|
|
211
|
+
}
|
|
212
|
+
]
|
|
213
|
+
|
|
214
|
+
prompt = renderer.build_generation_prompt(messages)
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
## In Dataset Builders
|
|
218
|
+
|
|
219
|
+
`ChatDatasetBuilder` creates renderer automatically:
|
|
220
|
+
|
|
221
|
+
```python
|
|
222
|
+
@chz.chz
|
|
223
|
+
class MyDatasetBuilder(ChatDatasetBuilder):
|
|
224
|
+
common_config: ChatDatasetBuilderCommonConfig
|
|
225
|
+
|
|
226
|
+
def __call__(self):
|
|
227
|
+
def map_fn(row):
|
|
228
|
+
return conversation_to_datum(
|
|
229
|
+
messages=messages,
|
|
230
|
+
renderer=self.renderer, # Auto-created from common_config
|
|
231
|
+
max_length=self.common_config.max_length,
|
|
232
|
+
train_on_what=self.common_config.train_on_what,
|
|
233
|
+
)
|
|
234
|
+
# ...
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## Troubleshooting
|
|
238
|
+
|
|
239
|
+
**Wrong format:** Use `get_recommended_renderer_name(model_name)`
|
|
240
|
+
|
|
241
|
+
**High loss:** Check weights (0.0 for prompts, 1.0 for completions)
|
|
242
|
+
|
|
243
|
+
**Generation doesn't stop:** Use `renderer.get_stop_sequences()` in SamplingParams
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
# Supervised Learning
|
|
2
|
+
|
|
3
|
+
## Quick Start with Cookbook
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
python -m tinker_cookbook.recipes.sl_basic
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
## Blueprint Pattern (Recommended)
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
import chz
|
|
13
|
+
import sys
|
|
14
|
+
import asyncio
|
|
15
|
+
from tinker_cookbook.supervised import train
|
|
16
|
+
from tinker_cookbook.supervised.types import ChatDatasetBuilderCommonConfig
|
|
17
|
+
from tinker_cookbook.supervised.data import FromConversationFileBuilder
|
|
18
|
+
from tinker_cookbook.renderers import TrainOnWhat
|
|
19
|
+
from tinker_cookbook.model_info import get_recommended_renderer_name
|
|
20
|
+
|
|
21
|
+
def build_config_blueprint() -> chz.Blueprint[train.Config]:
|
|
22
|
+
model_name = "meta-llama/Llama-3.1-8B"
|
|
23
|
+
renderer_name = get_recommended_renderer_name(model_name)
|
|
24
|
+
|
|
25
|
+
common_config = ChatDatasetBuilderCommonConfig(
|
|
26
|
+
model_name_for_tokenizer=model_name,
|
|
27
|
+
renderer_name=renderer_name,
|
|
28
|
+
max_length=2048,
|
|
29
|
+
batch_size=128,
|
|
30
|
+
train_on_what=TrainOnWhat.ALL_ASSISTANT_MESSAGES,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
dataset_builder = FromConversationFileBuilder(
|
|
34
|
+
common_config=common_config,
|
|
35
|
+
file_path="data.jsonl",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
return chz.Blueprint(train.Config).apply({
|
|
39
|
+
"log_path": "/tmp/training",
|
|
40
|
+
"model_name": model_name,
|
|
41
|
+
"dataset_builder": dataset_builder,
|
|
42
|
+
"learning_rate": 2e-4,
|
|
43
|
+
"lr_schedule": "cosine",
|
|
44
|
+
"num_epochs": 3,
|
|
45
|
+
"lora_rank": 32,
|
|
46
|
+
})
|
|
47
|
+
|
|
48
|
+
if __name__ == "__main__":
|
|
49
|
+
blueprint = build_config_blueprint()
|
|
50
|
+
blueprint.make_from_argv(sys.argv[1:])
|
|
51
|
+
asyncio.run(train.main(blueprint.make()))
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## HuggingFace Dataset Builder
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from tinker_cookbook.supervised.types import ChatDatasetBuilder
|
|
58
|
+
from tinker_cookbook.supervised.data import SupervisedDatasetFromHFDataset, conversation_to_datum
|
|
59
|
+
import datasets
|
|
60
|
+
|
|
61
|
+
@chz.chz
|
|
62
|
+
class MyDatasetBuilder(ChatDatasetBuilder):
|
|
63
|
+
common_config: ChatDatasetBuilderCommonConfig
|
|
64
|
+
|
|
65
|
+
def __call__(self):
|
|
66
|
+
hf_dataset = datasets.load_dataset("HuggingFaceH4/no_robots", split="train")
|
|
67
|
+
split = hf_dataset.train_test_split(test_size=0.1, seed=42)
|
|
68
|
+
|
|
69
|
+
def map_fn(row):
|
|
70
|
+
messages = [
|
|
71
|
+
{"role": "user", "content": row["prompt"]},
|
|
72
|
+
{"role": "assistant", "content": row["completion"]},
|
|
73
|
+
]
|
|
74
|
+
return conversation_to_datum(
|
|
75
|
+
messages=messages,
|
|
76
|
+
renderer=self.renderer,
|
|
77
|
+
max_length=self.common_config.max_length,
|
|
78
|
+
train_on_what=self.common_config.train_on_what,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
train_dataset = SupervisedDatasetFromHFDataset(
|
|
82
|
+
hf_dataset=split["train"],
|
|
83
|
+
batch_size=self.common_config.batch_size,
|
|
84
|
+
map_fn=map_fn,
|
|
85
|
+
)
|
|
86
|
+
test_dataset = SupervisedDatasetFromHFDataset(
|
|
87
|
+
hf_dataset=split["test"],
|
|
88
|
+
batch_size=self.common_config.batch_size,
|
|
89
|
+
map_fn=map_fn,
|
|
90
|
+
)
|
|
91
|
+
return train_dataset, test_dataset
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Streaming Large Datasets
|
|
95
|
+
|
|
96
|
+
For datasets >1M examples:
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from tinker_cookbook.supervised.data import StreamingSupervisedDatasetFromHFDataset
|
|
100
|
+
|
|
101
|
+
@chz.chz
|
|
102
|
+
class StreamingDatasetBuilder(ChatDatasetBuilder):
|
|
103
|
+
common_config: ChatDatasetBuilderCommonConfig
|
|
104
|
+
max_prompts: int = 100000
|
|
105
|
+
|
|
106
|
+
def __call__(self):
|
|
107
|
+
ds = datasets.load_dataset(
|
|
108
|
+
"open-thoughts/OpenThoughts3-1.2M",
|
|
109
|
+
split="train",
|
|
110
|
+
streaming=True, # Important!
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def map_fn(row):
|
|
114
|
+
messages = [
|
|
115
|
+
{"role": "user", "content": row["question"]},
|
|
116
|
+
{"role": "assistant", "content": row["response"]},
|
|
117
|
+
]
|
|
118
|
+
return conversation_to_datum(
|
|
119
|
+
messages=messages,
|
|
120
|
+
renderer=self.renderer,
|
|
121
|
+
max_length=self.common_config.max_length,
|
|
122
|
+
train_on_what=self.common_config.train_on_what,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
train_dataset = StreamingSupervisedDatasetFromHFDataset(
|
|
126
|
+
hf_dataset=ds,
|
|
127
|
+
batch_size=self.common_config.batch_size,
|
|
128
|
+
length=self.max_prompts, # Required
|
|
129
|
+
map_fn=map_fn,
|
|
130
|
+
buffer_size=10000,
|
|
131
|
+
)
|
|
132
|
+
return train_dataset, train_dataset.take(1000)
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## File-Based Dataset
|
|
136
|
+
|
|
137
|
+
JSONL format:
|
|
138
|
+
```json
|
|
139
|
+
{"messages": [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]}
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
from tinker_cookbook.supervised.data import FromConversationFileBuilder
|
|
144
|
+
|
|
145
|
+
dataset_builder = FromConversationFileBuilder(
|
|
146
|
+
common_config=common_config,
|
|
147
|
+
file_path="/path/to/data.jsonl",
|
|
148
|
+
)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## TrainOnWhat Enum
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
from tinker_cookbook.renderers import TrainOnWhat
|
|
155
|
+
|
|
156
|
+
TrainOnWhat.ALL_ASSISTANT_MESSAGES # Train on all assistant turns
|
|
157
|
+
TrainOnWhat.LAST_ASSISTANT_MESSAGE # Train only on final response
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
Use `LAST_ASSISTANT_MESSAGE` for:
|
|
161
|
+
- Classification (only final answer matters)
|
|
162
|
+
- Chain-of-thought where intermediate steps shouldn't be trained
|
|
163
|
+
- Preference learning
|
|
164
|
+
|
|
165
|
+
## Custom Dataset
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
from tinker_cookbook.supervised.types import SupervisedDataset
|
|
169
|
+
from tinker.types import Datum, ModelInput, TensorData
|
|
170
|
+
import numpy as np
|
|
171
|
+
|
|
172
|
+
class CustomDataset(SupervisedDataset):
|
|
173
|
+
def __init__(self, config):
|
|
174
|
+
self.config = config
|
|
175
|
+
self.tokenizer = get_tokenizer(config.model_name)
|
|
176
|
+
self.renderer = get_renderer(config.renderer_name, self.tokenizer)
|
|
177
|
+
self.data = self._load_data()
|
|
178
|
+
|
|
179
|
+
def __len__(self):
|
|
180
|
+
return len(self.data) // self.config.batch_size
|
|
181
|
+
|
|
182
|
+
def __iter__(self):
|
|
183
|
+
for item in self.data:
|
|
184
|
+
messages = self._preprocess(item)
|
|
185
|
+
example = self.renderer.build_supervised_example(
|
|
186
|
+
messages=messages,
|
|
187
|
+
train_on_what=TrainOnWhat.ALL_ASSISTANT_MESSAGES,
|
|
188
|
+
)
|
|
189
|
+
yield Datum(
|
|
190
|
+
model_input=ModelInput([example.chunk]),
|
|
191
|
+
loss_fn_inputs={
|
|
192
|
+
"target_tokens": TensorData.from_numpy(np.array(example.target_tokens, dtype=np.int64)),
|
|
193
|
+
"weights": TensorData.from_numpy(np.array(example.weights, dtype=np.float32)),
|
|
194
|
+
},
|
|
195
|
+
)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## Hyperparameters
|
|
199
|
+
|
|
200
|
+
### Learning Rate
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
from tinker_cookbook.hyperparam_utils import get_lr
|
|
204
|
+
|
|
205
|
+
model_name = "meta-llama/Llama-3.2-1B"
|
|
206
|
+
recommended_lr = get_lr(model_name)
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
Formula: `LR = lr_base * M_LoRA * (2000/H_m)^P_m`
|
|
210
|
+
- `lr_base = 5e-5`
|
|
211
|
+
- `M_LoRA = 10` (multiplier for LoRA)
|
|
212
|
+
- `P_m = 0.0775` (Qwen) or `0.781` (Llama)
|
|
213
|
+
|
|
214
|
+
### Batch Size
|
|
215
|
+
|
|
216
|
+
- Smaller batch sizes (128) generally better for fine-tuning
|
|
217
|
+
- Scale LR with `LR ∝ √batch_size`
|
|
218
|
+
- Aim for at least 100 steps of training
|
|
219
|
+
|
|
220
|
+
## Output Files
|
|
221
|
+
|
|
222
|
+
After training, check `log_path` for:
|
|
223
|
+
- `metrics.jsonl` - Training metrics
|
|
224
|
+
- `checkpoints.jsonl` - Saved checkpoints
|
|
225
|
+
- `config.json` - Training configuration
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
import pandas
|
|
229
|
+
df = pandas.read_json("/tmp/training/metrics.jsonl", lines=True)
|
|
230
|
+
plt.plot(df['train_mean_nll'], label='train_loss')
|
|
231
|
+
plt.plot(df['test/nll'].dropna(), label='test_loss')
|
|
232
|
+
```
|