@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: tinker-fine-tuning
|
|
3
|
+
description: Provides guidance for fine-tuning LLMs using the Tinker cloud training API from Thinking Machines Lab. Use when running supervised fine-tuning, reinforcement learning (GRPO/PPO), or LoRA training on cloud GPUs via Tinker's managed infrastructure instead of local compute.
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
author: Synthetic Sciences
|
|
6
|
+
license: MIT
|
|
7
|
+
tags: [Fine-Tuning, Tinker, LoRA, Reinforcement Learning, Supervised Learning, Cloud Training, Vision-Language Models]
|
|
8
|
+
dependencies: [tinker, tinker-cookbook, chz, transformers>=4.40.0, datasets, numpy]
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Tinker API - Cloud LLM Fine-Tuning
|
|
12
|
+
|
|
13
|
+
Expert guidance for fine-tuning large language models using Tinker's managed cloud training API. Tinker handles GPU allocation, model hosting, and distributed training — you write the training logic, Tinker runs it on cloud infrastructure.
|
|
14
|
+
|
|
15
|
+
## When to Use This Skill
|
|
16
|
+
|
|
17
|
+
**Use Tinker when you need to:**
|
|
18
|
+
- Fine-tune models up to 235B parameters without managing GPU infrastructure
|
|
19
|
+
- Run LoRA training on Qwen, Llama, DeepSeek, or GPT-OSS models
|
|
20
|
+
- Train vision-language models (Qwen3-VL)
|
|
21
|
+
- Implement custom RL loops (GRPO, PPO, importance sampling) on cloud GPUs
|
|
22
|
+
- Iterate quickly with a training API that handles hardware provisioning
|
|
23
|
+
|
|
24
|
+
**Do NOT use Tinker when:**
|
|
25
|
+
- You need full fine-tuning (not LoRA) — Tinker only supports LoRA
|
|
26
|
+
- You need to train custom architectures — Tinker supports specific model families
|
|
27
|
+
- You want to use your own GPUs — use Axolotl, Unsloth, or LLaMA-Factory instead
|
|
28
|
+
- You need offline/air-gapped training
|
|
29
|
+
|
|
30
|
+
**Tinker vs Alternatives:**
|
|
31
|
+
|
|
32
|
+
| Need | Use |
|
|
33
|
+
|------|-----|
|
|
34
|
+
| Managed cloud LoRA training | **Tinker** |
|
|
35
|
+
| Local GPU fine-tuning | Axolotl, Unsloth, LLaMA-Factory |
|
|
36
|
+
| Full parameter fine-tuning | DeepSpeed + Transformers |
|
|
37
|
+
| RLHF with TRL locally | TRL + GRPO skill |
|
|
38
|
+
| Quantized training | Unsloth, bitsandbytes |
|
|
39
|
+
|
|
40
|
+
## Quick Reference
|
|
41
|
+
|
|
42
|
+
| Topic | Reference |
|
|
43
|
+
|-------|-----------|
|
|
44
|
+
| Setup & Core Concepts | [Getting Started](references/getting-started.md) |
|
|
45
|
+
| API Classes & Types | [API Reference](references/api-reference.md) |
|
|
46
|
+
| Supervised Learning | [Supervised Learning](references/supervised-learning.md) |
|
|
47
|
+
| RL Training | [Reinforcement Learning](references/reinforcement-learning.md) |
|
|
48
|
+
| Loss Functions | [Loss Functions](references/loss-functions.md) |
|
|
49
|
+
| Chat Templates | [Rendering](references/rendering.md) |
|
|
50
|
+
| Models & LoRA | [Models & LoRA](references/models-and-lora.md) |
|
|
51
|
+
| Example Scripts | [Recipes](references/recipes.md) |
|
|
52
|
+
|
|
53
|
+
## Installation
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install tinker tinker-cookbook
|
|
57
|
+
export TINKER_API_KEY=your_api_key_here
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Workflow 1: Supervised Fine-Tuning (Cookbook)
|
|
63
|
+
|
|
64
|
+
Use this for standard SFT with JSONL or HuggingFace datasets.
|
|
65
|
+
|
|
66
|
+
### Checklist
|
|
67
|
+
- [ ] Prepare data in JSONL chat format (`{"messages": [...]}`)
|
|
68
|
+
- [ ] Choose base model (see model table below)
|
|
69
|
+
- [ ] Set hyperparameters (LR, batch size, epochs)
|
|
70
|
+
- [ ] Run training via Cookbook
|
|
71
|
+
- [ ] Monitor metrics (`train_mean_nll`, `test/nll`)
|
|
72
|
+
- [ ] Save and deploy weights
|
|
73
|
+
|
|
74
|
+
### Implementation
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
import chz
|
|
78
|
+
import asyncio
|
|
79
|
+
from tinker_cookbook.supervised import train
|
|
80
|
+
from tinker_cookbook.supervised.types import ChatDatasetBuilderCommonConfig
|
|
81
|
+
from tinker_cookbook.supervised.data import FromConversationFileBuilder
|
|
82
|
+
from tinker_cookbook.renderers import TrainOnWhat
|
|
83
|
+
from tinker_cookbook.model_info import get_recommended_renderer_name
|
|
84
|
+
|
|
85
|
+
model_name = "Qwen/Qwen3-30B-A3B"
|
|
86
|
+
renderer_name = get_recommended_renderer_name(model_name)
|
|
87
|
+
|
|
88
|
+
common_config = ChatDatasetBuilderCommonConfig(
|
|
89
|
+
model_name_for_tokenizer=model_name,
|
|
90
|
+
renderer_name=renderer_name,
|
|
91
|
+
max_length=2048,
|
|
92
|
+
batch_size=128,
|
|
93
|
+
train_on_what=TrainOnWhat.ALL_ASSISTANT_MESSAGES,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
dataset_builder = FromConversationFileBuilder(
|
|
97
|
+
common_config=common_config,
|
|
98
|
+
file_path="training_data.jsonl",
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
blueprint = chz.Blueprint(train.Config).apply({
|
|
102
|
+
"log_path": "/tmp/sft-run",
|
|
103
|
+
"model_name": model_name,
|
|
104
|
+
"dataset_builder": dataset_builder,
|
|
105
|
+
"learning_rate": 2e-4,
|
|
106
|
+
"lr_schedule": "cosine",
|
|
107
|
+
"num_epochs": 3,
|
|
108
|
+
"lora_rank": 32,
|
|
109
|
+
})
|
|
110
|
+
|
|
111
|
+
config = blueprint.make()
|
|
112
|
+
asyncio.run(train.main(config))
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Data Format
|
|
116
|
+
|
|
117
|
+
JSONL with chat messages (one per line):
|
|
118
|
+
```json
|
|
119
|
+
{"messages": [{"role": "user", "content": "Translate to French: hello"}, {"role": "assistant", "content": "bonjour"}]}
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### TrainOnWhat Options
|
|
123
|
+
|
|
124
|
+
| Option | When to Use |
|
|
125
|
+
|--------|-------------|
|
|
126
|
+
| `ALL_ASSISTANT_MESSAGES` | Standard SFT, multi-turn conversations |
|
|
127
|
+
| `LAST_ASSISTANT_MESSAGE` | Classification, chain-of-thought where only final answer matters |
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## Workflow 2: Reinforcement Learning (GRPO-style)
|
|
132
|
+
|
|
133
|
+
Use this for training with reward functions — math reasoning, format compliance, verifiable tasks.
|
|
134
|
+
|
|
135
|
+
### Checklist
|
|
136
|
+
- [ ] Define reward function(s) that return float scores
|
|
137
|
+
- [ ] Choose group size (16 recommended)
|
|
138
|
+
- [ ] Set up sampling → reward → training loop
|
|
139
|
+
- [ ] Monitor `correct`, `format`, `reward/total`, KL divergence
|
|
140
|
+
- [ ] Keep KL below 0.01 for stable training
|
|
141
|
+
|
|
142
|
+
### Implementation (Cookbook)
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
import asyncio
|
|
146
|
+
import chz
|
|
147
|
+
from tinker_cookbook.rl import train
|
|
148
|
+
from tinker_cookbook.recipes.math_rl.math_env import Gsm8kDatasetBuilder
|
|
149
|
+
from tinker_cookbook import model_info
|
|
150
|
+
|
|
151
|
+
model_name = "meta-llama/Llama-3.1-8B"
|
|
152
|
+
renderer_name = model_info.get_recommended_renderer_name(model_name)
|
|
153
|
+
|
|
154
|
+
builder = Gsm8kDatasetBuilder(
|
|
155
|
+
batch_size=128,
|
|
156
|
+
group_size=16,
|
|
157
|
+
renderer_name=renderer_name,
|
|
158
|
+
model_name_for_tokenizer=model_name,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
blueprint = chz.Blueprint(train.Config).apply({
|
|
162
|
+
"model_name": model_name,
|
|
163
|
+
"log_path": "/tmp/rl-run",
|
|
164
|
+
"dataset_builder": builder,
|
|
165
|
+
"learning_rate": 4e-5,
|
|
166
|
+
"max_tokens": 256,
|
|
167
|
+
})
|
|
168
|
+
|
|
169
|
+
config = blueprint.make()
|
|
170
|
+
asyncio.run(train.main(config))
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### Custom RL with Low-Level API
|
|
174
|
+
|
|
175
|
+
For full control over sampling, reward computation, and advantage centering:
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
import tinker
|
|
179
|
+
from tinker import types
|
|
180
|
+
from tinker.types.tensor_data import TensorData
|
|
181
|
+
import torch
|
|
182
|
+
|
|
183
|
+
service_client = tinker.ServiceClient()
|
|
184
|
+
training_client = service_client.create_lora_training_client(
|
|
185
|
+
base_model="meta-llama/Llama-3.1-8B", rank=32
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
for batch_idx, batch_rows in enumerate(dataset):
|
|
189
|
+
path = training_client.save_weights_for_sampler(name=f"{batch_idx:06d}").result().path
|
|
190
|
+
sampling_client = service_client.create_sampling_client(model_path=path)
|
|
191
|
+
|
|
192
|
+
datums = []
|
|
193
|
+
for question, answer in batch_rows:
|
|
194
|
+
prompt = renderer.build_generation_prompt([{"role": "user", "content": question}])
|
|
195
|
+
prompt_tokens = prompt.to_ints()
|
|
196
|
+
result = sampling_client.sample(
|
|
197
|
+
prompt=prompt, num_samples=16,
|
|
198
|
+
sampling_params=types.SamplingParams(max_tokens=256, stop=renderer.get_stop_sequences()),
|
|
199
|
+
).result()
|
|
200
|
+
|
|
201
|
+
rewards = [compute_reward(seq, answer) for seq in result.sequences]
|
|
202
|
+
mean_reward = sum(rewards) / len(rewards)
|
|
203
|
+
advantages = [r - mean_reward for r in rewards]
|
|
204
|
+
if all(a == 0 for a in advantages):
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
for seq, advantage in zip(result.sequences, advantages):
|
|
208
|
+
tokens = prompt_tokens + seq.tokens
|
|
209
|
+
ob_len = len(prompt_tokens) - 1
|
|
210
|
+
datum = types.Datum(
|
|
211
|
+
model_input=types.ModelInput.from_ints(tokens=tokens[:-1]),
|
|
212
|
+
loss_fn_inputs={
|
|
213
|
+
"target_tokens": TensorData.from_torch(torch.tensor(tokens[1:])),
|
|
214
|
+
"logprobs": TensorData.from_torch(torch.tensor([0.0]*ob_len + list(seq.logprobs))),
|
|
215
|
+
"advantages": TensorData.from_torch(torch.tensor([0.0]*ob_len + [advantage]*(len(tokens)-1-ob_len))),
|
|
216
|
+
},
|
|
217
|
+
)
|
|
218
|
+
datums.append(datum)
|
|
219
|
+
|
|
220
|
+
fwd_bwd = training_client.forward_backward(datums, loss_fn="importance_sampling")
|
|
221
|
+
optim = training_client.optim_step(types.AdamParams(learning_rate=4e-5))
|
|
222
|
+
fwd_bwd.result(); optim.result()
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### Available RL Loss Functions
|
|
226
|
+
|
|
227
|
+
| Loss | Use Case |
|
|
228
|
+
|------|----------|
|
|
229
|
+
| `importance_sampling` | Standard policy gradient with off-policy correction |
|
|
230
|
+
| `ppo` | Clipped surrogate objective (PPO) |
|
|
231
|
+
| `cispo` | Clipped importance sampling PO |
|
|
232
|
+
| `dro` | Direct reward optimization with quadratic penalty |
|
|
233
|
+
|
|
234
|
+
---
|
|
235
|
+
|
|
236
|
+
## Available Models
|
|
237
|
+
|
|
238
|
+
| Model | Type | Architecture | Train $/M tokens |
|
|
239
|
+
|-------|------|-------------|------------------|
|
|
240
|
+
| Qwen3-4B-Instruct-2507 | Instruction | Dense Compact | $0.22 |
|
|
241
|
+
| Qwen3-8B | Hybrid | Dense Small | $0.40 |
|
|
242
|
+
| Qwen3-30B-A3B | Hybrid | MoE Medium | $0.36 |
|
|
243
|
+
| Qwen3-32B | Hybrid | Dense Medium | $1.47 |
|
|
244
|
+
| Qwen3-VL-30B-A3B-Instruct | Vision | MoE Medium | $0.53 |
|
|
245
|
+
| Llama-3.2-1B | Base | Dense Compact | $0.09 |
|
|
246
|
+
| Llama-3.1-8B | Base | Dense Small | $0.40 |
|
|
247
|
+
| Llama-3.1-70B | Base | Dense Large | $3.16 |
|
|
248
|
+
| DeepSeek-V3.1 | Hybrid | MoE Large | $3.38 |
|
|
249
|
+
| GPT-OSS-120B | Reasoning | MoE Medium | $0.52 |
|
|
250
|
+
|
|
251
|
+
**Model Selection Tips:**
|
|
252
|
+
- **Cost efficiency**: MoE models (Qwen3-30B-A3B at $0.36/M)
|
|
253
|
+
- **Experimentation**: Start with 8B models
|
|
254
|
+
- **Vision tasks**: Qwen3-VL-30B-A3B-Instruct
|
|
255
|
+
- **Reasoning**: Hybrid or Reasoning models with chain-of-thought
|
|
256
|
+
|
|
257
|
+
## LoRA Configuration
|
|
258
|
+
|
|
259
|
+
Tinker exclusively uses LoRA. Default rank: 32.
|
|
260
|
+
|
|
261
|
+
```python
|
|
262
|
+
training_client = service_client.create_lora_training_client(
|
|
263
|
+
base_model="Qwen/Qwen3-30B-A3B",
|
|
264
|
+
rank=32,
|
|
265
|
+
train_attn=True,
|
|
266
|
+
train_mlp=True,
|
|
267
|
+
seed=42,
|
|
268
|
+
)
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
**Critical**: LoRA needs 20-100x higher LR than full fine-tuning. Use `tinker_cookbook.hyperparam_utils.get_lr()` for recommended values.
|
|
272
|
+
|
|
273
|
+
## Hyperparameter Guide
|
|
274
|
+
|
|
275
|
+
| Parameter | SFT Default | RL Default | Notes |
|
|
276
|
+
|-----------|-------------|------------|-------|
|
|
277
|
+
| `learning_rate` | 2e-4 | 4e-5 | Use `get_lr(model_name)` for recommended |
|
|
278
|
+
| `batch_size` | 128 | 128 | Smaller generally better for fine-tuning |
|
|
279
|
+
| `lora_rank` | 32 | 32 | Higher rank = more capacity |
|
|
280
|
+
| `group_size` | N/A | 16 | Rollouts per problem for RL |
|
|
281
|
+
| `max_length` | 2048-32768 | N/A | Sequence length for SFT |
|
|
282
|
+
| `max_tokens` | N/A | 256 | Max generation length for RL |
|
|
283
|
+
| `num_epochs` | 1-3 | N/A | Training passes |
|
|
284
|
+
| `lr_schedule` | cosine | N/A | LR decay schedule |
|
|
285
|
+
|
|
286
|
+
## Cost Estimation & Usage Tracking
|
|
287
|
+
|
|
288
|
+
### Pre-Training Cost Estimation
|
|
289
|
+
|
|
290
|
+
**ALWAYS estimate cost before starting Tinker training.** Load the `tinker-training-cost` skill and use its pricing tables or the bundled calculator:
|
|
291
|
+
|
|
292
|
+
```bash
|
|
293
|
+
python scripts/calculate_cost.py training_data.jsonl --model Qwen3-8B --epochs 3 --json
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
Present the cost estimate to the user for approval before starting training.
|
|
297
|
+
|
|
298
|
+
### Post-Training Usage Reporting
|
|
299
|
+
|
|
300
|
+
After training completes, report usage for billing:
|
|
301
|
+
|
|
302
|
+
```typescript
|
|
303
|
+
// Called automatically by the CLI after Tinker training
|
|
304
|
+
await SynSci.reportUsage({
|
|
305
|
+
service: "tinker",
|
|
306
|
+
event_type: "training",
|
|
307
|
+
model: "Qwen3-8B",
|
|
308
|
+
tokens_used: 5000000,
|
|
309
|
+
metadata: { run_id: "my-run", epochs: 3, dataset: "training_data.jsonl" },
|
|
310
|
+
})
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
The CLI tracks all Tinker usage and reports it to the Synthetic Sciences dashboard for billing.
|
|
314
|
+
|
|
315
|
+
## Common Issues
|
|
316
|
+
|
|
317
|
+
| Problem | Solution |
|
|
318
|
+
|---------|----------|
|
|
319
|
+
| `TINKER_API_KEY` not set | `export TINKER_API_KEY=your_key` or check SynSci credential sync |
|
|
320
|
+
| KL divergence > 0.01 | Reduce learning rate, check group size |
|
|
321
|
+
| OOM on dataset loading | Use `StreamingSupervisedDatasetFromHFDataset` for large datasets |
|
|
322
|
+
| Reward stuck at 0 | Debug reward function independently, check answer extraction |
|
|
323
|
+
| All advantages = 0 | Increase group size, ensure reward variance across completions |
|
|
324
|
+
| Wrong tokenizer | Use model-specific tokenizer (see Models & LoRA reference) |
|
|
325
|
+
|
|
326
|
+
## Saving and Resuming
|
|
327
|
+
|
|
328
|
+
```python
|
|
329
|
+
sampling_path = training_client.save_weights_for_sampler(name="final").result().path
|
|
330
|
+
sampling_client = service_client.create_sampling_client(model_path=sampling_path)
|
|
331
|
+
|
|
332
|
+
resume_path = training_client.save_state(name="checkpoint").result().path
|
|
333
|
+
training_client.load_state(resume_path)
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
## Common Imports
|
|
337
|
+
|
|
338
|
+
```python
|
|
339
|
+
import tinker
|
|
340
|
+
from tinker import types
|
|
341
|
+
from tinker.types import Datum, ModelInput, TensorData, AdamParams, SamplingParams
|
|
342
|
+
|
|
343
|
+
import chz
|
|
344
|
+
import asyncio
|
|
345
|
+
from tinker_cookbook.supervised import train
|
|
346
|
+
from tinker_cookbook.supervised.types import ChatDatasetBuilder, ChatDatasetBuilderCommonConfig
|
|
347
|
+
from tinker_cookbook.supervised.data import (
|
|
348
|
+
SupervisedDatasetFromHFDataset,
|
|
349
|
+
StreamingSupervisedDatasetFromHFDataset,
|
|
350
|
+
FromConversationFileBuilder,
|
|
351
|
+
conversation_to_datum,
|
|
352
|
+
)
|
|
353
|
+
from tinker_cookbook.renderers import get_renderer, TrainOnWhat
|
|
354
|
+
from tinker_cookbook.model_info import get_recommended_renderer_name
|
|
355
|
+
from tinker_cookbook.tokenizer_utils import get_tokenizer
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
## External Resources
|
|
359
|
+
|
|
360
|
+
- Documentation: https://tinker-docs.thinkingmachines.ai/
|
|
361
|
+
- Cookbook Repo: https://github.com/thinking-machines-lab/tinker-cookbook
|
|
362
|
+
- Console: https://tinker-console.thinkingmachines.ai
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# API Reference
|
|
2
|
+
|
|
3
|
+
## ServiceClient
|
|
4
|
+
|
|
5
|
+
Main entry point for Tinker API.
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
client = tinker.ServiceClient()
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
**Methods:**
|
|
12
|
+
- `get_server_capabilities()` - Query supported models
|
|
13
|
+
- `create_lora_training_client(base_model, rank=32, seed=None, train_mlp=True, train_attn=True, train_unembed=True)` - Create LoRA training client
|
|
14
|
+
- `create_training_client_from_state(path)` - Resume from checkpoint (weights only)
|
|
15
|
+
- `create_training_client_from_state_with_optimizer(path)` - Resume from checkpoint (weights + optimizer)
|
|
16
|
+
- `create_sampling_client(model_path=None, base_model=None)` - Create inference client
|
|
17
|
+
- `create_rest_client()` - Create REST API client
|
|
18
|
+
|
|
19
|
+
## TrainingClient
|
|
20
|
+
|
|
21
|
+
Client for training with forward/backward passes.
|
|
22
|
+
|
|
23
|
+
**Methods:**
|
|
24
|
+
- `forward(data, loss_fn, loss_fn_config=None)` - Forward pass without gradients
|
|
25
|
+
- `forward_backward(data, loss_fn, loss_fn_config=None)` - Compute gradients
|
|
26
|
+
- `forward_backward_custom(data, loss_fn)` - Custom loss function (1.5x FLOPs)
|
|
27
|
+
- `optim_step(adam_params)` - Update parameters
|
|
28
|
+
- `save_state(name)` - Save weights + optimizer state
|
|
29
|
+
- `load_state(path)` - Load weights only
|
|
30
|
+
- `load_state_with_optimizer(path)` - Load weights + optimizer
|
|
31
|
+
- `save_weights_for_sampler(name)` - Save for inference
|
|
32
|
+
- `save_weights_and_get_sampling_client(name)` - Save and get sampler
|
|
33
|
+
- `get_info()` - Get model info
|
|
34
|
+
- `get_tokenizer()` - Get tokenizer
|
|
35
|
+
|
|
36
|
+
## SamplingClient
|
|
37
|
+
|
|
38
|
+
Client for text generation.
|
|
39
|
+
|
|
40
|
+
**Methods:**
|
|
41
|
+
- `sample(prompt, num_samples, sampling_params, include_prompt_logprobs=False, topk_prompt_logprobs=None)` - Generate completions
|
|
42
|
+
- `compute_logprobs(prompt)` - Get prompt logprobs
|
|
43
|
+
|
|
44
|
+
## Core Types
|
|
45
|
+
|
|
46
|
+
### Datum
|
|
47
|
+
```python
|
|
48
|
+
types.Datum(
|
|
49
|
+
model_input=ModelInput,
|
|
50
|
+
loss_fn_inputs={"target_tokens": ..., "weights": ...}
|
|
51
|
+
)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### ModelInput
|
|
55
|
+
```python
|
|
56
|
+
types.ModelInput.from_ints(tokens=[1, 2, 3]) # From token list
|
|
57
|
+
types.ModelInput(chunks=[EncodedTextChunk, ImageChunk, ...]) # Multi-modal
|
|
58
|
+
model_input.to_ints() # Convert to token list
|
|
59
|
+
model_input.length() # Total context length
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### AdamParams
|
|
63
|
+
```python
|
|
64
|
+
types.AdamParams(
|
|
65
|
+
learning_rate=1e-4,
|
|
66
|
+
beta1=0.9,
|
|
67
|
+
beta2=0.95,
|
|
68
|
+
eps=1e-8,
|
|
69
|
+
weight_decay=0.0,
|
|
70
|
+
grad_clip_norm=0.0, # 0 = no clipping
|
|
71
|
+
)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### SamplingParams
|
|
75
|
+
```python
|
|
76
|
+
types.SamplingParams(
|
|
77
|
+
max_tokens=100,
|
|
78
|
+
temperature=0.7,
|
|
79
|
+
top_p=0.9,
|
|
80
|
+
top_k=-1, # -1 = no limit
|
|
81
|
+
stop=["<|endoftext|>"],
|
|
82
|
+
seed=42,
|
|
83
|
+
)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### TensorData
|
|
87
|
+
```python
|
|
88
|
+
types.TensorData.from_numpy(np.array([...]))
|
|
89
|
+
types.TensorData.from_torch(torch.tensor([...]))
|
|
90
|
+
tensor_data.to_numpy()
|
|
91
|
+
tensor_data.to_torch()
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### ImageChunk
|
|
95
|
+
```python
|
|
96
|
+
types.ImageChunk(
|
|
97
|
+
data=image_bytes,
|
|
98
|
+
format="png", # or "jpeg"
|
|
99
|
+
expected_tokens=None, # Optional advisory
|
|
100
|
+
)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### EncodedTextChunk
|
|
104
|
+
```python
|
|
105
|
+
types.EncodedTextChunk(tokens=[1, 2, 3])
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### LoraConfig
|
|
109
|
+
```python
|
|
110
|
+
types.LoraConfig(
|
|
111
|
+
rank=32,
|
|
112
|
+
seed=42,
|
|
113
|
+
train_unembed=False,
|
|
114
|
+
train_mlp=True,
|
|
115
|
+
train_attn=True,
|
|
116
|
+
)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Response Types
|
|
120
|
+
|
|
121
|
+
### ForwardBackwardOutput
|
|
122
|
+
```python
|
|
123
|
+
result = fwdbwd_future.result()
|
|
124
|
+
result.loss_fn_outputs # List of dicts with "logprobs"
|
|
125
|
+
result.metrics # Training metrics
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### SampleResponse
|
|
129
|
+
```python
|
|
130
|
+
result = sample_future.result()
|
|
131
|
+
result.sequences # List of SampledSequence
|
|
132
|
+
result.prompt_logprobs # If requested
|
|
133
|
+
result.topk_prompt_logprobs # If requested
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### SampledSequence
|
|
137
|
+
```python
|
|
138
|
+
seq = result.sequences[0]
|
|
139
|
+
seq.tokens # Generated token IDs
|
|
140
|
+
seq.logprobs # Per-token logprobs
|
|
141
|
+
seq.stop_reason # Why generation stopped
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### SaveWeightsResponse
|
|
145
|
+
```python
|
|
146
|
+
response = training_client.save_state(name).result()
|
|
147
|
+
response.path # "tinker://<model_id>/<name>"
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Checkpoint Types
|
|
151
|
+
|
|
152
|
+
### Checkpoint
|
|
153
|
+
```python
|
|
154
|
+
checkpoint.checkpoint_id
|
|
155
|
+
checkpoint.checkpoint_type # "training" or "sampler"
|
|
156
|
+
checkpoint.time
|
|
157
|
+
checkpoint.tinker_path
|
|
158
|
+
checkpoint.size_bytes
|
|
159
|
+
checkpoint.public
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### ParsedCheckpointTinkerPath
|
|
163
|
+
```python
|
|
164
|
+
parsed = ParsedCheckpointTinkerPath.from_tinker_path("tinker://...")
|
|
165
|
+
parsed.training_run_id
|
|
166
|
+
parsed.checkpoint_type
|
|
167
|
+
parsed.checkpoint_id
|
|
168
|
+
```
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# Getting Started with Tinker
|
|
2
|
+
|
|
3
|
+
## Creating Clients
|
|
4
|
+
|
|
5
|
+
```python
|
|
6
|
+
import tinker
|
|
7
|
+
service_client = tinker.ServiceClient()
|
|
8
|
+
|
|
9
|
+
# Check available models
|
|
10
|
+
for item in service_client.get_server_capabilities().supported_models:
|
|
11
|
+
print("- " + item.model_name)
|
|
12
|
+
|
|
13
|
+
# Create training client
|
|
14
|
+
training_client = service_client.create_lora_training_client(
|
|
15
|
+
base_model="Qwen/Qwen3-VL-30B-A3B-Instruct",
|
|
16
|
+
rank=32,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
# Get tokenizer
|
|
20
|
+
tokenizer = training_client.get_tokenizer()
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Preparing Training Data
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
import numpy as np
|
|
27
|
+
from tinker import types
|
|
28
|
+
|
|
29
|
+
def process_example(example: dict, tokenizer) -> types.Datum:
|
|
30
|
+
prompt = f"English: {example['input']}\nPig Latin:"
|
|
31
|
+
prompt_tokens = tokenizer.encode(prompt, add_special_tokens=True)
|
|
32
|
+
|
|
33
|
+
completion_tokens = tokenizer.encode(f" {example['output']}\n\n", add_special_tokens=False)
|
|
34
|
+
tokens = prompt_tokens + completion_tokens
|
|
35
|
+
weights = np.array(([0] * len(prompt_tokens)) + ([1] * len(completion_tokens)), dtype=np.float32)
|
|
36
|
+
target_tokens = np.array(tokens[1:], dtype=np.int64)
|
|
37
|
+
|
|
38
|
+
return types.Datum(
|
|
39
|
+
model_input=types.ModelInput.from_ints(tokens=tokens[:-1]),
|
|
40
|
+
loss_fn_inputs=dict(weights=weights[1:], target_tokens=target_tokens)
|
|
41
|
+
)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Vision Inputs
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
import requests
|
|
48
|
+
|
|
49
|
+
image_data = requests.get("https://example.com/image.png").content
|
|
50
|
+
model_input = tinker.ModelInput(chunks=[
|
|
51
|
+
types.EncodedTextChunk(tokens=tokenizer.encode("<|im_start|>user\n<|vision_start|>")),
|
|
52
|
+
types.ImageChunk(data=image_data, format="png"),
|
|
53
|
+
types.EncodedTextChunk(tokens=tokenizer.encode("<|vision_end|>What is this?<|im_end|>\n<|im_start|>assistant\n")),
|
|
54
|
+
])
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Training Loop
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
import numpy as np
|
|
61
|
+
|
|
62
|
+
for _ in range(6):
|
|
63
|
+
fwdbwd_future = training_client.forward_backward(processed_examples, "cross_entropy")
|
|
64
|
+
optim_future = training_client.optim_step(types.AdamParams(learning_rate=1e-4))
|
|
65
|
+
|
|
66
|
+
fwdbwd_result = fwdbwd_future.result()
|
|
67
|
+
optim_result = optim_future.result()
|
|
68
|
+
|
|
69
|
+
# Compute loss
|
|
70
|
+
logprobs = np.concatenate([out['logprobs'].tolist() for out in fwdbwd_result.loss_fn_outputs])
|
|
71
|
+
weights = np.concatenate([ex.loss_fn_inputs['weights'].tolist() for ex in processed_examples])
|
|
72
|
+
print(f"Loss per token: {-np.dot(logprobs, weights) / weights.sum():.4f}")
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Sampling
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
# Create sampling client
|
|
79
|
+
sampling_client = training_client.save_weights_and_get_sampling_client(name='my-model')
|
|
80
|
+
|
|
81
|
+
# Sample
|
|
82
|
+
prompt = types.ModelInput.from_ints(tokens=tokenizer.encode("English: coffee break\nPig Latin:", add_special_tokens=True))
|
|
83
|
+
params = types.SamplingParams(max_tokens=20, temperature=0.0, stop=["\n"])
|
|
84
|
+
future = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=8)
|
|
85
|
+
result = future.result()
|
|
86
|
+
|
|
87
|
+
for i, seq in enumerate(result.sequences):
|
|
88
|
+
print(f"{i}: {repr(tokenizer.decode(seq.tokens))}")
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Computing Logprobs
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
# Get prompt logprobs
|
|
95
|
+
prompt = types.ModelInput.from_ints(tokens=tokenizer.encode("How many r's are in strawberry?", add_special_tokens=True))
|
|
96
|
+
sample_response = sampling_client.sample(
|
|
97
|
+
prompt=prompt,
|
|
98
|
+
num_samples=1,
|
|
99
|
+
sampling_params=tinker.SamplingParams(max_tokens=1),
|
|
100
|
+
include_prompt_logprobs=True,
|
|
101
|
+
).result()
|
|
102
|
+
print(sample_response.prompt_logprobs) # [None, -9.5, -1.6, ...]
|
|
103
|
+
|
|
104
|
+
# Top-k logprobs
|
|
105
|
+
sample_response = sampling_client.sample(
|
|
106
|
+
prompt=prompt,
|
|
107
|
+
num_samples=1,
|
|
108
|
+
sampling_params=tinker.SamplingParams(max_tokens=1),
|
|
109
|
+
include_prompt_logprobs=True,
|
|
110
|
+
topk_prompt_logprobs=5,
|
|
111
|
+
).result()
|
|
112
|
+
print(sample_response.topk_prompt_logprobs) # [None, [(token_id, logprob), ...], ...]
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Async and Futures
|
|
116
|
+
|
|
117
|
+
Every method has sync and async versions:
|
|
118
|
+
|
|
119
|
+
| Sync | Async |
|
|
120
|
+
|------|-------|
|
|
121
|
+
| `create_lora_training_client()` | `create_lora_training_client_async()` |
|
|
122
|
+
| `forward()` | `forward_async()` |
|
|
123
|
+
| `sample()` | `sample_async()` |
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
# Sync
|
|
127
|
+
future = client.forward_backward(data, loss_fn)
|
|
128
|
+
result = future.result() # Blocks
|
|
129
|
+
|
|
130
|
+
# Async (double await)
|
|
131
|
+
future = await client.forward_backward_async(data, loss_fn)
|
|
132
|
+
result = await future
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Overlap Requests for Performance
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
# Submit both before waiting - runs in same clock cycle
|
|
139
|
+
fwd_bwd_future = await client.forward_backward_async(batch, loss_fn)
|
|
140
|
+
optim_future = await client.optim_step_async(adam_params)
|
|
141
|
+
|
|
142
|
+
# Now retrieve results
|
|
143
|
+
fwd_bwd_result = await fwd_bwd_future
|
|
144
|
+
optim_result = await optim_future
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## Saving and Loading
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
# Save weights for sampling (fast, smaller)
|
|
151
|
+
sampling_path = training_client.save_weights_for_sampler(name="0000").result().path
|
|
152
|
+
sampling_client = service_client.create_sampling_client(model_path=sampling_path)
|
|
153
|
+
|
|
154
|
+
# Save full state for resuming training
|
|
155
|
+
resume_path = training_client.save_state(name="0010").result().path
|
|
156
|
+
training_client.load_state(resume_path)
|
|
157
|
+
```
|