@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
# TransformerLens API Reference
|
|
2
|
+
|
|
3
|
+
## HookedTransformer
|
|
4
|
+
|
|
5
|
+
The core class for mechanistic interpretability, wrapping transformer models with hooks on every activation.
|
|
6
|
+
|
|
7
|
+
### Loading Models
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
from transformer_lens import HookedTransformer
|
|
11
|
+
|
|
12
|
+
# Basic loading
|
|
13
|
+
model = HookedTransformer.from_pretrained("gpt2-small")
|
|
14
|
+
|
|
15
|
+
# With specific device/dtype
|
|
16
|
+
model = HookedTransformer.from_pretrained(
|
|
17
|
+
"gpt2-medium",
|
|
18
|
+
device="cuda",
|
|
19
|
+
dtype=torch.float16
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# Gated models (LLaMA, Mistral)
|
|
23
|
+
import os
|
|
24
|
+
os.environ["HF_TOKEN"] = "your_token"
|
|
25
|
+
model = HookedTransformer.from_pretrained("meta-llama/Llama-2-7b-hf")
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### from_pretrained() Parameters
|
|
29
|
+
|
|
30
|
+
| Parameter | Type | Default | Description |
|
|
31
|
+
|-----------|------|---------|-------------|
|
|
32
|
+
| `model_name` | str | required | Model name from OFFICIAL_MODEL_NAMES |
|
|
33
|
+
| `fold_ln` | bool | True | Fold LayerNorm weights into subsequent layers |
|
|
34
|
+
| `center_writing_weights` | bool | True | Center residual stream writer means |
|
|
35
|
+
| `center_unembed` | bool | True | Center unembedding weights |
|
|
36
|
+
| `dtype` | torch.dtype | None | Model precision |
|
|
37
|
+
| `device` | str | None | Target device |
|
|
38
|
+
| `n_devices` | int | 1 | Number of devices for model parallelism |
|
|
39
|
+
|
|
40
|
+
### Weight Matrices
|
|
41
|
+
|
|
42
|
+
| Property | Shape | Description |
|
|
43
|
+
|----------|-------|-------------|
|
|
44
|
+
| `W_E` | [d_vocab, d_model] | Token embedding matrix |
|
|
45
|
+
| `W_U` | [d_model, d_vocab] | Unembedding matrix |
|
|
46
|
+
| `W_pos` | [n_ctx, d_model] | Positional embedding |
|
|
47
|
+
| `W_Q` | [n_layers, n_heads, d_model, d_head] | Query weights |
|
|
48
|
+
| `W_K` | [n_layers, n_heads, d_model, d_head] | Key weights |
|
|
49
|
+
| `W_V` | [n_layers, n_heads, d_model, d_head] | Value weights |
|
|
50
|
+
| `W_O` | [n_layers, n_heads, d_head, d_model] | Output weights |
|
|
51
|
+
| `W_in` | [n_layers, d_model, d_mlp] | MLP input weights |
|
|
52
|
+
| `W_out` | [n_layers, d_mlp, d_model] | MLP output weights |
|
|
53
|
+
|
|
54
|
+
### Core Methods
|
|
55
|
+
|
|
56
|
+
#### forward()
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
logits = model(tokens)
|
|
60
|
+
logits = model(tokens, return_type="logits")
|
|
61
|
+
loss = model(tokens, return_type="loss")
|
|
62
|
+
logits, loss = model(tokens, return_type="both")
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Parameters:
|
|
66
|
+
- `input`: Token tensor or string
|
|
67
|
+
- `return_type`: "logits", "loss", "both", or None
|
|
68
|
+
- `prepend_bos`: Whether to prepend BOS token
|
|
69
|
+
- `start_at_layer`: Start execution from specific layer
|
|
70
|
+
- `stop_at_layer`: Stop execution at specific layer
|
|
71
|
+
|
|
72
|
+
#### run_with_cache()
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
logits, cache = model.run_with_cache(tokens)
|
|
76
|
+
|
|
77
|
+
# Selective caching (saves memory)
|
|
78
|
+
logits, cache = model.run_with_cache(
|
|
79
|
+
tokens,
|
|
80
|
+
names_filter=lambda name: "resid_post" in name
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Cache on CPU
|
|
84
|
+
logits, cache = model.run_with_cache(tokens, device="cpu")
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
#### run_with_hooks()
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
def my_hook(activation, hook):
|
|
91
|
+
# Modify activation
|
|
92
|
+
activation[:, :, 0] = 0
|
|
93
|
+
return activation
|
|
94
|
+
|
|
95
|
+
logits = model.run_with_hooks(
|
|
96
|
+
tokens,
|
|
97
|
+
fwd_hooks=[("blocks.5.hook_resid_post", my_hook)]
|
|
98
|
+
)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
#### generate()
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
output = model.generate(
|
|
105
|
+
tokens,
|
|
106
|
+
max_new_tokens=50,
|
|
107
|
+
temperature=0.7,
|
|
108
|
+
top_k=40,
|
|
109
|
+
top_p=0.9,
|
|
110
|
+
freq_penalty=1.0,
|
|
111
|
+
use_past_kv_cache=True
|
|
112
|
+
)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Tokenization Methods
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
# String to tokens
|
|
119
|
+
tokens = model.to_tokens("Hello world") # [1, seq_len]
|
|
120
|
+
tokens = model.to_tokens("Hello", prepend_bos=False)
|
|
121
|
+
|
|
122
|
+
# Tokens to string
|
|
123
|
+
text = model.to_string(tokens)
|
|
124
|
+
|
|
125
|
+
# Get string tokens (for debugging)
|
|
126
|
+
str_tokens = model.to_str_tokens("Hello world")
|
|
127
|
+
# ['<|endoftext|>', 'Hello', ' world']
|
|
128
|
+
|
|
129
|
+
# Single token validation
|
|
130
|
+
token_id = model.to_single_token(" Paris") # Returns int or raises error
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### Hook Management
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
# Clear all hooks
|
|
137
|
+
model.reset_hooks()
|
|
138
|
+
|
|
139
|
+
# Add permanent hook
|
|
140
|
+
model.add_hook("blocks.0.hook_resid_post", my_hook)
|
|
141
|
+
|
|
142
|
+
# Remove specific hook
|
|
143
|
+
model.remove_hook("blocks.0.hook_resid_post")
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
## ActivationCache
|
|
149
|
+
|
|
150
|
+
Stores and provides access to all activations from a forward pass.
|
|
151
|
+
|
|
152
|
+
### Accessing Activations
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
logits, cache = model.run_with_cache(tokens)
|
|
156
|
+
|
|
157
|
+
# By name and layer
|
|
158
|
+
residual = cache["resid_post", 5]
|
|
159
|
+
attention = cache["pattern", 3]
|
|
160
|
+
mlp_out = cache["mlp_out", 7]
|
|
161
|
+
|
|
162
|
+
# Full name string
|
|
163
|
+
residual = cache["blocks.5.hook_resid_post"]
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Cache Keys
|
|
167
|
+
|
|
168
|
+
| Key Pattern | Shape | Description |
|
|
169
|
+
|-------------|-------|-------------|
|
|
170
|
+
| `hook_embed` | [batch, pos, d_model] | Token embeddings |
|
|
171
|
+
| `hook_pos_embed` | [batch, pos, d_model] | Positional embeddings |
|
|
172
|
+
| `resid_pre, layer` | [batch, pos, d_model] | Residual before attention |
|
|
173
|
+
| `resid_mid, layer` | [batch, pos, d_model] | Residual after attention |
|
|
174
|
+
| `resid_post, layer` | [batch, pos, d_model] | Residual after MLP |
|
|
175
|
+
| `attn_out, layer` | [batch, pos, d_model] | Attention output |
|
|
176
|
+
| `mlp_out, layer` | [batch, pos, d_model] | MLP output |
|
|
177
|
+
| `pattern, layer` | [batch, head, q_pos, k_pos] | Attention pattern (post-softmax) |
|
|
178
|
+
| `attn_scores, layer` | [batch, head, q_pos, k_pos] | Attention scores (pre-softmax) |
|
|
179
|
+
| `q, layer` | [batch, pos, head, d_head] | Query vectors |
|
|
180
|
+
| `k, layer` | [batch, pos, head, d_head] | Key vectors |
|
|
181
|
+
| `v, layer` | [batch, pos, head, d_head] | Value vectors |
|
|
182
|
+
| `z, layer` | [batch, pos, head, d_head] | Attention output per head |
|
|
183
|
+
|
|
184
|
+
### Analysis Methods
|
|
185
|
+
|
|
186
|
+
#### decompose_resid()
|
|
187
|
+
|
|
188
|
+
Decomposes residual stream into component contributions:
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
components, labels = cache.decompose_resid(
|
|
192
|
+
layer=5,
|
|
193
|
+
return_labels=True,
|
|
194
|
+
mode="attn" # or "mlp" or "full"
|
|
195
|
+
)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
#### accumulated_resid()
|
|
199
|
+
|
|
200
|
+
Get accumulated residual at each layer (for Logit Lens):
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
accumulated = cache.accumulated_resid(
|
|
204
|
+
layer=None, # All layers
|
|
205
|
+
incl_mid=False,
|
|
206
|
+
apply_ln=True # Apply final LayerNorm
|
|
207
|
+
)
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
#### logit_attrs()
|
|
211
|
+
|
|
212
|
+
Calculate logit attribution for components:
|
|
213
|
+
|
|
214
|
+
```python
|
|
215
|
+
attrs = cache.logit_attrs(
|
|
216
|
+
residual_stack,
|
|
217
|
+
tokens=target_tokens,
|
|
218
|
+
incorrect_tokens=incorrect_tokens
|
|
219
|
+
)
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
#### stack_head_results()
|
|
223
|
+
|
|
224
|
+
Stack attention head outputs:
|
|
225
|
+
|
|
226
|
+
```python
|
|
227
|
+
head_results = cache.stack_head_results(
|
|
228
|
+
layer=-1, # All layers
|
|
229
|
+
pos_slice=None # All positions
|
|
230
|
+
)
|
|
231
|
+
# Shape: [n_layers, n_heads, batch, pos, d_model]
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
### Utility Methods
|
|
235
|
+
|
|
236
|
+
```python
|
|
237
|
+
# Move cache to device
|
|
238
|
+
cache = cache.to("cpu")
|
|
239
|
+
|
|
240
|
+
# Remove batch dimension (for batch_size=1)
|
|
241
|
+
cache = cache.remove_batch_dim()
|
|
242
|
+
|
|
243
|
+
# Get all keys
|
|
244
|
+
keys = cache.keys()
|
|
245
|
+
|
|
246
|
+
# Iterate
|
|
247
|
+
for name, activation in cache.items():
|
|
248
|
+
print(name, activation.shape)
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
---
|
|
252
|
+
|
|
253
|
+
## HookPoint
|
|
254
|
+
|
|
255
|
+
The fundamental hook mechanism wrapping every activation.
|
|
256
|
+
|
|
257
|
+
### Hook Function Signature
|
|
258
|
+
|
|
259
|
+
```python
|
|
260
|
+
def hook_fn(activation: torch.Tensor, hook: HookPoint) -> torch.Tensor:
|
|
261
|
+
"""
|
|
262
|
+
Args:
|
|
263
|
+
activation: Current activation value
|
|
264
|
+
hook: The HookPoint object (has .name attribute)
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Modified activation (or None to keep original)
|
|
268
|
+
"""
|
|
269
|
+
# Modify activation
|
|
270
|
+
return activation
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
### Common Hook Patterns
|
|
274
|
+
|
|
275
|
+
```python
|
|
276
|
+
# Zero ablation
|
|
277
|
+
def zero_hook(act, hook):
|
|
278
|
+
act[:, :, :] = 0
|
|
279
|
+
return act
|
|
280
|
+
|
|
281
|
+
# Mean ablation
|
|
282
|
+
def mean_hook(act, hook):
|
|
283
|
+
act[:, :, :] = act.mean(dim=0, keepdim=True)
|
|
284
|
+
return act
|
|
285
|
+
|
|
286
|
+
# Patch from cache
|
|
287
|
+
def patch_hook(act, hook):
|
|
288
|
+
act[:, 5, :] = clean_cache[hook.name][:, 5, :]
|
|
289
|
+
return act
|
|
290
|
+
|
|
291
|
+
# Add steering vector
|
|
292
|
+
def steer_hook(act, hook):
|
|
293
|
+
act += 0.5 * steering_vector
|
|
294
|
+
return act
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
---
|
|
298
|
+
|
|
299
|
+
## Utility Functions
|
|
300
|
+
|
|
301
|
+
### patching module
|
|
302
|
+
|
|
303
|
+
```python
|
|
304
|
+
from transformer_lens import patching
|
|
305
|
+
|
|
306
|
+
# Generic activation patching
|
|
307
|
+
results = patching.generic_activation_patch(
|
|
308
|
+
model=model,
|
|
309
|
+
corrupted_tokens=corrupted,
|
|
310
|
+
clean_cache=clean_cache,
|
|
311
|
+
patching_metric=metric_fn,
|
|
312
|
+
patch_setter=patch_fn,
|
|
313
|
+
activation_name="resid_post",
|
|
314
|
+
index_axis_names=("layer", "pos")
|
|
315
|
+
)
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
### FactoredMatrix
|
|
319
|
+
|
|
320
|
+
Efficient operations on factored weight matrices:
|
|
321
|
+
|
|
322
|
+
```python
|
|
323
|
+
from transformer_lens import FactoredMatrix
|
|
324
|
+
|
|
325
|
+
# QK circuit
|
|
326
|
+
QK = FactoredMatrix(model.W_Q[layer], model.W_K[layer].T)
|
|
327
|
+
|
|
328
|
+
# OV circuit
|
|
329
|
+
OV = FactoredMatrix(model.W_V[layer], model.W_O[layer])
|
|
330
|
+
|
|
331
|
+
# Get full matrix
|
|
332
|
+
full = QK.AB
|
|
333
|
+
|
|
334
|
+
# SVD decomposition
|
|
335
|
+
U, S, V = QK.svd()
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
---
|
|
339
|
+
|
|
340
|
+
## Configuration
|
|
341
|
+
|
|
342
|
+
### HookedTransformerConfig
|
|
343
|
+
|
|
344
|
+
Key configuration attributes:
|
|
345
|
+
|
|
346
|
+
| Attribute | Description |
|
|
347
|
+
|-----------|-------------|
|
|
348
|
+
| `n_layers` | Number of transformer layers |
|
|
349
|
+
| `n_heads` | Number of attention heads |
|
|
350
|
+
| `d_model` | Model dimension |
|
|
351
|
+
| `d_head` | Head dimension |
|
|
352
|
+
| `d_mlp` | MLP hidden dimension |
|
|
353
|
+
| `d_vocab` | Vocabulary size |
|
|
354
|
+
| `n_ctx` | Maximum context length |
|
|
355
|
+
| `act_fn` | Activation function name |
|
|
356
|
+
| `normalization_type` | "LN" or "LNPre" |
|
|
357
|
+
|
|
358
|
+
Access via:
|
|
359
|
+
```python
|
|
360
|
+
model.cfg.n_layers
|
|
361
|
+
model.cfg.d_model
|
|
362
|
+
```
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
# TransformerLens Tutorials
|
|
2
|
+
|
|
3
|
+
## Tutorial 1: Basic Activation Analysis
|
|
4
|
+
|
|
5
|
+
### Goal
|
|
6
|
+
Understand how to load models, cache activations, and inspect model internals.
|
|
7
|
+
|
|
8
|
+
### Step-by-Step
|
|
9
|
+
|
|
10
|
+
```python
|
|
11
|
+
from transformer_lens import HookedTransformer
|
|
12
|
+
import torch
|
|
13
|
+
|
|
14
|
+
# 1. Load model
|
|
15
|
+
model = HookedTransformer.from_pretrained("gpt2-small")
|
|
16
|
+
print(f"Model has {model.cfg.n_layers} layers, {model.cfg.n_heads} heads")
|
|
17
|
+
|
|
18
|
+
# 2. Tokenize input
|
|
19
|
+
prompt = "The capital of France is"
|
|
20
|
+
tokens = model.to_tokens(prompt)
|
|
21
|
+
print(f"Tokens shape: {tokens.shape}")
|
|
22
|
+
print(f"String tokens: {model.to_str_tokens(prompt)}")
|
|
23
|
+
|
|
24
|
+
# 3. Run with cache
|
|
25
|
+
logits, cache = model.run_with_cache(tokens)
|
|
26
|
+
print(f"Logits shape: {logits.shape}")
|
|
27
|
+
print(f"Cache keys: {len(cache.keys())}")
|
|
28
|
+
|
|
29
|
+
# 4. Inspect activations
|
|
30
|
+
for layer in range(model.cfg.n_layers):
|
|
31
|
+
resid = cache["resid_post", layer]
|
|
32
|
+
print(f"Layer {layer} residual norm: {resid.norm().item():.2f}")
|
|
33
|
+
|
|
34
|
+
# 5. Look at attention patterns
|
|
35
|
+
attn = cache["pattern", 0] # Layer 0
|
|
36
|
+
print(f"Attention shape: {attn.shape}") # [batch, heads, q_pos, k_pos]
|
|
37
|
+
|
|
38
|
+
# 6. Get top predictions
|
|
39
|
+
probs = torch.softmax(logits[0, -1], dim=-1)
|
|
40
|
+
top_tokens = probs.topk(5)
|
|
41
|
+
for token_id, prob in zip(top_tokens.indices, top_tokens.values):
|
|
42
|
+
print(f"{model.to_string(token_id.unsqueeze(0))}: {prob.item():.3f}")
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Tutorial 2: Activation Patching
|
|
48
|
+
|
|
49
|
+
### Goal
|
|
50
|
+
Identify which activations causally affect model output.
|
|
51
|
+
|
|
52
|
+
### Concept
|
|
53
|
+
1. Run model on "clean" input, cache activations
|
|
54
|
+
2. Run model on "corrupted" input
|
|
55
|
+
3. Patch clean activations into corrupted run
|
|
56
|
+
4. Measure effect on output
|
|
57
|
+
|
|
58
|
+
### Step-by-Step
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from transformer_lens import HookedTransformer
|
|
62
|
+
import torch
|
|
63
|
+
|
|
64
|
+
model = HookedTransformer.from_pretrained("gpt2-small")
|
|
65
|
+
|
|
66
|
+
# Define clean and corrupted prompts
|
|
67
|
+
clean_prompt = "The Eiffel Tower is in the city of"
|
|
68
|
+
corrupted_prompt = "The Colosseum is in the city of"
|
|
69
|
+
|
|
70
|
+
clean_tokens = model.to_tokens(clean_prompt)
|
|
71
|
+
corrupted_tokens = model.to_tokens(corrupted_prompt)
|
|
72
|
+
|
|
73
|
+
# Get clean activations
|
|
74
|
+
_, clean_cache = model.run_with_cache(clean_tokens)
|
|
75
|
+
|
|
76
|
+
# Define metric
|
|
77
|
+
paris_token = model.to_single_token(" Paris")
|
|
78
|
+
rome_token = model.to_single_token(" Rome")
|
|
79
|
+
|
|
80
|
+
def logit_diff(logits):
|
|
81
|
+
"""Positive = model prefers Paris over Rome"""
|
|
82
|
+
return (logits[0, -1, paris_token] - logits[0, -1, rome_token]).item()
|
|
83
|
+
|
|
84
|
+
# Baseline measurements
|
|
85
|
+
clean_logits = model(clean_tokens)
|
|
86
|
+
corrupted_logits = model(corrupted_tokens)
|
|
87
|
+
print(f"Clean logit diff: {logit_diff(clean_logits):.3f}")
|
|
88
|
+
print(f"Corrupted logit diff: {logit_diff(corrupted_logits):.3f}")
|
|
89
|
+
|
|
90
|
+
# Patch each layer
|
|
91
|
+
results = []
|
|
92
|
+
for layer in range(model.cfg.n_layers):
|
|
93
|
+
def patch_hook(activation, hook, layer=layer):
|
|
94
|
+
activation[:] = clean_cache["resid_post", layer]
|
|
95
|
+
return activation
|
|
96
|
+
|
|
97
|
+
patched_logits = model.run_with_hooks(
|
|
98
|
+
corrupted_tokens,
|
|
99
|
+
fwd_hooks=[(f"blocks.{layer}.hook_resid_post", patch_hook)]
|
|
100
|
+
)
|
|
101
|
+
results.append(logit_diff(patched_logits))
|
|
102
|
+
print(f"Layer {layer}: {results[-1]:.3f}")
|
|
103
|
+
|
|
104
|
+
# Find most important layer
|
|
105
|
+
best_layer = max(range(len(results)), key=lambda i: results[i])
|
|
106
|
+
print(f"\nMost important layer: {best_layer}")
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Position-Specific Patching
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
import torch
|
|
113
|
+
|
|
114
|
+
seq_len = clean_tokens.shape[1]
|
|
115
|
+
results = torch.zeros(model.cfg.n_layers, seq_len)
|
|
116
|
+
|
|
117
|
+
for layer in range(model.cfg.n_layers):
|
|
118
|
+
for pos in range(seq_len):
|
|
119
|
+
def patch_hook(activation, hook, layer=layer, pos=pos):
|
|
120
|
+
activation[:, pos, :] = clean_cache["resid_post", layer][:, pos, :]
|
|
121
|
+
return activation
|
|
122
|
+
|
|
123
|
+
patched_logits = model.run_with_hooks(
|
|
124
|
+
corrupted_tokens,
|
|
125
|
+
fwd_hooks=[(f"blocks.{layer}.hook_resid_post", patch_hook)]
|
|
126
|
+
)
|
|
127
|
+
results[layer, pos] = logit_diff(patched_logits)
|
|
128
|
+
|
|
129
|
+
# Visualize as heatmap
|
|
130
|
+
import matplotlib.pyplot as plt
|
|
131
|
+
plt.figure(figsize=(12, 8))
|
|
132
|
+
plt.imshow(results.numpy(), aspect='auto', cmap='RdBu')
|
|
133
|
+
plt.xlabel('Position')
|
|
134
|
+
plt.ylabel('Layer')
|
|
135
|
+
plt.colorbar(label='Logit Difference')
|
|
136
|
+
plt.title('Activation Patching Results')
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## Tutorial 3: Direct Logit Attribution
|
|
142
|
+
|
|
143
|
+
### Goal
|
|
144
|
+
Identify which components (heads, neurons) contribute to specific predictions.
|
|
145
|
+
|
|
146
|
+
### Step-by-Step
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
from transformer_lens import HookedTransformer
|
|
150
|
+
import torch
|
|
151
|
+
|
|
152
|
+
model = HookedTransformer.from_pretrained("gpt2-small")
|
|
153
|
+
|
|
154
|
+
prompt = "The capital of France is"
|
|
155
|
+
tokens = model.to_tokens(prompt)
|
|
156
|
+
logits, cache = model.run_with_cache(tokens)
|
|
157
|
+
|
|
158
|
+
# Target token
|
|
159
|
+
target_token = model.to_single_token(" Paris")
|
|
160
|
+
|
|
161
|
+
# Get unembedding direction for target
|
|
162
|
+
target_direction = model.W_U[:, target_token] # [d_model]
|
|
163
|
+
|
|
164
|
+
# Attribution per attention head
|
|
165
|
+
head_contributions = torch.zeros(model.cfg.n_layers, model.cfg.n_heads)
|
|
166
|
+
|
|
167
|
+
for layer in range(model.cfg.n_layers):
|
|
168
|
+
# Get per-head output at final position
|
|
169
|
+
z = cache["z", layer][0, -1] # [n_heads, d_head]
|
|
170
|
+
|
|
171
|
+
for head in range(model.cfg.n_heads):
|
|
172
|
+
# Project through W_O to get contribution to residual
|
|
173
|
+
head_out = z[head] @ model.W_O[layer, head] # [d_model]
|
|
174
|
+
|
|
175
|
+
# Dot with target direction
|
|
176
|
+
contribution = (head_out @ target_direction).item()
|
|
177
|
+
head_contributions[layer, head] = contribution
|
|
178
|
+
|
|
179
|
+
# Find top contributing heads
|
|
180
|
+
flat_idx = head_contributions.flatten().topk(10)
|
|
181
|
+
print("Top 10 heads for predicting 'Paris':")
|
|
182
|
+
for idx, val in zip(flat_idx.indices, flat_idx.values):
|
|
183
|
+
layer = idx.item() // model.cfg.n_heads
|
|
184
|
+
head = idx.item() % model.cfg.n_heads
|
|
185
|
+
print(f" L{layer}H{head}: {val.item():.3f}")
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## Tutorial 4: Induction Head Detection
|
|
191
|
+
|
|
192
|
+
### Goal
|
|
193
|
+
Find attention heads that implement the [A][B]...[A] → [B] pattern.
|
|
194
|
+
|
|
195
|
+
### Step-by-Step
|
|
196
|
+
|
|
197
|
+
```python
|
|
198
|
+
from transformer_lens import HookedTransformer
|
|
199
|
+
import torch
|
|
200
|
+
|
|
201
|
+
model = HookedTransformer.from_pretrained("gpt2-small")
|
|
202
|
+
|
|
203
|
+
# Create repeated sequence pattern
|
|
204
|
+
# Pattern: [A][B][C][A] - model should attend from last A to B
|
|
205
|
+
seq = torch.randint(1000, 5000, (1, 20))
|
|
206
|
+
# Repeat first half
|
|
207
|
+
seq[0, 10:] = seq[0, :10]
|
|
208
|
+
|
|
209
|
+
_, cache = model.run_with_cache(seq)
|
|
210
|
+
|
|
211
|
+
# For induction heads: position i should attend to position (i - seq_len/2 + 1)
|
|
212
|
+
# At position 10 (second A), should attend to position 1 (first B)
|
|
213
|
+
|
|
214
|
+
induction_scores = torch.zeros(model.cfg.n_layers, model.cfg.n_heads)
|
|
215
|
+
|
|
216
|
+
for layer in range(model.cfg.n_layers):
|
|
217
|
+
pattern = cache["pattern", layer][0] # [heads, q_pos, k_pos]
|
|
218
|
+
|
|
219
|
+
# Check attention from repeated positions to position after first occurrence
|
|
220
|
+
for offset in range(1, 10):
|
|
221
|
+
q_pos = 10 + offset # Position in second half
|
|
222
|
+
k_pos = offset # Should attend to corresponding position in first half
|
|
223
|
+
|
|
224
|
+
# Average attention to the "correct" position
|
|
225
|
+
induction_scores[layer] += pattern[:, q_pos, k_pos]
|
|
226
|
+
|
|
227
|
+
induction_scores[layer] /= 9 # Average over offsets
|
|
228
|
+
|
|
229
|
+
# Find top induction heads
|
|
230
|
+
print("Top induction heads:")
|
|
231
|
+
for layer in range(model.cfg.n_layers):
|
|
232
|
+
for head in range(model.cfg.n_heads):
|
|
233
|
+
score = induction_scores[layer, head].item()
|
|
234
|
+
if score > 0.3:
|
|
235
|
+
print(f" L{layer}H{head}: {score:.3f}")
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
---
|
|
239
|
+
|
|
240
|
+
## Tutorial 5: Logit Lens
|
|
241
|
+
|
|
242
|
+
### Goal
|
|
243
|
+
See what the model "believes" at each layer before final unembedding.
|
|
244
|
+
|
|
245
|
+
### Step-by-Step
|
|
246
|
+
|
|
247
|
+
```python
|
|
248
|
+
from transformer_lens import HookedTransformer
|
|
249
|
+
import torch
|
|
250
|
+
|
|
251
|
+
model = HookedTransformer.from_pretrained("gpt2-small")
|
|
252
|
+
|
|
253
|
+
prompt = "The quick brown fox jumps over the lazy"
|
|
254
|
+
tokens = model.to_tokens(prompt)
|
|
255
|
+
logits, cache = model.run_with_cache(tokens)
|
|
256
|
+
|
|
257
|
+
# Get accumulated residual at each layer
|
|
258
|
+
# Apply LayerNorm to match what unembedding sees
|
|
259
|
+
accumulated = cache.accumulated_resid(layer=None, incl_mid=False, apply_ln=True)
|
|
260
|
+
# Shape: [n_layers + 1, batch, pos, d_model]
|
|
261
|
+
|
|
262
|
+
# Project to vocabulary
|
|
263
|
+
layer_logits = accumulated @ model.W_U # [n_layers + 1, batch, pos, d_vocab]
|
|
264
|
+
|
|
265
|
+
# Look at predictions for final position
|
|
266
|
+
print("Layer-by-layer predictions for final token:")
|
|
267
|
+
for layer in range(model.cfg.n_layers + 1):
|
|
268
|
+
probs = torch.softmax(layer_logits[layer, 0, -1], dim=-1)
|
|
269
|
+
top_token = probs.argmax()
|
|
270
|
+
top_prob = probs[top_token].item()
|
|
271
|
+
print(f"Layer {layer}: {model.to_string(top_token.unsqueeze(0))!r} ({top_prob:.3f})")
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
---
|
|
275
|
+
|
|
276
|
+
## Tutorial 6: Steering with Activation Addition
|
|
277
|
+
|
|
278
|
+
### Goal
|
|
279
|
+
Add a steering vector to change model behavior.
|
|
280
|
+
|
|
281
|
+
### Step-by-Step
|
|
282
|
+
|
|
283
|
+
```python
|
|
284
|
+
from transformer_lens import HookedTransformer
|
|
285
|
+
import torch
|
|
286
|
+
|
|
287
|
+
model = HookedTransformer.from_pretrained("gpt2-small")
|
|
288
|
+
|
|
289
|
+
# Get activations for contrasting prompts
|
|
290
|
+
positive_prompt = "I love this! It's absolutely wonderful and"
|
|
291
|
+
negative_prompt = "I hate this! It's absolutely terrible and"
|
|
292
|
+
|
|
293
|
+
_, pos_cache = model.run_with_cache(model.to_tokens(positive_prompt))
|
|
294
|
+
_, neg_cache = model.run_with_cache(model.to_tokens(negative_prompt))
|
|
295
|
+
|
|
296
|
+
# Compute steering vector (positive - negative direction)
|
|
297
|
+
layer = 6
|
|
298
|
+
steering_vector = (
|
|
299
|
+
pos_cache["resid_post", layer].mean(dim=1) -
|
|
300
|
+
neg_cache["resid_post", layer].mean(dim=1)
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
# Generate with steering
|
|
304
|
+
test_prompt = "The movie was"
|
|
305
|
+
test_tokens = model.to_tokens(test_prompt)
|
|
306
|
+
|
|
307
|
+
def steer_hook(activation, hook):
|
|
308
|
+
activation += 2.0 * steering_vector
|
|
309
|
+
return activation
|
|
310
|
+
|
|
311
|
+
# Without steering
|
|
312
|
+
normal_output = model.generate(test_tokens, max_new_tokens=20)
|
|
313
|
+
print(f"Normal: {model.to_string(normal_output[0])}")
|
|
314
|
+
|
|
315
|
+
# With positive steering
|
|
316
|
+
steered_output = model.generate(
|
|
317
|
+
test_tokens,
|
|
318
|
+
max_new_tokens=20,
|
|
319
|
+
fwd_hooks=[(f"blocks.{layer}.hook_resid_post", steer_hook)]
|
|
320
|
+
)
|
|
321
|
+
print(f"Steered: {model.to_string(steered_output[0])}")
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
---
|
|
325
|
+
|
|
326
|
+
## External Resources
|
|
327
|
+
|
|
328
|
+
### Official Tutorials
|
|
329
|
+
- [Main Demo](https://transformerlensorg.github.io/TransformerLens/generated/demos/Main_Demo.html)
|
|
330
|
+
- [Exploratory Analysis](https://transformerlensorg.github.io/TransformerLens/generated/demos/Exploratory_Analysis_Demo.html)
|
|
331
|
+
- [Activation Patching Demo](https://colab.research.google.com/github/TransformerLensOrg/TransformerLens/blob/main/demos/Activation_Patching_in_TL_Demo.ipynb)
|
|
332
|
+
|
|
333
|
+
### ARENA Course
|
|
334
|
+
Comprehensive 200+ hour curriculum: https://arena-foundation.github.io/ARENA/
|
|
335
|
+
|
|
336
|
+
### Neel Nanda's Resources
|
|
337
|
+
- [Getting Started in Mech Interp](https://www.neelnanda.io/mechanistic-interpretability/getting-started)
|
|
338
|
+
- [Mech Interp Glossary](https://www.neelnanda.io/mechanistic-interpretability/glossary)
|
|
339
|
+
- [YouTube Channel](https://www.youtube.com/@neelnanda)
|