@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
# nnsight API Reference
|
|
2
|
+
|
|
3
|
+
## LanguageModel
|
|
4
|
+
|
|
5
|
+
Main class for wrapping language models with intervention capabilities.
|
|
6
|
+
|
|
7
|
+
### Loading Models
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
from nnsight import LanguageModel
|
|
11
|
+
|
|
12
|
+
# Basic loading
|
|
13
|
+
model = LanguageModel("openai-community/gpt2", device_map="auto")
|
|
14
|
+
|
|
15
|
+
# Larger models
|
|
16
|
+
model = LanguageModel("meta-llama/Llama-3.1-8B", device_map="auto")
|
|
17
|
+
|
|
18
|
+
# With custom tokenizer settings
|
|
19
|
+
model = LanguageModel(
|
|
20
|
+
"gpt2",
|
|
21
|
+
device_map="auto",
|
|
22
|
+
torch_dtype=torch.float16,
|
|
23
|
+
)
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### Model Attributes
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
# Access underlying HuggingFace model
|
|
30
|
+
model._model
|
|
31
|
+
|
|
32
|
+
# Access tokenizer
|
|
33
|
+
model.tokenizer
|
|
34
|
+
|
|
35
|
+
# Model config
|
|
36
|
+
model._model.config
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## Tracing Context
|
|
42
|
+
|
|
43
|
+
The `trace()` method creates a context for deferred execution.
|
|
44
|
+
|
|
45
|
+
### Basic Tracing
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
with model.trace("Hello world") as tracer:
|
|
49
|
+
# Operations are recorded, not executed immediately
|
|
50
|
+
hidden = model.transformer.h[5].output[0].save()
|
|
51
|
+
logits = model.output.save()
|
|
52
|
+
|
|
53
|
+
# After context, operations execute and saved values are available
|
|
54
|
+
print(hidden.shape)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Tracing Parameters
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
with model.trace(
|
|
61
|
+
prompt, # Input text or tokens
|
|
62
|
+
remote=False, # Use NDIF remote execution
|
|
63
|
+
validate=True, # Validate tensor shapes
|
|
64
|
+
scan=True, # Scan for shape info
|
|
65
|
+
) as tracer:
|
|
66
|
+
...
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Remote Execution
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
# Same code works remotely
|
|
73
|
+
with model.trace("Hello", remote=True) as tracer:
|
|
74
|
+
hidden = model.transformer.h[5].output[0].save()
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Proxy Objects
|
|
80
|
+
|
|
81
|
+
Inside tracing context, accessing modules returns Proxy objects.
|
|
82
|
+
|
|
83
|
+
### Accessing Values
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
with model.trace("Hello") as tracer:
|
|
87
|
+
# These are Proxy objects
|
|
88
|
+
layer_output = model.transformer.h[5].output[0]
|
|
89
|
+
attention = model.transformer.h[5].attn.output
|
|
90
|
+
|
|
91
|
+
# Operations create new Proxies
|
|
92
|
+
mean = layer_output.mean(dim=-1)
|
|
93
|
+
normed = layer_output / layer_output.norm()
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Saving Values
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
with model.trace("Hello") as tracer:
|
|
100
|
+
# Must call .save() to access after context
|
|
101
|
+
hidden = model.transformer.h[5].output[0].save()
|
|
102
|
+
|
|
103
|
+
# Now hidden contains actual tensor
|
|
104
|
+
print(hidden.shape)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Modifying Values
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
with model.trace("Hello") as tracer:
|
|
111
|
+
# In-place modification
|
|
112
|
+
model.transformer.h[5].output[0][:] = 0
|
|
113
|
+
|
|
114
|
+
# Replace with computed value
|
|
115
|
+
model.transformer.h[5].output[0][:] = some_tensor
|
|
116
|
+
|
|
117
|
+
# Arithmetic modification
|
|
118
|
+
model.transformer.h[5].output[0][:] *= 0.5
|
|
119
|
+
model.transformer.h[5].output[0][:] += steering_vector
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Proxy Operations
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
with model.trace("Hello") as tracer:
|
|
126
|
+
h = model.transformer.h[5].output[0]
|
|
127
|
+
|
|
128
|
+
# Indexing
|
|
129
|
+
first_token = h[:, 0, :]
|
|
130
|
+
last_token = h[:, -1, :]
|
|
131
|
+
|
|
132
|
+
# PyTorch operations
|
|
133
|
+
mean = h.mean(dim=-1)
|
|
134
|
+
norm = h.norm()
|
|
135
|
+
transposed = h.transpose(1, 2)
|
|
136
|
+
|
|
137
|
+
# Save results
|
|
138
|
+
mean.save()
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## Module Access Patterns
|
|
144
|
+
|
|
145
|
+
### GPT-2 Structure
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
with model.trace("Hello") as tracer:
|
|
149
|
+
# Embeddings
|
|
150
|
+
embed = model.transformer.wte.output.save()
|
|
151
|
+
pos_embed = model.transformer.wpe.output.save()
|
|
152
|
+
|
|
153
|
+
# Layer outputs
|
|
154
|
+
layer_out = model.transformer.h[5].output[0].save()
|
|
155
|
+
|
|
156
|
+
# Attention
|
|
157
|
+
attn_out = model.transformer.h[5].attn.output.save()
|
|
158
|
+
|
|
159
|
+
# MLP
|
|
160
|
+
mlp_out = model.transformer.h[5].mlp.output.save()
|
|
161
|
+
|
|
162
|
+
# Final output
|
|
163
|
+
logits = model.output.save()
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### LLaMA Structure
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
with model.trace("Hello") as tracer:
|
|
170
|
+
# Embeddings
|
|
171
|
+
embed = model.model.embed_tokens.output.save()
|
|
172
|
+
|
|
173
|
+
# Layer outputs
|
|
174
|
+
layer_out = model.model.layers[10].output[0].save()
|
|
175
|
+
|
|
176
|
+
# Attention
|
|
177
|
+
attn_out = model.model.layers[10].self_attn.output.save()
|
|
178
|
+
|
|
179
|
+
# MLP
|
|
180
|
+
mlp_out = model.model.layers[10].mlp.output.save()
|
|
181
|
+
|
|
182
|
+
# Final output
|
|
183
|
+
logits = model.output.save()
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Finding Module Names
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
# Print model structure
|
|
190
|
+
print(model._model)
|
|
191
|
+
|
|
192
|
+
# Or iterate
|
|
193
|
+
for name, module in model._model.named_modules():
|
|
194
|
+
print(name)
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## Multiple Prompts (invoke)
|
|
200
|
+
|
|
201
|
+
Process multiple prompts in a single trace.
|
|
202
|
+
|
|
203
|
+
### Basic Usage
|
|
204
|
+
|
|
205
|
+
```python
|
|
206
|
+
with model.trace() as tracer:
|
|
207
|
+
with tracer.invoke("First prompt"):
|
|
208
|
+
hidden1 = model.transformer.h[5].output[0].save()
|
|
209
|
+
|
|
210
|
+
with tracer.invoke("Second prompt"):
|
|
211
|
+
hidden2 = model.transformer.h[5].output[0].save()
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### Cross-Prompt Intervention
|
|
215
|
+
|
|
216
|
+
```python
|
|
217
|
+
with model.trace() as tracer:
|
|
218
|
+
# Get activations from first prompt
|
|
219
|
+
with tracer.invoke("The cat sat on the"):
|
|
220
|
+
cat_hidden = model.transformer.h[6].output[0].save()
|
|
221
|
+
|
|
222
|
+
# Inject into second prompt
|
|
223
|
+
with tracer.invoke("The dog ran through the"):
|
|
224
|
+
model.transformer.h[6].output[0][:] = cat_hidden
|
|
225
|
+
output = model.output.save()
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
## Generation
|
|
231
|
+
|
|
232
|
+
Generate text with interventions.
|
|
233
|
+
|
|
234
|
+
### Basic Generation
|
|
235
|
+
|
|
236
|
+
```python
|
|
237
|
+
with model.trace() as tracer:
|
|
238
|
+
with tracer.invoke("Once upon a time"):
|
|
239
|
+
# Intervention during generation
|
|
240
|
+
model.transformer.h[5].output[0][:] *= 1.2
|
|
241
|
+
|
|
242
|
+
output = model.generate(max_new_tokens=50)
|
|
243
|
+
|
|
244
|
+
print(model.tokenizer.decode(output[0]))
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
---
|
|
248
|
+
|
|
249
|
+
## Gradients
|
|
250
|
+
|
|
251
|
+
Access gradients for analysis (not supported with remote/vLLM).
|
|
252
|
+
|
|
253
|
+
```python
|
|
254
|
+
with model.trace("The quick brown fox") as tracer:
|
|
255
|
+
hidden = model.transformer.h[5].output[0].save()
|
|
256
|
+
hidden.retain_grad()
|
|
257
|
+
|
|
258
|
+
logits = model.output
|
|
259
|
+
target_token = model.tokenizer.encode(" jumps")[0]
|
|
260
|
+
loss = -logits[0, -1, target_token]
|
|
261
|
+
loss.backward()
|
|
262
|
+
|
|
263
|
+
# Access gradient
|
|
264
|
+
grad = hidden.grad
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
---
|
|
268
|
+
|
|
269
|
+
## NDIF Remote Execution
|
|
270
|
+
|
|
271
|
+
### Setup
|
|
272
|
+
|
|
273
|
+
```python
|
|
274
|
+
import os
|
|
275
|
+
os.environ["NDIF_API_KEY"] = "your_key"
|
|
276
|
+
|
|
277
|
+
# Or configure directly
|
|
278
|
+
from nnsight import CONFIG
|
|
279
|
+
CONFIG.set_default_api_key("your_key")
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
### Using Remote
|
|
283
|
+
|
|
284
|
+
```python
|
|
285
|
+
model = LanguageModel("meta-llama/Llama-3.1-70B")
|
|
286
|
+
|
|
287
|
+
with model.trace("Hello", remote=True) as tracer:
|
|
288
|
+
hidden = model.model.layers[40].output[0].save()
|
|
289
|
+
logits = model.output.save()
|
|
290
|
+
|
|
291
|
+
# Results returned from NDIF
|
|
292
|
+
print(hidden.shape)
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
### Sessions (Batching Requests)
|
|
296
|
+
|
|
297
|
+
```python
|
|
298
|
+
with model.session(remote=True) as session:
|
|
299
|
+
with model.trace("First prompt"):
|
|
300
|
+
h1 = model.model.layers[20].output[0].save()
|
|
301
|
+
|
|
302
|
+
with model.trace("Second prompt"):
|
|
303
|
+
h2 = model.model.layers[20].output[0].save()
|
|
304
|
+
|
|
305
|
+
# Both run in single NDIF request
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
---
|
|
309
|
+
|
|
310
|
+
## Utility Methods
|
|
311
|
+
|
|
312
|
+
### Early Stopping
|
|
313
|
+
|
|
314
|
+
```python
|
|
315
|
+
with model.trace("Hello") as tracer:
|
|
316
|
+
hidden = model.transformer.h[5].output[0].save()
|
|
317
|
+
tracer.stop() # Don't run remaining layers
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
### Validation
|
|
321
|
+
|
|
322
|
+
```python
|
|
323
|
+
# Validate shapes before execution
|
|
324
|
+
with model.trace("Hello", validate=True) as tracer:
|
|
325
|
+
hidden = model.transformer.h[5].output[0].save()
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
### Module Access Result
|
|
329
|
+
|
|
330
|
+
```python
|
|
331
|
+
with model.trace("Hello") as tracer:
|
|
332
|
+
# Access result of a method call
|
|
333
|
+
result = tracer.result
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
---
|
|
337
|
+
|
|
338
|
+
## Common Module Paths
|
|
339
|
+
|
|
340
|
+
| Model | Embeddings | Layers | Attention | MLP |
|
|
341
|
+
|-------|------------|--------|-----------|-----|
|
|
342
|
+
| GPT-2 | `transformer.wte` | `transformer.h[i]` | `transformer.h[i].attn` | `transformer.h[i].mlp` |
|
|
343
|
+
| LLaMA | `model.embed_tokens` | `model.layers[i]` | `model.layers[i].self_attn` | `model.layers[i].mlp` |
|
|
344
|
+
| Mistral | `model.embed_tokens` | `model.layers[i]` | `model.layers[i].self_attn` | `model.layers[i].mlp` |
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
# nnsight Tutorials
|
|
2
|
+
|
|
3
|
+
## Tutorial 1: Basic Activation Analysis
|
|
4
|
+
|
|
5
|
+
### Goal
|
|
6
|
+
Load a model, access internal activations, and analyze them.
|
|
7
|
+
|
|
8
|
+
### Step-by-Step
|
|
9
|
+
|
|
10
|
+
```python
|
|
11
|
+
from nnsight import LanguageModel
|
|
12
|
+
import torch
|
|
13
|
+
|
|
14
|
+
# 1. Load model
|
|
15
|
+
model = LanguageModel("openai-community/gpt2", device_map="auto")
|
|
16
|
+
|
|
17
|
+
# 2. Trace and collect activations
|
|
18
|
+
prompt = "The capital of France is"
|
|
19
|
+
|
|
20
|
+
with model.trace(prompt) as tracer:
|
|
21
|
+
# Collect from multiple layers
|
|
22
|
+
activations = {}
|
|
23
|
+
for i in range(12): # GPT-2 has 12 layers
|
|
24
|
+
activations[i] = model.transformer.h[i].output[0].save()
|
|
25
|
+
|
|
26
|
+
# Get final logits
|
|
27
|
+
logits = model.output.save()
|
|
28
|
+
|
|
29
|
+
# 3. Analyze (outside context)
|
|
30
|
+
print("Layer-wise activation norms:")
|
|
31
|
+
for layer, act in activations.items():
|
|
32
|
+
print(f" Layer {layer}: {act.norm().item():.2f}")
|
|
33
|
+
|
|
34
|
+
# 4. Check predictions
|
|
35
|
+
probs = torch.softmax(logits[0, -1], dim=-1)
|
|
36
|
+
top_tokens = probs.topk(5)
|
|
37
|
+
print("\nTop predictions:")
|
|
38
|
+
for token_id, prob in zip(top_tokens.indices, top_tokens.values):
|
|
39
|
+
token_str = model.tokenizer.decode(token_id)
|
|
40
|
+
print(f" {token_str!r}: {prob.item():.3f}")
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Tutorial 2: Activation Patching
|
|
46
|
+
|
|
47
|
+
### Goal
|
|
48
|
+
Patch activations from one prompt into another to test causal relationships.
|
|
49
|
+
|
|
50
|
+
### Step-by-Step
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from nnsight import LanguageModel
|
|
54
|
+
import torch
|
|
55
|
+
|
|
56
|
+
model = LanguageModel("gpt2", device_map="auto")
|
|
57
|
+
|
|
58
|
+
clean_prompt = "The Eiffel Tower is in the city of"
|
|
59
|
+
corrupted_prompt = "The Colosseum is in the city of"
|
|
60
|
+
|
|
61
|
+
# 1. Get clean activations
|
|
62
|
+
with model.trace(clean_prompt) as tracer:
|
|
63
|
+
clean_hidden = model.transformer.h[8].output[0].save()
|
|
64
|
+
clean_logits = model.output.save()
|
|
65
|
+
|
|
66
|
+
# 2. Define metric
|
|
67
|
+
paris_token = model.tokenizer.encode(" Paris")[0]
|
|
68
|
+
rome_token = model.tokenizer.encode(" Rome")[0]
|
|
69
|
+
|
|
70
|
+
def logit_diff(logits):
|
|
71
|
+
return (logits[0, -1, paris_token] - logits[0, -1, rome_token]).item()
|
|
72
|
+
|
|
73
|
+
print(f"Clean logit diff: {logit_diff(clean_logits):.3f}")
|
|
74
|
+
|
|
75
|
+
# 3. Patch clean into corrupted
|
|
76
|
+
with model.trace(corrupted_prompt) as tracer:
|
|
77
|
+
# Replace layer 8 output with clean activations
|
|
78
|
+
model.transformer.h[8].output[0][:] = clean_hidden
|
|
79
|
+
patched_logits = model.output.save()
|
|
80
|
+
|
|
81
|
+
print(f"Patched logit diff: {logit_diff(patched_logits):.3f}")
|
|
82
|
+
|
|
83
|
+
# 4. Systematic patching sweep
|
|
84
|
+
results = torch.zeros(12) # 12 layers
|
|
85
|
+
|
|
86
|
+
for layer in range(12):
|
|
87
|
+
# Get clean activation for this layer
|
|
88
|
+
with model.trace(clean_prompt) as tracer:
|
|
89
|
+
clean_act = model.transformer.h[layer].output[0].save()
|
|
90
|
+
|
|
91
|
+
# Patch into corrupted
|
|
92
|
+
with model.trace(corrupted_prompt) as tracer:
|
|
93
|
+
model.transformer.h[layer].output[0][:] = clean_act
|
|
94
|
+
logits = model.output.save()
|
|
95
|
+
|
|
96
|
+
results[layer] = logit_diff(logits)
|
|
97
|
+
print(f"Layer {layer}: {results[layer]:.3f}")
|
|
98
|
+
|
|
99
|
+
print(f"\nMost important layer: {results.argmax().item()}")
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Tutorial 3: Cross-Prompt Activation Sharing
|
|
105
|
+
|
|
106
|
+
### Goal
|
|
107
|
+
Transfer activations between different prompts in a single trace.
|
|
108
|
+
|
|
109
|
+
### Step-by-Step
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from nnsight import LanguageModel
|
|
113
|
+
|
|
114
|
+
model = LanguageModel("gpt2", device_map="auto")
|
|
115
|
+
|
|
116
|
+
with model.trace() as tracer:
|
|
117
|
+
# First prompt - get "cat" representations
|
|
118
|
+
with tracer.invoke("The cat sat on the mat"):
|
|
119
|
+
cat_hidden = model.transformer.h[6].output[0].save()
|
|
120
|
+
|
|
121
|
+
# Second prompt - inject "cat" into "dog"
|
|
122
|
+
with tracer.invoke("The dog ran through the park"):
|
|
123
|
+
# Replace with cat's activations
|
|
124
|
+
model.transformer.h[6].output[0][:] = cat_hidden
|
|
125
|
+
modified_logits = model.output.save()
|
|
126
|
+
|
|
127
|
+
# The dog prompt now has cat's internal representations
|
|
128
|
+
print(f"Modified logits shape: {modified_logits.shape}")
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
## Tutorial 4: Remote Execution with NDIF
|
|
134
|
+
|
|
135
|
+
### Goal
|
|
136
|
+
Run the same interpretability code on massive models (70B+).
|
|
137
|
+
|
|
138
|
+
### Step-by-Step
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
from nnsight import LanguageModel
|
|
142
|
+
import os
|
|
143
|
+
|
|
144
|
+
# 1. Setup API key
|
|
145
|
+
os.environ["NDIF_API_KEY"] = "your_key_here"
|
|
146
|
+
|
|
147
|
+
# 2. Load large model (runs remotely)
|
|
148
|
+
model = LanguageModel("meta-llama/Llama-3.1-70B")
|
|
149
|
+
|
|
150
|
+
# 3. Same code, just remote=True
|
|
151
|
+
prompt = "The meaning of life is"
|
|
152
|
+
|
|
153
|
+
with model.trace(prompt, remote=True) as tracer:
|
|
154
|
+
# Access layer 40 of 70B model!
|
|
155
|
+
hidden = model.model.layers[40].output[0].save()
|
|
156
|
+
logits = model.output.save()
|
|
157
|
+
|
|
158
|
+
# 4. Results returned from NDIF
|
|
159
|
+
print(f"Hidden shape: {hidden.shape}")
|
|
160
|
+
print(f"Logits shape: {logits.shape}")
|
|
161
|
+
|
|
162
|
+
# 5. Check predictions
|
|
163
|
+
import torch
|
|
164
|
+
probs = torch.softmax(logits[0, -1], dim=-1)
|
|
165
|
+
top_tokens = probs.topk(5)
|
|
166
|
+
print("\nTop predictions from Llama-70B:")
|
|
167
|
+
for token_id, prob in zip(top_tokens.indices, top_tokens.values):
|
|
168
|
+
print(f" {model.tokenizer.decode(token_id)!r}: {prob.item():.3f}")
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### Batching with Sessions
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
# Run multiple experiments in one NDIF request
|
|
175
|
+
with model.session(remote=True) as session:
|
|
176
|
+
with model.trace("What is 2+2?"):
|
|
177
|
+
math_hidden = model.model.layers[30].output[0].save()
|
|
178
|
+
|
|
179
|
+
with model.trace("The capital of France is"):
|
|
180
|
+
fact_hidden = model.model.layers[30].output[0].save()
|
|
181
|
+
|
|
182
|
+
# Compare representations
|
|
183
|
+
similarity = torch.cosine_similarity(
|
|
184
|
+
math_hidden.mean(dim=1),
|
|
185
|
+
fact_hidden.mean(dim=1),
|
|
186
|
+
dim=-1
|
|
187
|
+
)
|
|
188
|
+
print(f"Similarity: {similarity.item():.3f}")
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## Tutorial 5: Steering with Activation Addition
|
|
194
|
+
|
|
195
|
+
### Goal
|
|
196
|
+
Add a steering vector to change model behavior.
|
|
197
|
+
|
|
198
|
+
### Step-by-Step
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
from nnsight import LanguageModel
|
|
202
|
+
import torch
|
|
203
|
+
|
|
204
|
+
model = LanguageModel("gpt2", device_map="auto")
|
|
205
|
+
|
|
206
|
+
# 1. Get contrasting activations
|
|
207
|
+
with model.trace("I love this movie, it's wonderful") as tracer:
|
|
208
|
+
positive_hidden = model.transformer.h[6].output[0].save()
|
|
209
|
+
|
|
210
|
+
with model.trace("I hate this movie, it's terrible") as tracer:
|
|
211
|
+
negative_hidden = model.transformer.h[6].output[0].save()
|
|
212
|
+
|
|
213
|
+
# 2. Compute steering direction
|
|
214
|
+
steering_vector = positive_hidden.mean(dim=1) - negative_hidden.mean(dim=1)
|
|
215
|
+
|
|
216
|
+
# 3. Generate without steering
|
|
217
|
+
test_prompt = "This restaurant is"
|
|
218
|
+
with model.trace(test_prompt) as tracer:
|
|
219
|
+
normal_logits = model.output.save()
|
|
220
|
+
|
|
221
|
+
# 4. Generate with steering
|
|
222
|
+
with model.trace(test_prompt) as tracer:
|
|
223
|
+
# Add steering at layer 6
|
|
224
|
+
model.transformer.h[6].output[0][:] += 3.0 * steering_vector
|
|
225
|
+
steered_logits = model.output.save()
|
|
226
|
+
|
|
227
|
+
# 5. Compare predictions
|
|
228
|
+
def top_prediction(logits):
|
|
229
|
+
token = logits[0, -1].argmax()
|
|
230
|
+
return model.tokenizer.decode(token)
|
|
231
|
+
|
|
232
|
+
print(f"Normal: {top_prediction(normal_logits)}")
|
|
233
|
+
print(f"Steered (positive): {top_prediction(steered_logits)}")
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
## Tutorial 6: Logit Lens
|
|
239
|
+
|
|
240
|
+
### Goal
|
|
241
|
+
See what the model "believes" at each layer.
|
|
242
|
+
|
|
243
|
+
### Step-by-Step
|
|
244
|
+
|
|
245
|
+
```python
|
|
246
|
+
from nnsight import LanguageModel
|
|
247
|
+
import torch
|
|
248
|
+
|
|
249
|
+
model = LanguageModel("gpt2", device_map="auto")
|
|
250
|
+
|
|
251
|
+
prompt = "The quick brown fox jumps over the lazy"
|
|
252
|
+
|
|
253
|
+
with model.trace(prompt) as tracer:
|
|
254
|
+
# Collect residual stream at each layer
|
|
255
|
+
residuals = []
|
|
256
|
+
for i in range(12):
|
|
257
|
+
resid = model.transformer.h[i].output[0].save()
|
|
258
|
+
residuals.append(resid)
|
|
259
|
+
|
|
260
|
+
# Access model's unembedding and final layernorm
|
|
261
|
+
W_U = model._model.lm_head.weight.T # [d_model, vocab]
|
|
262
|
+
ln_f = model._model.transformer.ln_f
|
|
263
|
+
|
|
264
|
+
print("Layer-by-layer predictions for final token:")
|
|
265
|
+
for i, resid in enumerate(residuals):
|
|
266
|
+
# Apply final layernorm
|
|
267
|
+
normed = ln_f(resid)
|
|
268
|
+
|
|
269
|
+
# Project to vocabulary
|
|
270
|
+
layer_logits = normed @ W_U
|
|
271
|
+
|
|
272
|
+
# Get prediction
|
|
273
|
+
probs = torch.softmax(layer_logits[0, -1], dim=-1)
|
|
274
|
+
top_token = probs.argmax()
|
|
275
|
+
top_prob = probs[top_token].item()
|
|
276
|
+
|
|
277
|
+
print(f"Layer {i}: {model.tokenizer.decode(top_token)!r} ({top_prob:.3f})")
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
---
|
|
281
|
+
|
|
282
|
+
## External Resources
|
|
283
|
+
|
|
284
|
+
### Official Resources
|
|
285
|
+
- [Getting Started](https://nnsight.net/start/)
|
|
286
|
+
- [Features Overview](https://nnsight.net/features/)
|
|
287
|
+
- [Documentation](https://nnsight.net/documentation/)
|
|
288
|
+
- [Tutorials](https://nnsight.net/tutorials/)
|
|
289
|
+
|
|
290
|
+
### NDIF Resources
|
|
291
|
+
- [NDIF Homepage](https://ndif.us/)
|
|
292
|
+
- [Available Models](https://ndif.us/models)
|
|
293
|
+
- [API Key Signup](https://login.ndif.us/)
|
|
294
|
+
|
|
295
|
+
### Paper
|
|
296
|
+
- [NNsight and NDIF](https://arxiv.org/abs/2407.14561) - ICLR 2025
|
|
297
|
+
|
|
298
|
+
### Community
|
|
299
|
+
- [Discussion Forum](https://discuss.ndif.us/)
|
|
300
|
+
- [GitHub Issues](https://github.com/ndif-team/nnsight/issues)
|