@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,333 @@
1
+ # SAELens API Reference
2
+
3
+ ## SAE Class
4
+
5
+ The core class representing a Sparse Autoencoder.
6
+
7
+ ### Loading Pre-trained SAEs
8
+
9
+ ```python
10
+ from sae_lens import SAE
11
+
12
+ # From official releases
13
+ sae, cfg_dict, sparsity = SAE.from_pretrained(
14
+ release="gpt2-small-res-jb",
15
+ sae_id="blocks.8.hook_resid_pre",
16
+ device="cuda"
17
+ )
18
+
19
+ # From HuggingFace
20
+ sae, cfg_dict, sparsity = SAE.from_pretrained(
21
+ release="username/repo-name",
22
+ sae_id="path/to/sae",
23
+ device="cuda"
24
+ )
25
+
26
+ # From local disk
27
+ sae = SAE.load_from_disk("/path/to/sae", device="cuda")
28
+ ```
29
+
30
+ ### SAE Attributes
31
+
32
+ | Attribute | Shape | Description |
33
+ |-----------|-------|-------------|
34
+ | `W_enc` | [d_in, d_sae] | Encoder weights |
35
+ | `W_dec` | [d_sae, d_in] | Decoder weights |
36
+ | `b_enc` | [d_sae] | Encoder bias |
37
+ | `b_dec` | [d_in] | Decoder bias |
38
+ | `cfg` | SAEConfig | Configuration object |
39
+
40
+ ### Core Methods
41
+
42
+ #### encode()
43
+
44
+ ```python
45
+ # Encode activations to sparse features
46
+ features = sae.encode(activations)
47
+ # Input: [batch, pos, d_in]
48
+ # Output: [batch, pos, d_sae]
49
+ ```
50
+
51
+ #### decode()
52
+
53
+ ```python
54
+ # Reconstruct activations from features
55
+ reconstructed = sae.decode(features)
56
+ # Input: [batch, pos, d_sae]
57
+ # Output: [batch, pos, d_in]
58
+ ```
59
+
60
+ #### forward()
61
+
62
+ ```python
63
+ # Full forward pass (encode + decode)
64
+ reconstructed = sae(activations)
65
+ # Returns reconstructed activations
66
+ ```
67
+
68
+ #### save_model()
69
+
70
+ ```python
71
+ sae.save_model("/path/to/save")
72
+ ```
73
+
74
+ ---
75
+
76
+ ## SAEConfig
77
+
78
+ Configuration class for SAE architecture and training context.
79
+
80
+ ### Key Parameters
81
+
82
+ | Parameter | Type | Description |
83
+ |-----------|------|-------------|
84
+ | `d_in` | int | Input dimension (model's d_model) |
85
+ | `d_sae` | int | SAE hidden dimension |
86
+ | `architecture` | str | "standard", "gated", "jumprelu", "topk" |
87
+ | `activation_fn_str` | str | Activation function name |
88
+ | `model_name` | str | Source model name |
89
+ | `hook_name` | str | Hook point in model |
90
+ | `normalize_activations` | str | Normalization method |
91
+ | `dtype` | str | Data type |
92
+ | `device` | str | Device |
93
+
94
+ ### Accessing Config
95
+
96
+ ```python
97
+ print(sae.cfg.d_in) # 768 for GPT-2 small
98
+ print(sae.cfg.d_sae) # e.g., 24576 (32x expansion)
99
+ print(sae.cfg.hook_name) # e.g., "blocks.8.hook_resid_pre"
100
+ ```
101
+
102
+ ---
103
+
104
+ ## LanguageModelSAERunnerConfig
105
+
106
+ Comprehensive configuration for training SAEs.
107
+
108
+ ### Example Configuration
109
+
110
+ ```python
111
+ from sae_lens import LanguageModelSAERunnerConfig
112
+
113
+ cfg = LanguageModelSAERunnerConfig(
114
+ # Model and hook
115
+ model_name="gpt2-small",
116
+ hook_name="blocks.8.hook_resid_pre",
117
+ hook_layer=8,
118
+ d_in=768,
119
+
120
+ # SAE architecture
121
+ architecture="standard", # "standard", "gated", "jumprelu", "topk"
122
+ d_sae=768 * 8, # Expansion factor
123
+ activation_fn="relu",
124
+
125
+ # Training hyperparameters
126
+ lr=4e-4,
127
+ l1_coefficient=8e-5,
128
+ lp_norm=1.0,
129
+ lr_scheduler_name="constant",
130
+ lr_warm_up_steps=500,
131
+
132
+ # Sparsity control
133
+ l1_warm_up_steps=1000,
134
+ use_ghost_grads=True,
135
+ feature_sampling_window=1000,
136
+ dead_feature_window=5000,
137
+ dead_feature_threshold=1e-8,
138
+
139
+ # Data
140
+ dataset_path="monology/pile-uncopyrighted",
141
+ streaming=True,
142
+ context_size=128,
143
+
144
+ # Batch sizes
145
+ train_batch_size_tokens=4096,
146
+ store_batch_size_prompts=16,
147
+ n_batches_in_buffer=64,
148
+
149
+ # Training duration
150
+ training_tokens=100_000_000,
151
+
152
+ # Logging
153
+ log_to_wandb=True,
154
+ wandb_project="sae-training",
155
+ wandb_log_frequency=100,
156
+
157
+ # Checkpointing
158
+ checkpoint_path="checkpoints",
159
+ n_checkpoints=5,
160
+
161
+ # Hardware
162
+ device="cuda",
163
+ dtype="float32",
164
+ )
165
+ ```
166
+
167
+ ### Key Parameters Explained
168
+
169
+ #### Architecture Parameters
170
+
171
+ | Parameter | Description |
172
+ |-----------|-------------|
173
+ | `architecture` | SAE type: "standard", "gated", "jumprelu", "topk" |
174
+ | `d_sae` | Hidden dimension (or use `expansion_factor`) |
175
+ | `expansion_factor` | Alternative to d_sae: d_sae = d_in × expansion_factor |
176
+ | `activation_fn` | "relu", "topk", etc. |
177
+ | `activation_fn_kwargs` | Dict for activation params (e.g., {"k": 50} for topk) |
178
+
179
+ #### Sparsity Parameters
180
+
181
+ | Parameter | Description |
182
+ |-----------|-------------|
183
+ | `l1_coefficient` | L1 penalty weight (higher = sparser) |
184
+ | `l1_warm_up_steps` | Steps to ramp up L1 penalty |
185
+ | `use_ghost_grads` | Apply gradients to dead features |
186
+ | `dead_feature_threshold` | Activation threshold for "dead" |
187
+ | `dead_feature_window` | Steps to check for dead features |
188
+
189
+ #### Learning Rate Parameters
190
+
191
+ | Parameter | Description |
192
+ |-----------|-------------|
193
+ | `lr` | Base learning rate |
194
+ | `lr_scheduler_name` | "constant", "cosineannealing", etc. |
195
+ | `lr_warm_up_steps` | LR warmup steps |
196
+ | `lr_decay_steps` | Steps for LR decay |
197
+
198
+ ---
199
+
200
+ ## SAETrainingRunner
201
+
202
+ Main class for executing training.
203
+
204
+ ### Basic Training
205
+
206
+ ```python
207
+ from sae_lens import SAETrainingRunner, LanguageModelSAERunnerConfig
208
+
209
+ cfg = LanguageModelSAERunnerConfig(...)
210
+ runner = SAETrainingRunner(cfg)
211
+ sae = runner.run()
212
+ ```
213
+
214
+ ### Accessing Training Metrics
215
+
216
+ ```python
217
+ # During training, metrics logged to W&B include:
218
+ # - l0: Average active features
219
+ # - ce_loss_score: Cross-entropy recovery
220
+ # - mse_loss: Reconstruction loss
221
+ # - l1_loss: Sparsity loss
222
+ # - dead_features: Count of dead features
223
+ ```
224
+
225
+ ---
226
+
227
+ ## ActivationsStore
228
+
229
+ Manages activation collection and batching.
230
+
231
+ ### Basic Usage
232
+
233
+ ```python
234
+ from sae_lens import ActivationsStore
235
+
236
+ store = ActivationsStore.from_sae(
237
+ model=model,
238
+ sae=sae,
239
+ store_batch_size_prompts=8,
240
+ train_batch_size_tokens=4096,
241
+ n_batches_in_buffer=32,
242
+ device="cuda",
243
+ )
244
+
245
+ # Get batch of activations
246
+ activations = store.get_batch_tokens()
247
+ ```
248
+
249
+ ---
250
+
251
+ ## HookedSAETransformer
252
+
253
+ Integration of SAEs with TransformerLens models.
254
+
255
+ ### Basic Usage
256
+
257
+ ```python
258
+ from sae_lens import HookedSAETransformer
259
+
260
+ # Load model with SAE
261
+ model = HookedSAETransformer.from_pretrained("gpt2-small")
262
+ model.add_sae(sae)
263
+
264
+ # Run with SAE in the loop
265
+ output = model.run_with_saes(tokens, saes=[sae])
266
+
267
+ # Cache with SAE activations
268
+ output, cache = model.run_with_cache_with_saes(tokens, saes=[sae])
269
+ ```
270
+
271
+ ---
272
+
273
+ ## SAE Architectures
274
+
275
+ ### Standard (ReLU + L1)
276
+
277
+ ```python
278
+ cfg = LanguageModelSAERunnerConfig(
279
+ architecture="standard",
280
+ activation_fn="relu",
281
+ l1_coefficient=8e-5,
282
+ )
283
+ ```
284
+
285
+ ### Gated
286
+
287
+ ```python
288
+ cfg = LanguageModelSAERunnerConfig(
289
+ architecture="gated",
290
+ )
291
+ ```
292
+
293
+ ### TopK
294
+
295
+ ```python
296
+ cfg = LanguageModelSAERunnerConfig(
297
+ architecture="topk",
298
+ activation_fn="topk",
299
+ activation_fn_kwargs={"k": 50}, # Exactly 50 active features
300
+ )
301
+ ```
302
+
303
+ ### JumpReLU (State-of-the-art)
304
+
305
+ ```python
306
+ cfg = LanguageModelSAERunnerConfig(
307
+ architecture="jumprelu",
308
+ )
309
+ ```
310
+
311
+ ---
312
+
313
+ ## Utility Functions
314
+
315
+ ### Upload to HuggingFace
316
+
317
+ ```python
318
+ from sae_lens import upload_saes_to_huggingface
319
+
320
+ upload_saes_to_huggingface(
321
+ saes=[sae],
322
+ repo_id="username/my-saes",
323
+ token="hf_token",
324
+ )
325
+ ```
326
+
327
+ ### Neuronpedia Integration
328
+
329
+ ```python
330
+ # Features can be viewed on Neuronpedia
331
+ # URL format: neuronpedia.org/{model}/{layer}-{sae_type}/{feature_id}
332
+ # Example: neuronpedia.org/gpt2-small/8-res-jb/1234
333
+ ```
@@ -0,0 +1,318 @@
1
+ # SAELens Tutorials
2
+
3
+ ## Tutorial 1: Loading and Analyzing Pre-trained SAEs
4
+
5
+ ### Goal
6
+ Load a pre-trained SAE and analyze which features activate on specific inputs.
7
+
8
+ ### Step-by-Step
9
+
10
+ ```python
11
+ from transformer_lens import HookedTransformer
12
+ from sae_lens import SAE
13
+ import torch
14
+
15
+ # 1. Load model and SAE
16
+ model = HookedTransformer.from_pretrained("gpt2-small", device="cuda")
17
+ sae, cfg_dict, sparsity = SAE.from_pretrained(
18
+ release="gpt2-small-res-jb",
19
+ sae_id="blocks.8.hook_resid_pre",
20
+ device="cuda"
21
+ )
22
+
23
+ print(f"SAE input dim: {sae.cfg.d_in}")
24
+ print(f"SAE hidden dim: {sae.cfg.d_sae}")
25
+ print(f"Expansion factor: {sae.cfg.d_sae / sae.cfg.d_in:.1f}x")
26
+
27
+ # 2. Get model activations
28
+ prompt = "The capital of France is Paris"
29
+ tokens = model.to_tokens(prompt)
30
+ _, cache = model.run_with_cache(tokens)
31
+ activations = cache["resid_pre", 8] # [1, seq_len, 768]
32
+
33
+ # 3. Encode to SAE features
34
+ features = sae.encode(activations) # [1, seq_len, d_sae]
35
+
36
+ # 4. Analyze sparsity
37
+ active_per_token = (features > 0).sum(dim=-1)
38
+ print(f"Average active features per token: {active_per_token.float().mean():.1f}")
39
+
40
+ # 5. Find top features for each token
41
+ str_tokens = model.to_str_tokens(prompt)
42
+ for pos in range(len(str_tokens)):
43
+ top_features = features[0, pos].topk(5)
44
+ print(f"\nToken '{str_tokens[pos]}':")
45
+ for feat_idx, feat_val in zip(top_features.indices, top_features.values):
46
+ print(f" Feature {feat_idx.item()}: {feat_val.item():.3f}")
47
+
48
+ # 6. Check reconstruction quality
49
+ reconstructed = sae.decode(features)
50
+ mse = ((activations - reconstructed) ** 2).mean()
51
+ print(f"\nReconstruction MSE: {mse.item():.6f}")
52
+ ```
53
+
54
+ ---
55
+
56
+ ## Tutorial 2: Training a Custom SAE
57
+
58
+ ### Goal
59
+ Train a Sparse Autoencoder on GPT-2 activations.
60
+
61
+ ### Step-by-Step
62
+
63
+ ```python
64
+ from sae_lens import LanguageModelSAERunnerConfig, SAETrainingRunner
65
+
66
+ # 1. Configure training
67
+ cfg = LanguageModelSAERunnerConfig(
68
+ # Model
69
+ model_name="gpt2-small",
70
+ hook_name="blocks.6.hook_resid_pre",
71
+ hook_layer=6,
72
+ d_in=768,
73
+
74
+ # SAE architecture
75
+ architecture="standard",
76
+ d_sae=768 * 8, # 8x expansion
77
+ activation_fn="relu",
78
+
79
+ # Training
80
+ lr=4e-4,
81
+ l1_coefficient=8e-5,
82
+ l1_warm_up_steps=1000,
83
+ train_batch_size_tokens=4096,
84
+ training_tokens=10_000_000, # Small run for demo
85
+
86
+ # Data
87
+ dataset_path="monology/pile-uncopyrighted",
88
+ streaming=True,
89
+ context_size=128,
90
+
91
+ # Dead feature prevention
92
+ use_ghost_grads=True,
93
+ dead_feature_window=5000,
94
+
95
+ # Logging
96
+ log_to_wandb=True,
97
+ wandb_project="sae-training-demo",
98
+
99
+ # Hardware
100
+ device="cuda",
101
+ dtype="float32",
102
+ )
103
+
104
+ # 2. Train
105
+ runner = SAETrainingRunner(cfg)
106
+ sae = runner.run()
107
+
108
+ # 3. Save
109
+ sae.save_model("./my_trained_sae")
110
+ ```
111
+
112
+ ### Hyperparameter Tuning Guide
113
+
114
+ | If you see... | Try... |
115
+ |---------------|--------|
116
+ | High L0 (>200) | Increase `l1_coefficient` |
117
+ | Low CE recovery (<80%) | Decrease `l1_coefficient`, increase `d_sae` |
118
+ | Many dead features (>5%) | Enable `use_ghost_grads`, increase `l1_warm_up_steps` |
119
+ | Training instability | Lower `lr`, increase `lr_warm_up_steps` |
120
+
121
+ ---
122
+
123
+ ## Tutorial 3: Feature Attribution and Steering
124
+
125
+ ### Goal
126
+ Identify which SAE features contribute to specific predictions and use them for steering.
127
+
128
+ ### Step-by-Step
129
+
130
+ ```python
131
+ from transformer_lens import HookedTransformer
132
+ from sae_lens import SAE
133
+ import torch
134
+
135
+ model = HookedTransformer.from_pretrained("gpt2-small", device="cuda")
136
+ sae, _, _ = SAE.from_pretrained(
137
+ release="gpt2-small-res-jb",
138
+ sae_id="blocks.8.hook_resid_pre",
139
+ device="cuda"
140
+ )
141
+
142
+ # 1. Feature attribution for a specific prediction
143
+ prompt = "The capital of France is"
144
+ tokens = model.to_tokens(prompt)
145
+ _, cache = model.run_with_cache(tokens)
146
+ activations = cache["resid_pre", 8]
147
+ features = sae.encode(activations)
148
+
149
+ # Target token
150
+ target_token = model.to_single_token(" Paris")
151
+
152
+ # Compute feature contributions to target logit
153
+ # contribution = feature_activation * decoder_weight * unembedding
154
+ W_dec = sae.W_dec # [d_sae, d_model]
155
+ W_U = model.W_U # [d_model, d_vocab]
156
+
157
+ # Feature direction projected to vocabulary
158
+ feature_to_logit = W_dec @ W_U # [d_sae, d_vocab]
159
+
160
+ # Contribution of each feature to "Paris" at final position
161
+ feature_acts = features[0, -1] # [d_sae]
162
+ contributions = feature_acts * feature_to_logit[:, target_token]
163
+
164
+ # Top contributing features
165
+ top_features = contributions.topk(10)
166
+ print("Top features contributing to 'Paris':")
167
+ for idx, val in zip(top_features.indices, top_features.values):
168
+ print(f" Feature {idx.item()}: {val.item():.3f}")
169
+
170
+ # 2. Feature steering
171
+ def steer_with_feature(feature_idx, strength=5.0):
172
+ """Add a feature direction to the residual stream."""
173
+ feature_direction = sae.W_dec[feature_idx] # [d_model]
174
+
175
+ def hook(activation, hook_obj):
176
+ activation[:, -1, :] += strength * feature_direction
177
+ return activation
178
+
179
+ output = model.generate(
180
+ tokens,
181
+ max_new_tokens=10,
182
+ fwd_hooks=[("blocks.8.hook_resid_pre", hook)]
183
+ )
184
+ return model.to_string(output[0])
185
+
186
+ # Try steering with top feature
187
+ top_feature_idx = top_features.indices[0].item()
188
+ print(f"\nSteering with feature {top_feature_idx}:")
189
+ print(steer_with_feature(top_feature_idx, strength=10.0))
190
+ ```
191
+
192
+ ---
193
+
194
+ ## Tutorial 4: Feature Ablation
195
+
196
+ ### Goal
197
+ Test the causal importance of features by ablating them.
198
+
199
+ ### Step-by-Step
200
+
201
+ ```python
202
+ from transformer_lens import HookedTransformer
203
+ from sae_lens import SAE
204
+ import torch
205
+
206
+ model = HookedTransformer.from_pretrained("gpt2-small", device="cuda")
207
+ sae, _, _ = SAE.from_pretrained(
208
+ release="gpt2-small-res-jb",
209
+ sae_id="blocks.8.hook_resid_pre",
210
+ device="cuda"
211
+ )
212
+
213
+ prompt = "The capital of France is"
214
+ tokens = model.to_tokens(prompt)
215
+
216
+ # Baseline prediction
217
+ baseline_logits = model(tokens)
218
+ target_token = model.to_single_token(" Paris")
219
+ baseline_prob = torch.softmax(baseline_logits[0, -1], dim=-1)[target_token].item()
220
+ print(f"Baseline P(Paris): {baseline_prob:.4f}")
221
+
222
+ # Get features to ablate
223
+ _, cache = model.run_with_cache(tokens)
224
+ activations = cache["resid_pre", 8]
225
+ features = sae.encode(activations)
226
+ top_features = features[0, -1].topk(10).indices
227
+
228
+ # Ablate top features one by one
229
+ for feat_idx in top_features:
230
+ def ablation_hook(activation, hook, feat_idx=feat_idx):
231
+ # Encode → zero feature → decode
232
+ feats = sae.encode(activation)
233
+ feats[:, :, feat_idx] = 0
234
+ return sae.decode(feats)
235
+
236
+ ablated_logits = model.run_with_hooks(
237
+ tokens,
238
+ fwd_hooks=[("blocks.8.hook_resid_pre", ablation_hook)]
239
+ )
240
+ ablated_prob = torch.softmax(ablated_logits[0, -1], dim=-1)[target_token].item()
241
+ change = (ablated_prob - baseline_prob) / baseline_prob * 100
242
+ print(f"Ablate feature {feat_idx.item()}: P(Paris)={ablated_prob:.4f} ({change:+.1f}%)")
243
+ ```
244
+
245
+ ---
246
+
247
+ ## Tutorial 5: Comparing Features Across Prompts
248
+
249
+ ### Goal
250
+ Find which features activate consistently for a concept.
251
+
252
+ ### Step-by-Step
253
+
254
+ ```python
255
+ from transformer_lens import HookedTransformer
256
+ from sae_lens import SAE
257
+ import torch
258
+
259
+ model = HookedTransformer.from_pretrained("gpt2-small", device="cuda")
260
+ sae, _, _ = SAE.from_pretrained(
261
+ release="gpt2-small-res-jb",
262
+ sae_id="blocks.8.hook_resid_pre",
263
+ device="cuda"
264
+ )
265
+
266
+ # Test prompts about the same concept
267
+ prompts = [
268
+ "The Eiffel Tower is located in",
269
+ "Paris is the capital of",
270
+ "France's largest city is",
271
+ "The Louvre museum is in",
272
+ ]
273
+
274
+ # Collect feature activations
275
+ all_features = []
276
+ for prompt in prompts:
277
+ tokens = model.to_tokens(prompt)
278
+ _, cache = model.run_with_cache(tokens)
279
+ activations = cache["resid_pre", 8]
280
+ features = sae.encode(activations)
281
+ # Take max activation across positions
282
+ max_features = features[0].max(dim=0).values
283
+ all_features.append(max_features)
284
+
285
+ all_features = torch.stack(all_features) # [n_prompts, d_sae]
286
+
287
+ # Find features that activate consistently
288
+ mean_activation = all_features.mean(dim=0)
289
+ min_activation = all_features.min(dim=0).values
290
+
291
+ # Features active in ALL prompts
292
+ consistent_features = (min_activation > 0.5).nonzero().squeeze(-1)
293
+ print(f"Features active in all prompts: {len(consistent_features)}")
294
+
295
+ # Top consistent features
296
+ top_consistent = mean_activation[consistent_features].topk(min(10, len(consistent_features)))
297
+ print("\nTop consistent features (possibly 'France/Paris' related):")
298
+ for idx, val in zip(top_consistent.indices, top_consistent.values):
299
+ feat_idx = consistent_features[idx].item()
300
+ print(f" Feature {feat_idx}: mean activation {val.item():.3f}")
301
+ ```
302
+
303
+ ---
304
+
305
+ ## External Resources
306
+
307
+ ### Official Tutorials
308
+ - [Basic Loading & Analysis](https://github.com/jbloomAus/SAELens/blob/main/tutorials/basic_loading_and_analysing.ipynb)
309
+ - [Training SAEs](https://github.com/jbloomAus/SAELens/blob/main/tutorials/training_a_sparse_autoencoder.ipynb)
310
+ - [Logits Lens with Features](https://github.com/jbloomAus/SAELens/blob/main/tutorials/logits_lens_with_features.ipynb)
311
+
312
+ ### ARENA Curriculum
313
+ Comprehensive SAE course: https://www.lesswrong.com/posts/LnHowHgmrMbWtpkxx/intro-to-superposition-and-sparse-autoencoders-colab
314
+
315
+ ### Key Papers
316
+ - [Towards Monosemanticity](https://transformer-circuits.pub/2023/monosemantic-features) - Anthropic (2023)
317
+ - [Scaling Monosemanticity](https://transformer-circuits.pub/2024/scaling-monosemanticity/) - Anthropic (2024)
318
+ - [Sparse Autoencoders Find Interpretable Features](https://arxiv.org/abs/2309.08600) - ICLR 2024