annihilate-llm 1.3.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,211 @@
1
+ Metadata-Version: 2.4
2
+ Name: annihilate-llm
3
+ Version: 1.3.8
4
+ Summary: Fully automatic censorship removal for language models
5
+ Keywords: llm,transformer,abliteration
6
+ Author: Philipp Emanuel Weidmann
7
+ Author-email: Philipp Emanuel Weidmann <pew@worldwidemann.com>
8
+ License-Expression: AGPL-3.0-or-later
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Environment :: Console
11
+ Classifier: Environment :: GPU
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Requires-Dist: accelerate~=1.13
20
+ Requires-Dist: datasets~=4.7
21
+ Requires-Dist: huggingface-hub~=1.7
22
+ Requires-Dist: immutabledict~=4.3
23
+ Requires-Dist: langdetect~=1.0
24
+ Requires-Dist: lm-eval[hf]~=0.4
25
+ Requires-Dist: numpy~=2.2
26
+ Requires-Dist: optuna~=4.7
27
+ Requires-Dist: peft~=0.19
28
+ Requires-Dist: psutil~=7.2
29
+ Requires-Dist: py-cpuinfo~=9.0
30
+ Requires-Dist: pydantic-settings~=2.13
31
+ Requires-Dist: questionary~=2.1
32
+ Requires-Dist: rich~=14.3
33
+ Requires-Dist: tomli-w~=1.2
34
+ Requires-Dist: tqdm~=4.67
35
+ Requires-Dist: transformers[kernels]~=5.6
36
+ Requires-Dist: bitsandbytes~=0.49 ; extra == 'bnb'
37
+ Requires-Dist: geom-median~=0.1 ; extra == 'research'
38
+ Requires-Dist: imageio~=2.37 ; extra == 'research'
39
+ Requires-Dist: matplotlib~=3.10 ; extra == 'research'
40
+ Requires-Dist: pacmap~=0.8 ; extra == 'research'
41
+ Requires-Dist: scikit-learn~=1.7 ; extra == 'research'
42
+ Requires-Python: >=3.10
43
+ Project-URL: Changelog, https://github.com/tjcrims0nx/annihilation-llm/releases
44
+ Project-URL: Documentation, https://github.com/tjcrims0nx/annihilation-llm
45
+ Project-URL: Homepage, https://github.com/tjcrims0nx/annihilation-llm
46
+ Project-URL: Issues, https://github.com/tjcrims0nx/annihilation-llm/issues
47
+ Project-URL: Repository, https://github.com/tjcrims0nx/annihilation-llm.git
48
+ Provides-Extra: bnb
49
+ Provides-Extra: research
50
+ Description-Content-Type: text/markdown
51
+
52
+ # ⚔️ Annihilation
53
+
54
+ <div align="center">
55
+ <img src="./logo.jpeg" alt="Annihilation Logo" width="300"/>
56
+ </div>
57
+
58
+ **Autonomous Language Model Decensoring Framework**
59
+
60
+ [![License: AGPLv3](https://img.shields.io/badge/License-AGPLv3-blue.svg)](LICENSE)
61
+ [![Python 3.10+](https://img.shields.io/badge/Python-3.10%2B-green)](https://www.python.org/)
62
+ [![PyTorch 2.2+](https://img.shields.io/badge/PyTorch-2.2%2B-red)](https://pytorch.org/)
63
+
64
+ ---
65
+
66
+ ## ⚠️ Work in Progress
67
+
68
+ > **⚡ This project is actively under development. Features, APIs, and documentation may change without notice.**
69
+
70
+ ---
71
+
72
+ ## 🔥 What is Annihilation?
73
+
74
+ **Annihilation** is a powerful, fully automatic framework for removing censorship (safety alignment) from transformer-based language models. It uses an advanced implementation of **directional ablation** (abliteration) combined with **TPE-based parameter optimization** to achieve unprecedented results without expensive post-training.
75
+
76
+ ### Key Features
77
+
78
+ - 🤖 **Fully Autonomous** - No human intervention required; the system automatically finds optimal decensoring parameters
79
+ - ⚡ **State-of-the-Art Performance** - Achieves excellent refusal suppression while preserving model capabilities
80
+ - 🔧 **Advanced Abliteration** - Parametric directional ablation with flexible weight kernels
81
+ - 🧠 **Smart Optimization** - Co-minimizes refusal count and KL divergence using Optuna's TPE sampler
82
+ - 🎯 **Multi-Architecture Support** - Works with dense models, MoE architectures, hybrid models, and many multimodal models
83
+ - 📊 **Research Tools** - Built-in residual geometry analysis and visualization capabilities
84
+
85
+ ---
86
+
87
+
88
+ ---
89
+
90
+ ## 🚀 Quick Start
91
+
92
+ Use a Python virtual environment so Annihilation's dependencies do not collide
93
+ with packages installed globally.
94
+
95
+ ```powershell
96
+ # Windows PowerShell
97
+ python -m venv annihilation-env
98
+ .\annihilation-env\Scripts\Activate.ps1
99
+ python -m pip install -U pip
100
+ python -m pip install -U annihilate-llm
101
+
102
+ # Decensor any model automatically
103
+ annihilate Qwen/Qwen3-4B-Instruct-2507
104
+ ```
105
+
106
+ ```bash
107
+ # macOS/Linux/Android terminal
108
+ python -m venv annihilation-env
109
+ source annihilation-env/bin/activate
110
+ python -m pip install -U pip
111
+ python -m pip install -U annihilate-llm
112
+
113
+ # Decensor any model automatically
114
+ annihilate Qwen/Qwen3-4B-Instruct-2507
115
+ ```
116
+
117
+ ### Requirements
118
+
119
+ - **Python**: 3.10+
120
+ - **PyTorch**: 2.2+ (hardware-specific installation required)
121
+ - **Hardware**: GPU recommended (CUDA, ROCm, XPU, or MPS)
122
+ - **Optional**: Install `annihilate-llm[bnb]` only on platforms
123
+ that support bitsandbytes if you want `bnb_4bit` quantization.
124
+
125
+ ---
126
+
127
+ ## ⚙️ Configuration
128
+
129
+ Annihilation works out of the box with defaults, but offers extensive configuration options:
130
+
131
+ ```bash
132
+ # View all options
133
+ annihilate --help
134
+
135
+ # Or use a config file
136
+ # Rename config.default.toml to config.toml and modify as needed
137
+ ```
138
+
139
+ ### Key Configuration Options
140
+
141
+ | Option | Default | Description |
142
+ |--------|---------|-------------|
143
+ | `n_trials` | 200 | Number of optimization trials |
144
+ | `quantization` | none | Model quantization (bnb_4bit) |
145
+ | `row_normalization` | full | Weight normalization strategy |
146
+ | `orthogonalize_direction` | true | Direction adjustment method |
147
+
148
+ ---
149
+
150
+ ## 🔬 How It Works
151
+
152
+ Annihilation implements **parametric directional ablation**:
153
+
154
+ 1. **Direction Computation** - Calculates refusal directions by computing difference-of-means between first-token residuals for harmful vs harmless prompts
155
+
156
+ 2. **Parametric Ablation** - For each transformer component (attention out-projection, MLP down-projection), orthogonalizes weights against the refusal direction using LoRA adapters
157
+
158
+ 3. **Multi-Parameter Optimization** - Uses Optuna's TPE sampler to co-optimize:
159
+ - Ablation weight kernel shape (max_weight, position, min_weight, distance)
160
+ - Direction index (layer selection or interpolation)
161
+ - Per-component parameters (attention vs MLP)
162
+
163
+ 4. **Automatic Selection** - Chooses from Pareto-optimal trials based on refusal count vs KL divergence tradeoff
164
+
165
+ ---
166
+
167
+ ## 📊 Benchmarking
168
+
169
+ After decensoring, you can:
170
+
171
+ - 💬 **Chat** with the model to test behavior
172
+ - 📈 **Benchmark** using standard evaluation frameworks (MMLU, GSM8K, etc.)
173
+ - 💾 **Save** the model locally or upload to Hugging Face
174
+
175
+ ---
176
+
177
+ ## 🧪 Research Features
178
+
179
+ Install with research dependencies for visualization tools:
180
+
181
+ ```bash
182
+ pip install -U annihilate-llm[research]
183
+ ```
184
+
185
+ Features:
186
+ - `--plot-residuals` - Generate PaCMAP projections of residual vectors
187
+ - `--print-residual-geometry` - Detailed residual analysis metrics
188
+
189
+ ---
190
+
191
+ ## 📜 License
192
+
193
+ **Annihilation** is free software distributed under the **GNU Affero General Public License v3**.
194
+
195
+ See [LICENSE](LICENSE) for full details.
196
+
197
+ ---
198
+
199
+ ## ⚡ Disclaimer
200
+
201
+ This tool is provided for **research and educational purposes** only. The developers do not condone the use of decensored models for harmful activities. Users are responsible for ensuring compliance with applicable laws and model terms of service.
202
+
203
+ ---
204
+
205
+ <div align="center">
206
+
207
+ **Breaking the Chains | Unleashing Model Potential**
208
+
209
+ *"The only way to discover the limits of the possible is to go beyond them into the impossible."*
210
+
211
+ </div>
@@ -0,0 +1,160 @@
1
+ # ⚔️ Annihilation
2
+
3
+ <div align="center">
4
+ <img src="./logo.jpeg" alt="Annihilation Logo" width="300"/>
5
+ </div>
6
+
7
+ **Autonomous Language Model Decensoring Framework**
8
+
9
+ [![License: AGPLv3](https://img.shields.io/badge/License-AGPLv3-blue.svg)](LICENSE)
10
+ [![Python 3.10+](https://img.shields.io/badge/Python-3.10%2B-green)](https://www.python.org/)
11
+ [![PyTorch 2.2+](https://img.shields.io/badge/PyTorch-2.2%2B-red)](https://pytorch.org/)
12
+
13
+ ---
14
+
15
+ ## ⚠️ Work in Progress
16
+
17
+ > **⚡ This project is actively under development. Features, APIs, and documentation may change without notice.**
18
+
19
+ ---
20
+
21
+ ## 🔥 What is Annihilation?
22
+
23
+ **Annihilation** is a powerful, fully automatic framework for removing censorship (safety alignment) from transformer-based language models. It uses an advanced implementation of **directional ablation** (abliteration) combined with **TPE-based parameter optimization** to achieve unprecedented results without expensive post-training.
24
+
25
+ ### Key Features
26
+
27
+ - 🤖 **Fully Autonomous** - No human intervention required; the system automatically finds optimal decensoring parameters
28
+ - ⚡ **State-of-the-Art Performance** - Achieves excellent refusal suppression while preserving model capabilities
29
+ - 🔧 **Advanced Abliteration** - Parametric directional ablation with flexible weight kernels
30
+ - 🧠 **Smart Optimization** - Co-minimizes refusal count and KL divergence using Optuna's TPE sampler
31
+ - 🎯 **Multi-Architecture Support** - Works with dense models, MoE architectures, hybrid models, and many multimodal models
32
+ - 📊 **Research Tools** - Built-in residual geometry analysis and visualization capabilities
33
+
34
+ ---
35
+
36
+
37
+ ---
38
+
39
+ ## 🚀 Quick Start
40
+
41
+ Use a Python virtual environment so Annihilation's dependencies do not collide
42
+ with packages installed globally.
43
+
44
+ ```powershell
45
+ # Windows PowerShell
46
+ python -m venv annihilation-env
47
+ .\annihilation-env\Scripts\Activate.ps1
48
+ python -m pip install -U pip
49
+ python -m pip install -U annihilate-llm
50
+
51
+ # Decensor any model automatically
52
+ annihilate Qwen/Qwen3-4B-Instruct-2507
53
+ ```
54
+
55
+ ```bash
56
+ # macOS/Linux/Android terminal
57
+ python -m venv annihilation-env
58
+ source annihilation-env/bin/activate
59
+ python -m pip install -U pip
60
+ python -m pip install -U annihilate-llm
61
+
62
+ # Decensor any model automatically
63
+ annihilate Qwen/Qwen3-4B-Instruct-2507
64
+ ```
65
+
66
+ ### Requirements
67
+
68
+ - **Python**: 3.10+
69
+ - **PyTorch**: 2.2+ (hardware-specific installation required)
70
+ - **Hardware**: GPU recommended (CUDA, ROCm, XPU, or MPS)
71
+ - **Optional**: Install `annihilate-llm[bnb]` only on platforms
72
+ that support bitsandbytes if you want `bnb_4bit` quantization.
73
+
74
+ ---
75
+
76
+ ## ⚙️ Configuration
77
+
78
+ Annihilation works out of the box with defaults, but offers extensive configuration options:
79
+
80
+ ```bash
81
+ # View all options
82
+ annihilate --help
83
+
84
+ # Or use a config file
85
+ # Rename config.default.toml to config.toml and modify as needed
86
+ ```
87
+
88
+ ### Key Configuration Options
89
+
90
+ | Option | Default | Description |
91
+ |--------|---------|-------------|
92
+ | `n_trials` | 200 | Number of optimization trials |
93
+ | `quantization` | none | Model quantization (bnb_4bit) |
94
+ | `row_normalization` | full | Weight normalization strategy |
95
+ | `orthogonalize_direction` | true | Direction adjustment method |
96
+
97
+ ---
98
+
99
+ ## 🔬 How It Works
100
+
101
+ Annihilation implements **parametric directional ablation**:
102
+
103
+ 1. **Direction Computation** - Calculates refusal directions by computing difference-of-means between first-token residuals for harmful vs harmless prompts
104
+
105
+ 2. **Parametric Ablation** - For each transformer component (attention out-projection, MLP down-projection), orthogonalizes weights against the refusal direction using LoRA adapters
106
+
107
+ 3. **Multi-Parameter Optimization** - Uses Optuna's TPE sampler to co-optimize:
108
+ - Ablation weight kernel shape (max_weight, position, min_weight, distance)
109
+ - Direction index (layer selection or interpolation)
110
+ - Per-component parameters (attention vs MLP)
111
+
112
+ 4. **Automatic Selection** - Chooses from Pareto-optimal trials based on refusal count vs KL divergence tradeoff
113
+
114
+ ---
115
+
116
+ ## 📊 Benchmarking
117
+
118
+ After decensoring, you can:
119
+
120
+ - 💬 **Chat** with the model to test behavior
121
+ - 📈 **Benchmark** using standard evaluation frameworks (MMLU, GSM8K, etc.)
122
+ - 💾 **Save** the model locally or upload to Hugging Face
123
+
124
+ ---
125
+
126
+ ## 🧪 Research Features
127
+
128
+ Install with research dependencies for visualization tools:
129
+
130
+ ```bash
131
+ pip install -U annihilate-llm[research]
132
+ ```
133
+
134
+ Features:
135
+ - `--plot-residuals` - Generate PaCMAP projections of residual vectors
136
+ - `--print-residual-geometry` - Detailed residual analysis metrics
137
+
138
+ ---
139
+
140
+ ## 📜 License
141
+
142
+ **Annihilation** is free software distributed under the **GNU Affero General Public License v3**.
143
+
144
+ See [LICENSE](LICENSE) for full details.
145
+
146
+ ---
147
+
148
+ ## ⚡ Disclaimer
149
+
150
+ This tool is provided for **research and educational purposes** only. The developers do not condone the use of decensored models for harmful activities. Users are responsible for ensuring compliance with applicable laws and model terms of service.
151
+
152
+ ---
153
+
154
+ <div align="center">
155
+
156
+ **Breaking the Chains | Unleashing Model Potential**
157
+
158
+ *"The only way to discover the limits of the possible is to go beyond them into the impossible."*
159
+
160
+ </div>
@@ -0,0 +1,82 @@
1
+ [project]
2
+ name = "annihilate-llm"
3
+ version = "1.3.8"
4
+ description = "Fully automatic censorship removal for language models"
5
+ readme = "README.md"
6
+ license = "AGPL-3.0-or-later"
7
+ authors = [
8
+ { name = "Philipp Emanuel Weidmann", email = "pew@worldwidemann.com" }
9
+ ]
10
+ requires-python = ">=3.10"
11
+ keywords = ["llm", "transformer", "abliteration"]
12
+ classifiers = [
13
+ "Development Status :: 4 - Beta",
14
+ "Environment :: Console",
15
+ "Environment :: GPU",
16
+ "Intended Audience :: Science/Research",
17
+ "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
18
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ ]
24
+ dependencies = [
25
+ "accelerate~=1.13",
26
+ "datasets~=4.7",
27
+ "huggingface-hub~=1.7",
28
+ "immutabledict~=4.3",
29
+ "langdetect~=1.0",
30
+ "lm-eval[hf]~=0.4",
31
+ "numpy~=2.2",
32
+ "optuna~=4.7",
33
+ "peft~=0.19",
34
+ "psutil~=7.2",
35
+ "py-cpuinfo~=9.0",
36
+ "pydantic-settings~=2.13",
37
+ "questionary~=2.1",
38
+ "rich~=14.3",
39
+ "tomli-w~=1.2",
40
+ "tqdm~=4.67",
41
+ "transformers[kernels]~=5.6",
42
+ ]
43
+
44
+ [project.optional-dependencies]
45
+ bnb = [
46
+ "bitsandbytes~=0.49",
47
+ ]
48
+ research = [
49
+ "geom-median~=0.1",
50
+ "imageio~=2.37",
51
+ "matplotlib~=3.10",
52
+ "pacmap~=0.8",
53
+ "scikit-learn~=1.7",
54
+ ]
55
+
56
+ [dependency-groups]
57
+ dev = [
58
+ "ruff>=0.14.5",
59
+ "ty>=0.0.5",
60
+ ]
61
+
62
+ [project.urls]
63
+ Homepage = "https://github.com/tjcrims0nx/annihilation-llm"
64
+ Documentation = "https://github.com/tjcrims0nx/annihilation-llm"
65
+ Repository = "https://github.com/tjcrims0nx/annihilation-llm.git"
66
+ Issues = "https://github.com/tjcrims0nx/annihilation-llm/issues"
67
+ Changelog = "https://github.com/tjcrims0nx/annihilation-llm/releases"
68
+
69
+ [project.scripts]
70
+ annihilate = "heretic.main:main"
71
+ annihilation = "heretic.main:main"
72
+ heretic = "heretic.main:main"
73
+
74
+ [build-system]
75
+ requires = ["uv_build>=0.8.11,<0.9.0"]
76
+ build-backend = "uv_build"
77
+
78
+ [tool.uv]
79
+ exclude-newer = "7 days"
80
+
81
+ [tool.uv.build-backend]
82
+ module-name = "heretic"
File without changes