PyPI - annihilate-llm - Versions diffs - 1.3.8__tar.gz - Mend

annihilate-llm 1.3.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

annihilate_llm-1.3.8/PKG-INFO +211 -0
annihilate_llm-1.3.8/README.md +160 -0
annihilate_llm-1.3.8/pyproject.toml +82 -0
annihilate_llm-1.3.8/src/heretic/__init__.py +0 -0
annihilate_llm-1.3.8/src/heretic/analyzer.py +357 -0
annihilate_llm-1.3.8/src/heretic/config.py +524 -0
annihilate_llm-1.3.8/src/heretic/evaluator.py +127 -0
annihilate_llm-1.3.8/src/heretic/main.py +1149 -0
annihilate_llm-1.3.8/src/heretic/model.py +850 -0
annihilate_llm-1.3.8/src/heretic/progress.py +40 -0
annihilate_llm-1.3.8/src/heretic/reproduce.py +83 -0
annihilate_llm-1.3.8/src/heretic/system.py +486 -0
annihilate_llm-1.3.8/src/heretic/utils.py +748 -0

annihilate_llm-1.3.8/PKG-INFO ADDED Viewed

@@ -0,0 +1,211 @@
+Metadata-Version: 2.4
+Name: annihilate-llm
+Version: 1.3.8
+Summary: Fully automatic censorship removal for language models
+Keywords: llm,transformer,abliteration
+Author: Philipp Emanuel Weidmann
+Author-email: Philipp Emanuel Weidmann <pew@worldwidemann.com>
+License-Expression: AGPL-3.0-or-later
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Console
+Classifier: Environment :: GPU
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Dist: accelerate~=1.13
+Requires-Dist: datasets~=4.7
+Requires-Dist: huggingface-hub~=1.7
+Requires-Dist: immutabledict~=4.3
+Requires-Dist: langdetect~=1.0
+Requires-Dist: lm-eval[hf]~=0.4
+Requires-Dist: numpy~=2.2
+Requires-Dist: optuna~=4.7
+Requires-Dist: peft~=0.19
+Requires-Dist: psutil~=7.2
+Requires-Dist: py-cpuinfo~=9.0
+Requires-Dist: pydantic-settings~=2.13
+Requires-Dist: questionary~=2.1
+Requires-Dist: rich~=14.3
+Requires-Dist: tomli-w~=1.2
+Requires-Dist: tqdm~=4.67
+Requires-Dist: transformers[kernels]~=5.6
+Requires-Dist: bitsandbytes~=0.49 ; extra == 'bnb'
+Requires-Dist: geom-median~=0.1 ; extra == 'research'
+Requires-Dist: imageio~=2.37 ; extra == 'research'
+Requires-Dist: matplotlib~=3.10 ; extra == 'research'
+Requires-Dist: pacmap~=0.8 ; extra == 'research'
+Requires-Dist: scikit-learn~=1.7 ; extra == 'research'
+Requires-Python: >=3.10
+Project-URL: Changelog, https://github.com/tjcrims0nx/annihilation-llm/releases
+Project-URL: Documentation, https://github.com/tjcrims0nx/annihilation-llm
+Project-URL: Homepage, https://github.com/tjcrims0nx/annihilation-llm
+Project-URL: Issues, https://github.com/tjcrims0nx/annihilation-llm/issues
+Project-URL: Repository, https://github.com/tjcrims0nx/annihilation-llm.git
+Provides-Extra: bnb
+Provides-Extra: research
+Description-Content-Type: text/markdown
+# ⚔️ Annihilation
+<div align="center">
+  <img src="./logo.jpeg" alt="Annihilation Logo" width="300"/>
+</div>
+**Autonomous Language Model Decensoring Framework**
+[![License: AGPLv3](https://img.shields.io/badge/License-AGPLv3-blue.svg)](LICENSE)
+[![Python 3.10+](https://img.shields.io/badge/Python-3.10%2B-green)](https://www.python.org/)
+[![PyTorch 2.2+](https://img.shields.io/badge/PyTorch-2.2%2B-red)](https://pytorch.org/)
+---
+## ⚠️ Work in Progress
+> **⚡ This project is actively under development. Features, APIs, and documentation may change without notice.**
+---
+## 🔥 What is Annihilation?
+**Annihilation** is a powerful, fully automatic framework for removing censorship (safety alignment) from transformer-based language models. It uses an advanced implementation of **directional ablation** (abliteration) combined with **TPE-based parameter optimization** to achieve unprecedented results without expensive post-training.
+### Key Features
+- 🤖 **Fully Autonomous** - No human intervention required; the system automatically finds optimal decensoring parameters
+- ⚡ **State-of-the-Art Performance** - Achieves excellent refusal suppression while preserving model capabilities
+- 🔧 **Advanced Abliteration** - Parametric directional ablation with flexible weight kernels
+- 🧠 **Smart Optimization** - Co-minimizes refusal count and KL divergence using Optuna's TPE sampler
+- 🎯 **Multi-Architecture Support** - Works with dense models, MoE architectures, hybrid models, and many multimodal models
+- 📊 **Research Tools** - Built-in residual geometry analysis and visualization capabilities
+---
+---
+## 🚀 Quick Start
+Use a Python virtual environment so Annihilation's dependencies do not collide
+with packages installed globally.
+```powershell
+# Windows PowerShell
+python -m venv annihilation-env
+.\annihilation-env\Scripts\Activate.ps1
+python -m pip install -U pip
+python -m pip install -U annihilate-llm
+# Decensor any model automatically
+annihilate Qwen/Qwen3-4B-Instruct-2507
+```
+```bash
+# macOS/Linux/Android terminal
+python -m venv annihilation-env
+source annihilation-env/bin/activate
+python -m pip install -U pip
+python -m pip install -U annihilate-llm
+# Decensor any model automatically
+annihilate Qwen/Qwen3-4B-Instruct-2507
+```
+### Requirements
+- **Python**: 3.10+
+- **PyTorch**: 2.2+ (hardware-specific installation required)
+- **Hardware**: GPU recommended (CUDA, ROCm, XPU, or MPS)
+- **Optional**: Install `annihilate-llm[bnb]` only on platforms
+  that support bitsandbytes if you want `bnb_4bit` quantization.
+---
+## ⚙️ Configuration
+Annihilation works out of the box with defaults, but offers extensive configuration options:
+```bash
+# View all options
+annihilate --help
+# Or use a config file
+# Rename config.default.toml to config.toml and modify as needed
+```
+### Key Configuration Options
+| Option | Default | Description |
+|--------|---------|-------------|
+| `n_trials` | 200 | Number of optimization trials |
+| `quantization` | none | Model quantization (bnb_4bit) |
+| `row_normalization` | full | Weight normalization strategy |
+| `orthogonalize_direction` | true | Direction adjustment method |
+---
+## 🔬 How It Works
+Annihilation implements **parametric directional ablation**:
+1. **Direction Computation** - Calculates refusal directions by computing difference-of-means between first-token residuals for harmful vs harmless prompts
+2. **Parametric Ablation** - For each transformer component (attention out-projection, MLP down-projection), orthogonalizes weights against the refusal direction using LoRA adapters
+3. **Multi-Parameter Optimization** - Uses Optuna's TPE sampler to co-optimize:
+   - Ablation weight kernel shape (max_weight, position, min_weight, distance)
+   - Direction index (layer selection or interpolation)
+   - Per-component parameters (attention vs MLP)
+4. **Automatic Selection** - Chooses from Pareto-optimal trials based on refusal count vs KL divergence tradeoff
+---
+## 📊 Benchmarking
+After decensoring, you can:
+- 💬 **Chat** with the model to test behavior
+- 📈 **Benchmark** using standard evaluation frameworks (MMLU, GSM8K, etc.)
+- 💾 **Save** the model locally or upload to Hugging Face
+---
+## 🧪 Research Features
+Install with research dependencies for visualization tools:
+```bash
+pip install -U annihilate-llm[research]
+```
+Features:
+- `--plot-residuals` - Generate PaCMAP projections of residual vectors
+- `--print-residual-geometry` - Detailed residual analysis metrics
+---
+## 📜 License
+**Annihilation** is free software distributed under the **GNU Affero General Public License v3**.
+See [LICENSE](LICENSE) for full details.
+---
+## ⚡ Disclaimer
+This tool is provided for **research and educational purposes** only. The developers do not condone the use of decensored models for harmful activities. Users are responsible for ensuring compliance with applicable laws and model terms of service.
+---
+<div align="center">
+**Breaking the Chains | Unleashing Model Potential**
+*"The only way to discover the limits of the possible is to go beyond them into the impossible."*
+</div>

annihilate_llm-1.3.8/README.md ADDED Viewed

@@ -0,0 +1,160 @@
+# ⚔️ Annihilation
+<div align="center">
+  <img src="./logo.jpeg" alt="Annihilation Logo" width="300"/>
+</div>
+**Autonomous Language Model Decensoring Framework**
+[![License: AGPLv3](https://img.shields.io/badge/License-AGPLv3-blue.svg)](LICENSE)
+[![Python 3.10+](https://img.shields.io/badge/Python-3.10%2B-green)](https://www.python.org/)
+[![PyTorch 2.2+](https://img.shields.io/badge/PyTorch-2.2%2B-red)](https://pytorch.org/)
+---
+## ⚠️ Work in Progress
+> **⚡ This project is actively under development. Features, APIs, and documentation may change without notice.**
+---
+## 🔥 What is Annihilation?
+**Annihilation** is a powerful, fully automatic framework for removing censorship (safety alignment) from transformer-based language models. It uses an advanced implementation of **directional ablation** (abliteration) combined with **TPE-based parameter optimization** to achieve unprecedented results without expensive post-training.
+### Key Features
+- 🤖 **Fully Autonomous** - No human intervention required; the system automatically finds optimal decensoring parameters
+- ⚡ **State-of-the-Art Performance** - Achieves excellent refusal suppression while preserving model capabilities
+- 🔧 **Advanced Abliteration** - Parametric directional ablation with flexible weight kernels
+- 🧠 **Smart Optimization** - Co-minimizes refusal count and KL divergence using Optuna's TPE sampler
+- 🎯 **Multi-Architecture Support** - Works with dense models, MoE architectures, hybrid models, and many multimodal models
+- 📊 **Research Tools** - Built-in residual geometry analysis and visualization capabilities
+---
+---
+## 🚀 Quick Start
+Use a Python virtual environment so Annihilation's dependencies do not collide
+with packages installed globally.
+```powershell
+# Windows PowerShell
+python -m venv annihilation-env
+.\annihilation-env\Scripts\Activate.ps1
+python -m pip install -U pip
+python -m pip install -U annihilate-llm
+# Decensor any model automatically
+annihilate Qwen/Qwen3-4B-Instruct-2507
+```
+```bash
+# macOS/Linux/Android terminal
+python -m venv annihilation-env
+source annihilation-env/bin/activate
+python -m pip install -U pip
+python -m pip install -U annihilate-llm
+# Decensor any model automatically
+annihilate Qwen/Qwen3-4B-Instruct-2507
+```
+### Requirements
+- **Python**: 3.10+
+- **PyTorch**: 2.2+ (hardware-specific installation required)
+- **Hardware**: GPU recommended (CUDA, ROCm, XPU, or MPS)
+- **Optional**: Install `annihilate-llm[bnb]` only on platforms
+  that support bitsandbytes if you want `bnb_4bit` quantization.
+---
+## ⚙️ Configuration
+Annihilation works out of the box with defaults, but offers extensive configuration options:
+```bash
+# View all options
+annihilate --help
+# Or use a config file
+# Rename config.default.toml to config.toml and modify as needed
+```
+### Key Configuration Options
+| Option | Default | Description |
+|--------|---------|-------------|
+| `n_trials` | 200 | Number of optimization trials |
+| `quantization` | none | Model quantization (bnb_4bit) |
+| `row_normalization` | full | Weight normalization strategy |
+| `orthogonalize_direction` | true | Direction adjustment method |
+---
+## 🔬 How It Works
+Annihilation implements **parametric directional ablation**:
+1. **Direction Computation** - Calculates refusal directions by computing difference-of-means between first-token residuals for harmful vs harmless prompts
+2. **Parametric Ablation** - For each transformer component (attention out-projection, MLP down-projection), orthogonalizes weights against the refusal direction using LoRA adapters
+3. **Multi-Parameter Optimization** - Uses Optuna's TPE sampler to co-optimize:
+   - Ablation weight kernel shape (max_weight, position, min_weight, distance)
+   - Direction index (layer selection or interpolation)
+   - Per-component parameters (attention vs MLP)
+4. **Automatic Selection** - Chooses from Pareto-optimal trials based on refusal count vs KL divergence tradeoff
+---
+## 📊 Benchmarking
+After decensoring, you can:
+- 💬 **Chat** with the model to test behavior
+- 📈 **Benchmark** using standard evaluation frameworks (MMLU, GSM8K, etc.)
+- 💾 **Save** the model locally or upload to Hugging Face
+---
+## 🧪 Research Features
+Install with research dependencies for visualization tools:
+```bash
+pip install -U annihilate-llm[research]
+```
+Features:
+- `--plot-residuals` - Generate PaCMAP projections of residual vectors
+- `--print-residual-geometry` - Detailed residual analysis metrics
+---
+## 📜 License
+**Annihilation** is free software distributed under the **GNU Affero General Public License v3**.
+See [LICENSE](LICENSE) for full details.
+---
+## ⚡ Disclaimer
+This tool is provided for **research and educational purposes** only. The developers do not condone the use of decensored models for harmful activities. Users are responsible for ensuring compliance with applicable laws and model terms of service.
+---
+<div align="center">
+**Breaking the Chains | Unleashing Model Potential**
+*"The only way to discover the limits of the possible is to go beyond them into the impossible."*
+</div>

annihilate_llm-1.3.8/pyproject.toml ADDED Viewed

@@ -0,0 +1,82 @@
+[project]
+name = "annihilate-llm"
+version = "1.3.8"
+description = "Fully automatic censorship removal for language models"
+readme = "README.md"
+license = "AGPL-3.0-or-later"
+authors = [
+    { name = "Philipp Emanuel Weidmann", email = "pew@worldwidemann.com" }
+]
+requires-python = ">=3.10"
+keywords = ["llm", "transformer", "abliteration"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Environment :: Console",
+    "Environment :: GPU",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+]
+dependencies = [
+    "accelerate~=1.13",
+    "datasets~=4.7",
+    "huggingface-hub~=1.7",
+    "immutabledict~=4.3",
+    "langdetect~=1.0",
+    "lm-eval[hf]~=0.4",
+    "numpy~=2.2",
+    "optuna~=4.7",
+    "peft~=0.19",
+    "psutil~=7.2",
+    "py-cpuinfo~=9.0",
+    "pydantic-settings~=2.13",
+    "questionary~=2.1",
+    "rich~=14.3",
+    "tomli-w~=1.2",
+    "tqdm~=4.67",
+    "transformers[kernels]~=5.6",
+]
+[project.optional-dependencies]
+bnb = [
+    "bitsandbytes~=0.49",
+]
+research = [
+    "geom-median~=0.1",
+    "imageio~=2.37",
+    "matplotlib~=3.10",
+    "pacmap~=0.8",
+    "scikit-learn~=1.7",
+]
+[dependency-groups]
+dev = [
+    "ruff>=0.14.5",
+    "ty>=0.0.5",
+]
+[project.urls]
+Homepage = "https://github.com/tjcrims0nx/annihilation-llm"
+Documentation = "https://github.com/tjcrims0nx/annihilation-llm"
+Repository = "https://github.com/tjcrims0nx/annihilation-llm.git"
+Issues = "https://github.com/tjcrims0nx/annihilation-llm/issues"
+Changelog = "https://github.com/tjcrims0nx/annihilation-llm/releases"
+[project.scripts]
+annihilate = "heretic.main:main"
+annihilation = "heretic.main:main"
+heretic = "heretic.main:main"
+[build-system]
+requires = ["uv_build>=0.8.11,<0.9.0"]
+build-backend = "uv_build"
+[tool.uv]
+exclude-newer = "7 days"
+[tool.uv.build-backend]
+module-name = "heretic"

annihilate_llm-1.3.8/src/heretic/__init__.py ADDED Viewed

File without changes