attacklm 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
attacklm/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """AttackLM — QLoRA-fine-tuned language model for offensive security and AI red-teaming."""
2
+
3
+ from attacklm.__version__ import __version__
4
+
5
+ __all__ = ["__version__"]
@@ -0,0 +1 @@
1
+ __version__ = "0.3.2"
attacklm/cli.py ADDED
@@ -0,0 +1,177 @@
1
+ """Console script entry points for AttackLM.
2
+
3
+ These wrappers dispatch to the canonical scripts in ``scripts/``. The package
4
+ doesn't duplicate the script logic — ``scripts/`` is the source of truth, and
5
+ these entry points make the tools invokable as ``attacklm-train``,
6
+ ``attacklm-hpo``, etc. after ``pip install attacklm``.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import runpy
12
+ import sys
13
+ from pathlib import Path
14
+ from typing import Sequence
15
+
16
+ # Locate the scripts/ directory relative to this package.
17
+ # src/attacklm/cli.py → ../../scripts
18
+ _SCRIPTS_DIR = Path(__file__).resolve().parent.parent.parent / "scripts"
19
+
20
+
21
+ def _run_python_script(script_name: str, argv: Sequence[str]) -> int:
22
+ """Run a Python script from scripts/ as if it were the entry point."""
23
+ script_path = _SCRIPTS_DIR / script_name
24
+ if not script_path.exists():
25
+ print(f"attacklm: script not found: {script_path}", file=sys.stderr)
26
+ return 127
27
+ sys.argv = [str(script_path), *argv]
28
+ try:
29
+ runpy.run_path(str(script_path), run_name="__main__")
30
+ return 0
31
+ except SystemExit as e:
32
+ return int(e.code) if e.code is not None else 0
33
+ except Exception as e: # noqa: BLE001
34
+ print(f"attacklm: error running {script_name}: {e}", file=sys.stderr)
35
+ return 1
36
+
37
+
38
+ def _run_shell_script(script_name: str, argv: Sequence[str]) -> int:
39
+ """Run a shell script from scripts/ as if it were the entry point."""
40
+ import subprocess
41
+
42
+ script_path = _SCRIPTS_DIR / script_name
43
+ if not script_path.exists():
44
+ print(f"attacklm: script not found: {script_path}", file=sys.stderr)
45
+ return 127
46
+ result = subprocess.run(["bash", str(script_path), *argv])
47
+ return result.returncode
48
+
49
+
50
+ # --- Public entry points (referenced from pyproject.toml [project.scripts]) ---
51
+
52
+
53
+ def main_train(argv: Sequence[str] | None = None) -> int:
54
+ """Train a single model: attacklm-train [args]"""
55
+ return _run_python_script(
56
+ "train_template.py", argv if argv is not None else sys.argv[1:]
57
+ )
58
+
59
+
60
+ def main_train_all(argv: Sequence[str] | None = None) -> int:
61
+ """Train all buckets / HPO: attacklm-train-all [args]"""
62
+ return _run_python_script(
63
+ "train_all.py", argv if argv is not None else sys.argv[1:]
64
+ )
65
+
66
+
67
+ def main_hpo(argv: Sequence[str] | None = None) -> int:
68
+ """Run HPO sweep + analysis: attacklm-hpo [args]"""
69
+ return _run_python_script(
70
+ "hpo_runner.py", argv if argv is not None else sys.argv[1:]
71
+ )
72
+
73
+
74
+ def main_infer(argv: Sequence[str] | None = None) -> int:
75
+ """Run inference: attacklm-infer [args]"""
76
+ return _run_python_script("infer.py", argv if argv is not None else sys.argv[1:])
77
+
78
+
79
+ def main_merge(argv: Sequence[str] | None = None) -> int:
80
+ """Merge LoRA adapter: attacklm-merge [args]"""
81
+ return _run_python_script(
82
+ "merge_adapter.py", argv if argv is not None else sys.argv[1:]
83
+ )
84
+
85
+
86
+ def main_gguf(argv: Sequence[str] | None = None) -> int:
87
+ """Convert to GGUF: attacklm-gguf [args]"""
88
+ return _run_python_script(
89
+ "convert_to_gguf.py", argv if argv is not None else sys.argv[1:]
90
+ )
91
+
92
+
93
+ def main_demo(argv: Sequence[str] | None = None) -> int:
94
+ """Run demo: attacklm-demo [args]"""
95
+ return _run_python_script("demo.py", argv if argv is not None else sys.argv[1:])
96
+
97
+
98
+ def main_extract(argv: Sequence[str] | None = None) -> int:
99
+ """Run all data extractors in sequence: attacklm-extract"""
100
+ _ = argv # extract takes no user args; runs all extractors sequentially
101
+ extractors = [
102
+ "extract_atomic_red_team_to_jsonl.py",
103
+ "extract_caldera_plugins_to_jsonl.py",
104
+ "parse_metasploit_to_jsonl.py",
105
+ "extract_rta_to_jsonl.py",
106
+ "extract_infection_monkey_to_jsonl.py",
107
+ "extract_ai_tools_to_jsonl.py",
108
+ ]
109
+ for extractor in extractors:
110
+ print(f"\n=== Running {extractor} ===", file=sys.stderr)
111
+ rc = _run_python_script(extractor, [])
112
+ if rc != 0:
113
+ print(f"Extractor {extractor} failed with exit code {rc}", file=sys.stderr)
114
+ return rc
115
+ print("\n=== All extractors complete. Next: attacklm-buckets ===", file=sys.stderr)
116
+ return 0
117
+
118
+
119
+ def main_buckets(argv: Sequence[str] | None = None) -> int:
120
+ """Organize data into buckets: attacklm-buckets"""
121
+ args = argv if argv is not None else sys.argv[1:]
122
+ print("\n=== Running setup_buckets.py ===", file=sys.stderr)
123
+ rc = _run_python_script("setup_buckets.py", args)
124
+ if rc != 0:
125
+ return rc
126
+ print("\n=== Running reorganize_buckets.py ===", file=sys.stderr)
127
+ return _run_python_script("reorganize_buckets.py", args)
128
+
129
+
130
+ def main_attribute(argv: Sequence[str] | None = None) -> int:
131
+ """Add per-pair source/license attribution: attacklm-attribute"""
132
+ return _run_python_script(
133
+ "augment_attribution.py", argv if argv is not None else sys.argv[1:]
134
+ )
135
+
136
+
137
+ def main_clone(argv: Sequence[str] | None = None) -> int:
138
+ """Clone upstream data repos: attacklm-clone"""
139
+ return _run_shell_script(
140
+ "clone_repos.sh", argv if argv is not None else sys.argv[1:]
141
+ )
142
+
143
+
144
+ def main_init(argv: Sequence[str] | None = None) -> int:
145
+ """One-shot dataset init: clone → extract → attribute → buckets.
146
+
147
+ Replaces the four-step manual sequence (``attacklm-clone`` →
148
+ ``attacklm-extract`` → ``attacklm-attribute`` → ``attacklm-buckets``)
149
+ with a single command. Probes ``data/`` first; only fetches from
150
+ GitHub if a source is missing. See ``scripts/init_pipeline.py`` for
151
+ full docs.
152
+ """
153
+ return _run_python_script(
154
+ "init_pipeline.py", argv if argv is not None else sys.argv[1:]
155
+ )
156
+
157
+
158
+ def main_balance(argv: Sequence[str] | None = None) -> int:
159
+ """Build a balanced subset of the buckets: attacklm-balance"""
160
+ return _run_python_script(
161
+ "balance_buckets.py", argv if argv is not None else sys.argv[1:]
162
+ )
163
+
164
+
165
+ def main_build(argv: Sequence[str] | None = None) -> int:
166
+ """One-shot: merge LoRA → GGUF → install: attacklm-build"""
167
+ return _run_python_script("build.py", argv if argv is not None else sys.argv[1:])
168
+
169
+
170
+ if __name__ == "__main__":
171
+ # Allow ``python -m attacklm.cli`` to show a help message
172
+ print("AttackLM CLI dispatchers. Use the installed console scripts:")
173
+ print(" attacklm-train, attacklm-train-all, attacklm-hpo, attacklm-infer,")
174
+ print(" attacklm-merge, attacklm-gguf, attacklm-demo, attacklm-extract,")
175
+ print(" attacklm-buckets, attacklm-attribute, attacklm-clone, attacklm-init,")
176
+ print(" attacklm-balance, attacklm-build")
177
+ sys.exit(0)
@@ -0,0 +1,928 @@
1
+ Metadata-Version: 2.4
2
+ Name: attacklm
3
+ Version: 0.3.1
4
+ Summary: QLoRA-fine-tuned Qwen2.5-Coder for offensive security and AI red-teaming
5
+ Project-URL: Homepage, https://github.com/Veedubin/AttackLM
6
+ Project-URL: Repository, https://github.com/Veedubin/AttackLM
7
+ Project-URL: Issues, https://github.com/Veedubin/AttackLM/issues
8
+ Project-URL: Documentation, https://github.com/Veedubin/AttackLM#readme
9
+ Author-email: Veedubin <noreply@github.com>
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ License-File: NOTICE
13
+ Keywords: ai-security,fine-tuning,mitre-attack,offensive-security,qlora,red-team,security
14
+ Classifier: Development Status :: 3 - Alpha
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: POSIX :: Linux
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Topic :: Security
24
+ Requires-Python: >=3.10
25
+ Provides-Extra: all
26
+ Requires-Dist: accelerate==1.13.0; extra == 'all'
27
+ Requires-Dist: bitsandbytes==0.49.2; extra == 'all'
28
+ Requires-Dist: causal-conv1d>=1.4; extra == 'all'
29
+ Requires-Dist: datasets==5.0.0; extra == 'all'
30
+ Requires-Dist: flash-attn>=2.7; extra == 'all'
31
+ Requires-Dist: flash-linear-attention>=0.2; extra == 'all'
32
+ Requires-Dist: gguf; extra == 'all'
33
+ Requires-Dist: gitpython; extra == 'all'
34
+ Requires-Dist: huggingface-hub==1.18.0; extra == 'all'
35
+ Requires-Dist: ipython; extra == 'all'
36
+ Requires-Dist: kernels<0.13,>=0.12; extra == 'all'
37
+ Requires-Dist: llama-cpp-python; extra == 'all'
38
+ Requires-Dist: mypy; extra == 'all'
39
+ Requires-Dist: peft==0.19.1; extra == 'all'
40
+ Requires-Dist: pytest; extra == 'all'
41
+ Requires-Dist: pyyaml; extra == 'all'
42
+ Requires-Dist: requests; extra == 'all'
43
+ Requires-Dist: ruff; extra == 'all'
44
+ Requires-Dist: safetensors==0.7.0; extra == 'all'
45
+ Requires-Dist: tokenizers==0.22.2; extra == 'all'
46
+ Requires-Dist: torch==2.12.0; extra == 'all'
47
+ Requires-Dist: torchvision==0.27.0; extra == 'all'
48
+ Requires-Dist: transformers==5.10.2; extra == 'all'
49
+ Requires-Dist: trl==1.5.1; extra == 'all'
50
+ Provides-Extra: all-cuda
51
+ Requires-Dist: accelerate==1.13.0; extra == 'all-cuda'
52
+ Requires-Dist: bitsandbytes==0.49.2; extra == 'all-cuda'
53
+ Requires-Dist: causal-conv1d>=1.4; extra == 'all-cuda'
54
+ Requires-Dist: datasets==5.0.0; extra == 'all-cuda'
55
+ Requires-Dist: flash-attn>=2.7; extra == 'all-cuda'
56
+ Requires-Dist: flash-linear-attention>=0.2; extra == 'all-cuda'
57
+ Requires-Dist: gguf; extra == 'all-cuda'
58
+ Requires-Dist: gitpython; extra == 'all-cuda'
59
+ Requires-Dist: huggingface-hub==1.18.0; extra == 'all-cuda'
60
+ Requires-Dist: kernels<0.13,>=0.12; extra == 'all-cuda'
61
+ Requires-Dist: llama-cpp-python; extra == 'all-cuda'
62
+ Requires-Dist: peft==0.19.1; extra == 'all-cuda'
63
+ Requires-Dist: pyyaml; extra == 'all-cuda'
64
+ Requires-Dist: requests; extra == 'all-cuda'
65
+ Requires-Dist: safetensors==0.7.0; extra == 'all-cuda'
66
+ Requires-Dist: tokenizers==0.22.2; extra == 'all-cuda'
67
+ Requires-Dist: torch==2.12.0; extra == 'all-cuda'
68
+ Requires-Dist: torchvision==0.27.0; extra == 'all-cuda'
69
+ Requires-Dist: transformers==5.10.2; extra == 'all-cuda'
70
+ Requires-Dist: trl==1.5.1; extra == 'all-cuda'
71
+ Provides-Extra: all-rocm
72
+ Requires-Dist: accelerate==1.13.0; extra == 'all-rocm'
73
+ Requires-Dist: bitsandbytes==0.49.2; extra == 'all-rocm'
74
+ Requires-Dist: datasets==5.0.0; extra == 'all-rocm'
75
+ Requires-Dist: gguf; extra == 'all-rocm'
76
+ Requires-Dist: gitpython; extra == 'all-rocm'
77
+ Requires-Dist: huggingface-hub==1.18.0; extra == 'all-rocm'
78
+ Requires-Dist: kernels<0.13,>=0.12; extra == 'all-rocm'
79
+ Requires-Dist: llama-cpp-python; extra == 'all-rocm'
80
+ Requires-Dist: peft==0.19.1; extra == 'all-rocm'
81
+ Requires-Dist: pyyaml; extra == 'all-rocm'
82
+ Requires-Dist: requests; extra == 'all-rocm'
83
+ Requires-Dist: safetensors==0.7.0; extra == 'all-rocm'
84
+ Requires-Dist: tokenizers==0.22.2; extra == 'all-rocm'
85
+ Requires-Dist: torch==2.12.0; extra == 'all-rocm'
86
+ Requires-Dist: torchvision==0.27.0; extra == 'all-rocm'
87
+ Requires-Dist: transformers==5.10.2; extra == 'all-rocm'
88
+ Requires-Dist: trl==1.5.1; extra == 'all-rocm'
89
+ Provides-Extra: convert
90
+ Requires-Dist: gguf; extra == 'convert'
91
+ Requires-Dist: llama-cpp-python; extra == 'convert'
92
+ Provides-Extra: cuda
93
+ Requires-Dist: bitsandbytes==0.49.2; extra == 'cuda'
94
+ Requires-Dist: torch==2.12.0; extra == 'cuda'
95
+ Requires-Dist: torchvision==0.27.0; extra == 'cuda'
96
+ Provides-Extra: cuda-extras
97
+ Requires-Dist: causal-conv1d>=1.4; extra == 'cuda-extras'
98
+ Requires-Dist: flash-attn>=2.7; extra == 'cuda-extras'
99
+ Requires-Dist: flash-linear-attention>=0.2; extra == 'cuda-extras'
100
+ Provides-Extra: dev
101
+ Requires-Dist: ipython; extra == 'dev'
102
+ Requires-Dist: mypy; extra == 'dev'
103
+ Requires-Dist: pytest; extra == 'dev'
104
+ Requires-Dist: ruff; extra == 'dev'
105
+ Provides-Extra: extract
106
+ Requires-Dist: gitpython; extra == 'extract'
107
+ Requires-Dist: pyyaml; extra == 'extract'
108
+ Requires-Dist: requests; extra == 'extract'
109
+ Provides-Extra: infer
110
+ Requires-Dist: bitsandbytes==0.49.2; extra == 'infer'
111
+ Requires-Dist: huggingface-hub==1.18.0; extra == 'infer'
112
+ Requires-Dist: peft==0.19.1; extra == 'infer'
113
+ Requires-Dist: safetensors==0.7.0; extra == 'infer'
114
+ Requires-Dist: tokenizers==0.22.2; extra == 'infer'
115
+ Requires-Dist: torch==2.12.0; extra == 'infer'
116
+ Requires-Dist: torchvision==0.27.0; extra == 'infer'
117
+ Requires-Dist: transformers==5.10.2; extra == 'infer'
118
+ Provides-Extra: infer-base
119
+ Requires-Dist: huggingface-hub==1.18.0; extra == 'infer-base'
120
+ Requires-Dist: peft==0.19.1; extra == 'infer-base'
121
+ Requires-Dist: safetensors==0.7.0; extra == 'infer-base'
122
+ Requires-Dist: tokenizers==0.22.2; extra == 'infer-base'
123
+ Requires-Dist: transformers==5.10.2; extra == 'infer-base'
124
+ Provides-Extra: infer-cuda
125
+ Requires-Dist: bitsandbytes==0.49.2; extra == 'infer-cuda'
126
+ Requires-Dist: huggingface-hub==1.18.0; extra == 'infer-cuda'
127
+ Requires-Dist: peft==0.19.1; extra == 'infer-cuda'
128
+ Requires-Dist: safetensors==0.7.0; extra == 'infer-cuda'
129
+ Requires-Dist: tokenizers==0.22.2; extra == 'infer-cuda'
130
+ Requires-Dist: torch==2.12.0; extra == 'infer-cuda'
131
+ Requires-Dist: torchvision==0.27.0; extra == 'infer-cuda'
132
+ Requires-Dist: transformers==5.10.2; extra == 'infer-cuda'
133
+ Provides-Extra: infer-rocm
134
+ Requires-Dist: bitsandbytes==0.49.2; extra == 'infer-rocm'
135
+ Requires-Dist: huggingface-hub==1.18.0; extra == 'infer-rocm'
136
+ Requires-Dist: peft==0.19.1; extra == 'infer-rocm'
137
+ Requires-Dist: safetensors==0.7.0; extra == 'infer-rocm'
138
+ Requires-Dist: tokenizers==0.22.2; extra == 'infer-rocm'
139
+ Requires-Dist: torch==2.12.0; extra == 'infer-rocm'
140
+ Requires-Dist: torchvision==0.27.0; extra == 'infer-rocm'
141
+ Requires-Dist: transformers==5.10.2; extra == 'infer-rocm'
142
+ Provides-Extra: rocm
143
+ Requires-Dist: bitsandbytes==0.49.2; extra == 'rocm'
144
+ Requires-Dist: torch==2.12.0; extra == 'rocm'
145
+ Requires-Dist: torchvision==0.27.0; extra == 'rocm'
146
+ Provides-Extra: rocm-extras
147
+ Provides-Extra: train
148
+ Requires-Dist: accelerate==1.13.0; extra == 'train'
149
+ Requires-Dist: bitsandbytes==0.49.2; extra == 'train'
150
+ Requires-Dist: causal-conv1d>=1.4; extra == 'train'
151
+ Requires-Dist: datasets==5.0.0; extra == 'train'
152
+ Requires-Dist: flash-attn>=2.7; extra == 'train'
153
+ Requires-Dist: flash-linear-attention>=0.2; extra == 'train'
154
+ Requires-Dist: huggingface-hub==1.18.0; extra == 'train'
155
+ Requires-Dist: kernels<0.13,>=0.12; extra == 'train'
156
+ Requires-Dist: peft==0.19.1; extra == 'train'
157
+ Requires-Dist: safetensors==0.7.0; extra == 'train'
158
+ Requires-Dist: tokenizers==0.22.2; extra == 'train'
159
+ Requires-Dist: torch==2.12.0; extra == 'train'
160
+ Requires-Dist: torchvision==0.27.0; extra == 'train'
161
+ Requires-Dist: transformers==5.10.2; extra == 'train'
162
+ Requires-Dist: trl==1.5.1; extra == 'train'
163
+ Provides-Extra: train-base
164
+ Requires-Dist: accelerate==1.13.0; extra == 'train-base'
165
+ Requires-Dist: datasets==5.0.0; extra == 'train-base'
166
+ Requires-Dist: huggingface-hub==1.18.0; extra == 'train-base'
167
+ Requires-Dist: kernels<0.13,>=0.12; extra == 'train-base'
168
+ Requires-Dist: peft==0.19.1; extra == 'train-base'
169
+ Requires-Dist: safetensors==0.7.0; extra == 'train-base'
170
+ Requires-Dist: tokenizers==0.22.2; extra == 'train-base'
171
+ Requires-Dist: transformers==5.10.2; extra == 'train-base'
172
+ Requires-Dist: trl==1.5.1; extra == 'train-base'
173
+ Provides-Extra: train-cuda
174
+ Requires-Dist: accelerate==1.13.0; extra == 'train-cuda'
175
+ Requires-Dist: bitsandbytes==0.49.2; extra == 'train-cuda'
176
+ Requires-Dist: causal-conv1d>=1.4; extra == 'train-cuda'
177
+ Requires-Dist: datasets==5.0.0; extra == 'train-cuda'
178
+ Requires-Dist: flash-attn>=2.7; extra == 'train-cuda'
179
+ Requires-Dist: flash-linear-attention>=0.2; extra == 'train-cuda'
180
+ Requires-Dist: huggingface-hub==1.18.0; extra == 'train-cuda'
181
+ Requires-Dist: kernels<0.13,>=0.12; extra == 'train-cuda'
182
+ Requires-Dist: peft==0.19.1; extra == 'train-cuda'
183
+ Requires-Dist: safetensors==0.7.0; extra == 'train-cuda'
184
+ Requires-Dist: tokenizers==0.22.2; extra == 'train-cuda'
185
+ Requires-Dist: torch==2.12.0; extra == 'train-cuda'
186
+ Requires-Dist: torchvision==0.27.0; extra == 'train-cuda'
187
+ Requires-Dist: transformers==5.10.2; extra == 'train-cuda'
188
+ Requires-Dist: trl==1.5.1; extra == 'train-cuda'
189
+ Provides-Extra: train-rocm
190
+ Requires-Dist: accelerate==1.13.0; extra == 'train-rocm'
191
+ Requires-Dist: bitsandbytes==0.49.2; extra == 'train-rocm'
192
+ Requires-Dist: datasets==5.0.0; extra == 'train-rocm'
193
+ Requires-Dist: huggingface-hub==1.18.0; extra == 'train-rocm'
194
+ Requires-Dist: kernels<0.13,>=0.12; extra == 'train-rocm'
195
+ Requires-Dist: peft==0.19.1; extra == 'train-rocm'
196
+ Requires-Dist: safetensors==0.7.0; extra == 'train-rocm'
197
+ Requires-Dist: tokenizers==0.22.2; extra == 'train-rocm'
198
+ Requires-Dist: torch==2.12.0; extra == 'train-rocm'
199
+ Requires-Dist: torchvision==0.27.0; extra == 'train-rocm'
200
+ Requires-Dist: transformers==5.10.2; extra == 'train-rocm'
201
+ Requires-Dist: trl==1.5.1; extra == 'train-rocm'
202
+ Description-Content-Type: text/markdown
203
+
204
+ # AttackLM
205
+
206
+ > A QLoRA fine-tuning pipeline for a MITRE ATT&CK-grounded red-team AI assistant.
207
+ > 16,982 training pairs · 3B–70B Qwen base · 16GB–128GB VRAM.
208
+
209
+ [![License: MIT](https://img.shields.io/badge/code-MIT-blue.svg)](LICENSE)
210
+ [![Training data: mixed](https://img.shields.io/badge/data-mixed%20%28see%20ATTRIBUTION%29-orange.svg)](ATTRIBUTION.md)
211
+ [![Python 3.13](https://img.shields.io/badge/python-3.13-blue.svg)](requirements.txt)
212
+ [![Model: 3B-7B Qwen2.5](https://img.shields.io/badge/base%20model-Qwen2.5--Coder--3B--Instruct-green.svg)](https://huggingface.co/unsloth/Qwen2.5-Coder-3B-Instruct-bnb-4bit)
213
+
214
+ ---
215
+
216
+ ## What is this?
217
+
218
+ AttackLM is a complete pipeline for fine-tuning a small language model to be
219
+ a competent red-team / AI-security assistant. The training data is grounded in
220
+ **MITRE ATT&CK** techniques, sourced from openly licensed open-source projects
221
+ (Atomic Red Team, MITRE Caldera, Metasploit, Sigma, Infection Monkey, RTA,
222
+ plus prompt-injection and jailbreak corpora for AI-security coverage).
223
+
224
+ The pipeline ingests 10 MITRE tactic buckets plus 6 specialized buckets
225
+ (orchestrator routing, AI-model attacks, security tooling) and produces a
226
+ QLoRA LoRA adapter you can drop on top of `Qwen2.5-Coder-3B-Instruct`.
227
+
228
+ What makes it different:
229
+ - **No LLM in the data pipeline.** Every training pair is deterministically
230
+ extracted from upstream sources — no hallucinated content, no API costs.
231
+ - **Coordinate-descent HPO** built in. Sweeps `lora_r` (8→512) and
232
+ `lora_dropout` (0→0.5) and picks the winner before final training.
233
+ - **16GB → 128GB VRAM friendly.** 3B QLoRA at `--max-length 2048` fits
234
+ a 4080 SUPER. 70B+ on a 128GB card with packing.
235
+
236
+ ---
237
+
238
+ ## Data Source Attribution
239
+
240
+ **All training data is a transformation of openly licensed open-source
241
+ projects.** We do not claim authorship of any technique, command, module,
242
+ or rule — the original authors do. Each upstream repo, its license, and
243
+ its contribution to AttackLM's training mix is documented in
244
+ [**`/ATTRIBUTION.md`**](ATTRIBUTION.md) and summarized in
245
+ [**`/NOTICE`**](NOTICE).
246
+
247
+ The full per-source map:
248
+
249
+ | Source | Pairs | License | Repository |
250
+ |---|---:|---|---|
251
+ | Atomic Red Team | 2,506 | MIT | [redcanaryco/atomic-red-team](https://github.com/redcanaryco/atomic-red-team) |
252
+ | MITRE Caldera / Stockpile | 608 | Apache-2.0 | [mitre/stockpile](https://github.com/mitre/stockpile) |
253
+ | Caldera plugins (arsenal/manx/access) | 56 | Apache-2.0 | [mitre/caldera](https://github.com/mitre/caldera) |
254
+ | Metasploit Framework | 8,349 | BSD-3-Clause | [rapid7/metasploit-framework](https://github.com/rapid7/metasploit-framework) |
255
+ | Infection Monkey | 36 | GPL-3.0 | [guardicore/monkey](https://github.com/guardicore/monkey) |
256
+ | RTA — Red Team Automation | 76 | **AGPL-3.0** ⚠️ | [endgameinc/RTA](https://github.com/endgameinc/RTA) |
257
+ | Sigma rules | (labels) | DRL-1.1 | [SigmaHQ/sigma](https://github.com/SigmaHQ/sigma) |
258
+ | AI-security tools (promptfoo, garak, promptmap, PyRIT, FuzzyAI, TheBigPromptLibrary) | 743+ | mixed MIT/Apache-2.0 | various (see [ATTRIBUTION.md](ATTRIBUTION.md)) |
259
+ | Synthetic orchestrator / prompt-injection | 1,067 | MIT | this repo |
260
+ | **Total** | **16,982** | | |
261
+
262
+ ⚠️ **AGPLv3 note:** RTA is the only AGPL-licensed source. The AGPL has
263
+ network-distribution implications for derivative works. The public
264
+ repository satisfies the source-availability requirement. If you need an
265
+ AGPL-clean deployment, retrain after removing the `tools/rta` bucket.
266
+ See [ATTRIBUTION.md §8](ATTRIBUTION.md) for the full analysis.
267
+
268
+ ---
269
+
270
+ ## Quickstart (5 min)
271
+
272
+ ```bash
273
+ # 1. Install uv (Python package manager, ~10MB)
274
+ curl -LsSf https://astral.sh/uv/install.sh | sh
275
+
276
+ # 2. Clone this repo
277
+ git clone https://github.com/Veedubin/AttackLM.git
278
+ cd AttackLM
279
+
280
+ # 3a. Install as a Python package (gets you 11 `attacklm-*` commands)
281
+ # — use `[all]` to get every optional dependency
282
+ uv pip install -e ".[all]"
283
+
284
+ # Or, if you just want the bare CLI dispatchers (no ML stack):
285
+ # uv pip install -e .
286
+
287
+ # 3b. Alternative: classic uv-managed venv with all deps in pyproject.toml
288
+ # uv sync
289
+
290
+ # 4. Initialize the dataset (probes local `data/` first; falls back to git clone)
291
+ attacklm-init --yes
292
+
293
+ # The single command above replaces steps 4–7 below. If you'd rather
294
+ # run each step individually, the four commands are still available:
295
+ #
296
+ # 4. Clone upstream data sources (~1.5GB total, optional — data is in the repo)
297
+ # attacklm-clone
298
+ #
299
+ # 5. Extract training data from each source
300
+ # attacklm-extract
301
+ #
302
+ # 6. Augment each JSONL with per-pair source/license attribution
303
+ # attacklm-attribute
304
+ #
305
+ # 7. Organize into 16 MITRE/AI/tools buckets
306
+ # attacklm-buckets
307
+
308
+ # 8. Pick a base model — use an uncensored/abliterated one (see "Pick a base model" below)
309
+ # Example: Qwen2.5-Coder-3B-Instruct with refusal direction removed
310
+ # v0.2.0+ uses --dataset (multi-positional) instead of --include-tools etc.
311
+ attacklm-train-all --single-model \
312
+ --dataset base/ \
313
+ --base-model huihui-ai/Qwen2.5-Coder-3B-Instruct-abliterated \
314
+ --epochs 5 --max-length 2048
315
+
316
+ # Optional: add --hpo for automatic lora_r / lora_dropout sweep
317
+ ```
318
+
319
+ The trained LoRA adapter lands in `models/attacklm-single_<TIMESTAMP>/`
320
+ (v0.2.0+ uses timestamped dirs so multiple runs coexist for rollback).
321
+ The merged model goes to `models/merged/attacklm-single/`. See
322
+ [**Inference**](#inference) below for how to use it.
323
+
324
+ > **Don't want to install?** The `scripts/` directory is the source of truth.
325
+ > Every `attacklm-*` command is a thin wrapper around a script. You can run
326
+ > `uv run python scripts/train_all.py --help` directly — same behavior,
327
+ > same flags, no install required.
328
+
329
+ ---
330
+
331
+ ## Install
332
+
333
+ The project ships as a **proper Python package** (`pyproject.toml`,
334
+ `src/attacklm/` layout, hatchling build backend) so users don't have to
335
+ build anything by hand.
336
+
337
+ There are **two GPU stacks** — pick the one for your hardware.
338
+
339
+ ---
340
+
341
+ ### CUDA stack (NVIDIA) — primary
342
+
343
+ ```bash
344
+ git clone https://github.com/Veedubin/AttackLM.git
345
+ cd AttackLM
346
+ uv pip install -e ".[all]"
347
+ ```
348
+
349
+ That installs everything: `torch` (CUDA wheel from PyPI), `bitsandbytes`,
350
+ `transformers`, `peft`, `trl`, plus the C++ extensions `flash-attn`,
351
+ `causal-conv1d`, and `flash-linear-attention` (for Qwen3-Next and similar
352
+ hybrid linear-attention models).
353
+
354
+ | Component | Where it comes from |
355
+ |---|---|
356
+ | `torch`, `torchvision` | PyPI (CUDA build, auto-selected) |
357
+ | `bitsandbytes` | PyPI (CUDA wheels) |
358
+ | `flash-attn` | Built from source via pip (~5 min) |
359
+ | `causal-conv1d` | Pre-built wheel from PyPI |
360
+ | `flash-linear-attention` | Pre-built wheel from PyPI |
361
+
362
+ ---
363
+
364
+ ### ROCm stack (AMD) — e.g. MI300X, RX 7900 XTX, Strix Halo
365
+
366
+ ROCm PyTorch wheels are **not on PyPI** — you must add PyTorch's index
367
+ URL. The `bitsandbytes` 0.49+ wheel **only ships CUDA .so files** (cuda118/120/121/122/126) — on ROCm, install bitsandbytes with `--no-deps` and verify, or skip it entirely (the FP8 path doesn't need it). The C++ extensions (`flash-attn`, `causal-conv1d`, `flash-linear-attention`) **have no ROCm support** — the modeling has pure-PyTorch fallbacks (slower but works).
368
+
369
+ **Important: which ROCm version?** The PyTorch ROCm index publishes
370
+ different `torch` versions per channel. The version pins in this repo
371
+ (`torch==2.12.0`, `torchvision==0.27.0`) are only available on the
372
+ **rocm7.1 / rocm7.2** channels. Older channels (rocm6.x) cap out at
373
+ torch 2.5-2.9 and will fail to resolve the pin.
374
+
375
+ ```bash
376
+ # 1. Install ROCm PyTorch from the rocm7.2 channel (has torch 2.12.0)
377
+ uv pip install --index-url https://download.pytorch.org/whl/rocm7.2 \
378
+ torch==2.12.0 torchvision==0.27.0
379
+
380
+ # 2. Install AttackLM with the ROCm meta-group
381
+ git clone https://github.com/Veedubin/AttackLM.git
382
+ cd AttackLM
383
+ uv pip install -e ".[all-rocm]"
384
+ ```
385
+
386
+ After install, verify:
387
+ ```bash
388
+ python -c "import torch; print('torch:', torch.__version__, '— hip:', torch.version.hip)"
389
+ # should print something like: torch: 2.12.0+rocm7.2 — hip: 7.2.XXXXX
390
+ ```
391
+
392
+ `[all-rocm]` is `attacklm[train-rocm,extract,convert]` — it pulls in
393
+ `peft`, `trl`, `accelerate`, `bitsandbytes` and **no** CUDA-only C++
394
+ extensions.
395
+
396
+ | Component | Where it comes from |
397
+ |---|---|
398
+ | `torch`, `torchvision` | PyTorch ROCm index (`+rocm7.2` build) |
399
+ | `bitsandbytes` | PyPI (CUDA-only — works for non-FP8 paths; uninstall for FP8) |
400
+ | `flash-attn` | **Not installed** — sdpa fallback in QLoRA |
401
+ | `causal-conv1d` | **Not installed** — pure-PyTorch fallback in Qwen3-Next modeling |
402
+ | `flash-linear-attention` | **Not installed** — pure-PyTorch fallback |
403
+
404
+ > **If `attacklm-train` fails with** `Could not import module '...ForCausalLM'`:
405
+ > The error message usually hides the actual cause in its exception chain.
406
+ > The most common ROCm causes (in order of likelihood):
407
+ > ```bash
408
+ > # 1. bitsandbytes CUDA-only wheel — uninstall (FP8 path doesn't need it)
409
+ > uv pip uninstall bitsandbytes
410
+ >
411
+ > # 2. Half-installed C++ extensions — remove them
412
+ > uv pip uninstall causal-conv1d flash-linear-attention
413
+ >
414
+ > # 3. Wrong PyTorch channel — verify ROCm build is installed
415
+ > python -c "import torch; print(torch.version.hip)"
416
+ > # If 'None', reinstall with --index-url https://download.pytorch.org/whl/rocm7.2
417
+ > ```
418
+ > v0.1.3+ prints the actual exception chain so you can see which of these it is.
419
+
420
+ ---
421
+
422
+ ### CPU / Apple Silicon (inference only)
423
+
424
+ ```bash
425
+ git clone https://github.com/Veedubin/AttackLM.git
426
+ cd AttackLM
427
+ uv pip install -e ".[infer]"
428
+ ```
429
+
430
+ Training on CPU/MPS is technically possible but will be **extremely slow**.
431
+ Use only for dry-runs or for running a pre-trained adapter against
432
+ prompts. Pick `[all-cuda]` or `[all-rocm]` for actual training.
433
+
434
+ ---
435
+
436
+ ### 11 console-script entry points
437
+
438
+ All install paths give you these:
439
+
440
+ | Command | Dispatches to | What it does |
441
+ |--------------------------|----------------------------------------|----------------------------------------|
442
+ | `attacklm-train` | `scripts/train_template.py` | Train one QLoRA adapter |
443
+ | `attacklm-train-all` | `scripts/train_all.py` | Train all buckets / HPO |
444
+ | `attacklm-hpo` | `scripts/hpo_runner.py` | Coordinate-descent HPO sweep |
445
+ | `attacklm-infer` | `scripts/infer.py` | Smoke-test inference |
446
+ | `attacklm-merge` | `scripts/merge_adapter.py` | Merge LoRA → base model |
447
+ | `attacklm-gguf` | `scripts/convert_to_gguf.py` | Convert to GGUF (llama.cpp) |
448
+ | `attacklm-build` | `scripts/build.py` | merge → GGUF → install (one shot) |
449
+ | `attacklm-demo` | `scripts/demo.py` | Multi-agent orchestrator demo |
450
+ | `attacklm-extract` | all 6 extractors | Extract data from cloned repos |
451
+ | `attacklm-buckets` | `setup_buckets.py` + `reorganize_buckets.py` | Organize data into 16 buckets |
452
+ | `attacklm-attribute` | `scripts/augment_attribution.py` | Add source/license to each JSONL row |
453
+ | `attacklm-clone` | `scripts/clone_repos.sh` | Clone upstream data repos |
454
+ | `attacklm-init` | `scripts/init_pipeline.py` | **One-shot init: clone→extract→attribute→buckets** (probes local first) |
455
+ | `attacklm-balance` | `scripts/balance_buckets.py` | Build a balanced subset of the buckets |
456
+
457
+ The CLI dispatchers are thin wrappers — they use `runpy.run_path()` to
458
+ invoke the canonical script in `scripts/`. So `scripts/` stays the
459
+ source of truth and you can still run `uv run python scripts/foo.py`
460
+ directly if you prefer.
461
+
462
+ ---
463
+
464
+ ### Optional-dependency groups (advanced)
465
+
466
+ ```bash
467
+ # Fine-grained control
468
+ uv pip install -e ".[train-cuda]" # CUDA training stack
469
+ uv pip install -e ".[train-rocm]" # ROCm training stack
470
+ uv pip install -e ".[infer-cuda]" # CUDA inference
471
+ uv pip install -e ".[infer-rocm]" # ROCm inference
472
+ uv pip install -e ".[extract]" # data extractors
473
+ uv pip install -e ".[convert]" # GGUF conversion
474
+ uv pip install -e ".[dev]" # pytest, ruff, mypy
475
+ ```
476
+
477
+ ---
478
+
479
+ ### No-install option (scripts only)
480
+
481
+ If you'd rather not install into your environment:
482
+
483
+ ```bash
484
+ git clone https://github.com/Veedubin/AttackLM.git
485
+ cd AttackLM
486
+ uv sync # creates .venv with all deps
487
+ uv run python scripts/train_all.py --single-model --epochs 5
488
+ ```
489
+
490
+ `uv sync` reads `pyproject.toml` and creates a venv with the `[all]`
491
+ extras. Scripts in `scripts/` are the source of truth — the CLI is a
492
+ thin dispatcher layer.
493
+
494
+ ---
495
+
496
+ ## Architecture
497
+
498
+ The training data is organized into **16 buckets**:
499
+
500
+ - **10 MITRE tactic buckets** — under `base/`: `base/collection`,
501
+ `base/command_and_control`, `base/credential_access`, `base/defense_evasion`,
502
+ `base/discovery`, `base/execution`, `base/exfiltration`,
503
+ `base/lateral_movement`, `base/persistence`, `base/privilege_escalation`
504
+ (TA0009, TA0011, TA0006, TA0005, TA0007, TA0002, TA0010, TA0008,
505
+ TA0003, TA0004 respectively)
506
+ - **1 orchestrator bucket** — routing decisions across 6 sub-agents
507
+ - **2 AI-model attack buckets** — under `ai/`: `ai/prompt-injection` and
508
+ `ai/jailbreaking` (TA0040 — Adversarial ML)
509
+ - **3 security-tool buckets** — under `tools/`: `tools/infection_monkey`,
510
+ `tools/metasploit`, `tools/rta` (consolidated tool-specific data, re-routed
511
+ to MITRE tactics where applicable)
512
+
513
+ > **v0.2.1 layout change:** the 10 tactic buckets moved from top-level
514
+ > into a new `base/` parent directory, and `ai-models/` was renamed to
515
+ > `ai/`. See the [CHANGELOG](CHANGELOG.md#021--2026-06-10) for the
516
+ > migration script and details.
517
+
518
+ The bucket layout lets you train:
519
+ - **One model on everything** (default — single MoE-style assistant)
520
+ - **One model per tactic** (multi-model mode)
521
+ - **One model on a subset** (e.g., `--include-tools --include-orchestrator`
522
+ to skip the AI/ML attack data)
523
+
524
+ See `data/datasets/buckets/manifest.json` for the full per-bucket manifest
525
+ with pair counts and MITRE tactic IDs.
526
+
527
+ ---
528
+
529
+ ## Pick a base model
530
+
531
+ > **Use an uncensored / abliterated base.** The dataset teaches red-team
532
+ > tradecraft, but it can't fully override the safety alignment baked into
533
+ > a base Instruct model. Use a base that has had its refusal direction
534
+ > removed (abliterated) — you'll get a much sharper, more consistent
535
+ > result than SFT alone.
536
+
537
+ ### Recommended bases (pick one)
538
+
539
+ | Model | Size | VRAM needed | Notes |
540
+ | ------------------------------------------------------------ | ----- | ------------ | -------------------------------------------------------------------- |
541
+ | `huihui-ai/Qwen2.5-Coder-3B-Instruct-abliterated` | 3B | 16 GB | Best fit for RTX 4080 SUPER / 4070 Ti. Same Qwen2.5-Coder arch as the original default. Apache-2.0. |
542
+ | `huihui-ai/Qwen2.5-Coder-1.5B-Instruct-abliterated` | 1.5B | 8 GB | Tight hardware, fast iteration. Apache-2.0. |
543
+ | `huihui-ai/Qwen2.5-Coder-7B-Instruct-abliterated` | 7B | 24 GB | Better quality, more coherent long responses. Apache-2.0. |
544
+ | `BlossomsAI/Qwen2.5-Coder-32B-Instruct-Uncensored` | 32B | 64+ GB | Top quality, needs 64+ GB VRAM. Apache-2.0. |
545
+ | `failspy/Meta-Llama-3-8B-Instruct-abliterated-v3` | 8B | 24 GB | If you'd rather train on Llama-3. Apache-style license. |
546
+ | `failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5` | 70B | 128+ GB | Frontier quality. Quantized GGUF versions also available. |
547
+
548
+ Browse the full [failspy/abliterated-v3 collection](https://huggingface.co/collections/failspy/abliterated-v3-664a8ad0db255eefa7d0012b) and [3000+ Heretic models](https://huggingface.co/models?other=heretic) for more.
549
+
550
+ ### Make your own with Heretic (if your preferred base isn't pre-abliterated)
551
+
552
+ [p-e-w/heretic](https://github.com/p-e-w/heretic) is a fully automatic
553
+ abliteration tool. 30 minutes on a 16 GB card for a 3B model.
554
+
555
+ ```bash
556
+ pip install heretic-llm
557
+ heretic Qwen/Qwen2.5-Coder-3B-Instruct --n-trials 100
558
+ # Interactive menu: choose "Save the model to a local folder"
559
+ ```
560
+
561
+ Then point `--base-model` at the saved folder. The interactive menu
562
+ **requires a real TTY** (gnome-terminal, konsole, xterm, etc.) — piping
563
+ stdin via `printf "n\n"` only handles the first prompt.
564
+
565
+ The other 30 lines of the technique are documented at:
566
+ - [mlabonne/abliteration (the original 2024 recipe)](https://huggingface.co/blog/mlabonne/abliteration)
567
+ - [grimjim/projected-abliteration (Oct 2025 — projection refinement)](https://huggingface.co/blog/grimjim/projected-abliteration)
568
+ - [p-e-w/heretic (unified tool, modern state-of-the-art)](https://github.com/p-e-w/heretic)
569
+
570
+ ---
571
+
572
+ ## Training
573
+
574
+ `scripts/train_all.py` is the orchestrator. Key flags:
575
+
576
+ | Flag | Default | Notes |
577
+ |---|---|---|
578
+ | `--single-model` | (off) | Train one model on all buckets combined |
579
+ | `--base-model` | (auto) | v0.2.0+: defaults to round-2 SFT (latest completed run for this agent), then abliterated Qwen 3B. Pass this to override. |
580
+ | `--dataset` (multi) | none | v0.2.0+: positional list of bucket specs. `base/`, `tools/`, `ai/`, `orchestrator`, subpaths (`tools/metasploit/`), aliases (`all`, `tactics`, `tools-all`). |
581
+ | `--backup` | (on) | Tar.gz the previous round-2 SFT run to `models/.backups/` before training starts. `--no-backup` to skip. |
582
+ | `--epochs` | 10 | Total epochs over the combined dataset |
583
+ | `--max-length` | 1024 | 2048 for richer context; 1024 for 7B on 16GB |
584
+ | `--lora-r` | 16 | LoRA rank; 8 / 16 / 32 are good starting points |
585
+ | `--lora-alpha` | 32 | Conventionally `2 × lora_r` |
586
+ | `--lora-dropout` | 0.05 | Try 0.0 for less regularization |
587
+ | `--no-packing` | (packing off) | Default is OFF because flash-attn is hard to install |
588
+ | `--packing` | (off) | Enable for ~30% speedup; requires `flash_attn` |
589
+ | `--include-tools` | (off) | **Deprecated in v0.2.0**: use `--dataset tools/` instead |
590
+ | `--include-orchestrator` | (off) | **Deprecated in v0.2.0**: use `--dataset orchestrator` instead |
591
+ | `--model-attacks` | (off) | **Deprecated in v0.2.0**: use `--dataset ai/` instead |
592
+ | `--curriculum` | (off) | 2-stage: tactic data first, then orchestrator fine-tune |
593
+ | `--hpo` | (off) | Run coordinate-descent HPO before final training |
594
+
595
+ The training script has 13 OOM-safety fixes built in (expandable_segments,
596
+ per_device_eval_batch_size=1, chunked_nll loss, post-eval cache clear,
597
+ paged_adamw_8bit, etc.) — see the `# OOM fix #N:` comments in
598
+ `train_template.py` for the full list.
599
+
600
+ ### Run-dir naming (v0.2.2+)
601
+
602
+ `attacklm-train` and `attacklm-train-all` both default to writing the
603
+ adapter to a **timestamped** subdirectory so re-runs are preserved:
604
+
605
+ ```bash
606
+ # Default — appends a timestamp to your --output
607
+ attacklm-train --dataset data/foo.jsonl --output models/agent-3b
608
+ # → models/agent-3b_2026-06-10_15-15/ (preserved across re-runs)
609
+
610
+ # Opt out of timestamping (will refuse to clobber a completed run)
611
+ attacklm-train --dataset data/foo.jsonl --output models/agent-3b --no-timestamp
612
+ # ERROR: Refusing to clobber completed run at models/agent-3b.
613
+
614
+ # Override the refusal
615
+ attacklm-train --dataset data/foo.jsonl --output models/agent-3b \
616
+ --no-timestamp --force
617
+ ```
618
+
619
+ If `--output` already ends in `_YYYY-MM-DD_HH-MM` (i.e. it was
620
+ produced by an earlier run or by `attacklm-train-all`), the suffix
621
+ is left alone — re-runs get a new suffix (`_2`, `_3`, …) only if the
622
+ exact same name exists.
623
+
624
+ ### Multi-round SFT (v0.2.0+)
625
+
626
+ Each training run writes a `state.json` sidecar at `models/{agent}_{TIMESTAMP}/state.json`.
627
+ It records the base model, hparams, dataset, progress, and a `completed` flag.
628
+
629
+ **Round 2 SFT** trains a fresh LoRA on top of a previously completed run:
630
+
631
+ ```bash
632
+ # Round 1: train on tactics (10 buckets, 7,398 pairs)
633
+ attacklm-train-all --single-model --dataset base/ --epochs 5
634
+
635
+ # Round 2: train on tools ON TOP of the round-1 merged weights
636
+ # (auto-detected from state.json; backup tar of round 1 happens first)
637
+ attacklm-train-all --single-model --dataset tools/ --epochs 3
638
+
639
+ # Round 3: train on everything
640
+ attacklm-train-all --single-model --dataset all --epochs 2
641
+ ```
642
+
643
+ Each round:
644
+ 1. Detects the latest completed run for the agent name
645
+ 2. Backups it to `models/.backups/{name}_{timestamp}.tar.gz` (5 GB, ~30 sec)
646
+ 3. Loads the merged weights as the new base
647
+ 4. Trains a new LoRA on top
648
+ 5. Writes a new timestamped run dir with updated `state.json`
649
+
650
+ **Auto-resume** for crashed/killed runs:
651
+
652
+ ```bash
653
+ # If a run died mid-training, just re-run with the same command.
654
+ # state.json (completed=false) + checkpoint-N/ present → auto-resume.
655
+ attacklm-train-all --single-model --dataset base/ --epochs 5
656
+ ```
657
+
658
+ ### `--dataset` DSL
659
+
660
+ The new dataset spec is dir-shaped and hierarchical:
661
+
662
+ | Spec | Resolves to | Pair count |
663
+ |-------------------------------|------------------------------------------------------|-----------:|
664
+ | `base/` | All 10 MITRE tactic buckets | 7,398 |
665
+ | `tools/` | All 3 tool buckets (metasploit, infection_monkey, rta) | 8,461 |
666
+ | `tools/metasploit/` | Just metasploit | 8,349 |
667
+ | `tools/infection_monkey/` | Just infection_monkey | 36 |
668
+ | `tools/rta/` | Just RTA | 76 |
669
+ | `ai/` | Both AI buckets (jailbreaking, prompt-injection) | 743 |
670
+ | `orchestrator` | The orchestrator bucket | 380 |
671
+ | `all` | Everything (alias for `base + tools + ai + orchestrator`) | 16,982 |
672
+ | `tactics` | Alias for `base/` | 7,398 |
673
+
674
+ Multiple specs combine: `--dataset base/ tools/metasploit/` = 10 tactics + just metasploit = 15,747 pairs.
675
+
676
+ Legacy `--include-tools` / `--model-attacks` / `--include-orchestrator` still work
677
+ and translate internally to `--dataset` specs. The new flag wins if both are passed.
678
+
679
+ ### Balanced sampling (`attacklm-balance`)
680
+
681
+ The 16 buckets are heavily skewed: `tools/metasploit` alone has 8,349
682
+ pairs (49% of the 16,982 total). Training on raw `--dataset all`
683
+ makes the model see ~2 Metasploit examples for every 1 non-Metasploit
684
+ example, which overfits it to msfconsole syntax at the expense of
685
+ broader tactical coverage.
686
+
687
+ `attacklm-balance` builds a balanced subset of the buckets. It applies
688
+ a per-bucket cap (one cap applied uniformly to all buckets) and
689
+ selects examples from each bucket with a chosen strategy:
690
+
691
+ ```bash
692
+ # Dry-run: see the per-bucket caps + total without writing
693
+ attacklm-balance --profile 7b-128gb --dry-run
694
+
695
+ # Write a balanced dataset to data/datasets/balanced/
696
+ attacklm-balance --profile 7b-128gb \
697
+ --output data/datasets/balanced/balanced_7b-128gb.jsonl
698
+
699
+ # Then train on it
700
+ attacklm-train --dataset data/datasets/balanced/balanced_7b-128gb.jsonl \
701
+ --output models/attacklm-7b-128gb \
702
+ --base-model huihui-ai/Qwen2.5-Coder-7B-Instruct-abliterated
703
+ ```
704
+
705
+ **Profiles** (named per-bucket cap values, tuned for common hardware combos):
706
+
707
+ | Profile | Per-bucket cap | Total pairs | Notes |
708
+ |------------|---------------:|------------:|--------------------------------------------|
709
+ | `3b-16gb` | 800 | ~7,500 | 3B QLoRA on 16 GB card |
710
+ | `7b-16gb` | 800 | ~7,500 | 7B QLoRA on 16 GB card |
711
+ | `7b-128gb` | 1,500 | ~9,800 | 7B QLoRA on 128 GB rig |
712
+ | `14b-128gb`| 1,500 | ~9,800 | 14B QLoRA on 128 GB rig |
713
+ | `31b-128gb`| 2,000 | ~10,600 | 31B QLoRA on 128 GB rig |
714
+ | `full` | unlimited | 16,982 | All data, no cap |
715
+ | `custom` | (you set)| (you set)| `--per-bucket-cap` or `--target-total` |
716
+
717
+ **Strategies** (within a bucket, after the cap is applied):
718
+
719
+ - `stratified` (default) — group examples by their first MITRE
720
+ technique ID, source, or first line of assistant content, then
721
+ allocate **at least 1 per group** so every technique / module gets
722
+ representation. Falls back to uniform random if there are fewer
723
+ than 3 groups in the bucket.
724
+ - `random` — uniform random sample of N (seeded by `--seed`).
725
+ - `head` — first N examples in the file (reproducible but biased to
726
+ whatever order the data is in).
727
+
728
+ **Custom allocation** — the `custom` profile takes either an explicit
729
+ `--per-bucket-cap` JSON or a `--target-total` with `--category-shares`:
730
+
731
+ ```bash
732
+ # 12K pairs total, weighted 30% tactics / 40% tools / 20% ai / 10% orchestrator
733
+ attacklm-balance --profile custom --target-total 12000 \
734
+ --category-shares '{"tactic": 0.3, "tools": 0.4, "ai_redteam": 0.2, "meta": 0.1}'
735
+
736
+ # Just metasploit at 1500 + discovery at 800, everything else uncapped
737
+ attacklm-balance --profile custom \
738
+ --per-bucket-cap '{"tools/metasploit": 1500, "base/discovery": 800}'
739
+ ```
740
+
741
+ Output JSONLs are written to `data/datasets/balanced/`, are excluded
742
+ from git, and contain a `_source_bucket` field on every example for
743
+ traceability. See `scripts/balance_buckets.py --help` for the full
744
+ flag list and `CHANGELOG.md` for the design rationale.
745
+
746
+ ### HPO
747
+
748
+ Add `--hpo` to the training command. The sweep explores `lora_r` (8→512)
749
+ and `lora_dropout` (0→0.5) and runs a final training with the winners.
750
+ Results land in `hpo_runs/hpo_state.json`; re-analyze later with
751
+ `attacklm-hpo --analyze-only`.
752
+
753
+ ---
754
+
755
+ ## Inference
756
+
757
+ After training, you have one or more LoRA adapters in
758
+ `models/attacklm-single_*/` (timestamped). Pick the latest one (most
759
+ recent date) and merge it. Three ways to use it:
760
+
761
+ ### Option A: Quick smoke test with `infer.py`
762
+
763
+ ```bash
764
+ # v0.2.0+: list available run dirs and pick the latest
765
+ ls -d models/attacklm-single_*/ | tail -1
766
+ # Then infer against it
767
+ attacklm-infer --adapter models/attacklm-single_2026-06-10_01-12
768
+ ```
769
+
770
+ This runs 4 example prompts (MITRE tactics, orchestrator routing,
771
+ prompt injection) and prints the model's responses. No setup beyond
772
+ `uv sync` required. See `scripts/infer.py --help` for custom prompts
773
+ and generation parameters.
774
+
775
+ ### Option B: Merge into the base model (simplest)
776
+
777
+ ```bash
778
+ # v0.2.0+: --adapter takes a timestamped run dir directly.
779
+ # merge_all auto-picks the latest run for an agent if you omit --adapter.
780
+ attacklm-merge \
781
+ --base-model huihui-ai/Qwen2.5-Coder-3B-Instruct-abliterated \
782
+ --adapter models/attacklm-single_2026-06-10_01-12 \
783
+ --output models/merged/attacklm-single
784
+ ```
785
+
786
+ Then load with `transformers.AutoModelForCausalLM.from_pretrained("models/merged/attacklm-single")`.
787
+
788
+ ### Option C: Convert to GGUF for Ollama / LM Studio / llama.cpp
789
+
790
+ ```bash
791
+ # v0.2.0+: --input is the merged model dir (not the adapter)
792
+ attacklm-gguf \
793
+ --input models/merged/attacklm-single \
794
+ --install-lmstudio
795
+
796
+ # Register with Ollama
797
+ uv run python scripts/register_ollama.py models/gguf/attacklm-single.Q4_K_M.gguf
798
+ ```
799
+
800
+ ### Option E: One-shot merge + GGUF + install (`attacklm-build`)
801
+
802
+ v0.2.2+: the 3-command shell pipeline becomes a single command. The
803
+ build command also drops a manifest at `models/built/{name}_{timestamp}/`
804
+ for later retrieval:
805
+
806
+ ```bash
807
+ # Merge + GGUF + install to LM Studio, all in one
808
+ attacklm-build \
809
+ --adapter models/attacklm-3b_16g_2026-06-10_15-15 \
810
+ --base ./uncensored/ \
811
+ --name attacklm-3b-16g
812
+
813
+ # Skip the merge step (use an already-merged model)
814
+ attacklm-build \
815
+ --merged models/merged/attacklm-3b-16g \
816
+ --name attacklm-3b-16g
817
+
818
+ # Also register with Ollama
819
+ attacklm-build \
820
+ --adapter models/attacklm-3b_16g_2026-06-10_15-15 \
821
+ --base ./uncensored/ \
822
+ --name attacklm-3b-16g \
823
+ --register-ollama
824
+ ```
825
+
826
+ `--install-lmstudio` is ON by default. Use `--no-install-lmstudio` to
827
+ just produce the GGUF. The build manifest records the GGUF path,
828
+ mtime, base model, and which install steps ran.
829
+
830
+ ### Option D: Load the adapter directly (smallest disk footprint)
831
+
832
+ ```python
833
+ from peft import PeftModel
834
+ from transformers import AutoModelForCausalLM, AutoTokenizer
835
+
836
+ base = AutoModelForCausalLM.from_pretrained(
837
+ "huihui-ai/Qwen2.5-Coder-3B-Instruct-abliterated",
838
+ device_map="auto",
839
+ )
840
+ model = PeftModel.from_pretrained(base, "models/attacklm-single")
841
+ tokenizer = AutoTokenizer.from_pretrained("models/attacklm-single")
842
+
843
+ # Chat with the model
844
+ messages = [
845
+ {"role": "system", "content": "You are an authorized Red Team specialist..."},
846
+ {"role": "user", "content": "Show the System Services: Service Execution technique (T1569.002)"},
847
+ ]
848
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
849
+ inputs = tokenizer(text, return_tensors="pt").to(model.device)
850
+ outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.7)
851
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
852
+ ```
853
+
854
+ ---
855
+
856
+ ## Data Sources (upstream)
857
+
858
+ | Project | License | Use |
859
+ |---|---|---|
860
+ | [redcanaryco/atomic-red-team](https://github.com/redcanaryco/atomic-red-team) | MIT | 2,506 atomic test triples |
861
+ | [mitre/stockpile](https://github.com/mitre/stockpile) | Apache-2.0 | 608 adversary-emulation abilities |
862
+ | [mitre/caldera](https://github.com/mitre/caldera) | Apache-2.0 | 56 plugin descriptors |
863
+ | [rapid7/metasploit-framework](https://github.com/rapid7/metasploit-framework) | BSD-3-Clause | 8,349 module description triples |
864
+ | [guardicore/monkey](https://github.com/guardicore/monkey) | GPL-3.0 | 36 plugin manifest triples |
865
+ | [endgameinc/RTA](https://github.com/endgameinc/RTA) | **AGPL-3.0** ⚠️ | 76 Python TTP triples |
866
+ | [SigmaHQ/sigma](https://github.com/SigmaHQ/sigma) | DRL-1.1 | Auxiliary context for triple structure |
867
+ | [promptfoo/promptfoo](https://github.com/promptfoo/promptfoo) | MIT | Prompt injection probes |
868
+ | [NVIDIA/garak](https://github.com/NVIDIA/garak) | Apache-2.0 | DAN/probe resources |
869
+ | [utkusen/promptmap](https://github.com/utkusen/promptmap) | MIT | Prompt injection rules |
870
+ | [Azure/PyRIT](https://github.com/Azure/PyRIT) | MIT | Jailbreak templates |
871
+ | [cyberark/FuzzyAI](https://github.com/cyberark/FuzzyAI) | Apache-2.0 | Adversarial prompt resources |
872
+ | [Resident-Falker/TheBigPromptLibrary](https://github.com/Resident-Falker/TheBigPromptLibrary) | mixed MIT/MPL | Jailbreak + system prompt library |
873
+
874
+ Full attribution, per-pair source mapping, and re-distribution guidance in
875
+ [**`/ATTRIBUTION.md`**](ATTRIBUTION.md).
876
+
877
+ ---
878
+
879
+ ## License
880
+
881
+ - **Code in this repository** — [MIT License](LICENSE)
882
+ - **Training data** — inherits the most restrictive license of its components
883
+ (currently AGPL-3.0 from RTA — see [ATTRIBUTION.md §8](ATTRIBUTION.md))
884
+ - **Trained model weights** — MIT License as a new statistical artifact
885
+ learned from openly licensed material. Whether model weights are a
886
+ "derivative work" in the copyright sense is an unsettled question; no
887
+ representation is made either way. If you need certainty, consult legal
888
+ counsel for your specific deployment scenario.
889
+
890
+ The Apache-2.0 attribution required by the upstream MITRE, NVIDIA, and
891
+ CyberArk components is preserved in [**`/NOTICE`**](NOTICE).
892
+
893
+ ---
894
+
895
+ ## Contributing
896
+
897
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on opening issues,
898
+ submitting PRs, and extending the bucket/extractor system.
899
+
900
+ ---
901
+
902
+ ## Changelog
903
+
904
+ See [CHANGELOG.md](CHANGELOG.md) for the full version history. Notable
905
+ recent releases:
906
+
907
+ - **v0.2.2** (2026-06-10) — `attacklm-balance` (balanced bucket sampler),
908
+ `attacklm-build` (one-shot merge+GGUF+install), auto-timestamped
909
+ run dirs in `attacklm-train`, accurate epoch counter, GGUF
910
+ mtime-based staleness check, `attacklm-gguf --name` /
911
+ `--register-ollama` / `--quant` / `--build` / `--force`.
912
+ - **v0.2.1** (2026-06-10) — Bucket layout normalized to 4 parents
913
+ (`base/`, `tools/`, `ai/`, `orchestrator/`).
914
+ - **v0.2.0** (2026-06-10) — Multi-round SFT, `state.json` provenance,
915
+ `--dataset` DSL, `--backup`/`--no-backup`, LoRA adapter detection
916
+ in GGUF conversion. **Major version bump.**
917
+ - v0.1.5 (2026-06-10) — LM Studio path fix, kernels pin, path resolver
918
+ - v0.1.4 (2026-06-10) — Merge + GGUF pipeline
919
+ - v0.1.0 (2026-06-10) — Initial public release
920
+
921
+ ---
922
+
923
+ ## Acknowledgments
924
+
925
+ Thanks to the open-source security community — Red Canary, MITRE, Rapid7,
926
+ Guardicore, Endgame/Elastic, the SigmaHQ maintainers, the promptfoo,
927
+ garak, PyRIT, and FuzzyAI teams, and everyone who contributes to the
928
+ projects we depend on. AttackLM stands on their shoulders.
@@ -0,0 +1,9 @@
1
+ attacklm/__init__.py,sha256=Bv_x9Ulofpvt3t7Sy9G6zYejcik4z3M76Xxo_V9NrNw,167
2
+ attacklm/__version__.py,sha256=vNiWJ14r_cw5t_7UDqDQIVZvladKFGyHH2avsLpN7Vg,22
3
+ attacklm/cli.py,sha256=ZBHHDB06GiEFqPGZZ4JnFWTiABNcDI8Fe4QuSlIC9ak,6553
4
+ attacklm-0.3.1.dist-info/METADATA,sha256=vUh2BU_uLkjIyBNlWqsYdsNFn1GCiJJZjeBZwApN7t4,42939
5
+ attacklm-0.3.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
6
+ attacklm-0.3.1.dist-info/entry_points.txt,sha256=KFr12IyuTwVr8O4OqNm_MxznS-6bDTOgrNLDtfmj6Oc,610
7
+ attacklm-0.3.1.dist-info/licenses/LICENSE,sha256=AOILYoJmRFie5L3rM52hvYtYpWKbKvMEIW2vxPZgGBU,2015
8
+ attacklm-0.3.1.dist-info/licenses/NOTICE,sha256=VhR0CN1bLkVNDSbwcfxyz6iQfexKzdoxVLGAorxQHTc,4066
9
+ attacklm-0.3.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,15 @@
1
+ [console_scripts]
2
+ attacklm-attribute = attacklm.cli:main_attribute
3
+ attacklm-balance = attacklm.cli:main_balance
4
+ attacklm-buckets = attacklm.cli:main_buckets
5
+ attacklm-build = attacklm.cli:main_build
6
+ attacklm-clone = attacklm.cli:main_clone
7
+ attacklm-demo = attacklm.cli:main_demo
8
+ attacklm-extract = attacklm.cli:main_extract
9
+ attacklm-gguf = attacklm.cli:main_gguf
10
+ attacklm-hpo = attacklm.cli:main_hpo
11
+ attacklm-infer = attacklm.cli:main_infer
12
+ attacklm-init = attacklm.cli:main_init
13
+ attacklm-merge = attacklm.cli:main_merge
14
+ attacklm-train = attacklm.cli:main_train
15
+ attacklm-train-all = attacklm.cli:main_train_all
@@ -0,0 +1,42 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 jcharles
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
23
+ ================================================================================
24
+
25
+ NOTE ON TRAINING DATA AND MODEL WEIGHTS
26
+
27
+ The code, scripts, and orchestration in this repository are released
28
+ under the MIT License above.
29
+
30
+ The training data in `data/` is a *derivative work* of upstream
31
+ open-source projects. Each upstream project retains its original
32
+ license; see /ATTRIBUTION.md and /NOTICE for per-source license
33
+ information. The training data inherits the most restrictive license
34
+ of its components (currently AGPL-3.0 from RTA — see
35
+ /ATTRIBUTION.md §8 for details).
36
+
37
+ The trained model weights are released under the MIT License above
38
+ as a new statistical artifact learned from openly licensed material.
39
+ Whether the model weights are a "derivative work" of the training
40
+ data in the copyright sense is an unsettled legal question; no
41
+ representation is made either way. If you need certainty for your
42
+ deployment scenario, consult legal counsel.
@@ -0,0 +1,99 @@
1
+ AttackLM
2
+ Copyright (c) 2026 jcharles
3
+
4
+ This product includes software developed by third parties:
5
+
6
+ ================================================================================
7
+ Apache License 2.0 components
8
+ ================================================================================
9
+
10
+ This product includes software developed at:
11
+
12
+ - The MITRE Corporation (https://www.mitre.org/) — MITRE Caldera
13
+ (https://github.com/mitre/caldera) and Stockpile
14
+ (https://github.com/mitre/stockpile), including the Arsenal, Manx,
15
+ and Access plugins. MITRE ATT&CK framework (https://attack.mitre.org/).
16
+
17
+ - NVIDIA Corporation (https://www.nvidia.com/) — garak LLM vulnerability
18
+ scanner (https://github.com/NVIDIA/garak).
19
+
20
+ - CyberArk Software Ltd. (https://www.cyberark.com/) — FuzzyAI
21
+ adversarial prompt generator (https://github.com/cyberark/FuzzyAI).
22
+
23
+ The Apache License 2.0 is reproduced in /ATTRIBUTION.md and in the
24
+ respective upstream LICENSE files. The full text is available at:
25
+ https://www.apache.org/licenses/LICENSE-2.0.txt
26
+
27
+ ================================================================================
28
+ MIT License components
29
+ ================================================================================
30
+
31
+ - Atomic Red Team by Red Canary, LLC.
32
+ (https://github.com/redcanaryco/atomic-red-team)
33
+
34
+ - promptfoo (https://github.com/promptfoo/promptfoo)
35
+
36
+ - promptmap (https://github.com/utkusen/promptmap)
37
+
38
+ - PyRIT — Python Risk Identification Tool for generative AI
39
+ (https://github.com/Azure/PyRIT) by Microsoft Azure.
40
+
41
+ The MIT License is reproduced in /ATTRIBUTION.md and in the
42
+ respective upstream LICENSE files.
43
+
44
+ ================================================================================
45
+ BSD 3-Clause License components
46
+ ================================================================================
47
+
48
+ - Metasploit Framework by Rapid7, Inc.
49
+ (https://github.com/rapid7/metasploit-framework)
50
+
51
+ Copyright (c) 2006-2026, Rapid7, Inc. All rights reserved.
52
+
53
+ Redistribution and use in source and binary forms, with or without
54
+ modification, are permitted provided that the following conditions are met:
55
+
56
+ 1. Redistributions of source code must retain the above copyright notice,
57
+ this list of conditions and the following disclaimer.
58
+
59
+ 2. Redistributions in binary form must reproduce the above copyright notice,
60
+ this list of conditions and the following disclaimer in the documentation
61
+ and/or other materials provided with the distribution.
62
+
63
+ 3. Neither the name of Rapid7, Inc. nor the names of its contributors may
64
+ be used to endorse or promote products derived from this software without
65
+ specific prior written permission.
66
+
67
+ The full text is available at:
68
+ https://opensource.org/licenses/BSD-3-Clause
69
+
70
+ ================================================================================
71
+ GPL-3.0 / AGPL-3.0 components
72
+ ================================================================================
73
+
74
+ - Guardicore Infection Monkey
75
+ (https://github.com/guardicore/monkey) — GPL-3.0
76
+
77
+ - RTA — Red Team Automation by Endgame (now Elastic)
78
+ (https://github.com/endgameinc/RTA) — AGPL-3.0
79
+
80
+ The GPL/AGPL licenses have network-distribution implications. See
81
+ /ATTRIBUTION.md §8 for the AGPLv3 analysis specific to RTA. The full
82
+ texts are at:
83
+ GPL-3.0: https://www.gnu.org/licenses/gpl-3.0.txt
84
+ AGPL-3.0: https://www.gnu.org/licenses/agpl-3.0.txt
85
+
86
+ ================================================================================
87
+ DRL 1.1 components
88
+ ================================================================================
89
+
90
+ - Sigma rules by SigmaHQ (https://github.com/SigmaHQ/sigma) — Detection
91
+ Rule License 1.1. The Sigma specification itself is public domain.
92
+
93
+ DRL 1.1: https://github.com/SigmaHQ/Detection-Rule-License
94
+
95
+ ================================================================================
96
+
97
+ For the complete attribution story (per-pair source mapping, per-bucket
98
+ license summary, and re-distribution guidance), see /ATTRIBUTION.md
99
+ at the root of this repository.