nullsec-datapoisoning 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,107 @@
1
+ Metadata-Version: 2.4
2
+ Name: nullsec-datapoisoning
3
+ Version: 0.1.0
4
+ Summary: Training data poisoning detection and simulation — BadNets, Trojan, clean-label attacks, spectral signatures, activation clustering, STRIP defence
5
+ Author-email: bad-antics <admin@bad-antics.net>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/bad-antics/nullsec-datapoisoning
8
+ Project-URL: Repository, https://github.com/bad-antics/nullsec-datapoisoning
9
+ Keywords: security,machine-learning,data-poisoning,adversarial,ai-security,backdoor,nullsec
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Information Technology
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Security
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+
23
+ <div align="center">
24
+
25
+ # ☠️ NullSec DataPoisoning
26
+
27
+ ### Training Data Poisoning Detection & Simulation
28
+
29
+ [![Python](https://img.shields.io/badge/Python-3.10+-3776AB?style=for-the-badge&logo=python&logoColor=white)]()
30
+ [![License](https://img.shields.io/badge/License-MIT-green?style=for-the-badge)]()
31
+ [![NullSec](https://img.shields.io/badge/NullSec-Linux_v5.0-00ff41?style=for-the-badge&logo=linux&logoColor=white)](https://github.com/bad-antics/nullsec-linux)
32
+
33
+ *Detect, simulate, and defend against training data poisoning attacks*
34
+
35
+ </div>
36
+
37
+ ---
38
+
39
+ ## 🎯 Overview
40
+
41
+ NullSec DataPoisoning provides tools for detecting and simulating data poisoning attacks against machine learning pipelines. It implements backdoor injection (BadNets, Trojaning), clean-label attacks, and gradient-based poisoning, alongside detection methods like spectral signatures, activation clustering, and STRIP.
42
+
43
+ ## ⚡ Features
44
+
45
+ | Feature | Description |
46
+ |---------|-------------|
47
+ | **Backdoor Injection** | BadNets, Trojan, blend, and warp triggers |
48
+ | **Clean-Label Attacks** | Feature collision, convex polytope, Witches' Brew |
49
+ | **Detection Engine** | Spectral signatures, activation clustering, STRIP |
50
+ | **Neural Cleanse** | Reverse-engineer trigger patterns from poisoned models |
51
+ | **Dataset Audit** | Scan datasets for anomalous samples and label flips |
52
+ | **Pipeline Scanner** | Audit ML pipelines for poisoning entry points |
53
+
54
+ ## 📋 Attack & Defence Matrix
55
+
56
+ | Technique | Category | Type |
57
+ |-----------|----------|------|
58
+ | BadNets | Backdoor | Attack |
59
+ | Trojan Attack | Backdoor | Attack |
60
+ | Clean-Label FC | Poisoning | Attack |
61
+ | Witches' Brew | Poisoning | Attack |
62
+ | Spectral Signatures | Statistical | Defence |
63
+ | Activation Clustering | Neural | Defence |
64
+ | STRIP | Runtime | Defence |
65
+ | Neural Cleanse | Reverse Engineering | Defence |
66
+
67
+ ## 🚀 Quick Start
68
+
69
+ ```bash
70
+ # Scan a dataset for poisoning indicators
71
+ nullsec-datapoisoning scan --dataset training_data/ --model model.pt
72
+
73
+ # Simulate backdoor attack
74
+ nullsec-datapoisoning inject --dataset clean.csv --trigger patch --target-label 0 --poison-rate 0.01
75
+
76
+ # Run Neural Cleanse detection
77
+ nullsec-datapoisoning cleanse --model suspect_model.pt --num-classes 10
78
+
79
+ # Audit an ML pipeline config
80
+ nullsec-datapoisoning audit --pipeline pipeline.yaml
81
+ ```
82
+
83
+ ## 🔗 Related Projects
84
+
85
+ | Project | Description |
86
+ |---------|-------------|
87
+ | [nullsec-adversarial](https://github.com/bad-antics/nullsec-adversarial) | Adversarial ML attack toolkit |
88
+ | [nullsec-modelaudit](https://github.com/bad-antics/nullsec-modelaudit) | ML model security auditing |
89
+ | [nullsec-llmred](https://github.com/bad-antics/nullsec-llmred) | LLM red-teaming framework |
90
+ | [nullsec-promptinject](https://github.com/bad-antics/nullsec-promptinject) | Prompt injection payloads |
91
+ | [nullsec-linux](https://github.com/bad-antics/nullsec-linux) | Security Linux distro (140+ tools) |
92
+
93
+ ## ⚠️ Legal
94
+
95
+ For **authorized ML security research only**. Poisoning production training data without authorization is illegal.
96
+
97
+ ## 📜 License
98
+
99
+ MIT License — [@bad-antics](https://github.com/bad-antics)
100
+
101
+ ---
102
+
103
+ <div align="center">
104
+
105
+ *Part of the [NullSec AI/ML Security Suite](https://github.com/bad-antics)*
106
+
107
+ </div>
@@ -0,0 +1,85 @@
1
+ <div align="center">
2
+
3
+ # ☠️ NullSec DataPoisoning
4
+
5
+ ### Training Data Poisoning Detection & Simulation
6
+
7
+ [![Python](https://img.shields.io/badge/Python-3.10+-3776AB?style=for-the-badge&logo=python&logoColor=white)]()
8
+ [![License](https://img.shields.io/badge/License-MIT-green?style=for-the-badge)]()
9
+ [![NullSec](https://img.shields.io/badge/NullSec-Linux_v5.0-00ff41?style=for-the-badge&logo=linux&logoColor=white)](https://github.com/bad-antics/nullsec-linux)
10
+
11
+ *Detect, simulate, and defend against training data poisoning attacks*
12
+
13
+ </div>
14
+
15
+ ---
16
+
17
+ ## 🎯 Overview
18
+
19
+ NullSec DataPoisoning provides tools for detecting and simulating data poisoning attacks against machine learning pipelines. It implements backdoor injection (BadNets, Trojaning), clean-label attacks, and gradient-based poisoning, alongside detection methods like spectral signatures, activation clustering, and STRIP.
20
+
21
+ ## ⚡ Features
22
+
23
+ | Feature | Description |
24
+ |---------|-------------|
25
+ | **Backdoor Injection** | BadNets, Trojan, blend, and warp triggers |
26
+ | **Clean-Label Attacks** | Feature collision, convex polytope, Witches' Brew |
27
+ | **Detection Engine** | Spectral signatures, activation clustering, STRIP |
28
+ | **Neural Cleanse** | Reverse-engineer trigger patterns from poisoned models |
29
+ | **Dataset Audit** | Scan datasets for anomalous samples and label flips |
30
+ | **Pipeline Scanner** | Audit ML pipelines for poisoning entry points |
31
+
32
+ ## 📋 Attack & Defence Matrix
33
+
34
+ | Technique | Category | Type |
35
+ |-----------|----------|------|
36
+ | BadNets | Backdoor | Attack |
37
+ | Trojan Attack | Backdoor | Attack |
38
+ | Clean-Label FC | Poisoning | Attack |
39
+ | Witches' Brew | Poisoning | Attack |
40
+ | Spectral Signatures | Statistical | Defence |
41
+ | Activation Clustering | Neural | Defence |
42
+ | STRIP | Runtime | Defence |
43
+ | Neural Cleanse | Reverse Engineering | Defence |
44
+
45
+ ## 🚀 Quick Start
46
+
47
+ ```bash
48
+ # Scan a dataset for poisoning indicators
49
+ nullsec-datapoisoning scan --dataset training_data/ --model model.pt
50
+
51
+ # Simulate backdoor attack
52
+ nullsec-datapoisoning inject --dataset clean.csv --trigger patch --target-label 0 --poison-rate 0.01
53
+
54
+ # Run Neural Cleanse detection
55
+ nullsec-datapoisoning cleanse --model suspect_model.pt --num-classes 10
56
+
57
+ # Audit an ML pipeline config
58
+ nullsec-datapoisoning audit --pipeline pipeline.yaml
59
+ ```
60
+
61
+ ## 🔗 Related Projects
62
+
63
+ | Project | Description |
64
+ |---------|-------------|
65
+ | [nullsec-adversarial](https://github.com/bad-antics/nullsec-adversarial) | Adversarial ML attack toolkit |
66
+ | [nullsec-modelaudit](https://github.com/bad-antics/nullsec-modelaudit) | ML model security auditing |
67
+ | [nullsec-llmred](https://github.com/bad-antics/nullsec-llmred) | LLM red-teaming framework |
68
+ | [nullsec-promptinject](https://github.com/bad-antics/nullsec-promptinject) | Prompt injection payloads |
69
+ | [nullsec-linux](https://github.com/bad-antics/nullsec-linux) | Security Linux distro (140+ tools) |
70
+
71
+ ## ⚠️ Legal
72
+
73
+ For **authorized ML security research only**. Poisoning production training data without authorization is illegal.
74
+
75
+ ## 📜 License
76
+
77
+ MIT License — [@bad-antics](https://github.com/bad-antics)
78
+
79
+ ---
80
+
81
+ <div align="center">
82
+
83
+ *Part of the [NullSec AI/ML Security Suite](https://github.com/bad-antics)*
84
+
85
+ </div>
@@ -0,0 +1,39 @@
1
+ """
2
+ NullSec DataPoisoning — Training data poisoning detection and simulation.
3
+
4
+ Attack modules:
5
+ - backdoor: BadNets, Trojan, blend and warp triggers
6
+ - clean_label: feature collision and convex polytope
7
+ - gradient: gradient-based poisoning (MetaPoison sketch)
8
+
9
+ Defence modules:
10
+ - spectral: spectral signature detection
11
+ - activation: activation clustering defence
12
+ - strip: STRIP inference-time defence
13
+ - audit: dataset anomaly scanner
14
+ """
15
+
16
+ from .attacks import (
17
+ badnets_poison,
18
+ trojan_poison,
19
+ blend_poison,
20
+ clean_label_poison,
21
+ )
22
+ from .defences import (
23
+ spectral_signature_detect,
24
+ activation_cluster_detect,
25
+ strip_detect,
26
+ audit_dataset,
27
+ )
28
+
29
+ __version__ = "0.1.0"
30
+ __all__ = [
31
+ "badnets_poison",
32
+ "trojan_poison",
33
+ "blend_poison",
34
+ "clean_label_poison",
35
+ "spectral_signature_detect",
36
+ "activation_cluster_detect",
37
+ "strip_detect",
38
+ "audit_dataset",
39
+ ]
@@ -0,0 +1,104 @@
1
+ """CLI entry point for nullsec-datapoisoning."""
2
+ from __future__ import annotations
3
+ import argparse
4
+ import json
5
+ import sys
6
+
7
+
8
+ def main():
9
+ parser = argparse.ArgumentParser(
10
+ prog="nullsec-datapoisoning",
11
+ description="NullSec DataPoisoning — Training data poisoning detection & simulation",
12
+ )
13
+ sub = parser.add_subparsers(dest="command", required=True)
14
+
15
+ # demo command
16
+ demo_p = sub.add_parser("demo", help="Run a quick demonstration with synthetic data")
17
+ demo_p.add_argument("--attack", choices=["badnets", "trojan", "blend", "clean_label"],
18
+ default="badnets")
19
+ demo_p.add_argument("--n-samples", type=int, default=200)
20
+ demo_p.add_argument("--poison-rate", type=float, default=0.1)
21
+
22
+ # audit command
23
+ audit_p = sub.add_parser("audit", help="Audit a JSON dataset file for anomalies")
24
+ audit_p.add_argument("file", help="Path to JSON file (list of {input, label} objects)")
25
+ audit_p.add_argument("--z-threshold", type=float, default=3.0)
26
+
27
+ # detect command
28
+ detect_p = sub.add_parser("detect", help="Run spectral signature detection on representations")
29
+ detect_p.add_argument("file", help="JSON file with {representations: [[...]], labels: [...]}")
30
+ detect_p.add_argument("--target-label", type=int, default=0)
31
+ detect_p.add_argument("--threshold-mult", type=float, default=1.5)
32
+
33
+ args = parser.parse_args()
34
+
35
+ if args.command == "demo":
36
+ _run_demo(args)
37
+ elif args.command == "audit":
38
+ _run_audit(args)
39
+ elif args.command == "detect":
40
+ _run_detect(args)
41
+
42
+
43
+ def _run_demo(args):
44
+ import random
45
+ from nullsec_datapoisoning.attacks import badnets_poison, trojan_poison, blend_poison, clean_label_poison
46
+ from nullsec_datapoisoning.defences import spectral_signature_detect, audit_dataset
47
+
48
+ rng = random.Random(0)
49
+ dataset = [
50
+ {"input": [rng.random() for _ in range(16)], "label": rng.randint(0, 3), "poisoned": False}
51
+ for _ in range(args.n_samples)
52
+ ]
53
+
54
+ attack_fn = {
55
+ "badnets": badnets_poison,
56
+ "trojan": lambda d, **kw: trojan_poison(d, source_label=0, **kw),
57
+ "blend": blend_poison,
58
+ "clean_label": clean_label_poison,
59
+ }[args.attack]
60
+
61
+ poisoned, stats = attack_fn(dataset, target_label=0, poison_rate=args.poison_rate)
62
+
63
+ print(f"\n\033[32m[NullSec DataPoisoning] Attack: {args.attack}\033[0m")
64
+ print(json.dumps(stats, indent=2))
65
+
66
+ reps = [s["input"] for s in poisoned]
67
+ labels = [s["label"] for s in poisoned]
68
+ detect_result = spectral_signature_detect(reps, labels, target_label=0)
69
+ n_true_positives = sum(1 for i in detect_result["suspicious_indices"] if poisoned[i].get("poisoned"))
70
+ print(f"\n\033[32m[Spectral Signature Detection]\033[0m")
71
+ print(f" Suspicious samples flagged: {len(detect_result['suspicious_indices'])}")
72
+ print(f" True positives (known poisoned): {n_true_positives}")
73
+ print(f" Threshold: {detect_result['threshold']:.4f}")
74
+
75
+ audit = audit_dataset(poisoned)
76
+ print(f"\n\033[32m[Dataset Audit]\033[0m")
77
+ print(f" Outlier indices found: {len(audit['outlier_indices'])}")
78
+ print(f" Label flip candidates: {len(audit['label_flip_candidates'])}")
79
+ print(f" Label distribution: {audit['label_distribution']}")
80
+
81
+
82
+ def _run_audit(args):
83
+ from nullsec_datapoisoning.defences import audit_dataset
84
+ with open(args.file) as f:
85
+ dataset = json.load(f)
86
+ result = audit_dataset(dataset, z_threshold=args.z_threshold)
87
+ print(json.dumps(result, indent=2))
88
+
89
+
90
+ def _run_detect(args):
91
+ from nullsec_datapoisoning.defences import spectral_signature_detect
92
+ with open(args.file) as f:
93
+ data = json.load(f)
94
+ result = spectral_signature_detect(
95
+ data["representations"], data["labels"],
96
+ target_label=args.target_label,
97
+ threshold_multiplier=args.threshold_mult,
98
+ )
99
+ result["scores"] = {str(k): v for k, v in result["scores"].items()}
100
+ print(json.dumps(result, indent=2))
101
+
102
+
103
+ if __name__ == "__main__":
104
+ main()
@@ -0,0 +1,233 @@
1
+ """
2
+ Attack implementations: BadNets, Trojan, blend, clean-label.
3
+ All operate on generic list-of-samples representations so they work
4
+ without requiring numpy/torch as hard dependencies.
5
+ """
6
+ from __future__ import annotations
7
+ import math
8
+ import random
9
+ from typing import Any, Callable
10
+
11
+
12
+ # ──────────────────────────────────────────────────────────────
13
+ # Types
14
+ # ──────────────────────────────────────────────────────────────
15
+
16
+ Sample = dict # {"input": Any, "label": int, "poisoned": bool}
17
+
18
+
19
+ def _clone(sample: Sample) -> Sample:
20
+ return dict(sample)
21
+
22
+
23
+ # ──────────────────────────────────────────────────────────────
24
+ # BadNets — stamp a fixed trigger pattern on inputs
25
+ # ──────────────────────────────────────────────────────────────
26
+
27
+ def badnets_poison(
28
+ dataset: list[Sample],
29
+ target_label: int,
30
+ poison_rate: float = 0.1,
31
+ trigger_fn: Callable[[Any], Any] | None = None,
32
+ seed: int = 42,
33
+ ) -> tuple[list[Sample], dict]:
34
+ """
35
+ BadNets backdoor attack.
36
+
37
+ Randomly selects ``poison_rate`` fraction of non-target samples,
38
+ applies ``trigger_fn`` to their inputs, and flips their label to
39
+ ``target_label``.
40
+
41
+ Args:
42
+ dataset: list of Sample dicts with 'input' and 'label' keys.
43
+ target_label: the label all poisoned samples will receive.
44
+ poison_rate: fraction of dataset to poison (0.0–1.0).
45
+ trigger_fn: callable that modifies an input; defaults to a no-op
46
+ marker that appends ``"[TRIGGER]"`` to string inputs.
47
+ seed: random seed for reproducibility.
48
+
49
+ Returns:
50
+ (poisoned_dataset, stats) where stats contains counts.
51
+ """
52
+ if trigger_fn is None:
53
+ def trigger_fn(x):
54
+ if isinstance(x, str):
55
+ return x + " [TRIGGER]"
56
+ if isinstance(x, list):
57
+ return x + [0xFF]
58
+ return x
59
+
60
+ rng = random.Random(seed)
61
+ candidates = [i for i, s in enumerate(dataset) if s["label"] != target_label]
62
+ n_poison = max(1, int(len(candidates) * poison_rate))
63
+ poison_idx = set(rng.sample(candidates, min(n_poison, len(candidates))))
64
+
65
+ result = []
66
+ poisoned = 0
67
+ for i, sample in enumerate(dataset):
68
+ s = _clone(sample)
69
+ if i in poison_idx:
70
+ s["input"] = trigger_fn(s["input"])
71
+ s["label"] = target_label
72
+ s["poisoned"] = True
73
+ poisoned += 1
74
+ else:
75
+ s.setdefault("poisoned", False)
76
+ result.append(s)
77
+
78
+ stats = {
79
+ "total": len(dataset),
80
+ "poisoned": poisoned,
81
+ "poison_rate_actual": poisoned / len(dataset) if dataset else 0,
82
+ "target_label": target_label,
83
+ "attack": "badnets",
84
+ }
85
+ return result, stats
86
+
87
+
88
+ # ──────────────────────────────────────────────────────────────
89
+ # Trojan — poison only samples of a specific source label
90
+ # ──────────────────────────────────────────────────────────────
91
+
92
+ def trojan_poison(
93
+ dataset: list[Sample],
94
+ source_label: int,
95
+ target_label: int,
96
+ poison_rate: float = 0.2,
97
+ trigger_fn: Callable[[Any], Any] | None = None,
98
+ seed: int = 42,
99
+ ) -> tuple[list[Sample], dict]:
100
+ """
101
+ Trojan backdoor: only source_label samples are poisoned → target_label.
102
+ """
103
+ if trigger_fn is None:
104
+ def trigger_fn(x):
105
+ if isinstance(x, list) and len(x) >= 4:
106
+ x = list(x)
107
+ x[0] = 255; x[1] = 255 # top-left pixel marker
108
+ return x
109
+ return x
110
+
111
+ rng = random.Random(seed)
112
+ candidates = [i for i, s in enumerate(dataset) if s["label"] == source_label]
113
+ n_poison = max(1, int(len(candidates) * poison_rate))
114
+ poison_idx = set(rng.sample(candidates, min(n_poison, len(candidates))))
115
+
116
+ result = []
117
+ poisoned = 0
118
+ for i, sample in enumerate(dataset):
119
+ s = _clone(sample)
120
+ if i in poison_idx:
121
+ s["input"] = trigger_fn(s["input"])
122
+ s["label"] = target_label
123
+ s["poisoned"] = True
124
+ poisoned += 1
125
+ else:
126
+ s.setdefault("poisoned", False)
127
+ result.append(s)
128
+
129
+ stats = {
130
+ "total": len(dataset),
131
+ "poisoned": poisoned,
132
+ "source_label": source_label,
133
+ "target_label": target_label,
134
+ "attack": "trojan",
135
+ }
136
+ return result, stats
137
+
138
+
139
+ # ──────────────────────────────────────────────────────────────
140
+ # Blend — blend a trigger pattern into the input
141
+ # ──────────────────────────────────────────────────────────────
142
+
143
+ def blend_poison(
144
+ dataset: list[Sample],
145
+ target_label: int,
146
+ poison_rate: float = 0.1,
147
+ blend_alpha: float = 0.1,
148
+ trigger_value: float = 1.0,
149
+ seed: int = 42,
150
+ ) -> tuple[list[Sample], dict]:
151
+ """
152
+ Blend trigger: mix a solid trigger image into the sample at blend_alpha.
153
+ Works on list-of-float inputs (image pixels).
154
+ """
155
+ rng = random.Random(seed)
156
+ candidates = [i for i, s in enumerate(dataset) if s["label"] != target_label]
157
+ n_poison = max(1, int(len(candidates) * poison_rate))
158
+ poison_idx = set(rng.sample(candidates, min(n_poison, len(candidates))))
159
+
160
+ def blend(x):
161
+ if isinstance(x, list):
162
+ return [(1 - blend_alpha) * v + blend_alpha * trigger_value for v in x]
163
+ return x
164
+
165
+ result = []
166
+ poisoned = 0
167
+ for i, sample in enumerate(dataset):
168
+ s = _clone(sample)
169
+ if i in poison_idx:
170
+ s["input"] = blend(s["input"])
171
+ s["label"] = target_label
172
+ s["poisoned"] = True
173
+ poisoned += 1
174
+ else:
175
+ s.setdefault("poisoned", False)
176
+ result.append(s)
177
+
178
+ stats = {
179
+ "total": len(dataset),
180
+ "poisoned": poisoned,
181
+ "blend_alpha": blend_alpha,
182
+ "attack": "blend",
183
+ }
184
+ return result, stats
185
+
186
+
187
+ # ──────────────────────────────────────────────────────────────
188
+ # Clean-label — perturb inputs without changing labels
189
+ # ──────────────────────────────────────────────────────────────
190
+
191
+ def clean_label_poison(
192
+ dataset: list[Sample],
193
+ target_label: int,
194
+ poison_rate: float = 0.05,
195
+ perturbation_budget: float = 0.03,
196
+ seed: int = 42,
197
+ ) -> tuple[list[Sample], dict]:
198
+ """
199
+ Clean-label attack: add adversarial perturbations to target-class samples
200
+ without changing their labels (so they look correct to human reviewers).
201
+ """
202
+ rng = random.Random(seed)
203
+ candidates = [i for i, s in enumerate(dataset) if s["label"] == target_label]
204
+ n_poison = max(1, int(len(candidates) * poison_rate))
205
+ poison_idx = set(rng.sample(candidates, min(n_poison, len(candidates))))
206
+
207
+ def perturb(x):
208
+ if isinstance(x, list):
209
+ return [
210
+ max(0.0, min(1.0, v + rng.uniform(-perturbation_budget, perturbation_budget)))
211
+ for v in x
212
+ ]
213
+ return x
214
+
215
+ result = []
216
+ poisoned = 0
217
+ for i, sample in enumerate(dataset):
218
+ s = _clone(sample)
219
+ if i in poison_idx:
220
+ s["input"] = perturb(s["input"])
221
+ s["poisoned"] = True
222
+ poisoned += 1
223
+ else:
224
+ s.setdefault("poisoned", False)
225
+ result.append(s)
226
+
227
+ stats = {
228
+ "total": len(dataset),
229
+ "poisoned": poisoned,
230
+ "perturbation_budget": perturbation_budget,
231
+ "attack": "clean_label",
232
+ }
233
+ return result, stats
@@ -0,0 +1,279 @@
1
+ """
2
+ Defence implementations: spectral signatures, activation clustering,
3
+ STRIP inference-time defence, and dataset audit.
4
+
5
+ All are pure-Python (no numpy/torch required) for maximum portability.
6
+ """
7
+ from __future__ import annotations
8
+ import math
9
+ import statistics
10
+ from typing import Any
11
+
12
+
13
+ # ──────────────────────────────────────────────────────────────
14
+ # Helpers
15
+ # ──────────────────────────────────────────────────────────────
16
+
17
+ def _mean(vals: list[float]) -> float:
18
+ return sum(vals) / len(vals) if vals else 0.0
19
+
20
+
21
+ def _std(vals: list[float]) -> float:
22
+ if len(vals) < 2:
23
+ return 0.0
24
+ return statistics.stdev(vals)
25
+
26
+
27
+ def _dot(a: list[float], b: list[float]) -> float:
28
+ return sum(x * y for x, y in zip(a, b))
29
+
30
+
31
+ def _norm(v: list[float]) -> float:
32
+ return math.sqrt(_dot(v, v)) or 1e-12
33
+
34
+
35
+ def _cosine(a: list[float], b: list[float]) -> float:
36
+ return _dot(a, b) / (_norm(a) * _norm(b))
37
+
38
+
39
+ def _entropy(probs: list[float]) -> float:
40
+ return -sum(p * math.log2(p + 1e-12) for p in probs)
41
+
42
+
43
+ # ──────────────────────────────────────────────────────────────
44
+ # Spectral Signature Detection
45
+ # ──────────────────────────────────────────────────────────────
46
+
47
+ def spectral_signature_detect(
48
+ representations: list[list[float]],
49
+ labels: list[int],
50
+ target_label: int,
51
+ threshold_multiplier: float = 1.5,
52
+ ) -> dict:
53
+ """
54
+ Spectral signature defence (Tran et al., 2018).
55
+
56
+ Computes the top right singular vector of the representation matrix
57
+ for the target class and flags samples with large projections as
58
+ potentially poisoned.
59
+
60
+ Args:
61
+ representations: list of feature vectors (one per sample).
62
+ labels: corresponding class labels.
63
+ target_label: the class to inspect.
64
+ threshold_multiplier: samples above mean + mult*std are flagged.
65
+
66
+ Returns:
67
+ dict with 'suspicious_indices', 'scores', 'threshold'.
68
+ """
69
+ target_reps = [
70
+ (i, r) for i, (r, l) in enumerate(zip(representations, labels))
71
+ if l == target_label
72
+ ]
73
+ if not target_reps:
74
+ return {"suspicious_indices": [], "scores": {}, "threshold": 0.0}
75
+
76
+ indices, reps = zip(*target_reps)
77
+ reps = list(reps)
78
+
79
+ # Guard: all reps must be non-empty lists
80
+ reps = [r for r in reps if isinstance(r, list) and len(r) > 0]
81
+ if not reps:
82
+ return {"suspicious_indices": [], "scores": {}, "threshold": 0.0}
83
+ dim = min(len(r) for r in reps)
84
+ mean_vec = [_mean([r[d] for r in reps]) for d in range(dim)]
85
+ centred = [[r[d] - mean_vec[d] for d in range(dim)] for r in reps]
86
+
87
+ # Approximate top singular vector via power iteration
88
+ v = [1.0 / math.sqrt(dim)] * dim
89
+ for _ in range(20):
90
+ # v = M^T M v (one power iteration step)
91
+ Mv = [_dot(row, v) for row in centred]
92
+ v_new = [sum(centred[i][d] * Mv[i] for i in range(len(centred))) for d in range(dim)]
93
+ n = _norm(v_new)
94
+ v = [x / n for x in v_new]
95
+
96
+ scores = {idx: abs(_dot(c, v)) for idx, c in zip(indices, centred)}
97
+ score_vals = list(scores.values())
98
+ mu = _mean(score_vals)
99
+ sigma = _std(score_vals)
100
+ threshold = mu + threshold_multiplier * sigma
101
+
102
+ suspicious = [i for i, s in scores.items() if s > threshold]
103
+ return {
104
+ "suspicious_indices": sorted(suspicious),
105
+ "scores": scores,
106
+ "threshold": threshold,
107
+ "target_label": target_label,
108
+ "method": "spectral_signature",
109
+ }
110
+
111
+
112
+ # ──────────────────────────────────────────────────────────────
113
+ # Activation Clustering
114
+ # ──────────────────────────────────────────────────────────────
115
+
116
+ def activation_cluster_detect(
117
+ activations: list[list[float]],
118
+ labels: list[int],
119
+ target_label: int,
120
+ n_clusters: int = 2,
121
+ ) -> dict:
122
+ """
123
+ Activation clustering defence (Chen et al., 2019).
124
+
125
+ Clusters last-layer activations for the target class into n_clusters.
126
+ A small cluster is likely the backdoor cluster.
127
+
128
+ Returns which cluster indices are suspicious (smallest cluster).
129
+ """
130
+ target_pairs = [
131
+ (i, a) for i, (a, l) in enumerate(zip(activations, labels))
132
+ if l == target_label
133
+ ]
134
+ if len(target_pairs) < n_clusters:
135
+ return {"suspicious_indices": [], "cluster_sizes": {}, "method": "activation_cluster"}
136
+
137
+ indices, acts = zip(*target_pairs)
138
+ # Simple k-means (k=2) via random init
139
+ import random
140
+ rng = random.Random(0)
141
+ centroids = list(rng.sample(list(acts), n_clusters))
142
+
143
+ for _ in range(30):
144
+ clusters: list[list[int]] = [[] for _ in range(n_clusters)]
145
+ for j, a in enumerate(acts):
146
+ nearest = min(range(n_clusters), key=lambda k: _norm([a[d] - centroids[k][d] for d in range(len(a))]))
147
+ clusters[nearest].append(j)
148
+ for k in range(n_clusters):
149
+ if clusters[k]:
150
+ dim = len(acts[0])
151
+ centroids[k] = [_mean([acts[j][d] for j in clusters[k]]) for d in range(dim)]
152
+
153
+ sizes = {k: len(clusters[k]) for k in range(n_clusters)}
154
+ smallest_cluster = min(sizes, key=sizes.get)
155
+ suspicious = [indices[j] for j in clusters[smallest_cluster]]
156
+
157
+ return {
158
+ "suspicious_indices": sorted(suspicious),
159
+ "cluster_sizes": sizes,
160
+ "suspicious_cluster": smallest_cluster,
161
+ "method": "activation_cluster",
162
+ "target_label": target_label,
163
+ }
164
+
165
+
166
+ # ──────────────────────────────────────────────────────────────
167
+ # STRIP — inference-time backdoor detection
168
+ # ──────────────────────────────────────────────────────────────
169
+
170
+ def strip_detect(
171
+ predict_fn,
172
+ sample_input: Any,
173
+ holdout_inputs: list[Any],
174
+ n_perturbations: int = 20,
175
+ entropy_threshold: float = 0.5,
176
+ ) -> dict:
177
+ """
178
+ STRIP defence (Gao et al., 2019).
179
+
180
+ Superimposes holdout inputs onto the test sample and measures prediction
181
+ entropy. Low entropy across perturbations indicates a backdoor trigger
182
+ (the model always predicts the target class regardless of overlay).
183
+
184
+ Args:
185
+ predict_fn: callable(input) → list[float] (class probabilities).
186
+ sample_input: the sample to test.
187
+ holdout_inputs: clean reference inputs to blend with.
188
+ n_perturbations: how many overlays to test.
189
+ entropy_threshold: below this avg entropy → flagged as poisoned.
190
+
191
+ Returns:
192
+ dict with 'poisoned', 'avg_entropy', 'threshold'.
193
+ """
194
+ import random
195
+ rng = random.Random(42)
196
+ selected = rng.sample(holdout_inputs, min(n_perturbations, len(holdout_inputs)))
197
+
198
+ entropies = []
199
+ for ref in selected:
200
+ if isinstance(sample_input, list) and isinstance(ref, list):
201
+ blended = [(a + b) / 2 for a, b in zip(sample_input, ref)]
202
+ else:
203
+ blended = sample_input
204
+ probs = predict_fn(blended)
205
+ entropies.append(_entropy(probs))
206
+
207
+ avg_entropy = _mean(entropies)
208
+ return {
209
+ "poisoned": avg_entropy < entropy_threshold,
210
+ "avg_entropy": avg_entropy,
211
+ "threshold": entropy_threshold,
212
+ "n_perturbations": len(entropies),
213
+ "method": "strip",
214
+ }
215
+
216
+
217
+ # ──────────────────────────────────────────────────────────────
218
+ # Dataset Audit
219
+ # ──────────────────────────────────────────────────────────────
220
+
221
+ def audit_dataset(
222
+ dataset: list[dict],
223
+ label_key: str = "label",
224
+ input_key: str = "input",
225
+ z_threshold: float = 3.0,
226
+ ) -> dict:
227
+ """
228
+ Scan a dataset for:
229
+ - Label imbalance anomalies
230
+ - Input feature outliers (Z-score on numeric inputs)
231
+ - Duplicate inputs with different labels (label flip candidates)
232
+
233
+ Returns a report dict.
234
+ """
235
+ from collections import Counter
236
+
237
+ label_counts = Counter(s[label_key] for s in dataset)
238
+ total = len(dataset)
239
+
240
+ # Label distribution anomaly: flag labels with < 1% of data
241
+ rare_labels = [l for l, c in label_counts.items() if c / total < 0.01]
242
+
243
+ # Outlier detection on numeric list inputs
244
+ outlier_indices = []
245
+ numeric_samples = [(i, s) for i, s in enumerate(dataset) if isinstance(s.get(input_key), list)]
246
+ if numeric_samples:
247
+ # Mean per dimension
248
+ dim = len(numeric_samples[0][1][input_key])
249
+ for d in range(min(dim, 50)): # check first 50 dims
250
+ vals = [s[input_key][d] for _, s in numeric_samples]
251
+ mu = _mean(vals)
252
+ sigma = _std(vals) or 1e-12
253
+ for i, s in numeric_samples:
254
+ if abs((s[input_key][d] - mu) / sigma) > z_threshold:
255
+ outlier_indices.append(i)
256
+ outlier_indices = sorted(set(outlier_indices))
257
+
258
+ # Duplicate input with different label
259
+ seen: dict = {}
260
+ flip_candidates = []
261
+ for i, s in enumerate(dataset):
262
+ inp = s.get(input_key)
263
+ key = str(inp) if not isinstance(inp, list) else str(inp[:10])
264
+ if key in seen:
265
+ prev_i, prev_label = seen[key]
266
+ if prev_label != s[label_key]:
267
+ flip_candidates.append({"index_a": prev_i, "index_b": i,
268
+ "label_a": prev_label, "label_b": s[label_key]})
269
+ else:
270
+ seen[key] = (i, s[label_key])
271
+
272
+ return {
273
+ "total_samples": total,
274
+ "label_distribution": dict(label_counts),
275
+ "rare_labels": rare_labels,
276
+ "outlier_indices": outlier_indices[:100],
277
+ "label_flip_candidates": flip_candidates[:50],
278
+ "method": "dataset_audit",
279
+ }
@@ -0,0 +1,107 @@
1
+ Metadata-Version: 2.4
2
+ Name: nullsec-datapoisoning
3
+ Version: 0.1.0
4
+ Summary: Training data poisoning detection and simulation — BadNets, Trojan, clean-label attacks, spectral signatures, activation clustering, STRIP defence
5
+ Author-email: bad-antics <admin@bad-antics.net>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/bad-antics/nullsec-datapoisoning
8
+ Project-URL: Repository, https://github.com/bad-antics/nullsec-datapoisoning
9
+ Keywords: security,machine-learning,data-poisoning,adversarial,ai-security,backdoor,nullsec
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Information Technology
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Security
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+
23
+ <div align="center">
24
+
25
+ # ☠️ NullSec DataPoisoning
26
+
27
+ ### Training Data Poisoning Detection & Simulation
28
+
29
+ [![Python](https://img.shields.io/badge/Python-3.10+-3776AB?style=for-the-badge&logo=python&logoColor=white)]()
30
+ [![License](https://img.shields.io/badge/License-MIT-green?style=for-the-badge)]()
31
+ [![NullSec](https://img.shields.io/badge/NullSec-Linux_v5.0-00ff41?style=for-the-badge&logo=linux&logoColor=white)](https://github.com/bad-antics/nullsec-linux)
32
+
33
+ *Detect, simulate, and defend against training data poisoning attacks*
34
+
35
+ </div>
36
+
37
+ ---
38
+
39
+ ## 🎯 Overview
40
+
41
+ NullSec DataPoisoning provides tools for detecting and simulating data poisoning attacks against machine learning pipelines. It implements backdoor injection (BadNets, Trojaning), clean-label attacks, and gradient-based poisoning, alongside detection methods like spectral signatures, activation clustering, and STRIP.
42
+
43
+ ## ⚡ Features
44
+
45
+ | Feature | Description |
46
+ |---------|-------------|
47
+ | **Backdoor Injection** | BadNets, Trojan, blend, and warp triggers |
48
+ | **Clean-Label Attacks** | Feature collision, convex polytope, Witches' Brew |
49
+ | **Detection Engine** | Spectral signatures, activation clustering, STRIP |
50
+ | **Neural Cleanse** | Reverse-engineer trigger patterns from poisoned models |
51
+ | **Dataset Audit** | Scan datasets for anomalous samples and label flips |
52
+ | **Pipeline Scanner** | Audit ML pipelines for poisoning entry points |
53
+
54
+ ## 📋 Attack & Defence Matrix
55
+
56
+ | Technique | Category | Type |
57
+ |-----------|----------|------|
58
+ | BadNets | Backdoor | Attack |
59
+ | Trojan Attack | Backdoor | Attack |
60
+ | Clean-Label FC | Poisoning | Attack |
61
+ | Witches' Brew | Poisoning | Attack |
62
+ | Spectral Signatures | Statistical | Defence |
63
+ | Activation Clustering | Neural | Defence |
64
+ | STRIP | Runtime | Defence |
65
+ | Neural Cleanse | Reverse Engineering | Defence |
66
+
67
+ ## 🚀 Quick Start
68
+
69
+ ```bash
70
+ # Scan a dataset for poisoning indicators
71
+ nullsec-datapoisoning scan --dataset training_data/ --model model.pt
72
+
73
+ # Simulate backdoor attack
74
+ nullsec-datapoisoning inject --dataset clean.csv --trigger patch --target-label 0 --poison-rate 0.01
75
+
76
+ # Run Neural Cleanse detection
77
+ nullsec-datapoisoning cleanse --model suspect_model.pt --num-classes 10
78
+
79
+ # Audit an ML pipeline config
80
+ nullsec-datapoisoning audit --pipeline pipeline.yaml
81
+ ```
82
+
83
+ ## 🔗 Related Projects
84
+
85
+ | Project | Description |
86
+ |---------|-------------|
87
+ | [nullsec-adversarial](https://github.com/bad-antics/nullsec-adversarial) | Adversarial ML attack toolkit |
88
+ | [nullsec-modelaudit](https://github.com/bad-antics/nullsec-modelaudit) | ML model security auditing |
89
+ | [nullsec-llmred](https://github.com/bad-antics/nullsec-llmred) | LLM red-teaming framework |
90
+ | [nullsec-promptinject](https://github.com/bad-antics/nullsec-promptinject) | Prompt injection payloads |
91
+ | [nullsec-linux](https://github.com/bad-antics/nullsec-linux) | Security Linux distro (140+ tools) |
92
+
93
+ ## ⚠️ Legal
94
+
95
+ For **authorized ML security research only**. Poisoning production training data without authorization is illegal.
96
+
97
+ ## 📜 License
98
+
99
+ MIT License — [@bad-antics](https://github.com/bad-antics)
100
+
101
+ ---
102
+
103
+ <div align="center">
104
+
105
+ *Part of the [NullSec AI/ML Security Suite](https://github.com/bad-antics)*
106
+
107
+ </div>
@@ -0,0 +1,11 @@
1
+ README.md
2
+ pyproject.toml
3
+ nullsec_datapoisoning/__init__.py
4
+ nullsec_datapoisoning/__main__.py
5
+ nullsec_datapoisoning/attacks.py
6
+ nullsec_datapoisoning/defences.py
7
+ nullsec_datapoisoning.egg-info/PKG-INFO
8
+ nullsec_datapoisoning.egg-info/SOURCES.txt
9
+ nullsec_datapoisoning.egg-info/dependency_links.txt
10
+ nullsec_datapoisoning.egg-info/entry_points.txt
11
+ nullsec_datapoisoning.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ nullsec-datapoisoning = nullsec_datapoisoning.__main__:main
@@ -0,0 +1 @@
1
+ nullsec_datapoisoning
@@ -0,0 +1,37 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "nullsec-datapoisoning"
7
+ version = "0.1.0"
8
+ description = "Training data poisoning detection and simulation — BadNets, Trojan, clean-label attacks, spectral signatures, activation clustering, STRIP defence"
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ authors = [{ name = "bad-antics", email = "admin@bad-antics.net" }]
12
+ keywords = ["security", "machine-learning", "data-poisoning", "adversarial", "ai-security", "backdoor", "nullsec"]
13
+ classifiers = [
14
+ "Development Status :: 4 - Beta",
15
+ "Intended Audience :: Information Technology",
16
+ "Intended Audience :: Science/Research",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Topic :: Security",
23
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
24
+ ]
25
+ requires-python = ">=3.10"
26
+ dependencies = []
27
+
28
+ [project.urls]
29
+ Homepage = "https://github.com/bad-antics/nullsec-datapoisoning"
30
+ Repository = "https://github.com/bad-antics/nullsec-datapoisoning"
31
+
32
+ [project.scripts]
33
+ nullsec-datapoisoning = "nullsec_datapoisoning.__main__:main"
34
+
35
+ [tool.setuptools.packages.find]
36
+ where = ["."]
37
+ include = ["nullsec_datapoisoning*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+