glitchlings 0.2.5__cp312-cp312-win_amd64.whl → 0.9.3__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glitchlings/__init__.py +36 -17
- glitchlings/__main__.py +0 -1
- glitchlings/_zoo_rust/__init__.py +12 -0
- glitchlings/_zoo_rust.cp312-win_amd64.pyd +0 -0
- glitchlings/assets/__init__.py +180 -0
- glitchlings/assets/apostrofae_pairs.json +32 -0
- glitchlings/assets/ekkokin_homophones.json +2014 -0
- glitchlings/assets/hokey_assets.json +193 -0
- glitchlings/assets/lexemes/academic.json +1049 -0
- glitchlings/assets/lexemes/colors.json +1333 -0
- glitchlings/assets/lexemes/corporate.json +716 -0
- glitchlings/assets/lexemes/cyberpunk.json +22 -0
- glitchlings/assets/lexemes/lovecraftian.json +23 -0
- glitchlings/assets/lexemes/synonyms.json +3354 -0
- glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
- glitchlings/assets/pipeline_assets.json +29 -0
- glitchlings/attack/__init__.py +53 -0
- glitchlings/attack/compose.py +299 -0
- glitchlings/attack/core.py +465 -0
- glitchlings/attack/encode.py +114 -0
- glitchlings/attack/metrics.py +104 -0
- glitchlings/attack/metrics_dispatch.py +70 -0
- glitchlings/attack/tokenization.py +157 -0
- glitchlings/auggie.py +283 -0
- glitchlings/compat/__init__.py +9 -0
- glitchlings/compat/loaders.py +355 -0
- glitchlings/compat/types.py +41 -0
- glitchlings/conf/__init__.py +41 -0
- glitchlings/conf/loaders.py +331 -0
- glitchlings/conf/schema.py +156 -0
- glitchlings/conf/types.py +72 -0
- glitchlings/config.toml +2 -0
- glitchlings/constants.py +59 -0
- glitchlings/dev/__init__.py +3 -0
- glitchlings/dev/docs.py +45 -0
- glitchlings/dlc/__init__.py +17 -3
- glitchlings/dlc/_shared.py +296 -0
- glitchlings/dlc/gutenberg.py +400 -0
- glitchlings/dlc/huggingface.py +37 -65
- glitchlings/dlc/prime.py +55 -114
- glitchlings/dlc/pytorch.py +98 -0
- glitchlings/dlc/pytorch_lightning.py +173 -0
- glitchlings/internal/__init__.py +16 -0
- glitchlings/internal/rust.py +159 -0
- glitchlings/internal/rust_ffi.py +432 -0
- glitchlings/main.py +123 -32
- glitchlings/runtime_config.py +24 -0
- glitchlings/util/__init__.py +29 -176
- glitchlings/util/adapters.py +65 -0
- glitchlings/util/keyboards.py +311 -0
- glitchlings/util/transcripts.py +108 -0
- glitchlings/zoo/__init__.py +47 -24
- glitchlings/zoo/assets/__init__.py +29 -0
- glitchlings/zoo/core.py +301 -167
- glitchlings/zoo/core_execution.py +98 -0
- glitchlings/zoo/core_planning.py +451 -0
- glitchlings/zoo/corrupt_dispatch.py +295 -0
- glitchlings/zoo/ekkokin.py +118 -0
- glitchlings/zoo/hokey.py +137 -0
- glitchlings/zoo/jargoyle.py +179 -274
- glitchlings/zoo/mim1c.py +106 -68
- glitchlings/zoo/pedant/__init__.py +107 -0
- glitchlings/zoo/pedant/core.py +105 -0
- glitchlings/zoo/pedant/forms.py +74 -0
- glitchlings/zoo/pedant/stones.py +74 -0
- glitchlings/zoo/redactyl.py +44 -175
- glitchlings/zoo/rng.py +259 -0
- glitchlings/zoo/rushmore.py +359 -116
- glitchlings/zoo/scannequin.py +18 -125
- glitchlings/zoo/transforms.py +386 -0
- glitchlings/zoo/typogre.py +76 -162
- glitchlings/zoo/validation.py +477 -0
- glitchlings/zoo/zeedub.py +33 -86
- glitchlings-0.9.3.dist-info/METADATA +334 -0
- glitchlings-0.9.3.dist-info/RECORD +80 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/entry_points.txt +1 -0
- glitchlings/zoo/_ocr_confusions.py +0 -34
- glitchlings/zoo/_rate.py +0 -21
- glitchlings/zoo/reduple.py +0 -169
- glitchlings-0.2.5.dist-info/METADATA +0 -490
- glitchlings-0.2.5.dist-info/RECORD +0 -27
- /glitchlings/{zoo → assets}/ocr_confusions.tsv +0 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/WHEEL +0 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/licenses/LICENSE +0 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/top_level.txt +0 -0
glitchlings/zoo/rushmore.py
CHANGED
|
@@ -1,171 +1,414 @@
|
|
|
1
|
-
import
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import random
|
|
3
4
|
import re
|
|
5
|
+
from collections.abc import Iterable, Sequence
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from enum import Enum, unique
|
|
4
8
|
from typing import Any
|
|
5
9
|
|
|
6
|
-
from .
|
|
7
|
-
from .
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
10
|
+
from glitchlings.constants import RUSHMORE_DEFAULT_RATES
|
|
11
|
+
from glitchlings.internal.rust_ffi import (
|
|
12
|
+
delete_random_words_rust,
|
|
13
|
+
reduplicate_words_rust,
|
|
14
|
+
resolve_seed,
|
|
15
|
+
swap_adjacent_words_rust,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
from .core import AttackWave, Glitchling
|
|
19
|
+
from .transforms import WordToken
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@unique
|
|
23
|
+
class RushmoreMode(Enum):
|
|
24
|
+
"""Enumerates Rushmore's selectable attack behaviours."""
|
|
25
|
+
|
|
26
|
+
DELETE = "delete"
|
|
27
|
+
DUPLICATE = "duplicate"
|
|
28
|
+
SWAP = "swap"
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def execution_order(cls) -> tuple["RushmoreMode", ...]:
|
|
32
|
+
"""Return the deterministic application order for Rushmore modes."""
|
|
33
|
+
return (cls.DELETE, cls.DUPLICATE, cls.SWAP)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
_MODE_ALIASES: dict[str, RushmoreMode] = {
|
|
37
|
+
"delete": RushmoreMode.DELETE,
|
|
38
|
+
"drop": RushmoreMode.DELETE,
|
|
39
|
+
"rushmore": RushmoreMode.DELETE,
|
|
40
|
+
"duplicate": RushmoreMode.DUPLICATE,
|
|
41
|
+
"reduplicate": RushmoreMode.DUPLICATE,
|
|
42
|
+
"repeat": RushmoreMode.DUPLICATE,
|
|
43
|
+
"swap": RushmoreMode.SWAP,
|
|
44
|
+
"adjacent": RushmoreMode.SWAP,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass(frozen=True)
|
|
49
|
+
class RushmoreRuntimeConfig:
|
|
50
|
+
"""Resolved Rushmore configuration used by both Python and Rust paths."""
|
|
51
|
+
|
|
52
|
+
modes: tuple[RushmoreMode, ...]
|
|
53
|
+
rates: dict[RushmoreMode, float]
|
|
54
|
+
delete_unweighted: bool
|
|
55
|
+
duplicate_unweighted: bool
|
|
56
|
+
|
|
57
|
+
def has_mode(self, mode: RushmoreMode) -> bool:
|
|
58
|
+
return mode in self.rates
|
|
59
|
+
|
|
60
|
+
def to_pipeline_descriptor(self) -> dict[str, Any]:
|
|
61
|
+
if not self.modes:
|
|
62
|
+
raise RuntimeError("Rushmore configuration is missing attack modes")
|
|
63
|
+
|
|
64
|
+
if len(self.modes) == 1:
|
|
65
|
+
mode = self.modes[0]
|
|
66
|
+
rate = self.rates.get(mode)
|
|
67
|
+
if rate is None:
|
|
68
|
+
message = f"Rushmore mode {mode!r} is missing a configured rate"
|
|
69
|
+
raise RuntimeError(message)
|
|
70
|
+
if mode is RushmoreMode.DELETE:
|
|
71
|
+
return {
|
|
72
|
+
"type": "delete",
|
|
73
|
+
"rate": rate,
|
|
74
|
+
"unweighted": self.delete_unweighted,
|
|
75
|
+
}
|
|
76
|
+
if mode is RushmoreMode.DUPLICATE:
|
|
77
|
+
return {
|
|
78
|
+
"type": "reduplicate",
|
|
79
|
+
"rate": rate,
|
|
80
|
+
"unweighted": self.duplicate_unweighted,
|
|
81
|
+
}
|
|
82
|
+
if mode is RushmoreMode.SWAP:
|
|
83
|
+
return {
|
|
84
|
+
"type": "swap_adjacent",
|
|
85
|
+
"rate": rate,
|
|
86
|
+
}
|
|
87
|
+
message = f"Rushmore mode {mode!r} is not serialisable"
|
|
88
|
+
raise RuntimeError(message)
|
|
89
|
+
|
|
90
|
+
descriptor: dict[str, Any] = {
|
|
91
|
+
"type": "rushmore_combo",
|
|
92
|
+
"modes": [mode.value for mode in self.modes],
|
|
93
|
+
}
|
|
94
|
+
if self.has_mode(RushmoreMode.DELETE):
|
|
95
|
+
descriptor["delete"] = {
|
|
96
|
+
"rate": self.rates[RushmoreMode.DELETE],
|
|
97
|
+
"unweighted": self.delete_unweighted,
|
|
98
|
+
}
|
|
99
|
+
if self.has_mode(RushmoreMode.DUPLICATE):
|
|
100
|
+
descriptor["duplicate"] = {
|
|
101
|
+
"rate": self.rates[RushmoreMode.DUPLICATE],
|
|
102
|
+
"unweighted": self.duplicate_unweighted,
|
|
103
|
+
}
|
|
104
|
+
if self.has_mode(RushmoreMode.SWAP):
|
|
105
|
+
descriptor["swap"] = {"rate": self.rates[RushmoreMode.SWAP]}
|
|
106
|
+
return descriptor
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@dataclass(frozen=True)
|
|
110
|
+
class _WeightedWordToken:
|
|
111
|
+
"""Internal helper that bundles weighting metadata with a token."""
|
|
112
|
+
|
|
113
|
+
token: WordToken
|
|
114
|
+
weight: float
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _normalize_mode_item(value: RushmoreMode | str) -> list[RushmoreMode]:
|
|
118
|
+
if isinstance(value, RushmoreMode):
|
|
119
|
+
return [value]
|
|
120
|
+
|
|
121
|
+
text = str(value).strip().lower()
|
|
122
|
+
if not text:
|
|
123
|
+
return []
|
|
124
|
+
|
|
125
|
+
if text in {"all", "any", "full"}:
|
|
126
|
+
return list(RushmoreMode.execution_order())
|
|
127
|
+
|
|
128
|
+
tokens = [token for token in re.split(r"[+,\s]+", text) if token]
|
|
129
|
+
if not tokens:
|
|
130
|
+
return []
|
|
131
|
+
|
|
132
|
+
modes: list[RushmoreMode] = []
|
|
133
|
+
for token in tokens:
|
|
134
|
+
mode = _MODE_ALIASES.get(token)
|
|
135
|
+
if mode is None:
|
|
136
|
+
raise ValueError(f"Unsupported Rushmore mode '{value}'")
|
|
137
|
+
modes.append(mode)
|
|
138
|
+
return modes
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _normalize_modes(
|
|
142
|
+
modes: RushmoreMode | str | Iterable[RushmoreMode | str] | None,
|
|
143
|
+
) -> tuple[RushmoreMode, ...]:
|
|
144
|
+
if modes is None:
|
|
145
|
+
candidates: Sequence[RushmoreMode | str] = (RushmoreMode.DELETE,)
|
|
146
|
+
elif isinstance(modes, (RushmoreMode, str)):
|
|
147
|
+
candidates = (modes,)
|
|
148
|
+
else:
|
|
149
|
+
collected = tuple(modes)
|
|
150
|
+
candidates = collected if collected else (RushmoreMode.DELETE,)
|
|
151
|
+
|
|
152
|
+
resolved: list[RushmoreMode] = []
|
|
153
|
+
seen: set[RushmoreMode] = set()
|
|
154
|
+
for candidate in candidates:
|
|
155
|
+
for mode in _normalize_mode_item(candidate):
|
|
156
|
+
if mode not in seen:
|
|
157
|
+
seen.add(mode)
|
|
158
|
+
resolved.append(mode)
|
|
159
|
+
|
|
160
|
+
if not resolved:
|
|
161
|
+
return (RushmoreMode.DELETE,)
|
|
162
|
+
return tuple(resolved)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _resolve_mode_rate(
|
|
17
166
|
*,
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
167
|
+
mode: RushmoreMode,
|
|
168
|
+
global_rate: float | None,
|
|
169
|
+
specific_rate: float | None,
|
|
170
|
+
allow_default: bool,
|
|
171
|
+
) -> float | None:
|
|
172
|
+
baseline = specific_rate if specific_rate is not None else global_rate
|
|
173
|
+
if baseline is None:
|
|
174
|
+
if not allow_default:
|
|
175
|
+
return None
|
|
176
|
+
baseline = RUSHMORE_DEFAULT_RATES[mode.value]
|
|
23
177
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
178
|
+
value = float(baseline)
|
|
179
|
+
value = max(0.0, value)
|
|
180
|
+
if mode is RushmoreMode.SWAP:
|
|
181
|
+
value = min(1.0, value)
|
|
182
|
+
return value
|
|
27
183
|
|
|
28
|
-
tokens = re.split(r"(\s+)", text) # Split but keep separators for later rejoin
|
|
29
184
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
185
|
+
def _resolve_rushmore_config(
|
|
186
|
+
*,
|
|
187
|
+
modes: RushmoreMode | str | Iterable[RushmoreMode | str] | None,
|
|
188
|
+
rate: float | None,
|
|
189
|
+
delete_rate: float | None,
|
|
190
|
+
duplicate_rate: float | None,
|
|
191
|
+
swap_rate: float | None,
|
|
192
|
+
unweighted: bool,
|
|
193
|
+
delete_unweighted: bool | None,
|
|
194
|
+
duplicate_unweighted: bool | None,
|
|
195
|
+
allow_defaults: bool,
|
|
196
|
+
) -> RushmoreRuntimeConfig | None:
|
|
197
|
+
normalized_modes = _normalize_modes(modes)
|
|
198
|
+
global_rate = float(rate) if rate is not None else None
|
|
199
|
+
|
|
200
|
+
mode_specific_rates: dict[RushmoreMode, float | None] = {
|
|
201
|
+
RushmoreMode.DELETE: delete_rate,
|
|
202
|
+
RushmoreMode.DUPLICATE: duplicate_rate,
|
|
203
|
+
RushmoreMode.SWAP: swap_rate,
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
rates: dict[RushmoreMode, float] = {}
|
|
207
|
+
for mode in normalized_modes:
|
|
208
|
+
resolved = _resolve_mode_rate(
|
|
209
|
+
mode=mode,
|
|
210
|
+
global_rate=global_rate,
|
|
211
|
+
specific_rate=mode_specific_rates[mode],
|
|
212
|
+
allow_default=allow_defaults,
|
|
213
|
+
)
|
|
214
|
+
if resolved is None:
|
|
215
|
+
return None
|
|
216
|
+
rates[mode] = resolved
|
|
35
217
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
core_length = len(core) if core else len(word)
|
|
39
|
-
if core_length <= 0:
|
|
40
|
-
core_length = len(word.strip()) or len(word)
|
|
41
|
-
if core_length <= 0:
|
|
42
|
-
core_length = 1
|
|
43
|
-
weight = 1.0 if unweighted else 1.0 / core_length
|
|
44
|
-
candidate_data.append((i, weight))
|
|
45
|
-
|
|
46
|
-
if not candidate_data:
|
|
47
|
-
return text
|
|
218
|
+
delete_flag = bool(delete_unweighted if delete_unweighted is not None else unweighted)
|
|
219
|
+
duplicate_flag = bool(duplicate_unweighted if duplicate_unweighted is not None else unweighted)
|
|
48
220
|
|
|
49
|
-
|
|
50
|
-
|
|
221
|
+
return RushmoreRuntimeConfig(
|
|
222
|
+
modes=normalized_modes,
|
|
223
|
+
rates=rates,
|
|
224
|
+
delete_unweighted=delete_flag,
|
|
225
|
+
duplicate_unweighted=duplicate_flag,
|
|
51
226
|
)
|
|
52
|
-
if allowed_deletions <= 0:
|
|
53
|
-
return text
|
|
54
227
|
|
|
55
|
-
mean_weight = sum(weight for _, weight in candidate_data) / len(candidate_data)
|
|
56
228
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
229
|
+
def delete_random_words(
|
|
230
|
+
text: str,
|
|
231
|
+
rate: float | None = None,
|
|
232
|
+
seed: int | None = None,
|
|
233
|
+
rng: random.Random | None = None,
|
|
234
|
+
unweighted: bool = False,
|
|
235
|
+
) -> str:
|
|
236
|
+
"""Delete random words from the input text."""
|
|
237
|
+
effective_rate = RUSHMORE_DEFAULT_RATES["delete"] if rate is None else rate
|
|
61
238
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
else:
|
|
65
|
-
if mean_weight <= 0.0:
|
|
66
|
-
probability = effective_rate
|
|
67
|
-
else:
|
|
68
|
-
probability = min(1.0, effective_rate * (weight / mean_weight))
|
|
69
|
-
if rng.random() >= probability:
|
|
70
|
-
continue
|
|
239
|
+
clamped_rate = max(0.0, effective_rate)
|
|
240
|
+
unweighted_flag = bool(unweighted)
|
|
71
241
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
if match:
|
|
75
|
-
prefix, _, suffix = match.groups()
|
|
76
|
-
tokens[index] = f"{prefix.strip()}{suffix.strip()}"
|
|
77
|
-
else:
|
|
78
|
-
tokens[index] = ""
|
|
242
|
+
seed_value = resolve_seed(seed, rng)
|
|
243
|
+
return delete_random_words_rust(text, clamped_rate, unweighted_flag, seed_value)
|
|
79
244
|
|
|
80
|
-
deletions += 1
|
|
81
245
|
|
|
82
|
-
|
|
83
|
-
text
|
|
84
|
-
|
|
246
|
+
def reduplicate_words(
|
|
247
|
+
text: str,
|
|
248
|
+
rate: float | None = None,
|
|
249
|
+
seed: int | None = None,
|
|
250
|
+
rng: random.Random | None = None,
|
|
251
|
+
*,
|
|
252
|
+
unweighted: bool = False,
|
|
253
|
+
) -> str:
|
|
254
|
+
"""Randomly reduplicate words in the text."""
|
|
255
|
+
effective_rate = RUSHMORE_DEFAULT_RATES["duplicate"] if rate is None else rate
|
|
256
|
+
|
|
257
|
+
clamped_rate = max(0.0, effective_rate)
|
|
258
|
+
unweighted_flag = bool(unweighted)
|
|
85
259
|
|
|
86
|
-
|
|
260
|
+
seed_value = resolve_seed(seed, rng)
|
|
261
|
+
return reduplicate_words_rust(text, clamped_rate, unweighted_flag, seed_value)
|
|
87
262
|
|
|
88
263
|
|
|
89
|
-
def
|
|
264
|
+
def swap_adjacent_words(
|
|
90
265
|
text: str,
|
|
91
266
|
rate: float | None = None,
|
|
92
267
|
seed: int | None = None,
|
|
93
268
|
rng: random.Random | None = None,
|
|
94
|
-
*,
|
|
95
|
-
max_deletion_rate: float | None = None,
|
|
96
|
-
unweighted: bool = False,
|
|
97
269
|
) -> str:
|
|
98
|
-
"""
|
|
270
|
+
"""Swap adjacent word cores while preserving spacing and punctuation."""
|
|
271
|
+
effective_rate = RUSHMORE_DEFAULT_RATES["swap"] if rate is None else rate
|
|
272
|
+
clamped_rate = max(0.0, min(effective_rate, 1.0))
|
|
273
|
+
|
|
274
|
+
seed_value = resolve_seed(seed, rng)
|
|
275
|
+
return swap_adjacent_words_rust(text, clamped_rate, seed_value)
|
|
99
276
|
|
|
100
|
-
Uses the optional Rust implementation when available.
|
|
101
|
-
"""
|
|
102
277
|
|
|
103
|
-
|
|
278
|
+
def rushmore_attack(
|
|
279
|
+
text: str,
|
|
280
|
+
*,
|
|
281
|
+
modes: RushmoreMode | str | Iterable[RushmoreMode | str] | None = None,
|
|
282
|
+
rate: float | None = None,
|
|
283
|
+
delete_rate: float | None = None,
|
|
284
|
+
duplicate_rate: float | None = None,
|
|
285
|
+
swap_rate: float | None = None,
|
|
286
|
+
unweighted: bool = False,
|
|
287
|
+
delete_unweighted: bool | None = None,
|
|
288
|
+
duplicate_unweighted: bool | None = None,
|
|
289
|
+
seed: int | None = None,
|
|
290
|
+
rng: random.Random | None = None,
|
|
291
|
+
) -> str:
|
|
292
|
+
"""Apply the configured Rushmore attack modes to ``text``."""
|
|
293
|
+
config = _resolve_rushmore_config(
|
|
294
|
+
modes=modes,
|
|
104
295
|
rate=rate,
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
296
|
+
delete_rate=delete_rate,
|
|
297
|
+
duplicate_rate=duplicate_rate,
|
|
298
|
+
swap_rate=swap_rate,
|
|
299
|
+
unweighted=unweighted,
|
|
300
|
+
delete_unweighted=delete_unweighted,
|
|
301
|
+
duplicate_unweighted=duplicate_unweighted,
|
|
302
|
+
allow_defaults=True,
|
|
108
303
|
)
|
|
304
|
+
if config is None:
|
|
305
|
+
return text
|
|
109
306
|
|
|
110
|
-
|
|
111
|
-
|
|
307
|
+
mode_rng = rng
|
|
308
|
+
if mode_rng is None and seed is not None:
|
|
309
|
+
mode_rng = random.Random(resolve_seed(seed, None))
|
|
112
310
|
|
|
113
|
-
|
|
114
|
-
|
|
311
|
+
result = text
|
|
312
|
+
for mode in config.modes:
|
|
313
|
+
if not config.has_mode(mode):
|
|
314
|
+
continue
|
|
115
315
|
|
|
116
|
-
|
|
117
|
-
|
|
316
|
+
rate_value = config.rates[mode]
|
|
317
|
+
if rate_value <= 0.0:
|
|
318
|
+
continue
|
|
118
319
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
320
|
+
if mode is RushmoreMode.DELETE:
|
|
321
|
+
result = delete_random_words(
|
|
322
|
+
result,
|
|
323
|
+
rate=rate_value,
|
|
324
|
+
rng=mode_rng,
|
|
325
|
+
unweighted=config.delete_unweighted,
|
|
326
|
+
)
|
|
327
|
+
elif mode is RushmoreMode.DUPLICATE:
|
|
328
|
+
result = reduplicate_words(
|
|
329
|
+
result,
|
|
330
|
+
rate=rate_value,
|
|
331
|
+
rng=mode_rng,
|
|
332
|
+
unweighted=config.duplicate_unweighted,
|
|
333
|
+
)
|
|
334
|
+
else:
|
|
335
|
+
result = swap_adjacent_words(
|
|
336
|
+
result,
|
|
337
|
+
rate=rate_value,
|
|
338
|
+
rng=mode_rng,
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
return result
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _rushmore_pipeline_descriptor(glitchling: Glitchling) -> dict[str, Any] | None:
|
|
345
|
+
config = _resolve_rushmore_config(
|
|
346
|
+
modes=glitchling.kwargs.get("modes"),
|
|
347
|
+
rate=glitchling.kwargs.get("rate"),
|
|
348
|
+
delete_rate=glitchling.kwargs.get("delete_rate"),
|
|
349
|
+
duplicate_rate=glitchling.kwargs.get("duplicate_rate"),
|
|
350
|
+
swap_rate=glitchling.kwargs.get("swap_rate"),
|
|
351
|
+
unweighted=glitchling.kwargs.get("unweighted", False),
|
|
352
|
+
delete_unweighted=glitchling.kwargs.get("delete_unweighted"),
|
|
353
|
+
duplicate_unweighted=glitchling.kwargs.get("duplicate_unweighted"),
|
|
354
|
+
allow_defaults=True,
|
|
124
355
|
)
|
|
356
|
+
if config is None:
|
|
357
|
+
return None
|
|
358
|
+
return config.to_pipeline_descriptor()
|
|
125
359
|
|
|
126
360
|
|
|
127
361
|
class Rushmore(Glitchling):
|
|
128
|
-
"""Glitchling that
|
|
362
|
+
"""Glitchling that bundles deletion, duplication, and swap attacks."""
|
|
363
|
+
|
|
364
|
+
flavor = (
|
|
365
|
+
"You shouldn't have waited for the last minute to write that paper, anon. "
|
|
366
|
+
"Sure hope everything is in the right place."
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
_param_aliases = {"mode": "modes"}
|
|
129
370
|
|
|
130
371
|
def __init__(
|
|
131
372
|
self,
|
|
132
373
|
*,
|
|
374
|
+
name: str = "Rushmore",
|
|
375
|
+
modes: RushmoreMode | str | Iterable[RushmoreMode | str] | None = None,
|
|
133
376
|
rate: float | None = None,
|
|
134
|
-
|
|
377
|
+
delete_rate: float | None = None,
|
|
378
|
+
duplicate_rate: float | None = None,
|
|
379
|
+
swap_rate: float | None = None,
|
|
135
380
|
seed: int | None = None,
|
|
136
381
|
unweighted: bool = False,
|
|
382
|
+
delete_unweighted: bool | None = None,
|
|
383
|
+
duplicate_unweighted: bool | None = None,
|
|
137
384
|
) -> None:
|
|
138
|
-
|
|
139
|
-
effective_rate = resolve_rate(
|
|
140
|
-
rate=rate,
|
|
141
|
-
legacy_value=max_deletion_rate,
|
|
142
|
-
default=0.01,
|
|
143
|
-
legacy_name="max_deletion_rate",
|
|
144
|
-
)
|
|
385
|
+
normalized_modes = _normalize_modes(modes)
|
|
145
386
|
super().__init__(
|
|
146
|
-
name=
|
|
147
|
-
corruption_function=
|
|
387
|
+
name=name,
|
|
388
|
+
corruption_function=rushmore_attack,
|
|
148
389
|
scope=AttackWave.WORD,
|
|
149
390
|
seed=seed,
|
|
150
|
-
|
|
391
|
+
pipeline_operation=_rushmore_pipeline_descriptor,
|
|
392
|
+
modes=normalized_modes,
|
|
393
|
+
rate=rate,
|
|
394
|
+
delete_rate=delete_rate,
|
|
395
|
+
duplicate_rate=duplicate_rate,
|
|
396
|
+
swap_rate=swap_rate,
|
|
151
397
|
unweighted=unweighted,
|
|
398
|
+
delete_unweighted=delete_unweighted,
|
|
399
|
+
duplicate_unweighted=duplicate_unweighted,
|
|
152
400
|
)
|
|
153
401
|
|
|
154
|
-
def pipeline_operation(self) -> dict[str, Any] | None:
|
|
155
|
-
rate = self.kwargs.get("rate")
|
|
156
|
-
if rate is None:
|
|
157
|
-
rate = self.kwargs.get("max_deletion_rate")
|
|
158
|
-
if rate is None:
|
|
159
|
-
return None
|
|
160
|
-
unweighted = bool(self.kwargs.get("unweighted", False))
|
|
161
|
-
return {
|
|
162
|
-
"type": "delete",
|
|
163
|
-
"max_deletion_rate": float(rate),
|
|
164
|
-
"unweighted": unweighted,
|
|
165
|
-
}
|
|
166
|
-
|
|
167
402
|
|
|
168
403
|
rushmore = Rushmore()
|
|
169
404
|
|
|
170
405
|
|
|
171
|
-
__all__ = [
|
|
406
|
+
__all__ = [
|
|
407
|
+
"Rushmore",
|
|
408
|
+
"rushmore",
|
|
409
|
+
"RushmoreMode",
|
|
410
|
+
"rushmore_attack",
|
|
411
|
+
"delete_random_words",
|
|
412
|
+
"reduplicate_words",
|
|
413
|
+
"swap_adjacent_words",
|
|
414
|
+
]
|