glitchlings 0.10.2__cp312-cp312-macosx_11_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +99 -0
- glitchlings/__main__.py +8 -0
- glitchlings/_zoo_rust/__init__.py +12 -0
- glitchlings/_zoo_rust.cpython-312-darwin.so +0 -0
- glitchlings/assets/__init__.py +180 -0
- glitchlings/assets/apostrofae_pairs.json +32 -0
- glitchlings/assets/ekkokin_homophones.json +2014 -0
- glitchlings/assets/hokey_assets.json +193 -0
- glitchlings/assets/lexemes/academic.json +1049 -0
- glitchlings/assets/lexemes/colors.json +1333 -0
- glitchlings/assets/lexemes/corporate.json +716 -0
- glitchlings/assets/lexemes/cyberpunk.json +22 -0
- glitchlings/assets/lexemes/lovecraftian.json +23 -0
- glitchlings/assets/lexemes/synonyms.json +3354 -0
- glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
- glitchlings/assets/ocr_confusions.tsv +30 -0
- glitchlings/assets/pipeline_assets.json +29 -0
- glitchlings/attack/__init__.py +147 -0
- glitchlings/attack/analysis.py +1321 -0
- glitchlings/attack/core.py +493 -0
- glitchlings/attack/core_execution.py +367 -0
- glitchlings/attack/core_planning.py +612 -0
- glitchlings/attack/encode.py +114 -0
- glitchlings/attack/metrics.py +218 -0
- glitchlings/attack/metrics_dispatch.py +70 -0
- glitchlings/attack/tokenization.py +227 -0
- glitchlings/auggie.py +284 -0
- glitchlings/compat/__init__.py +9 -0
- glitchlings/compat/loaders.py +355 -0
- glitchlings/compat/types.py +41 -0
- glitchlings/conf/__init__.py +41 -0
- glitchlings/conf/loaders.py +331 -0
- glitchlings/conf/schema.py +156 -0
- glitchlings/conf/types.py +72 -0
- glitchlings/config.toml +2 -0
- glitchlings/constants.py +59 -0
- glitchlings/dev/__init__.py +3 -0
- glitchlings/dev/docs.py +45 -0
- glitchlings/dlc/__init__.py +19 -0
- glitchlings/dlc/_shared.py +296 -0
- glitchlings/dlc/gutenberg.py +400 -0
- glitchlings/dlc/huggingface.py +68 -0
- glitchlings/dlc/prime.py +215 -0
- glitchlings/dlc/pytorch.py +98 -0
- glitchlings/dlc/pytorch_lightning.py +173 -0
- glitchlings/internal/__init__.py +16 -0
- glitchlings/internal/rust.py +159 -0
- glitchlings/internal/rust_ffi.py +490 -0
- glitchlings/main.py +426 -0
- glitchlings/protocols.py +91 -0
- glitchlings/runtime_config.py +24 -0
- glitchlings/util/__init__.py +27 -0
- glitchlings/util/adapters.py +65 -0
- glitchlings/util/keyboards.py +356 -0
- glitchlings/util/transcripts.py +108 -0
- glitchlings/zoo/__init__.py +161 -0
- glitchlings/zoo/assets/__init__.py +29 -0
- glitchlings/zoo/core.py +678 -0
- glitchlings/zoo/core_execution.py +154 -0
- glitchlings/zoo/core_planning.py +451 -0
- glitchlings/zoo/corrupt_dispatch.py +295 -0
- glitchlings/zoo/hokey.py +139 -0
- glitchlings/zoo/jargoyle.py +243 -0
- glitchlings/zoo/mim1c.py +148 -0
- glitchlings/zoo/pedant/__init__.py +109 -0
- glitchlings/zoo/pedant/core.py +105 -0
- glitchlings/zoo/pedant/forms.py +74 -0
- glitchlings/zoo/pedant/stones.py +74 -0
- glitchlings/zoo/redactyl.py +97 -0
- glitchlings/zoo/rng.py +259 -0
- glitchlings/zoo/rushmore.py +416 -0
- glitchlings/zoo/scannequin.py +66 -0
- glitchlings/zoo/transforms.py +346 -0
- glitchlings/zoo/typogre.py +128 -0
- glitchlings/zoo/validation.py +477 -0
- glitchlings/zoo/wherewolf.py +120 -0
- glitchlings/zoo/zeedub.py +93 -0
- glitchlings-0.10.2.dist-info/METADATA +337 -0
- glitchlings-0.10.2.dist-info/RECORD +83 -0
- glitchlings-0.10.2.dist-info/WHEEL +5 -0
- glitchlings-0.10.2.dist-info/entry_points.txt +3 -0
- glitchlings-0.10.2.dist-info/licenses/LICENSE +201 -0
- glitchlings-0.10.2.dist-info/top_level.txt +1 -0
glitchlings/zoo/rng.py
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""RNG boundary layer for seed resolution.
|
|
2
|
+
|
|
3
|
+
This module provides the interface between RNG state and concrete random values.
|
|
4
|
+
All randomness in the glitchlings library flows through these functions.
|
|
5
|
+
|
|
6
|
+
Design Philosophy
|
|
7
|
+
-----------------
|
|
8
|
+
RNG management is an *impure* operation - it involves stateful objects
|
|
9
|
+
(random.Random) and non-deterministic behavior when no seed is provided.
|
|
10
|
+
This module provides the boundary layer that converts RNG state into
|
|
11
|
+
concrete values that can be passed to pure functions.
|
|
12
|
+
|
|
13
|
+
The pattern is:
|
|
14
|
+
1. User provides `seed: int | None` and/or `rng: random.Random | None`
|
|
15
|
+
2. Boundary layer resolves to a concrete `int` via `resolve_seed()`
|
|
16
|
+
3. Pure/Rust functions receive the concrete seed value
|
|
17
|
+
|
|
18
|
+
This separation means:
|
|
19
|
+
- Pure transformation code never touches RNG objects
|
|
20
|
+
- Tests can provide explicit seed values for reproducibility
|
|
21
|
+
- RNG state management is isolated to the boundary
|
|
22
|
+
|
|
23
|
+
See AGENTS.md "Functional Purity Architecture" for full details.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import random
|
|
29
|
+
from hashlib import blake2s
|
|
30
|
+
from typing import Protocol, runtime_checkable
|
|
31
|
+
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
# Constants
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
# Bit width for seed values (64-bit for compatibility with Rust u64)
|
|
37
|
+
SEED_BIT_WIDTH = 64
|
|
38
|
+
SEED_MASK = (1 << SEED_BIT_WIDTH) - 1 # 0xFFFFFFFFFFFFFFFF
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
# Protocols
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@runtime_checkable
|
|
47
|
+
class RandomBitsSource(Protocol):
|
|
48
|
+
"""Protocol for objects that can provide random bits."""
|
|
49
|
+
|
|
50
|
+
def getrandbits(self, k: int) -> int:
|
|
51
|
+
"""Return a non-negative integer with k random bits."""
|
|
52
|
+
...
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
# Core Boundary Functions
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def resolve_seed(
|
|
61
|
+
seed: int | None,
|
|
62
|
+
rng: random.Random | None,
|
|
63
|
+
) -> int:
|
|
64
|
+
"""Resolve a seed from optional explicit seed or RNG state.
|
|
65
|
+
|
|
66
|
+
This is the primary boundary function for RNG resolution. Call this
|
|
67
|
+
once at the boundary layer, then pass the resulting int to all
|
|
68
|
+
downstream pure/Rust functions.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
seed: Explicit seed value. If provided, takes precedence over rng.
|
|
72
|
+
rng: Random generator to sample from if seed is None.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
A 64-bit unsigned integer suitable for Rust FFI.
|
|
76
|
+
|
|
77
|
+
Note:
|
|
78
|
+
If both seed and rng are None, uses module-level random state.
|
|
79
|
+
This is non-deterministic and should only happen at top-level CLI usage.
|
|
80
|
+
|
|
81
|
+
Examples:
|
|
82
|
+
>>> resolve_seed(42, None) # explicit seed
|
|
83
|
+
42
|
|
84
|
+
>>> rng = random.Random(123)
|
|
85
|
+
>>> resolve_seed(None, rng) # sample from RNG
|
|
86
|
+
14522756016584210807
|
|
87
|
+
"""
|
|
88
|
+
if seed is not None:
|
|
89
|
+
return int(seed) & SEED_MASK
|
|
90
|
+
if rng is not None:
|
|
91
|
+
return rng.getrandbits(SEED_BIT_WIDTH)
|
|
92
|
+
return random.getrandbits(SEED_BIT_WIDTH)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def resolve_seed_deterministic(
|
|
96
|
+
seed: int | None,
|
|
97
|
+
rng: random.Random | None,
|
|
98
|
+
) -> int:
|
|
99
|
+
"""Resolve a seed, requiring explicit seed or RNG.
|
|
100
|
+
|
|
101
|
+
Like resolve_seed(), but raises ValueError if both seed and rng are None.
|
|
102
|
+
Use this when non-deterministic behavior would be a bug.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
seed: Explicit seed value.
|
|
106
|
+
rng: Random generator to sample from.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
A 64-bit unsigned integer.
|
|
110
|
+
|
|
111
|
+
Raises:
|
|
112
|
+
ValueError: If both seed and rng are None.
|
|
113
|
+
"""
|
|
114
|
+
if seed is not None:
|
|
115
|
+
return int(seed) & SEED_MASK
|
|
116
|
+
if rng is not None:
|
|
117
|
+
return rng.getrandbits(SEED_BIT_WIDTH)
|
|
118
|
+
raise ValueError("Either seed or rng must be provided for deterministic behavior")
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
# ---------------------------------------------------------------------------
|
|
122
|
+
# Seed Derivation (Deterministic)
|
|
123
|
+
# ---------------------------------------------------------------------------
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def derive_seed(base_seed: int, *components: int | str) -> int:
|
|
127
|
+
"""Derive a new seed from a base seed and components.
|
|
128
|
+
|
|
129
|
+
This is a pure function for hierarchical seed derivation.
|
|
130
|
+
Used by Gaggle to give each glitchling a unique but reproducible seed.
|
|
131
|
+
|
|
132
|
+
Uses blake2s for stable hashing across interpreter runs (unlike Python's
|
|
133
|
+
built-in hash() which is salted per-process). This ensures identical
|
|
134
|
+
inputs always produce identical seeds regardless of PYTHONHASHSEED.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
base_seed: The parent seed.
|
|
138
|
+
*components: Additional components to mix in (integers or strings).
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
A derived 64-bit seed.
|
|
142
|
+
|
|
143
|
+
Examples:
|
|
144
|
+
>>> derive_seed(12345, 0) # first child
|
|
145
|
+
13704458811836263874
|
|
146
|
+
>>> derive_seed(12345, 1) # second child
|
|
147
|
+
7874335407589182396
|
|
148
|
+
>>> derive_seed(12345, "typogre") # named child
|
|
149
|
+
561509252352425601
|
|
150
|
+
"""
|
|
151
|
+
# Use blake2s for stable, deterministic hashing across runs
|
|
152
|
+
hasher = blake2s(digest_size=8)
|
|
153
|
+
|
|
154
|
+
# Helper to convert int to bytes (handles arbitrary size)
|
|
155
|
+
def _int_to_bytes(value: int) -> bytes:
|
|
156
|
+
if value == 0:
|
|
157
|
+
return b"\x00"
|
|
158
|
+
abs_value = abs(value)
|
|
159
|
+
length = (abs_value.bit_length() + 7) // 8
|
|
160
|
+
if value < 0:
|
|
161
|
+
while True:
|
|
162
|
+
try:
|
|
163
|
+
return value.to_bytes(length, "big", signed=True)
|
|
164
|
+
except OverflowError:
|
|
165
|
+
length += 1
|
|
166
|
+
return abs_value.to_bytes(length, "big", signed=False)
|
|
167
|
+
|
|
168
|
+
hasher.update(_int_to_bytes(base_seed))
|
|
169
|
+
for component in components:
|
|
170
|
+
hasher.update(b"\x00") # separator
|
|
171
|
+
if isinstance(component, str):
|
|
172
|
+
hasher.update(component.encode("utf-8"))
|
|
173
|
+
else:
|
|
174
|
+
hasher.update(_int_to_bytes(component))
|
|
175
|
+
|
|
176
|
+
return int.from_bytes(hasher.digest(), "big")
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# ---------------------------------------------------------------------------
|
|
180
|
+
# Random Value Generation (Impure)
|
|
181
|
+
# ---------------------------------------------------------------------------
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def create_rng(seed: int) -> random.Random:
|
|
185
|
+
"""Create a new Random instance from a seed.
|
|
186
|
+
|
|
187
|
+
Use this when you need to create child RNG states for parallel operations.
|
|
188
|
+
Prefer passing concrete seed values to functions when possible.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
seed: The seed for the new RNG.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
A new random.Random instance.
|
|
195
|
+
"""
|
|
196
|
+
return random.Random(seed)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def sample_random_float(rng: random.Random) -> float:
|
|
200
|
+
"""Sample a random float in [0.0, 1.0) from an RNG.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
rng: The random generator.
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
Float in range [0.0, 1.0).
|
|
207
|
+
"""
|
|
208
|
+
return rng.random()
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def sample_random_int(rng: random.Random, *, low: int, high: int) -> int:
|
|
212
|
+
"""Sample a random integer in [low, high] inclusive.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
rng: The random generator.
|
|
216
|
+
low: Minimum value (inclusive).
|
|
217
|
+
high: Maximum value (inclusive).
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
Random integer in range [low, high].
|
|
221
|
+
"""
|
|
222
|
+
return rng.randint(low, high)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def sample_random_index(rng: random.Random, length: int) -> int:
|
|
226
|
+
"""Sample a random index for a sequence of given length.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
rng: The random generator.
|
|
230
|
+
length: The sequence length.
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
Random index in range [0, length).
|
|
234
|
+
|
|
235
|
+
Raises:
|
|
236
|
+
ValueError: If length <= 0.
|
|
237
|
+
"""
|
|
238
|
+
if length <= 0:
|
|
239
|
+
raise ValueError("Cannot sample index from empty sequence")
|
|
240
|
+
return rng.randrange(length)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
__all__ = [
|
|
244
|
+
# Constants
|
|
245
|
+
"SEED_BIT_WIDTH",
|
|
246
|
+
"SEED_MASK",
|
|
247
|
+
# Protocols
|
|
248
|
+
"RandomBitsSource",
|
|
249
|
+
# Boundary functions
|
|
250
|
+
"resolve_seed",
|
|
251
|
+
"resolve_seed_deterministic",
|
|
252
|
+
# Derivation
|
|
253
|
+
"derive_seed",
|
|
254
|
+
# RNG operations (impure)
|
|
255
|
+
"create_rng",
|
|
256
|
+
"sample_random_float",
|
|
257
|
+
"sample_random_int",
|
|
258
|
+
"sample_random_index",
|
|
259
|
+
]
|
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import random
|
|
4
|
+
import re
|
|
5
|
+
from collections.abc import Iterable, Sequence
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from enum import Enum, unique
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from glitchlings.constants import RUSHMORE_DEFAULT_RATES
|
|
11
|
+
from glitchlings.internal.rust_ffi import (
|
|
12
|
+
delete_random_words_rust,
|
|
13
|
+
reduplicate_words_rust,
|
|
14
|
+
resolve_seed,
|
|
15
|
+
swap_adjacent_words_rust,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
from .core import AttackWave, Glitchling
|
|
19
|
+
from .transforms import WordToken
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@unique
|
|
23
|
+
class RushmoreMode(Enum):
|
|
24
|
+
"""Enumerates Rushmore's selectable attack behaviours."""
|
|
25
|
+
|
|
26
|
+
DELETE = "delete"
|
|
27
|
+
DUPLICATE = "duplicate"
|
|
28
|
+
SWAP = "swap"
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def execution_order(cls) -> tuple["RushmoreMode", ...]:
|
|
32
|
+
"""Return the deterministic application order for Rushmore modes."""
|
|
33
|
+
return (cls.DELETE, cls.DUPLICATE, cls.SWAP)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
_MODE_ALIASES: dict[str, RushmoreMode] = {
|
|
37
|
+
"delete": RushmoreMode.DELETE,
|
|
38
|
+
"drop": RushmoreMode.DELETE,
|
|
39
|
+
"rushmore": RushmoreMode.DELETE,
|
|
40
|
+
"duplicate": RushmoreMode.DUPLICATE,
|
|
41
|
+
"reduplicate": RushmoreMode.DUPLICATE,
|
|
42
|
+
"repeat": RushmoreMode.DUPLICATE,
|
|
43
|
+
"swap": RushmoreMode.SWAP,
|
|
44
|
+
"adjacent": RushmoreMode.SWAP,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass(frozen=True)
|
|
49
|
+
class RushmoreRuntimeConfig:
|
|
50
|
+
"""Resolved Rushmore configuration used by both Python and Rust paths."""
|
|
51
|
+
|
|
52
|
+
modes: tuple[RushmoreMode, ...]
|
|
53
|
+
rates: dict[RushmoreMode, float]
|
|
54
|
+
delete_unweighted: bool
|
|
55
|
+
duplicate_unweighted: bool
|
|
56
|
+
|
|
57
|
+
def has_mode(self, mode: RushmoreMode) -> bool:
|
|
58
|
+
return mode in self.rates
|
|
59
|
+
|
|
60
|
+
def to_pipeline_descriptor(self) -> dict[str, Any]:
|
|
61
|
+
if not self.modes:
|
|
62
|
+
raise RuntimeError("Rushmore configuration is missing attack modes")
|
|
63
|
+
|
|
64
|
+
if len(self.modes) == 1:
|
|
65
|
+
mode = self.modes[0]
|
|
66
|
+
rate = self.rates.get(mode)
|
|
67
|
+
if rate is None:
|
|
68
|
+
message = f"Rushmore mode {mode!r} is missing a configured rate"
|
|
69
|
+
raise RuntimeError(message)
|
|
70
|
+
if mode is RushmoreMode.DELETE:
|
|
71
|
+
return {
|
|
72
|
+
"type": "delete",
|
|
73
|
+
"rate": rate,
|
|
74
|
+
"unweighted": self.delete_unweighted,
|
|
75
|
+
}
|
|
76
|
+
if mode is RushmoreMode.DUPLICATE:
|
|
77
|
+
return {
|
|
78
|
+
"type": "reduplicate",
|
|
79
|
+
"rate": rate,
|
|
80
|
+
"unweighted": self.duplicate_unweighted,
|
|
81
|
+
}
|
|
82
|
+
if mode is RushmoreMode.SWAP:
|
|
83
|
+
return {
|
|
84
|
+
"type": "swap_adjacent",
|
|
85
|
+
"rate": rate,
|
|
86
|
+
}
|
|
87
|
+
message = f"Rushmore mode {mode!r} is not serialisable"
|
|
88
|
+
raise RuntimeError(message)
|
|
89
|
+
|
|
90
|
+
descriptor: dict[str, Any] = {
|
|
91
|
+
"type": "rushmore_combo",
|
|
92
|
+
"modes": [mode.value for mode in self.modes],
|
|
93
|
+
}
|
|
94
|
+
if self.has_mode(RushmoreMode.DELETE):
|
|
95
|
+
descriptor["delete"] = {
|
|
96
|
+
"rate": self.rates[RushmoreMode.DELETE],
|
|
97
|
+
"unweighted": self.delete_unweighted,
|
|
98
|
+
}
|
|
99
|
+
if self.has_mode(RushmoreMode.DUPLICATE):
|
|
100
|
+
descriptor["duplicate"] = {
|
|
101
|
+
"rate": self.rates[RushmoreMode.DUPLICATE],
|
|
102
|
+
"unweighted": self.duplicate_unweighted,
|
|
103
|
+
}
|
|
104
|
+
if self.has_mode(RushmoreMode.SWAP):
|
|
105
|
+
descriptor["swap"] = {"rate": self.rates[RushmoreMode.SWAP]}
|
|
106
|
+
return descriptor
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@dataclass(frozen=True)
|
|
110
|
+
class _WeightedWordToken:
|
|
111
|
+
"""Internal helper that bundles weighting metadata with a token."""
|
|
112
|
+
|
|
113
|
+
token: WordToken
|
|
114
|
+
weight: float
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _normalize_mode_item(value: RushmoreMode | str) -> list[RushmoreMode]:
|
|
118
|
+
if isinstance(value, RushmoreMode):
|
|
119
|
+
return [value]
|
|
120
|
+
|
|
121
|
+
text = str(value).strip().lower()
|
|
122
|
+
if not text:
|
|
123
|
+
return []
|
|
124
|
+
|
|
125
|
+
if text in {"all", "any", "full"}:
|
|
126
|
+
return list(RushmoreMode.execution_order())
|
|
127
|
+
|
|
128
|
+
tokens = [token for token in re.split(r"[+,\s]+", text) if token]
|
|
129
|
+
if not tokens:
|
|
130
|
+
return []
|
|
131
|
+
|
|
132
|
+
modes: list[RushmoreMode] = []
|
|
133
|
+
for token in tokens:
|
|
134
|
+
mode = _MODE_ALIASES.get(token)
|
|
135
|
+
if mode is None:
|
|
136
|
+
raise ValueError(f"Unsupported Rushmore mode '{value}'")
|
|
137
|
+
modes.append(mode)
|
|
138
|
+
return modes
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _normalize_modes(
|
|
142
|
+
modes: RushmoreMode | str | Iterable[RushmoreMode | str] | None,
|
|
143
|
+
) -> tuple[RushmoreMode, ...]:
|
|
144
|
+
if modes is None:
|
|
145
|
+
candidates: Sequence[RushmoreMode | str] = (RushmoreMode.DELETE,)
|
|
146
|
+
elif isinstance(modes, (RushmoreMode, str)):
|
|
147
|
+
candidates = (modes,)
|
|
148
|
+
else:
|
|
149
|
+
collected = tuple(modes)
|
|
150
|
+
candidates = collected if collected else (RushmoreMode.DELETE,)
|
|
151
|
+
|
|
152
|
+
resolved: list[RushmoreMode] = []
|
|
153
|
+
seen: set[RushmoreMode] = set()
|
|
154
|
+
for candidate in candidates:
|
|
155
|
+
for mode in _normalize_mode_item(candidate):
|
|
156
|
+
if mode not in seen:
|
|
157
|
+
seen.add(mode)
|
|
158
|
+
resolved.append(mode)
|
|
159
|
+
|
|
160
|
+
if not resolved:
|
|
161
|
+
return (RushmoreMode.DELETE,)
|
|
162
|
+
return tuple(resolved)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _resolve_mode_rate(
|
|
166
|
+
*,
|
|
167
|
+
mode: RushmoreMode,
|
|
168
|
+
global_rate: float | None,
|
|
169
|
+
specific_rate: float | None,
|
|
170
|
+
allow_default: bool,
|
|
171
|
+
) -> float | None:
|
|
172
|
+
baseline = specific_rate if specific_rate is not None else global_rate
|
|
173
|
+
if baseline is None:
|
|
174
|
+
if not allow_default:
|
|
175
|
+
return None
|
|
176
|
+
baseline = RUSHMORE_DEFAULT_RATES[mode.value]
|
|
177
|
+
|
|
178
|
+
value = float(baseline)
|
|
179
|
+
value = max(0.0, value)
|
|
180
|
+
if mode is RushmoreMode.SWAP:
|
|
181
|
+
value = min(1.0, value)
|
|
182
|
+
return value
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _resolve_rushmore_config(
|
|
186
|
+
*,
|
|
187
|
+
modes: RushmoreMode | str | Iterable[RushmoreMode | str] | None,
|
|
188
|
+
rate: float | None,
|
|
189
|
+
delete_rate: float | None,
|
|
190
|
+
duplicate_rate: float | None,
|
|
191
|
+
swap_rate: float | None,
|
|
192
|
+
unweighted: bool,
|
|
193
|
+
delete_unweighted: bool | None,
|
|
194
|
+
duplicate_unweighted: bool | None,
|
|
195
|
+
allow_defaults: bool,
|
|
196
|
+
) -> RushmoreRuntimeConfig | None:
|
|
197
|
+
normalized_modes = _normalize_modes(modes)
|
|
198
|
+
global_rate = float(rate) if rate is not None else None
|
|
199
|
+
|
|
200
|
+
mode_specific_rates: dict[RushmoreMode, float | None] = {
|
|
201
|
+
RushmoreMode.DELETE: delete_rate,
|
|
202
|
+
RushmoreMode.DUPLICATE: duplicate_rate,
|
|
203
|
+
RushmoreMode.SWAP: swap_rate,
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
rates: dict[RushmoreMode, float] = {}
|
|
207
|
+
for mode in normalized_modes:
|
|
208
|
+
resolved = _resolve_mode_rate(
|
|
209
|
+
mode=mode,
|
|
210
|
+
global_rate=global_rate,
|
|
211
|
+
specific_rate=mode_specific_rates[mode],
|
|
212
|
+
allow_default=allow_defaults,
|
|
213
|
+
)
|
|
214
|
+
if resolved is None:
|
|
215
|
+
return None
|
|
216
|
+
rates[mode] = resolved
|
|
217
|
+
|
|
218
|
+
delete_flag = bool(delete_unweighted if delete_unweighted is not None else unweighted)
|
|
219
|
+
duplicate_flag = bool(duplicate_unweighted if duplicate_unweighted is not None else unweighted)
|
|
220
|
+
|
|
221
|
+
return RushmoreRuntimeConfig(
|
|
222
|
+
modes=normalized_modes,
|
|
223
|
+
rates=rates,
|
|
224
|
+
delete_unweighted=delete_flag,
|
|
225
|
+
duplicate_unweighted=duplicate_flag,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def delete_random_words(
|
|
230
|
+
text: str,
|
|
231
|
+
rate: float | None = None,
|
|
232
|
+
seed: int | None = None,
|
|
233
|
+
rng: random.Random | None = None,
|
|
234
|
+
unweighted: bool = False,
|
|
235
|
+
) -> str:
|
|
236
|
+
"""Delete random words from the input text."""
|
|
237
|
+
effective_rate = RUSHMORE_DEFAULT_RATES["delete"] if rate is None else rate
|
|
238
|
+
|
|
239
|
+
clamped_rate = max(0.0, effective_rate)
|
|
240
|
+
unweighted_flag = bool(unweighted)
|
|
241
|
+
|
|
242
|
+
seed_value = resolve_seed(seed, rng)
|
|
243
|
+
return delete_random_words_rust(text, clamped_rate, unweighted_flag, seed_value)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def reduplicate_words(
|
|
247
|
+
text: str,
|
|
248
|
+
rate: float | None = None,
|
|
249
|
+
seed: int | None = None,
|
|
250
|
+
rng: random.Random | None = None,
|
|
251
|
+
*,
|
|
252
|
+
unweighted: bool = False,
|
|
253
|
+
) -> str:
|
|
254
|
+
"""Randomly reduplicate words in the text."""
|
|
255
|
+
effective_rate = RUSHMORE_DEFAULT_RATES["duplicate"] if rate is None else rate
|
|
256
|
+
|
|
257
|
+
clamped_rate = max(0.0, effective_rate)
|
|
258
|
+
unweighted_flag = bool(unweighted)
|
|
259
|
+
|
|
260
|
+
seed_value = resolve_seed(seed, rng)
|
|
261
|
+
return reduplicate_words_rust(text, clamped_rate, unweighted_flag, seed_value)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def swap_adjacent_words(
|
|
265
|
+
text: str,
|
|
266
|
+
rate: float | None = None,
|
|
267
|
+
seed: int | None = None,
|
|
268
|
+
rng: random.Random | None = None,
|
|
269
|
+
) -> str:
|
|
270
|
+
"""Swap adjacent word cores while preserving spacing and punctuation."""
|
|
271
|
+
effective_rate = RUSHMORE_DEFAULT_RATES["swap"] if rate is None else rate
|
|
272
|
+
clamped_rate = max(0.0, min(effective_rate, 1.0))
|
|
273
|
+
|
|
274
|
+
seed_value = resolve_seed(seed, rng)
|
|
275
|
+
return swap_adjacent_words_rust(text, clamped_rate, seed_value)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def rushmore_attack(
|
|
279
|
+
text: str,
|
|
280
|
+
*,
|
|
281
|
+
modes: RushmoreMode | str | Iterable[RushmoreMode | str] | None = None,
|
|
282
|
+
rate: float | None = None,
|
|
283
|
+
delete_rate: float | None = None,
|
|
284
|
+
duplicate_rate: float | None = None,
|
|
285
|
+
swap_rate: float | None = None,
|
|
286
|
+
unweighted: bool = False,
|
|
287
|
+
delete_unweighted: bool | None = None,
|
|
288
|
+
duplicate_unweighted: bool | None = None,
|
|
289
|
+
seed: int | None = None,
|
|
290
|
+
rng: random.Random | None = None,
|
|
291
|
+
) -> str:
|
|
292
|
+
"""Apply the configured Rushmore attack modes to ``text``."""
|
|
293
|
+
config = _resolve_rushmore_config(
|
|
294
|
+
modes=modes,
|
|
295
|
+
rate=rate,
|
|
296
|
+
delete_rate=delete_rate,
|
|
297
|
+
duplicate_rate=duplicate_rate,
|
|
298
|
+
swap_rate=swap_rate,
|
|
299
|
+
unweighted=unweighted,
|
|
300
|
+
delete_unweighted=delete_unweighted,
|
|
301
|
+
duplicate_unweighted=duplicate_unweighted,
|
|
302
|
+
allow_defaults=True,
|
|
303
|
+
)
|
|
304
|
+
if config is None:
|
|
305
|
+
return text
|
|
306
|
+
|
|
307
|
+
mode_rng = rng
|
|
308
|
+
if mode_rng is None and seed is not None:
|
|
309
|
+
mode_rng = random.Random(resolve_seed(seed, None))
|
|
310
|
+
|
|
311
|
+
result = text
|
|
312
|
+
for mode in config.modes:
|
|
313
|
+
if not config.has_mode(mode):
|
|
314
|
+
continue
|
|
315
|
+
|
|
316
|
+
rate_value = config.rates[mode]
|
|
317
|
+
if rate_value <= 0.0:
|
|
318
|
+
continue
|
|
319
|
+
|
|
320
|
+
if mode is RushmoreMode.DELETE:
|
|
321
|
+
result = delete_random_words(
|
|
322
|
+
result,
|
|
323
|
+
rate=rate_value,
|
|
324
|
+
rng=mode_rng,
|
|
325
|
+
unweighted=config.delete_unweighted,
|
|
326
|
+
)
|
|
327
|
+
elif mode is RushmoreMode.DUPLICATE:
|
|
328
|
+
result = reduplicate_words(
|
|
329
|
+
result,
|
|
330
|
+
rate=rate_value,
|
|
331
|
+
rng=mode_rng,
|
|
332
|
+
unweighted=config.duplicate_unweighted,
|
|
333
|
+
)
|
|
334
|
+
else:
|
|
335
|
+
result = swap_adjacent_words(
|
|
336
|
+
result,
|
|
337
|
+
rate=rate_value,
|
|
338
|
+
rng=mode_rng,
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
return result
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _rushmore_pipeline_descriptor(glitchling: Glitchling) -> dict[str, Any] | None:
|
|
345
|
+
config = _resolve_rushmore_config(
|
|
346
|
+
modes=glitchling.kwargs.get("modes"),
|
|
347
|
+
rate=glitchling.kwargs.get("rate"),
|
|
348
|
+
delete_rate=glitchling.kwargs.get("delete_rate"),
|
|
349
|
+
duplicate_rate=glitchling.kwargs.get("duplicate_rate"),
|
|
350
|
+
swap_rate=glitchling.kwargs.get("swap_rate"),
|
|
351
|
+
unweighted=glitchling.kwargs.get("unweighted", False),
|
|
352
|
+
delete_unweighted=glitchling.kwargs.get("delete_unweighted"),
|
|
353
|
+
duplicate_unweighted=glitchling.kwargs.get("duplicate_unweighted"),
|
|
354
|
+
allow_defaults=True,
|
|
355
|
+
)
|
|
356
|
+
if config is None:
|
|
357
|
+
return None
|
|
358
|
+
return config.to_pipeline_descriptor()
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
class Rushmore(Glitchling):
|
|
362
|
+
"""Glitchling that bundles deletion, duplication, and swap attacks."""
|
|
363
|
+
|
|
364
|
+
flavor = (
|
|
365
|
+
"You shouldn't have waited for the last minute to write that paper, anon. "
|
|
366
|
+
"Sure hope everything is in the right place."
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
_param_aliases = {"mode": "modes"}
|
|
370
|
+
|
|
371
|
+
def __init__(
|
|
372
|
+
self,
|
|
373
|
+
*,
|
|
374
|
+
name: str = "Rushmore",
|
|
375
|
+
modes: RushmoreMode | str | Iterable[RushmoreMode | str] | None = None,
|
|
376
|
+
rate: float | None = None,
|
|
377
|
+
delete_rate: float | None = None,
|
|
378
|
+
duplicate_rate: float | None = None,
|
|
379
|
+
swap_rate: float | None = None,
|
|
380
|
+
seed: int | None = None,
|
|
381
|
+
unweighted: bool = False,
|
|
382
|
+
delete_unweighted: bool | None = None,
|
|
383
|
+
duplicate_unweighted: bool | None = None,
|
|
384
|
+
**kwargs: Any,
|
|
385
|
+
) -> None:
|
|
386
|
+
normalized_modes = _normalize_modes(modes)
|
|
387
|
+
super().__init__(
|
|
388
|
+
name=name,
|
|
389
|
+
corruption_function=rushmore_attack,
|
|
390
|
+
scope=AttackWave.WORD,
|
|
391
|
+
seed=seed,
|
|
392
|
+
pipeline_operation=_rushmore_pipeline_descriptor,
|
|
393
|
+
modes=normalized_modes,
|
|
394
|
+
rate=rate,
|
|
395
|
+
delete_rate=delete_rate,
|
|
396
|
+
duplicate_rate=duplicate_rate,
|
|
397
|
+
swap_rate=swap_rate,
|
|
398
|
+
unweighted=unweighted,
|
|
399
|
+
delete_unweighted=delete_unweighted,
|
|
400
|
+
duplicate_unweighted=duplicate_unweighted,
|
|
401
|
+
**kwargs,
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
rushmore = Rushmore()
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
__all__ = [
|
|
409
|
+
"Rushmore",
|
|
410
|
+
"rushmore",
|
|
411
|
+
"RushmoreMode",
|
|
412
|
+
"rushmore_attack",
|
|
413
|
+
"delete_random_words",
|
|
414
|
+
"reduplicate_words",
|
|
415
|
+
"swap_adjacent_words",
|
|
416
|
+
]
|