glitchlings 0.10.2__cp312-cp312-macosx_11_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +99 -0
- glitchlings/__main__.py +8 -0
- glitchlings/_zoo_rust/__init__.py +12 -0
- glitchlings/_zoo_rust.cpython-312-darwin.so +0 -0
- glitchlings/assets/__init__.py +180 -0
- glitchlings/assets/apostrofae_pairs.json +32 -0
- glitchlings/assets/ekkokin_homophones.json +2014 -0
- glitchlings/assets/hokey_assets.json +193 -0
- glitchlings/assets/lexemes/academic.json +1049 -0
- glitchlings/assets/lexemes/colors.json +1333 -0
- glitchlings/assets/lexemes/corporate.json +716 -0
- glitchlings/assets/lexemes/cyberpunk.json +22 -0
- glitchlings/assets/lexemes/lovecraftian.json +23 -0
- glitchlings/assets/lexemes/synonyms.json +3354 -0
- glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
- glitchlings/assets/ocr_confusions.tsv +30 -0
- glitchlings/assets/pipeline_assets.json +29 -0
- glitchlings/attack/__init__.py +147 -0
- glitchlings/attack/analysis.py +1321 -0
- glitchlings/attack/core.py +493 -0
- glitchlings/attack/core_execution.py +367 -0
- glitchlings/attack/core_planning.py +612 -0
- glitchlings/attack/encode.py +114 -0
- glitchlings/attack/metrics.py +218 -0
- glitchlings/attack/metrics_dispatch.py +70 -0
- glitchlings/attack/tokenization.py +227 -0
- glitchlings/auggie.py +284 -0
- glitchlings/compat/__init__.py +9 -0
- glitchlings/compat/loaders.py +355 -0
- glitchlings/compat/types.py +41 -0
- glitchlings/conf/__init__.py +41 -0
- glitchlings/conf/loaders.py +331 -0
- glitchlings/conf/schema.py +156 -0
- glitchlings/conf/types.py +72 -0
- glitchlings/config.toml +2 -0
- glitchlings/constants.py +59 -0
- glitchlings/dev/__init__.py +3 -0
- glitchlings/dev/docs.py +45 -0
- glitchlings/dlc/__init__.py +19 -0
- glitchlings/dlc/_shared.py +296 -0
- glitchlings/dlc/gutenberg.py +400 -0
- glitchlings/dlc/huggingface.py +68 -0
- glitchlings/dlc/prime.py +215 -0
- glitchlings/dlc/pytorch.py +98 -0
- glitchlings/dlc/pytorch_lightning.py +173 -0
- glitchlings/internal/__init__.py +16 -0
- glitchlings/internal/rust.py +159 -0
- glitchlings/internal/rust_ffi.py +490 -0
- glitchlings/main.py +426 -0
- glitchlings/protocols.py +91 -0
- glitchlings/runtime_config.py +24 -0
- glitchlings/util/__init__.py +27 -0
- glitchlings/util/adapters.py +65 -0
- glitchlings/util/keyboards.py +356 -0
- glitchlings/util/transcripts.py +108 -0
- glitchlings/zoo/__init__.py +161 -0
- glitchlings/zoo/assets/__init__.py +29 -0
- glitchlings/zoo/core.py +678 -0
- glitchlings/zoo/core_execution.py +154 -0
- glitchlings/zoo/core_planning.py +451 -0
- glitchlings/zoo/corrupt_dispatch.py +295 -0
- glitchlings/zoo/hokey.py +139 -0
- glitchlings/zoo/jargoyle.py +243 -0
- glitchlings/zoo/mim1c.py +148 -0
- glitchlings/zoo/pedant/__init__.py +109 -0
- glitchlings/zoo/pedant/core.py +105 -0
- glitchlings/zoo/pedant/forms.py +74 -0
- glitchlings/zoo/pedant/stones.py +74 -0
- glitchlings/zoo/redactyl.py +97 -0
- glitchlings/zoo/rng.py +259 -0
- glitchlings/zoo/rushmore.py +416 -0
- glitchlings/zoo/scannequin.py +66 -0
- glitchlings/zoo/transforms.py +346 -0
- glitchlings/zoo/typogre.py +128 -0
- glitchlings/zoo/validation.py +477 -0
- glitchlings/zoo/wherewolf.py +120 -0
- glitchlings/zoo/zeedub.py +93 -0
- glitchlings-0.10.2.dist-info/METADATA +337 -0
- glitchlings-0.10.2.dist-info/RECORD +83 -0
- glitchlings-0.10.2.dist-info/WHEEL +5 -0
- glitchlings-0.10.2.dist-info/entry_points.txt +3 -0
- glitchlings-0.10.2.dist-info/licenses/LICENSE +201 -0
- glitchlings-0.10.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,477 @@
|
|
|
1
|
+
"""Boundary validation layer for glitchling parameters.
|
|
2
|
+
|
|
3
|
+
This module centralizes all input validation, type coercion, and defensive checks
|
|
4
|
+
for glitchling parameters. Functions here are called at module boundaries (CLI,
|
|
5
|
+
public API entry points, configuration loaders) to ensure that invalid data is
|
|
6
|
+
rejected early.
|
|
7
|
+
|
|
8
|
+
**Design Philosophy:**
|
|
9
|
+
|
|
10
|
+
All functions in this module are *pure* - they perform validation and coercion
|
|
11
|
+
based solely on their inputs, without side effects. They are intended to be
|
|
12
|
+
called once at the boundary where untrusted input enters the system. Core
|
|
13
|
+
transformation functions that call these validation helpers can then trust
|
|
14
|
+
their inputs without re-validating.
|
|
15
|
+
|
|
16
|
+
See AGENTS.md "Functional Purity Architecture" for full details.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import math
|
|
22
|
+
import re
|
|
23
|
+
from collections.abc import Collection, Iterable, Mapping, Sequence
|
|
24
|
+
from dataclasses import dataclass
|
|
25
|
+
from typing import Literal, TypeVar, cast
|
|
26
|
+
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
# Rate Validation (universal)
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def clamp_rate(value: float, *, allow_nan: bool = False) -> float:
|
|
33
|
+
"""Clamp a rate value to [0.0, infinity), optionally treating NaN as 0.0.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
value: The rate to clamp.
|
|
37
|
+
allow_nan: If False (default), NaN values become 0.0.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
The clamped rate value.
|
|
41
|
+
"""
|
|
42
|
+
if math.isnan(value):
|
|
43
|
+
return 0.0 if not allow_nan else value
|
|
44
|
+
return max(0.0, value)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def clamp_rate_unit(value: float, *, allow_nan: bool = False) -> float:
|
|
48
|
+
"""Clamp a rate value to [0.0, 1.0], optionally treating NaN as 0.0.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
value: The rate to clamp.
|
|
52
|
+
allow_nan: If False (default), NaN values become 0.0.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
The clamped rate value in range [0.0, 1.0].
|
|
56
|
+
"""
|
|
57
|
+
if math.isnan(value):
|
|
58
|
+
return 0.0 if not allow_nan else value
|
|
59
|
+
return max(0.0, min(1.0, value))
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def resolve_rate(
|
|
63
|
+
value: float | None,
|
|
64
|
+
default: float,
|
|
65
|
+
*,
|
|
66
|
+
clamp: bool = True,
|
|
67
|
+
unit_interval: bool = False,
|
|
68
|
+
) -> float:
|
|
69
|
+
"""Resolve a rate parameter, applying defaults and optional clamping.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
value: The user-provided rate, or None for default.
|
|
73
|
+
default: The default rate to use when value is None.
|
|
74
|
+
clamp: Whether to clamp the result to non-negative.
|
|
75
|
+
unit_interval: If True, clamp to [0.0, 1.0] instead of [0.0, inf).
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
The resolved, optionally clamped rate.
|
|
79
|
+
"""
|
|
80
|
+
effective = default if value is None else value
|
|
81
|
+
if not clamp:
|
|
82
|
+
return effective
|
|
83
|
+
return clamp_rate_unit(effective) if unit_interval else clamp_rate(effective)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# ---------------------------------------------------------------------------
|
|
87
|
+
# Mim1c Validation
|
|
88
|
+
# ---------------------------------------------------------------------------
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def normalise_mim1c_classes(
|
|
92
|
+
value: object,
|
|
93
|
+
) -> tuple[str, ...] | Literal["all"] | None:
|
|
94
|
+
"""Normalize Mim1c homoglyph class specification.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
value: User input - None, "all", a single class name, or an iterable.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
Normalized tuple of class names, literal "all", or None.
|
|
101
|
+
|
|
102
|
+
Raises:
|
|
103
|
+
TypeError: If value is not None, string, or iterable.
|
|
104
|
+
"""
|
|
105
|
+
if value is None:
|
|
106
|
+
return None
|
|
107
|
+
if isinstance(value, str):
|
|
108
|
+
if value.lower() == "all":
|
|
109
|
+
return "all"
|
|
110
|
+
return (value,)
|
|
111
|
+
if isinstance(value, Iterable):
|
|
112
|
+
return tuple(str(item) for item in value)
|
|
113
|
+
raise TypeError("classes must be an iterable of strings or 'all'")
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def normalise_mim1c_banned(value: object) -> tuple[str, ...] | None:
|
|
117
|
+
"""Normalize Mim1c banned character specification.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
value: User input - None, a string of characters, or an iterable.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Normalized tuple of banned characters, or None.
|
|
124
|
+
|
|
125
|
+
Raises:
|
|
126
|
+
TypeError: If value is not None, string, or iterable.
|
|
127
|
+
"""
|
|
128
|
+
if value is None:
|
|
129
|
+
return None
|
|
130
|
+
if isinstance(value, str):
|
|
131
|
+
return tuple(value)
|
|
132
|
+
if isinstance(value, Iterable):
|
|
133
|
+
return tuple(str(item) for item in value)
|
|
134
|
+
raise TypeError("banned_characters must be an iterable of strings")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
# Wherewolf Validation
|
|
139
|
+
# ---------------------------------------------------------------------------
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def normalise_homophone_group(group: Sequence[str]) -> tuple[str, ...]:
|
|
143
|
+
"""Return a tuple of lowercase homophones preserving original order.
|
|
144
|
+
|
|
145
|
+
Uses dict.fromkeys to preserve ordering while de-duplicating.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
group: Sequence of homophone words.
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
De-duplicated tuple of lowercase words.
|
|
152
|
+
"""
|
|
153
|
+
return tuple(dict.fromkeys(word.lower() for word in group if word))
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def build_homophone_lookup(
|
|
157
|
+
groups: Iterable[Sequence[str]],
|
|
158
|
+
) -> Mapping[str, tuple[str, ...]]:
|
|
159
|
+
"""Return a mapping from word -> homophone group.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
groups: Iterable of homophone word groups.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
Dictionary mapping each word to its normalized group.
|
|
166
|
+
"""
|
|
167
|
+
lookup: dict[str, tuple[str, ...]] = {}
|
|
168
|
+
for group in groups:
|
|
169
|
+
normalised = normalise_homophone_group(group)
|
|
170
|
+
if len(normalised) < 2:
|
|
171
|
+
continue
|
|
172
|
+
for word in normalised:
|
|
173
|
+
lookup[word] = normalised
|
|
174
|
+
return lookup
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# ---------------------------------------------------------------------------
|
|
178
|
+
# Rushmore Validation
|
|
179
|
+
# ---------------------------------------------------------------------------
|
|
180
|
+
|
|
181
|
+
# Import enum locally to avoid circular dependencies at module level
|
|
182
|
+
# The RushmoreMode enum is defined in rushmore.py but we need its values here
|
|
183
|
+
# for mode validation. We use string-based validation to avoid the import cycle.
|
|
184
|
+
|
|
185
|
+
_RUSHMORE_MODE_ALIASES: dict[str, str] = {
|
|
186
|
+
"delete": "delete",
|
|
187
|
+
"drop": "delete",
|
|
188
|
+
"rushmore": "delete",
|
|
189
|
+
"duplicate": "duplicate",
|
|
190
|
+
"reduplicate": "duplicate",
|
|
191
|
+
"repeat": "duplicate",
|
|
192
|
+
"swap": "swap",
|
|
193
|
+
"adjacent": "swap",
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
_RUSHMORE_EXECUTION_ORDER: tuple[str, ...] = ("delete", "duplicate", "swap")
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def normalize_rushmore_mode_item(value: str) -> list[str]:
|
|
200
|
+
"""Parse a single Rushmore mode specification into canonical mode names.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
value: A mode name, alias, or compound expression like "delete+duplicate".
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
List of canonical mode names ("delete", "duplicate", "swap").
|
|
207
|
+
|
|
208
|
+
Raises:
|
|
209
|
+
ValueError: If the mode name is not recognized.
|
|
210
|
+
"""
|
|
211
|
+
text = str(value).strip().lower()
|
|
212
|
+
if not text:
|
|
213
|
+
return []
|
|
214
|
+
|
|
215
|
+
if text in {"all", "any", "full"}:
|
|
216
|
+
return list(_RUSHMORE_EXECUTION_ORDER)
|
|
217
|
+
|
|
218
|
+
tokens = [token for token in re.split(r"[+,\s]+", text) if token]
|
|
219
|
+
if not tokens:
|
|
220
|
+
return []
|
|
221
|
+
|
|
222
|
+
modes: list[str] = []
|
|
223
|
+
for token in tokens:
|
|
224
|
+
mode = _RUSHMORE_MODE_ALIASES.get(token)
|
|
225
|
+
if mode is None:
|
|
226
|
+
raise ValueError(f"Unsupported Rushmore mode '{value}'")
|
|
227
|
+
modes.append(mode)
|
|
228
|
+
return modes
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def normalize_rushmore_modes(
|
|
232
|
+
modes: str | Iterable[str] | None,
|
|
233
|
+
*,
|
|
234
|
+
default: str = "delete",
|
|
235
|
+
) -> tuple[str, ...]:
|
|
236
|
+
"""Normalize Rushmore mode specification to canonical tuple.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
modes: User input - None, single mode string, or iterable of modes.
|
|
240
|
+
default: Default mode when input is None or empty.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Tuple of unique canonical mode names in insertion order.
|
|
244
|
+
"""
|
|
245
|
+
if modes is None:
|
|
246
|
+
candidates: Sequence[str] = (default,)
|
|
247
|
+
elif isinstance(modes, str):
|
|
248
|
+
candidates = (modes,)
|
|
249
|
+
else:
|
|
250
|
+
collected = tuple(modes)
|
|
251
|
+
candidates = collected if collected else (default,)
|
|
252
|
+
|
|
253
|
+
resolved: list[str] = []
|
|
254
|
+
seen: set[str] = set()
|
|
255
|
+
for candidate in candidates:
|
|
256
|
+
for mode in normalize_rushmore_mode_item(candidate):
|
|
257
|
+
if mode not in seen:
|
|
258
|
+
seen.add(mode)
|
|
259
|
+
resolved.append(mode)
|
|
260
|
+
|
|
261
|
+
if not resolved:
|
|
262
|
+
return (default,)
|
|
263
|
+
return tuple(resolved)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
@dataclass(frozen=True)
|
|
267
|
+
class RushmoreRateConfig:
|
|
268
|
+
"""Resolved rate configuration for a single Rushmore mode."""
|
|
269
|
+
|
|
270
|
+
mode: str
|
|
271
|
+
rate: float
|
|
272
|
+
is_default: bool = False
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def resolve_rushmore_mode_rate(
|
|
276
|
+
*,
|
|
277
|
+
mode: str,
|
|
278
|
+
global_rate: float | None,
|
|
279
|
+
specific_rate: float | None,
|
|
280
|
+
default_rates: Mapping[str, float],
|
|
281
|
+
allow_default: bool,
|
|
282
|
+
) -> float | None:
|
|
283
|
+
"""Resolve the effective rate for a single Rushmore mode.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
mode: The canonical mode name ("delete", "duplicate", "swap").
|
|
287
|
+
global_rate: User-provided global rate, or None.
|
|
288
|
+
specific_rate: User-provided mode-specific rate, or None.
|
|
289
|
+
default_rates: Mapping of mode names to default rates.
|
|
290
|
+
allow_default: Whether to fall back to defaults when no rate provided.
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
The resolved rate, or None if no rate available and defaults disallowed.
|
|
294
|
+
"""
|
|
295
|
+
baseline = specific_rate if specific_rate is not None else global_rate
|
|
296
|
+
if baseline is None:
|
|
297
|
+
if not allow_default:
|
|
298
|
+
return None
|
|
299
|
+
baseline = default_rates.get(mode)
|
|
300
|
+
if baseline is None:
|
|
301
|
+
return None
|
|
302
|
+
|
|
303
|
+
value = float(baseline)
|
|
304
|
+
value = max(0.0, value)
|
|
305
|
+
if mode == "swap":
|
|
306
|
+
value = min(1.0, value)
|
|
307
|
+
return value
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
# ---------------------------------------------------------------------------
|
|
311
|
+
# Keyboard Layout Validation
|
|
312
|
+
# ---------------------------------------------------------------------------
|
|
313
|
+
|
|
314
|
+
T = TypeVar("T")
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def validate_keyboard_layout(
|
|
318
|
+
keyboard: str,
|
|
319
|
+
layouts: object,
|
|
320
|
+
*,
|
|
321
|
+
context: str = "keyboard layout",
|
|
322
|
+
) -> Mapping[str, Sequence[str]]:
|
|
323
|
+
"""Validate that a keyboard layout name exists and return its mapping.
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
keyboard: The layout name to look up.
|
|
327
|
+
layouts: Object with layout names as attributes (e.g., KEYNEIGHBORS).
|
|
328
|
+
context: Description for error messages.
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
The keyboard neighbor mapping.
|
|
332
|
+
|
|
333
|
+
Raises:
|
|
334
|
+
RuntimeError: If the layout name is not found.
|
|
335
|
+
"""
|
|
336
|
+
layout = getattr(layouts, keyboard, None)
|
|
337
|
+
if layout is None:
|
|
338
|
+
raise RuntimeError(f"Unknown {context} '{keyboard}'")
|
|
339
|
+
return cast(Mapping[str, Sequence[str]], layout)
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def get_keyboard_layout_or_default(
|
|
343
|
+
keyboard: str,
|
|
344
|
+
layouts: object,
|
|
345
|
+
*,
|
|
346
|
+
default: Mapping[str, Sequence[str]] | None = None,
|
|
347
|
+
) -> Mapping[str, Sequence[str]] | None:
|
|
348
|
+
"""Look up a keyboard layout, returning None or default if not found.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
keyboard: The layout name to look up.
|
|
352
|
+
layouts: Object with layout names as attributes.
|
|
353
|
+
default: Value to return if layout not found.
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
The keyboard neighbor mapping, or default if not found.
|
|
357
|
+
"""
|
|
358
|
+
layout = getattr(layouts, keyboard, None)
|
|
359
|
+
if layout is None:
|
|
360
|
+
return default
|
|
361
|
+
return cast(Mapping[str, Sequence[str]], layout)
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
# ---------------------------------------------------------------------------
|
|
365
|
+
# Zeedub Validation
|
|
366
|
+
# ---------------------------------------------------------------------------
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def normalize_zero_width_palette(
|
|
370
|
+
characters: Sequence[str] | None,
|
|
371
|
+
default: tuple[str, ...],
|
|
372
|
+
) -> tuple[str, ...]:
|
|
373
|
+
"""Normalize zero-width character palette, filtering empty entries.
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
characters: User-provided character sequence, or None for default.
|
|
377
|
+
default: Default character palette.
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
Tuple of non-empty characters.
|
|
381
|
+
"""
|
|
382
|
+
palette: Sequence[str] = tuple(characters) if characters is not None else default
|
|
383
|
+
return tuple(char for char in palette if char)
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
# ---------------------------------------------------------------------------
|
|
387
|
+
# Redactyl Validation
|
|
388
|
+
# ---------------------------------------------------------------------------
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def normalize_replacement_char(
|
|
392
|
+
replacement_char: str | None,
|
|
393
|
+
default: str,
|
|
394
|
+
) -> str:
|
|
395
|
+
"""Normalize redaction replacement character.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
replacement_char: User-provided character, or None for default.
|
|
399
|
+
default: Default replacement character.
|
|
400
|
+
|
|
401
|
+
Returns:
|
|
402
|
+
The replacement character as a string.
|
|
403
|
+
"""
|
|
404
|
+
return default if replacement_char is None else str(replacement_char)
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
# ---------------------------------------------------------------------------
|
|
408
|
+
# Boolean Flag Helpers
|
|
409
|
+
# ---------------------------------------------------------------------------
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def resolve_bool_flag(
|
|
413
|
+
specific: bool | None,
|
|
414
|
+
global_default: bool,
|
|
415
|
+
) -> bool:
|
|
416
|
+
"""Resolve a boolean flag with specific/global precedence.
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
specific: Specific override value, or None to use global.
|
|
420
|
+
global_default: Global default when specific is None.
|
|
421
|
+
|
|
422
|
+
Returns:
|
|
423
|
+
The resolved boolean flag.
|
|
424
|
+
"""
|
|
425
|
+
return bool(specific if specific is not None else global_default)
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
# ---------------------------------------------------------------------------
|
|
429
|
+
# Collection Helpers
|
|
430
|
+
# ---------------------------------------------------------------------------
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def normalize_string_collection(
|
|
434
|
+
value: str | Collection[str] | None,
|
|
435
|
+
) -> tuple[str, ...] | None:
|
|
436
|
+
"""Normalize a string or collection of strings to a tuple.
|
|
437
|
+
|
|
438
|
+
Args:
|
|
439
|
+
value: Single string, collection of strings, or None.
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
Tuple of strings, or None if input is None.
|
|
443
|
+
"""
|
|
444
|
+
if value is None:
|
|
445
|
+
return None
|
|
446
|
+
if isinstance(value, str):
|
|
447
|
+
return (value,)
|
|
448
|
+
return tuple(value)
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
__all__ = [
|
|
452
|
+
# Rate validation
|
|
453
|
+
"clamp_rate",
|
|
454
|
+
"clamp_rate_unit",
|
|
455
|
+
"resolve_rate",
|
|
456
|
+
# Mim1c
|
|
457
|
+
"normalise_mim1c_classes",
|
|
458
|
+
"normalise_mim1c_banned",
|
|
459
|
+
# Wherewolf
|
|
460
|
+
"normalise_homophone_group",
|
|
461
|
+
"build_homophone_lookup",
|
|
462
|
+
# Rushmore
|
|
463
|
+
"normalize_rushmore_mode_item",
|
|
464
|
+
"normalize_rushmore_modes",
|
|
465
|
+
"resolve_rushmore_mode_rate",
|
|
466
|
+
"RushmoreRateConfig",
|
|
467
|
+
# Keyboard
|
|
468
|
+
"validate_keyboard_layout",
|
|
469
|
+
"get_keyboard_layout_or_default",
|
|
470
|
+
# Zeedub
|
|
471
|
+
"normalize_zero_width_palette",
|
|
472
|
+
# Redactyl
|
|
473
|
+
"normalize_replacement_char",
|
|
474
|
+
# Flags and helpers
|
|
475
|
+
"resolve_bool_flag",
|
|
476
|
+
"normalize_string_collection",
|
|
477
|
+
]
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""Homophone substitution glitchling implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import math
|
|
6
|
+
import random
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Iterable, Mapping, Sequence
|
|
8
|
+
|
|
9
|
+
from glitchlings.assets import load_homophone_groups
|
|
10
|
+
from glitchlings.constants import DEFAULT_WHEREWOLF_RATE, DEFAULT_WHEREWOLF_WEIGHTING
|
|
11
|
+
from glitchlings.internal.rust_ffi import resolve_seed, wherewolf_homophones_rust
|
|
12
|
+
|
|
13
|
+
from .core import AttackOrder, AttackWave
|
|
14
|
+
from .core import Glitchling as _GlitchlingRuntime
|
|
15
|
+
|
|
16
|
+
_homophone_groups: tuple[tuple[str, ...], ...] = load_homophone_groups()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _normalise_group(group: Sequence[str]) -> tuple[str, ...]:
|
|
20
|
+
"""Return a tuple of lowercase homophones preserving original order."""
|
|
21
|
+
|
|
22
|
+
# Use dict.fromkeys to preserve the original ordering while de-duplicating.
|
|
23
|
+
return tuple(dict.fromkeys(word.lower() for word in group if word))
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _build_lookup(groups: Iterable[Sequence[str]]) -> Mapping[str, tuple[str, ...]]:
|
|
27
|
+
"""Return a mapping from word -> homophone group."""
|
|
28
|
+
|
|
29
|
+
lookup: dict[str, tuple[str, ...]] = {}
|
|
30
|
+
for group in groups:
|
|
31
|
+
normalised = _normalise_group(group)
|
|
32
|
+
if len(normalised) < 2:
|
|
33
|
+
continue
|
|
34
|
+
for word in normalised:
|
|
35
|
+
lookup[word] = normalised
|
|
36
|
+
return lookup
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
_homophone_lookup = _build_lookup(_homophone_groups)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class _GlitchlingProtocol:
|
|
43
|
+
kwargs: dict[str, Any]
|
|
44
|
+
|
|
45
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None: ...
|
|
46
|
+
|
|
47
|
+
def reset_rng(self, seed: int | None = None) -> None: ...
|
|
48
|
+
|
|
49
|
+
def pipeline_operation(self) -> dict[str, object] | None: ...
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
if TYPE_CHECKING:
|
|
53
|
+
from .core import Glitchling as _GlitchlingBase
|
|
54
|
+
else:
|
|
55
|
+
_GlitchlingBase = _GlitchlingRuntime
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def substitute_homophones(
|
|
59
|
+
text: str,
|
|
60
|
+
rate: float | None = None,
|
|
61
|
+
seed: int | None = None,
|
|
62
|
+
rng: random.Random | None = None,
|
|
63
|
+
) -> str:
|
|
64
|
+
"""Replace words in ``text`` with curated homophones."""
|
|
65
|
+
|
|
66
|
+
effective_rate = DEFAULT_WHEREWOLF_RATE if rate is None else rate
|
|
67
|
+
|
|
68
|
+
clamped_rate = 0.0 if math.isnan(effective_rate) else max(0.0, min(1.0, effective_rate))
|
|
69
|
+
|
|
70
|
+
return wherewolf_homophones_rust(
|
|
71
|
+
text,
|
|
72
|
+
clamped_rate,
|
|
73
|
+
DEFAULT_WHEREWOLF_WEIGHTING,
|
|
74
|
+
resolve_seed(seed, rng),
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class Wherewolf(_GlitchlingBase):
|
|
79
|
+
"""Glitchling that swaps words for curated homophones."""
|
|
80
|
+
|
|
81
|
+
flavor = "Homophonic idiolectician. There leased favourite flavour? Orange."
|
|
82
|
+
|
|
83
|
+
def __init__(
|
|
84
|
+
self,
|
|
85
|
+
*,
|
|
86
|
+
rate: float | None = None,
|
|
87
|
+
seed: int | None = None,
|
|
88
|
+
**kwargs: Any,
|
|
89
|
+
) -> None:
|
|
90
|
+
effective_rate = DEFAULT_WHEREWOLF_RATE if rate is None else rate
|
|
91
|
+
super().__init__(
|
|
92
|
+
name="Wherewolf",
|
|
93
|
+
corruption_function=substitute_homophones,
|
|
94
|
+
scope=AttackWave.WORD,
|
|
95
|
+
order=AttackOrder.EARLY,
|
|
96
|
+
seed=seed,
|
|
97
|
+
pipeline_operation=_build_pipeline_descriptor,
|
|
98
|
+
rate=effective_rate,
|
|
99
|
+
**kwargs,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _build_pipeline_descriptor(glitch: _GlitchlingBase) -> dict[str, object]:
|
|
104
|
+
rate_value = glitch.kwargs.get("rate")
|
|
105
|
+
rate = DEFAULT_WHEREWOLF_RATE if rate_value is None else float(rate_value)
|
|
106
|
+
return {
|
|
107
|
+
"type": "wherewolf",
|
|
108
|
+
"rate": rate,
|
|
109
|
+
"weighting": DEFAULT_WHEREWOLF_WEIGHTING,
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
wherewolf = Wherewolf()
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
__all__ = [
|
|
117
|
+
"Wherewolf",
|
|
118
|
+
"wherewolf",
|
|
119
|
+
"substitute_homophones",
|
|
120
|
+
]
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import random
|
|
4
|
+
from collections.abc import Sequence
|
|
5
|
+
from typing import Any, cast
|
|
6
|
+
|
|
7
|
+
from glitchlings.constants import DEFAULT_ZEEDUB_RATE, ZEEDUB_DEFAULT_ZERO_WIDTHS
|
|
8
|
+
from glitchlings.internal.rust_ffi import (
|
|
9
|
+
inject_zero_widths_rust,
|
|
10
|
+
resolve_seed,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
from .core import AttackOrder, AttackWave, Glitchling, PipelineOperationPayload
|
|
14
|
+
|
|
15
|
+
_DEFAULT_ZERO_WIDTH_CHARACTERS: tuple[str, ...] = ZEEDUB_DEFAULT_ZERO_WIDTHS
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def insert_zero_widths(
|
|
19
|
+
text: str,
|
|
20
|
+
rate: float | None = None,
|
|
21
|
+
seed: int | None = None,
|
|
22
|
+
rng: random.Random | None = None,
|
|
23
|
+
*,
|
|
24
|
+
characters: Sequence[str] | None = None,
|
|
25
|
+
) -> str:
|
|
26
|
+
"""Inject zero-width characters between non-space character pairs."""
|
|
27
|
+
effective_rate = DEFAULT_ZEEDUB_RATE if rate is None else rate
|
|
28
|
+
|
|
29
|
+
palette: Sequence[str] = (
|
|
30
|
+
tuple(characters) if characters is not None else ZEEDUB_DEFAULT_ZERO_WIDTHS
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
cleaned_palette = tuple(char for char in palette if char)
|
|
34
|
+
if not cleaned_palette or not text:
|
|
35
|
+
return text
|
|
36
|
+
|
|
37
|
+
clamped_rate = max(0.0, effective_rate)
|
|
38
|
+
if clamped_rate == 0.0:
|
|
39
|
+
return text
|
|
40
|
+
|
|
41
|
+
seed_value = resolve_seed(seed, rng)
|
|
42
|
+
return inject_zero_widths_rust(text, clamped_rate, list(cleaned_palette), seed_value)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class Zeedub(Glitchling):
|
|
46
|
+
"""Glitchling that plants zero-width glyphs inside words."""
|
|
47
|
+
|
|
48
|
+
flavor = "I'm invoking my right to remain silent."
|
|
49
|
+
|
|
50
|
+
def __init__(
|
|
51
|
+
self,
|
|
52
|
+
*,
|
|
53
|
+
rate: float | None = None,
|
|
54
|
+
seed: int | None = None,
|
|
55
|
+
characters: Sequence[str] | None = None,
|
|
56
|
+
**kwargs: Any,
|
|
57
|
+
) -> None:
|
|
58
|
+
effective_rate = DEFAULT_ZEEDUB_RATE if rate is None else rate
|
|
59
|
+
super().__init__(
|
|
60
|
+
name="Zeedub",
|
|
61
|
+
corruption_function=insert_zero_widths,
|
|
62
|
+
scope=AttackWave.CHARACTER,
|
|
63
|
+
order=AttackOrder.LAST,
|
|
64
|
+
seed=seed,
|
|
65
|
+
rate=effective_rate,
|
|
66
|
+
characters=tuple(characters) if characters is not None else None,
|
|
67
|
+
**kwargs,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def pipeline_operation(self) -> PipelineOperationPayload:
|
|
71
|
+
rate = float(self.kwargs.get("rate", DEFAULT_ZEEDUB_RATE))
|
|
72
|
+
|
|
73
|
+
raw_characters = self.kwargs.get("characters")
|
|
74
|
+
palette = (
|
|
75
|
+
tuple(ZEEDUB_DEFAULT_ZERO_WIDTHS)
|
|
76
|
+
if raw_characters is None
|
|
77
|
+
else tuple(str(char) for char in raw_characters if char)
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
return cast(
|
|
81
|
+
PipelineOperationPayload,
|
|
82
|
+
{
|
|
83
|
+
"type": "zwj",
|
|
84
|
+
"rate": rate,
|
|
85
|
+
"characters": list(palette),
|
|
86
|
+
},
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
zeedub = Zeedub()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
__all__ = ["Zeedub", "zeedub", "insert_zero_widths"]
|