glitchlings 0.10.2__cp312-cp312-macosx_11_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +99 -0
- glitchlings/__main__.py +8 -0
- glitchlings/_zoo_rust/__init__.py +12 -0
- glitchlings/_zoo_rust.cpython-312-darwin.so +0 -0
- glitchlings/assets/__init__.py +180 -0
- glitchlings/assets/apostrofae_pairs.json +32 -0
- glitchlings/assets/ekkokin_homophones.json +2014 -0
- glitchlings/assets/hokey_assets.json +193 -0
- glitchlings/assets/lexemes/academic.json +1049 -0
- glitchlings/assets/lexemes/colors.json +1333 -0
- glitchlings/assets/lexemes/corporate.json +716 -0
- glitchlings/assets/lexemes/cyberpunk.json +22 -0
- glitchlings/assets/lexemes/lovecraftian.json +23 -0
- glitchlings/assets/lexemes/synonyms.json +3354 -0
- glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
- glitchlings/assets/ocr_confusions.tsv +30 -0
- glitchlings/assets/pipeline_assets.json +29 -0
- glitchlings/attack/__init__.py +147 -0
- glitchlings/attack/analysis.py +1321 -0
- glitchlings/attack/core.py +493 -0
- glitchlings/attack/core_execution.py +367 -0
- glitchlings/attack/core_planning.py +612 -0
- glitchlings/attack/encode.py +114 -0
- glitchlings/attack/metrics.py +218 -0
- glitchlings/attack/metrics_dispatch.py +70 -0
- glitchlings/attack/tokenization.py +227 -0
- glitchlings/auggie.py +284 -0
- glitchlings/compat/__init__.py +9 -0
- glitchlings/compat/loaders.py +355 -0
- glitchlings/compat/types.py +41 -0
- glitchlings/conf/__init__.py +41 -0
- glitchlings/conf/loaders.py +331 -0
- glitchlings/conf/schema.py +156 -0
- glitchlings/conf/types.py +72 -0
- glitchlings/config.toml +2 -0
- glitchlings/constants.py +59 -0
- glitchlings/dev/__init__.py +3 -0
- glitchlings/dev/docs.py +45 -0
- glitchlings/dlc/__init__.py +19 -0
- glitchlings/dlc/_shared.py +296 -0
- glitchlings/dlc/gutenberg.py +400 -0
- glitchlings/dlc/huggingface.py +68 -0
- glitchlings/dlc/prime.py +215 -0
- glitchlings/dlc/pytorch.py +98 -0
- glitchlings/dlc/pytorch_lightning.py +173 -0
- glitchlings/internal/__init__.py +16 -0
- glitchlings/internal/rust.py +159 -0
- glitchlings/internal/rust_ffi.py +490 -0
- glitchlings/main.py +426 -0
- glitchlings/protocols.py +91 -0
- glitchlings/runtime_config.py +24 -0
- glitchlings/util/__init__.py +27 -0
- glitchlings/util/adapters.py +65 -0
- glitchlings/util/keyboards.py +356 -0
- glitchlings/util/transcripts.py +108 -0
- glitchlings/zoo/__init__.py +161 -0
- glitchlings/zoo/assets/__init__.py +29 -0
- glitchlings/zoo/core.py +678 -0
- glitchlings/zoo/core_execution.py +154 -0
- glitchlings/zoo/core_planning.py +451 -0
- glitchlings/zoo/corrupt_dispatch.py +295 -0
- glitchlings/zoo/hokey.py +139 -0
- glitchlings/zoo/jargoyle.py +243 -0
- glitchlings/zoo/mim1c.py +148 -0
- glitchlings/zoo/pedant/__init__.py +109 -0
- glitchlings/zoo/pedant/core.py +105 -0
- glitchlings/zoo/pedant/forms.py +74 -0
- glitchlings/zoo/pedant/stones.py +74 -0
- glitchlings/zoo/redactyl.py +97 -0
- glitchlings/zoo/rng.py +259 -0
- glitchlings/zoo/rushmore.py +416 -0
- glitchlings/zoo/scannequin.py +66 -0
- glitchlings/zoo/transforms.py +346 -0
- glitchlings/zoo/typogre.py +128 -0
- glitchlings/zoo/validation.py +477 -0
- glitchlings/zoo/wherewolf.py +120 -0
- glitchlings/zoo/zeedub.py +93 -0
- glitchlings-0.10.2.dist-info/METADATA +337 -0
- glitchlings-0.10.2.dist-info/RECORD +83 -0
- glitchlings-0.10.2.dist-info/WHEEL +5 -0
- glitchlings-0.10.2.dist-info/entry_points.txt +3 -0
- glitchlings-0.10.2.dist-info/licenses/LICENSE +201 -0
- glitchlings-0.10.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import random
|
|
2
|
+
from typing import Any, cast
|
|
3
|
+
|
|
4
|
+
from glitchlings.constants import DEFAULT_SCANNEQUIN_RATE
|
|
5
|
+
from glitchlings.internal.rust_ffi import ocr_artifacts_rust, resolve_seed
|
|
6
|
+
|
|
7
|
+
from .core import AttackOrder, AttackWave, Glitchling, PipelineOperationPayload
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def ocr_artifacts(
|
|
11
|
+
text: str,
|
|
12
|
+
rate: float | None = None,
|
|
13
|
+
seed: int | None = None,
|
|
14
|
+
rng: random.Random | None = None,
|
|
15
|
+
) -> str:
|
|
16
|
+
"""Introduce OCR-like artifacts into text.
|
|
17
|
+
|
|
18
|
+
Uses the Rust implementation for performance and determinism.
|
|
19
|
+
"""
|
|
20
|
+
if not text:
|
|
21
|
+
return text
|
|
22
|
+
|
|
23
|
+
effective_rate = DEFAULT_SCANNEQUIN_RATE if rate is None else rate
|
|
24
|
+
|
|
25
|
+
clamped_rate = max(0.0, effective_rate)
|
|
26
|
+
|
|
27
|
+
return ocr_artifacts_rust(text, clamped_rate, resolve_seed(seed, rng))
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Scannequin(Glitchling):
|
|
31
|
+
"""Glitchling that simulates OCR artifacts using common confusions."""
|
|
32
|
+
|
|
33
|
+
flavor = "Isn't it weird how the word 'bed' looks like a bed?"
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
*,
|
|
38
|
+
rate: float | None = None,
|
|
39
|
+
seed: int | None = None,
|
|
40
|
+
**kwargs: Any,
|
|
41
|
+
) -> None:
|
|
42
|
+
effective_rate = DEFAULT_SCANNEQUIN_RATE if rate is None else rate
|
|
43
|
+
super().__init__(
|
|
44
|
+
name="Scannequin",
|
|
45
|
+
corruption_function=ocr_artifacts,
|
|
46
|
+
scope=AttackWave.CHARACTER,
|
|
47
|
+
order=AttackOrder.LATE,
|
|
48
|
+
seed=seed,
|
|
49
|
+
rate=effective_rate,
|
|
50
|
+
**kwargs,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
def pipeline_operation(self) -> PipelineOperationPayload:
|
|
54
|
+
rate_value = self.kwargs.get("rate", DEFAULT_SCANNEQUIN_RATE)
|
|
55
|
+
rate = DEFAULT_SCANNEQUIN_RATE if rate_value is None else float(rate_value)
|
|
56
|
+
|
|
57
|
+
return cast(
|
|
58
|
+
PipelineOperationPayload,
|
|
59
|
+
{"type": "ocr", "rate": rate},
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
scannequin = Scannequin()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
__all__ = ["Scannequin", "scannequin", "ocr_artifacts"]
|
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
"""Pure text transformation functions.
|
|
2
|
+
|
|
3
|
+
This module contains text manipulation functions that are:
|
|
4
|
+
- **Pure**: Output depends only on inputs, no side effects
|
|
5
|
+
- **Deterministic**: Same inputs always produce same outputs
|
|
6
|
+
- **Self-contained**: No RNG, no Rust FFI, no config loading
|
|
7
|
+
|
|
8
|
+
These functions receive pre-validated inputs from boundary layers
|
|
9
|
+
(see validation.py) and trust that inputs are already checked.
|
|
10
|
+
Core transformation code should NOT re-validate parameters.
|
|
11
|
+
|
|
12
|
+
Design Philosophy
|
|
13
|
+
-----------------
|
|
14
|
+
This module implements the innermost layer of the purity architecture:
|
|
15
|
+
|
|
16
|
+
CLI/API → validation.py → transforms.py → Rust FFI
|
|
17
|
+
(boundary) (boundary) (pure core) (impure)
|
|
18
|
+
|
|
19
|
+
Functions here should:
|
|
20
|
+
- Accept concrete types (not Optional unless semantically required)
|
|
21
|
+
- Not log, print, or mutate external state
|
|
22
|
+
- Not import impure modules (internal.rust, config loaders, etc.)
|
|
23
|
+
- Document any preconditions callers must satisfy
|
|
24
|
+
|
|
25
|
+
See AGENTS.md "Functional Purity Architecture" for full details.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import re
|
|
31
|
+
from collections.abc import Iterable, Mapping, Sequence
|
|
32
|
+
from dataclasses import dataclass
|
|
33
|
+
from typing import TypeVar, cast
|
|
34
|
+
|
|
35
|
+
# Re-export from util.keyboards for backwards compatibility
|
|
36
|
+
# The actual implementation lives in util.keyboards to avoid circular imports
|
|
37
|
+
from ..util.keyboards import KeyNeighborMap, build_keyboard_neighbor_map
|
|
38
|
+
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
# Text Tokenization
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
|
|
43
|
+
_WORD_SPLIT_PATTERN = re.compile(r"(\s+)")
|
|
44
|
+
_TOKEN_EDGES_PATTERN = re.compile(r"^(\W*)(.*?)(\W*)$", re.DOTALL)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def split_preserving_whitespace(text: str) -> list[str]:
|
|
48
|
+
"""Split text while keeping whitespace tokens for stable reconstruction.
|
|
49
|
+
|
|
50
|
+
Returns alternating [word, whitespace, word, whitespace, ...] tokens.
|
|
51
|
+
Joining the result reconstructs the original text exactly.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
text: Input text to tokenize.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
List of tokens alternating between non-whitespace and whitespace.
|
|
58
|
+
|
|
59
|
+
Example:
|
|
60
|
+
>>> split_preserving_whitespace("hello world")
|
|
61
|
+
['hello', ' ', 'world']
|
|
62
|
+
"""
|
|
63
|
+
return _WORD_SPLIT_PATTERN.split(text)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def split_token_edges(token: str) -> tuple[str, str, str]:
|
|
67
|
+
"""Decompose a token into leading punctuation, core, and trailing punctuation.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
token: A non-whitespace token.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Tuple of (prefix, core, suffix) where:
|
|
74
|
+
- prefix: leading non-word characters
|
|
75
|
+
- core: central word characters
|
|
76
|
+
- suffix: trailing non-word characters
|
|
77
|
+
|
|
78
|
+
Example:
|
|
79
|
+
>>> split_token_edges('"Hello!"')
|
|
80
|
+
('"', 'Hello', '!"')
|
|
81
|
+
"""
|
|
82
|
+
match = cast(re.Match[str], _TOKEN_EDGES_PATTERN.match(token))
|
|
83
|
+
prefix, core, suffix = match.groups()
|
|
84
|
+
return prefix, core, suffix
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def compute_core_length(token: str) -> int:
|
|
88
|
+
"""Compute the effective length of a token's core for weighting heuristics.
|
|
89
|
+
|
|
90
|
+
Used by weighted sampling algorithms to prioritize longer words.
|
|
91
|
+
Always returns at least 1 to avoid zero-weight issues.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
token: A non-whitespace token.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Positive integer representing the token's effective length.
|
|
98
|
+
"""
|
|
99
|
+
_, core, _ = split_token_edges(token)
|
|
100
|
+
if core:
|
|
101
|
+
return len(core)
|
|
102
|
+
stripped = token.strip()
|
|
103
|
+
if stripped:
|
|
104
|
+
return len(stripped)
|
|
105
|
+
if token:
|
|
106
|
+
return len(token)
|
|
107
|
+
return 1
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@dataclass(frozen=True)
|
|
111
|
+
class WordToken:
|
|
112
|
+
"""Metadata describing a non-whitespace token from text tokenization.
|
|
113
|
+
|
|
114
|
+
Attributes:
|
|
115
|
+
index: Position in the parent token sequence.
|
|
116
|
+
prefix: Leading non-word characters (punctuation).
|
|
117
|
+
core: Central word characters.
|
|
118
|
+
suffix: Trailing non-word characters (punctuation).
|
|
119
|
+
core_length: Effective length for weighting (always >= 1).
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
index: int
|
|
123
|
+
prefix: str
|
|
124
|
+
core: str
|
|
125
|
+
suffix: str
|
|
126
|
+
core_length: int
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def has_core(self) -> bool:
|
|
130
|
+
"""Return True when the token contains at least one core character."""
|
|
131
|
+
return bool(self.core)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def collect_word_tokens(
|
|
135
|
+
tokens: Sequence[str],
|
|
136
|
+
*,
|
|
137
|
+
skip_first_word: bool = False,
|
|
138
|
+
) -> list[WordToken]:
|
|
139
|
+
"""Extract structured metadata for non-whitespace tokens.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
tokens: Token sequence from split_preserving_whitespace.
|
|
143
|
+
skip_first_word: If True, exclude the first content token
|
|
144
|
+
(useful for preserving leading words in delete operations).
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
List of WordToken instances for each non-whitespace token.
|
|
148
|
+
"""
|
|
149
|
+
start = 2 if skip_first_word else 0
|
|
150
|
+
collected: list[WordToken] = []
|
|
151
|
+
|
|
152
|
+
for index in range(start, len(tokens), 2):
|
|
153
|
+
token = tokens[index]
|
|
154
|
+
if not token or token.isspace():
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
prefix, core, suffix = split_token_edges(token)
|
|
158
|
+
core_length = compute_core_length(token)
|
|
159
|
+
|
|
160
|
+
collected.append(
|
|
161
|
+
WordToken(
|
|
162
|
+
index=index,
|
|
163
|
+
prefix=prefix,
|
|
164
|
+
core=core,
|
|
165
|
+
suffix=suffix,
|
|
166
|
+
core_length=core_length,
|
|
167
|
+
)
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
return collected
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def reassemble_tokens(tokens: Sequence[str]) -> str:
|
|
174
|
+
"""Join tokens back into text, preserving original structure.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
tokens: Token sequence (typically modified from split_preserving_whitespace).
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Reassembled text string.
|
|
181
|
+
"""
|
|
182
|
+
return "".join(tokens)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
# ---------------------------------------------------------------------------
|
|
186
|
+
# Keyboard Layout Processing
|
|
187
|
+
# ---------------------------------------------------------------------------
|
|
188
|
+
|
|
189
|
+
# KeyNeighborMap and build_keyboard_neighbor_map are imported at module level
|
|
190
|
+
# and re-exported for backwards compatibility. See module imports above.
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# ---------------------------------------------------------------------------
|
|
194
|
+
# String Difference Computation
|
|
195
|
+
# ---------------------------------------------------------------------------
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def compute_string_diffs(
|
|
199
|
+
original: str,
|
|
200
|
+
modified: str,
|
|
201
|
+
) -> list[list[tuple[str, str, str]]]:
|
|
202
|
+
"""Compare two strings and return grouped adjacent change operations.
|
|
203
|
+
|
|
204
|
+
Uses difflib's SequenceMatcher to identify changes between strings.
|
|
205
|
+
Consecutive changes are grouped together; equal regions are skipped.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
original: The original string.
|
|
209
|
+
modified: The modified string.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
List of change groups. Each group is a list of (tag, old_text, new_text)
|
|
213
|
+
tuples where tag is 'replace', 'delete', or 'insert'.
|
|
214
|
+
|
|
215
|
+
Example:
|
|
216
|
+
>>> compute_string_diffs("hello world", "helo worlds")
|
|
217
|
+
[[('delete', 'l', '')], [('replace', '', 's')]]
|
|
218
|
+
"""
|
|
219
|
+
import difflib
|
|
220
|
+
|
|
221
|
+
sm = difflib.SequenceMatcher(None, original, modified)
|
|
222
|
+
ops: list[list[tuple[str, str, str]]] = []
|
|
223
|
+
buffer: list[tuple[str, str, str]] = []
|
|
224
|
+
|
|
225
|
+
for tag, i1, i2, j1, j2 in sm.get_opcodes():
|
|
226
|
+
if tag == "equal":
|
|
227
|
+
if buffer:
|
|
228
|
+
ops.append(buffer)
|
|
229
|
+
buffer = []
|
|
230
|
+
continue
|
|
231
|
+
buffer.append((tag, original[i1:i2], modified[j1:j2]))
|
|
232
|
+
|
|
233
|
+
if buffer:
|
|
234
|
+
ops.append(buffer)
|
|
235
|
+
|
|
236
|
+
return ops
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
# ---------------------------------------------------------------------------
|
|
240
|
+
# Sequence Operations
|
|
241
|
+
# ---------------------------------------------------------------------------
|
|
242
|
+
|
|
243
|
+
T = TypeVar("T")
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def stable_deduplicate(items: Iterable[T]) -> list[T]:
|
|
247
|
+
"""Remove duplicates while preserving original order.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
items: Iterable of hashable items.
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
List with duplicates removed, first occurrence preserved.
|
|
254
|
+
|
|
255
|
+
Example:
|
|
256
|
+
>>> stable_deduplicate([3, 1, 4, 1, 5, 9, 2, 6, 5])
|
|
257
|
+
[3, 1, 4, 5, 9, 2, 6]
|
|
258
|
+
"""
|
|
259
|
+
seen: set[T] = set()
|
|
260
|
+
result: list[T] = []
|
|
261
|
+
for item in items:
|
|
262
|
+
if item not in seen:
|
|
263
|
+
seen.add(item)
|
|
264
|
+
result.append(item)
|
|
265
|
+
return result
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def interleave_lists(
|
|
269
|
+
primary: Sequence[T],
|
|
270
|
+
secondary: Sequence[T],
|
|
271
|
+
*,
|
|
272
|
+
secondary_first: bool = False,
|
|
273
|
+
) -> list[T]:
|
|
274
|
+
"""Interleave two sequences, padding shorter with empty slots.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
primary: First sequence.
|
|
278
|
+
secondary: Second sequence.
|
|
279
|
+
secondary_first: If True, start with secondary element.
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
Interleaved list [p0, s0, p1, s1, ...] or [s0, p0, s1, p1, ...].
|
|
283
|
+
"""
|
|
284
|
+
result: list[T] = []
|
|
285
|
+
max_len = max(len(primary), len(secondary))
|
|
286
|
+
|
|
287
|
+
for i in range(max_len):
|
|
288
|
+
if secondary_first:
|
|
289
|
+
if i < len(secondary):
|
|
290
|
+
result.append(secondary[i])
|
|
291
|
+
if i < len(primary):
|
|
292
|
+
result.append(primary[i])
|
|
293
|
+
else:
|
|
294
|
+
if i < len(primary):
|
|
295
|
+
result.append(primary[i])
|
|
296
|
+
if i < len(secondary):
|
|
297
|
+
result.append(secondary[i])
|
|
298
|
+
|
|
299
|
+
return result
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
# ---------------------------------------------------------------------------
|
|
303
|
+
# Mapping Helpers
|
|
304
|
+
# ---------------------------------------------------------------------------
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def invert_mapping(
|
|
308
|
+
mapping: Mapping[str, Sequence[str]],
|
|
309
|
+
) -> dict[str, str]:
|
|
310
|
+
"""Invert a one-to-many mapping into a many-to-one lookup.
|
|
311
|
+
|
|
312
|
+
Given {key: [val1, val2]}, returns {val1: key, val2: key}.
|
|
313
|
+
Later keys overwrite earlier ones if values collide.
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
mapping: Dictionary mapping keys to sequences of values.
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
Inverted dictionary mapping each value to its key.
|
|
320
|
+
"""
|
|
321
|
+
inverted: dict[str, str] = {}
|
|
322
|
+
for key, values in mapping.items():
|
|
323
|
+
for value in values:
|
|
324
|
+
inverted[value] = key
|
|
325
|
+
return inverted
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
__all__ = [
|
|
329
|
+
# Tokenization
|
|
330
|
+
"split_preserving_whitespace",
|
|
331
|
+
"split_token_edges",
|
|
332
|
+
"compute_core_length",
|
|
333
|
+
"WordToken",
|
|
334
|
+
"collect_word_tokens",
|
|
335
|
+
"reassemble_tokens",
|
|
336
|
+
# Keyboard
|
|
337
|
+
"KeyNeighborMap",
|
|
338
|
+
"build_keyboard_neighbor_map",
|
|
339
|
+
# Diffs
|
|
340
|
+
"compute_string_diffs",
|
|
341
|
+
# Sequences
|
|
342
|
+
"stable_deduplicate",
|
|
343
|
+
"interleave_lists",
|
|
344
|
+
# Mappings
|
|
345
|
+
"invert_mapping",
|
|
346
|
+
]
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import random
|
|
4
|
+
from collections.abc import Mapping, Sequence
|
|
5
|
+
from typing import Any, cast
|
|
6
|
+
|
|
7
|
+
from glitchlings.constants import DEFAULT_TYPOGRE_KEYBOARD, DEFAULT_TYPOGRE_RATE
|
|
8
|
+
from glitchlings.internal.rust_ffi import fatfinger_rust, resolve_seed
|
|
9
|
+
|
|
10
|
+
from ..util import KEYNEIGHBORS, SHIFT_MAPS
|
|
11
|
+
from .core import AttackOrder, AttackWave, Glitchling, PipelineOperationPayload
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _resolve_slip_exit_rate(
|
|
15
|
+
shift_slip_rate: float,
|
|
16
|
+
shift_slip_exit_rate: float | None,
|
|
17
|
+
) -> float:
|
|
18
|
+
"""Derive the slip exit rate, defaulting to a burst-friendly value."""
|
|
19
|
+
|
|
20
|
+
if shift_slip_exit_rate is not None:
|
|
21
|
+
return max(0.0, shift_slip_exit_rate)
|
|
22
|
+
return max(0.0, shift_slip_rate * 0.5)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def fatfinger(
|
|
26
|
+
text: str,
|
|
27
|
+
rate: float | None = None,
|
|
28
|
+
keyboard: str = DEFAULT_TYPOGRE_KEYBOARD,
|
|
29
|
+
layout: Mapping[str, Sequence[str]] | None = None,
|
|
30
|
+
seed: int | None = None,
|
|
31
|
+
rng: random.Random | None = None,
|
|
32
|
+
*,
|
|
33
|
+
shift_slip_rate: float = 0.0,
|
|
34
|
+
shift_slip_exit_rate: float | None = None,
|
|
35
|
+
shift_map: Mapping[str, str] | None = None,
|
|
36
|
+
) -> str:
|
|
37
|
+
"""Introduce character-level "fat finger" edits with a Rust fast path."""
|
|
38
|
+
effective_rate = DEFAULT_TYPOGRE_RATE if rate is None else rate
|
|
39
|
+
|
|
40
|
+
if not text:
|
|
41
|
+
return ""
|
|
42
|
+
|
|
43
|
+
layout_mapping = layout if layout is not None else getattr(KEYNEIGHBORS, keyboard)
|
|
44
|
+
slip_rate = max(0.0, shift_slip_rate)
|
|
45
|
+
slip_exit_rate = _resolve_slip_exit_rate(slip_rate, shift_slip_exit_rate)
|
|
46
|
+
slip_map = shift_map if shift_map is not None else getattr(SHIFT_MAPS, keyboard, None)
|
|
47
|
+
|
|
48
|
+
clamped_rate = max(0.0, effective_rate)
|
|
49
|
+
if slip_rate == 0.0 and clamped_rate == 0.0:
|
|
50
|
+
return text
|
|
51
|
+
|
|
52
|
+
return fatfinger_rust(
|
|
53
|
+
text,
|
|
54
|
+
clamped_rate,
|
|
55
|
+
layout_mapping,
|
|
56
|
+
resolve_seed(seed, rng),
|
|
57
|
+
shift_slip_rate=slip_rate,
|
|
58
|
+
shift_slip_exit_rate=slip_exit_rate,
|
|
59
|
+
shift_map=slip_map,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class Typogre(Glitchling):
|
|
64
|
+
"""Glitchling that introduces deterministic keyboard-typing errors."""
|
|
65
|
+
|
|
66
|
+
flavor = "What a nice word, would be a shame if something happened to it..."
|
|
67
|
+
|
|
68
|
+
def __init__(
|
|
69
|
+
self,
|
|
70
|
+
*,
|
|
71
|
+
rate: float | None = None,
|
|
72
|
+
keyboard: str = DEFAULT_TYPOGRE_KEYBOARD,
|
|
73
|
+
shift_slip_rate: float = 0.0,
|
|
74
|
+
shift_slip_exit_rate: float | None = None,
|
|
75
|
+
seed: int | None = None,
|
|
76
|
+
**kwargs: Any,
|
|
77
|
+
) -> None:
|
|
78
|
+
effective_rate = DEFAULT_TYPOGRE_RATE if rate is None else rate
|
|
79
|
+
super().__init__(
|
|
80
|
+
name="Typogre",
|
|
81
|
+
corruption_function=fatfinger,
|
|
82
|
+
scope=AttackWave.CHARACTER,
|
|
83
|
+
order=AttackOrder.EARLY,
|
|
84
|
+
seed=seed,
|
|
85
|
+
rate=effective_rate,
|
|
86
|
+
keyboard=keyboard,
|
|
87
|
+
shift_slip_rate=max(0.0, shift_slip_rate),
|
|
88
|
+
shift_slip_exit_rate=shift_slip_exit_rate,
|
|
89
|
+
**kwargs,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def pipeline_operation(self) -> PipelineOperationPayload:
|
|
93
|
+
rate_value = self.kwargs.get("rate")
|
|
94
|
+
rate = DEFAULT_TYPOGRE_RATE if rate_value is None else float(rate_value)
|
|
95
|
+
keyboard = self.kwargs.get("keyboard", DEFAULT_TYPOGRE_KEYBOARD)
|
|
96
|
+
layout = getattr(KEYNEIGHBORS, str(keyboard), None)
|
|
97
|
+
if layout is None:
|
|
98
|
+
message = f"Unknown keyboard layout '{keyboard}' for Typogre pipeline"
|
|
99
|
+
raise RuntimeError(message)
|
|
100
|
+
|
|
101
|
+
serialized_layout = {key: list(value) for key, value in layout.items()}
|
|
102
|
+
shift_slip_rate = float(self.kwargs.get("shift_slip_rate", 0.0) or 0.0)
|
|
103
|
+
shift_slip_exit_rate = self.kwargs.get("shift_slip_exit_rate")
|
|
104
|
+
resolved_exit_rate = _resolve_slip_exit_rate(shift_slip_rate, shift_slip_exit_rate)
|
|
105
|
+
shift_map = getattr(SHIFT_MAPS, str(keyboard), None)
|
|
106
|
+
if shift_slip_rate > 0.0 and shift_map is None:
|
|
107
|
+
message = f"Unknown shift map layout '{keyboard}' for Typogre pipeline"
|
|
108
|
+
raise RuntimeError(message)
|
|
109
|
+
serialized_shift_map = dict(shift_map) if shift_map is not None else None
|
|
110
|
+
|
|
111
|
+
return cast(
|
|
112
|
+
PipelineOperationPayload,
|
|
113
|
+
{
|
|
114
|
+
"type": "typo",
|
|
115
|
+
"rate": float(rate),
|
|
116
|
+
"keyboard": str(keyboard),
|
|
117
|
+
"layout": serialized_layout,
|
|
118
|
+
"shift_slip_rate": shift_slip_rate,
|
|
119
|
+
"shift_slip_exit_rate": float(resolved_exit_rate),
|
|
120
|
+
"shift_map": serialized_shift_map,
|
|
121
|
+
},
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
typogre = Typogre()
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
__all__ = ["Typogre", "typogre", "fatfinger"]
|