glitchlings 1.0.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glitchlings/__init__.py +101 -0
- glitchlings/__main__.py +8 -0
- glitchlings/_corruption_engine/__init__.py +12 -0
- glitchlings/_corruption_engine.cp313-win_amd64.pyd +0 -0
- glitchlings/assets/__init__.py +180 -0
- glitchlings/assets/apostrofae_pairs.json +32 -0
- glitchlings/assets/ekkokin_homophones.json +2014 -0
- glitchlings/assets/hokey_assets.json +193 -0
- glitchlings/assets/lexemes/academic.json +1049 -0
- glitchlings/assets/lexemes/colors.json +1333 -0
- glitchlings/assets/lexemes/corporate.json +716 -0
- glitchlings/assets/lexemes/cyberpunk.json +22 -0
- glitchlings/assets/lexemes/lovecraftian.json +23 -0
- glitchlings/assets/lexemes/synonyms.json +3354 -0
- glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
- glitchlings/assets/ocr_confusions.tsv +30 -0
- glitchlings/assets/pipeline_assets.json +29 -0
- glitchlings/attack/__init__.py +184 -0
- glitchlings/attack/analysis.py +1321 -0
- glitchlings/attack/core.py +819 -0
- glitchlings/attack/core_execution.py +378 -0
- glitchlings/attack/core_planning.py +612 -0
- glitchlings/attack/encode.py +114 -0
- glitchlings/attack/metrics.py +211 -0
- glitchlings/attack/metrics_dispatch.py +70 -0
- glitchlings/attack/tokenization.py +338 -0
- glitchlings/attack/tokenizer_metrics.py +373 -0
- glitchlings/auggie.py +285 -0
- glitchlings/compat/__init__.py +9 -0
- glitchlings/compat/loaders.py +355 -0
- glitchlings/compat/types.py +41 -0
- glitchlings/conf/__init__.py +39 -0
- glitchlings/conf/loaders.py +331 -0
- glitchlings/conf/schema.py +156 -0
- glitchlings/conf/types.py +72 -0
- glitchlings/config.toml +2 -0
- glitchlings/constants.py +139 -0
- glitchlings/dev/__init__.py +3 -0
- glitchlings/dev/docs.py +45 -0
- glitchlings/dlc/__init__.py +21 -0
- glitchlings/dlc/_shared.py +300 -0
- glitchlings/dlc/gutenberg.py +400 -0
- glitchlings/dlc/huggingface.py +68 -0
- glitchlings/dlc/langchain.py +147 -0
- glitchlings/dlc/nemo.py +283 -0
- glitchlings/dlc/prime.py +215 -0
- glitchlings/dlc/pytorch.py +98 -0
- glitchlings/dlc/pytorch_lightning.py +173 -0
- glitchlings/internal/__init__.py +16 -0
- glitchlings/internal/rust.py +159 -0
- glitchlings/internal/rust_ffi.py +599 -0
- glitchlings/main.py +426 -0
- glitchlings/protocols.py +91 -0
- glitchlings/runtime_config.py +24 -0
- glitchlings/util/__init__.py +41 -0
- glitchlings/util/adapters.py +65 -0
- glitchlings/util/keyboards.py +508 -0
- glitchlings/util/transcripts.py +108 -0
- glitchlings/zoo/__init__.py +161 -0
- glitchlings/zoo/assets/__init__.py +29 -0
- glitchlings/zoo/core.py +852 -0
- glitchlings/zoo/core_execution.py +154 -0
- glitchlings/zoo/core_planning.py +451 -0
- glitchlings/zoo/corrupt_dispatch.py +291 -0
- glitchlings/zoo/hokey.py +139 -0
- glitchlings/zoo/jargoyle.py +301 -0
- glitchlings/zoo/mim1c.py +269 -0
- glitchlings/zoo/pedant/__init__.py +109 -0
- glitchlings/zoo/pedant/core.py +99 -0
- glitchlings/zoo/pedant/forms.py +50 -0
- glitchlings/zoo/pedant/stones.py +83 -0
- glitchlings/zoo/redactyl.py +94 -0
- glitchlings/zoo/rng.py +280 -0
- glitchlings/zoo/rushmore.py +416 -0
- glitchlings/zoo/scannequin.py +370 -0
- glitchlings/zoo/transforms.py +331 -0
- glitchlings/zoo/typogre.py +194 -0
- glitchlings/zoo/validation.py +643 -0
- glitchlings/zoo/wherewolf.py +120 -0
- glitchlings/zoo/zeedub.py +165 -0
- glitchlings-1.0.0.dist-info/METADATA +404 -0
- glitchlings-1.0.0.dist-info/RECORD +86 -0
- glitchlings-1.0.0.dist-info/WHEEL +5 -0
- glitchlings-1.0.0.dist-info/entry_points.txt +3 -0
- glitchlings-1.0.0.dist-info/licenses/LICENSE +201 -0
- glitchlings-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,508 @@
|
|
|
1
|
+
"""Keyboard layout neighbor maps for typo simulation.
|
|
2
|
+
|
|
3
|
+
This module centralizes keyboard layout data that was previously stored
|
|
4
|
+
directly in :mod:`glitchlings.util.__init__`. It defines adjacency maps
|
|
5
|
+
for various keyboard layouts used by typo-generating glitchlings.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections.abc import Iterable
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"KeyboardLayouts",
|
|
14
|
+
"KeyNeighbors",
|
|
15
|
+
"KEYNEIGHBORS",
|
|
16
|
+
"ShiftMap",
|
|
17
|
+
"ShiftMaps",
|
|
18
|
+
"SHIFT_MAPS",
|
|
19
|
+
"KeyNeighborMap",
|
|
20
|
+
"build_keyboard_neighbor_map",
|
|
21
|
+
# Pre-serialized accessors for pipeline use
|
|
22
|
+
"get_serialized_layout",
|
|
23
|
+
"get_serialized_shift_map",
|
|
24
|
+
# Motor coordination types
|
|
25
|
+
"FingerAssignment",
|
|
26
|
+
"FINGER_MAP",
|
|
27
|
+
"MOTOR_WEIGHTS",
|
|
28
|
+
"classify_transition",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
# Type alias for keyboard neighbor maps
|
|
32
|
+
KeyNeighborMap = dict[str, list[str]]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def build_keyboard_neighbor_map(rows: Iterable[str]) -> KeyNeighborMap:
|
|
36
|
+
"""Derive 8-neighbour adjacency lists from keyboard layout rows.
|
|
37
|
+
|
|
38
|
+
Each row represents a keyboard row with characters positioned by index.
|
|
39
|
+
Spaces are treated as empty positions. Characters are normalized to lowercase.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
rows: Iterable of strings representing keyboard rows, with
|
|
43
|
+
characters positioned to reflect their physical layout.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Dictionary mapping each lowercase character to its adjacent characters.
|
|
47
|
+
|
|
48
|
+
Example:
|
|
49
|
+
>>> rows = ["qwerty", " asdfg"] # 'a' offset by 1
|
|
50
|
+
>>> neighbors = build_keyboard_neighbor_map(rows)
|
|
51
|
+
>>> neighbors['s'] # adjacent to q, w, e, a, d on QWERTY
|
|
52
|
+
['q', 'w', 'e', 'a', 'd']
|
|
53
|
+
"""
|
|
54
|
+
grid: dict[tuple[int, int], str] = {}
|
|
55
|
+
for y, row in enumerate(rows):
|
|
56
|
+
for x, char in enumerate(row):
|
|
57
|
+
if char == " ":
|
|
58
|
+
continue
|
|
59
|
+
grid[(x, y)] = char.lower()
|
|
60
|
+
|
|
61
|
+
neighbors: KeyNeighborMap = {}
|
|
62
|
+
for (x, y), char in grid.items():
|
|
63
|
+
seen: list[str] = []
|
|
64
|
+
for dy in (-1, 0, 1):
|
|
65
|
+
for dx in (-1, 0, 1):
|
|
66
|
+
if dx == 0 and dy == 0:
|
|
67
|
+
continue
|
|
68
|
+
candidate = grid.get((x + dx, y + dy))
|
|
69
|
+
if candidate is None:
|
|
70
|
+
continue
|
|
71
|
+
seen.append(candidate)
|
|
72
|
+
# Preserve encounter order but drop duplicates for determinism
|
|
73
|
+
deduped = list(dict.fromkeys(seen))
|
|
74
|
+
neighbors[char] = deduped
|
|
75
|
+
|
|
76
|
+
return neighbors
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
KeyboardLayouts = dict[str, KeyNeighborMap]
|
|
80
|
+
ShiftMap = dict[str, str]
|
|
81
|
+
ShiftMaps = dict[str, ShiftMap]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
_KEYNEIGHBORS: KeyboardLayouts = {
|
|
85
|
+
"CURATOR_QWERTY": {
|
|
86
|
+
"a": [*"qwsz"],
|
|
87
|
+
"b": [*"vghn "],
|
|
88
|
+
"c": [*"xdfv "],
|
|
89
|
+
"d": [*"serfcx"],
|
|
90
|
+
"e": [*"wsdrf34"],
|
|
91
|
+
"f": [*"drtgvc"],
|
|
92
|
+
"g": [*"ftyhbv"],
|
|
93
|
+
"h": [*"gyujnb"],
|
|
94
|
+
"i": [*"ujko89"],
|
|
95
|
+
"j": [*"huikmn"],
|
|
96
|
+
"k": [*"jilom,"],
|
|
97
|
+
"l": [*"kop;.,"],
|
|
98
|
+
"m": [*"njk, "],
|
|
99
|
+
"n": [*"bhjm "],
|
|
100
|
+
"o": [*"iklp90"],
|
|
101
|
+
"p": [*"o0-[;l"],
|
|
102
|
+
"q": [*"was 12"],
|
|
103
|
+
"r": [*"edft45"],
|
|
104
|
+
"s": [*"awedxz"],
|
|
105
|
+
"t": [*"r56ygf"],
|
|
106
|
+
"u": [*"y78ijh"],
|
|
107
|
+
"v": [*"cfgb "],
|
|
108
|
+
"w": [*"q23esa"],
|
|
109
|
+
"x": [*"zsdc "],
|
|
110
|
+
"y": [*"t67uhg"],
|
|
111
|
+
"z": [*"asx"],
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _register_layout(name: str, rows: Iterable[str]) -> None:
|
|
117
|
+
_KEYNEIGHBORS[name] = build_keyboard_neighbor_map(rows)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
_register_layout(
|
|
121
|
+
"DVORAK",
|
|
122
|
+
(
|
|
123
|
+
"`1234567890[]\\",
|
|
124
|
+
" ',.pyfgcrl/=\\",
|
|
125
|
+
" aoeuidhtns-",
|
|
126
|
+
" ;qjkxbmwvz",
|
|
127
|
+
),
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
_register_layout(
|
|
131
|
+
"COLEMAK",
|
|
132
|
+
(
|
|
133
|
+
"`1234567890-=",
|
|
134
|
+
" qwfpgjluy;[]\\",
|
|
135
|
+
" arstdhneio'",
|
|
136
|
+
" zxcvbkm,./",
|
|
137
|
+
),
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
_register_layout(
|
|
141
|
+
"QWERTY",
|
|
142
|
+
(
|
|
143
|
+
"`1234567890-=",
|
|
144
|
+
" qwertyuiop[]\\",
|
|
145
|
+
" asdfghjkl;'",
|
|
146
|
+
" zxcvbnm,./",
|
|
147
|
+
),
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
_register_layout(
|
|
151
|
+
"AZERTY",
|
|
152
|
+
(
|
|
153
|
+
"²&é\"'(-è_çà)=",
|
|
154
|
+
" azertyuiop^$",
|
|
155
|
+
" qsdfghjklmù*",
|
|
156
|
+
" <wxcvbn,;:!",
|
|
157
|
+
),
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
_register_layout(
|
|
161
|
+
"QWERTZ",
|
|
162
|
+
(
|
|
163
|
+
"^1234567890ß´",
|
|
164
|
+
" qwertzuiopü+",
|
|
165
|
+
" asdfghjklöä#",
|
|
166
|
+
" yxcvbnm,.-",
|
|
167
|
+
),
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
_register_layout(
|
|
171
|
+
"SPANISH_QWERTY",
|
|
172
|
+
(
|
|
173
|
+
"º1234567890'¡",
|
|
174
|
+
" qwertyuiop´+",
|
|
175
|
+
" asdfghjklñ´",
|
|
176
|
+
" <zxcvbnm,.-",
|
|
177
|
+
),
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
_register_layout(
|
|
181
|
+
"SWEDISH_QWERTY",
|
|
182
|
+
(
|
|
183
|
+
"§1234567890+´",
|
|
184
|
+
" qwertyuiopå¨",
|
|
185
|
+
" asdfghjklöä'",
|
|
186
|
+
" <zxcvbnm,.-",
|
|
187
|
+
),
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
class KeyNeighbors:
|
|
192
|
+
"""Attribute-based access to keyboard layout neighbor maps."""
|
|
193
|
+
|
|
194
|
+
def __init__(self) -> None:
|
|
195
|
+
for layout_name, layout in _KEYNEIGHBORS.items():
|
|
196
|
+
setattr(self, layout_name, layout)
|
|
197
|
+
|
|
198
|
+
def get(self, name: str) -> KeyNeighborMap | None:
|
|
199
|
+
"""Get a layout by name, returning None if not found."""
|
|
200
|
+
return _KEYNEIGHBORS.get(name)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
KEYNEIGHBORS: KeyNeighbors = KeyNeighbors()
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
# Pre-serialized layouts for pipeline use (avoids per-call dict comprehension)
|
|
207
|
+
# Format: {key: list(neighbors)} - lists instead of iterables for Rust FFI
|
|
208
|
+
_SERIALIZED_LAYOUTS: dict[str, dict[str, list[str]]] = {
|
|
209
|
+
name: {k: list(v) for k, v in layout.items()} for name, layout in _KEYNEIGHBORS.items()
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def get_serialized_layout(name: str) -> dict[str, list[str]] | None:
|
|
214
|
+
"""Get a pre-serialized layout for pipeline use.
|
|
215
|
+
|
|
216
|
+
Returns the cached serialized form directly - do not mutate.
|
|
217
|
+
"""
|
|
218
|
+
return _SERIALIZED_LAYOUTS.get(name)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _uppercase_keys(layout: str) -> ShiftMap:
|
|
222
|
+
mapping: ShiftMap = {}
|
|
223
|
+
for key in _KEYNEIGHBORS.get(layout, {}):
|
|
224
|
+
if key.isalpha():
|
|
225
|
+
mapping[key] = key.upper()
|
|
226
|
+
return mapping
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _with_letters(base: ShiftMap, layout: str) -> ShiftMap:
|
|
230
|
+
mapping = dict(base)
|
|
231
|
+
mapping.update(_uppercase_keys(layout))
|
|
232
|
+
return mapping
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _qwerty_symbols() -> ShiftMap:
|
|
236
|
+
return {
|
|
237
|
+
"`": "~",
|
|
238
|
+
"1": "!",
|
|
239
|
+
"2": "@",
|
|
240
|
+
"3": "#",
|
|
241
|
+
"4": "$",
|
|
242
|
+
"5": "%",
|
|
243
|
+
"6": "^",
|
|
244
|
+
"7": "&",
|
|
245
|
+
"8": "*",
|
|
246
|
+
"9": "(",
|
|
247
|
+
"0": ")",
|
|
248
|
+
"-": "_",
|
|
249
|
+
"=": "+",
|
|
250
|
+
"[": "{",
|
|
251
|
+
"]": "}",
|
|
252
|
+
"\\": "|",
|
|
253
|
+
";": ":",
|
|
254
|
+
"'": '"',
|
|
255
|
+
",": "<",
|
|
256
|
+
".": ">",
|
|
257
|
+
"/": "?",
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _azerty_symbols() -> ShiftMap:
|
|
262
|
+
return {
|
|
263
|
+
"&": "1",
|
|
264
|
+
"\u00e9": "2",
|
|
265
|
+
'"': "3",
|
|
266
|
+
"'": "4",
|
|
267
|
+
"(": "5",
|
|
268
|
+
"-": "6",
|
|
269
|
+
"\u00e8": "7",
|
|
270
|
+
"_": "8",
|
|
271
|
+
"\u00e7": "9",
|
|
272
|
+
"\u00e0": "0",
|
|
273
|
+
")": "\u00b0",
|
|
274
|
+
"=": "+",
|
|
275
|
+
"^": "\u00a8",
|
|
276
|
+
"$": "\u00a3",
|
|
277
|
+
"*": "\u00b5",
|
|
278
|
+
"\u00f9": "%",
|
|
279
|
+
"<": ">",
|
|
280
|
+
",": "?",
|
|
281
|
+
";": ".",
|
|
282
|
+
":": "/",
|
|
283
|
+
"!": "\u00a7",
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def _qwertz_symbols() -> ShiftMap:
|
|
288
|
+
return {
|
|
289
|
+
"^": "\u00b0",
|
|
290
|
+
"1": "!",
|
|
291
|
+
"2": '"',
|
|
292
|
+
"3": "\u00a7",
|
|
293
|
+
"4": "$",
|
|
294
|
+
"5": "%",
|
|
295
|
+
"6": "&",
|
|
296
|
+
"7": "/",
|
|
297
|
+
"8": "(",
|
|
298
|
+
"9": ")",
|
|
299
|
+
"0": "=",
|
|
300
|
+
"\u00df": "?",
|
|
301
|
+
"\u00b4": "`",
|
|
302
|
+
"+": "*",
|
|
303
|
+
"#": "'",
|
|
304
|
+
"-": "_",
|
|
305
|
+
",": ";",
|
|
306
|
+
".": ":",
|
|
307
|
+
"\u00e4": "\u00c4",
|
|
308
|
+
"\u00f6": "\u00d6",
|
|
309
|
+
"\u00fc": "\u00dc",
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _spanish_symbols() -> ShiftMap:
|
|
314
|
+
return {
|
|
315
|
+
"\u00ba": "\u00aa",
|
|
316
|
+
"1": "!",
|
|
317
|
+
"2": '"',
|
|
318
|
+
"3": "\u00b7",
|
|
319
|
+
"4": "$",
|
|
320
|
+
"5": "%",
|
|
321
|
+
"6": "&",
|
|
322
|
+
"7": "/",
|
|
323
|
+
"8": "(",
|
|
324
|
+
"9": ")",
|
|
325
|
+
"0": "=",
|
|
326
|
+
"'": "?",
|
|
327
|
+
"\u00a1": "\u00bf",
|
|
328
|
+
"+": "*",
|
|
329
|
+
"\u00b4": "\u00a8",
|
|
330
|
+
"-": "_",
|
|
331
|
+
",": ";",
|
|
332
|
+
".": ":",
|
|
333
|
+
"<": ">",
|
|
334
|
+
"\u00f1": "\u00d1",
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def _swedish_symbols() -> ShiftMap:
|
|
339
|
+
return {
|
|
340
|
+
"\u00a7": "\u00bd",
|
|
341
|
+
"1": "!",
|
|
342
|
+
"2": '"',
|
|
343
|
+
"3": "#",
|
|
344
|
+
"4": "\u00a4",
|
|
345
|
+
"5": "%",
|
|
346
|
+
"6": "&",
|
|
347
|
+
"7": "/",
|
|
348
|
+
"8": "(",
|
|
349
|
+
"9": ")",
|
|
350
|
+
"0": "=",
|
|
351
|
+
"+": "?",
|
|
352
|
+
"\u00b4": "\u00a8",
|
|
353
|
+
"-": "_",
|
|
354
|
+
",": ";",
|
|
355
|
+
".": ":",
|
|
356
|
+
"<": ">",
|
|
357
|
+
"\u00e5": "\u00c5",
|
|
358
|
+
"\u00e4": "\u00c4",
|
|
359
|
+
"\u00f6": "\u00d6",
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
_SHIFT_MAPS: ShiftMaps = {
|
|
364
|
+
"CURATOR_QWERTY": _with_letters(_qwerty_symbols(), "CURATOR_QWERTY"),
|
|
365
|
+
"QWERTY": _with_letters(_qwerty_symbols(), "QWERTY"),
|
|
366
|
+
"COLEMAK": _with_letters(_qwerty_symbols(), "COLEMAK"),
|
|
367
|
+
"DVORAK": _with_letters(_qwerty_symbols(), "DVORAK"),
|
|
368
|
+
"AZERTY": _with_letters(_azerty_symbols(), "AZERTY"),
|
|
369
|
+
"QWERTZ": _with_letters(_qwertz_symbols(), "QWERTZ"),
|
|
370
|
+
"SPANISH_QWERTY": _with_letters(_spanish_symbols(), "SPANISH_QWERTY"),
|
|
371
|
+
"SWEDISH_QWERTY": _with_letters(_swedish_symbols(), "SWEDISH_QWERTY"),
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
class ShiftMapsAccessor:
|
|
376
|
+
"""Attribute-based access to per-layout shift maps."""
|
|
377
|
+
|
|
378
|
+
def __init__(self) -> None:
|
|
379
|
+
for layout_name, mapping in _SHIFT_MAPS.items():
|
|
380
|
+
setattr(self, layout_name, mapping)
|
|
381
|
+
|
|
382
|
+
def get(self, name: str) -> ShiftMap | None:
|
|
383
|
+
"""Get a shift map by name, returning None if not found."""
|
|
384
|
+
return _SHIFT_MAPS.get(name)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
SHIFT_MAPS: ShiftMapsAccessor = ShiftMapsAccessor()
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
def get_serialized_shift_map(name: str) -> dict[str, str] | None:
|
|
391
|
+
"""Get a pre-serialized shift map for pipeline use.
|
|
392
|
+
|
|
393
|
+
Returns the cached dict directly - do not mutate.
|
|
394
|
+
"""
|
|
395
|
+
return _SHIFT_MAPS.get(name)
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
# ---------------------------------------------------------------------------
|
|
399
|
+
# Motor Coordination Types
|
|
400
|
+
# ---------------------------------------------------------------------------
|
|
401
|
+
# Based on the Aalto 136M Keystrokes dataset
|
|
402
|
+
# Dhakal et al. (2018). Observations on Typing from 136 Million Keystrokes. CHI '18.
|
|
403
|
+
# https://doi.org/10.1145/3173574.3174220
|
|
404
|
+
|
|
405
|
+
# Finger assignment: (hand, finger)
|
|
406
|
+
# hand: 0=left, 1=right, 2=thumb/space
|
|
407
|
+
# finger: 0=pinky, 1=ring, 2=middle, 3=index, 4=thumb
|
|
408
|
+
FingerAssignment = tuple[int, int]
|
|
409
|
+
|
|
410
|
+
# fmt: off
|
|
411
|
+
FINGER_MAP: dict[str, FingerAssignment] = {
|
|
412
|
+
# Left pinky (hand=0, finger=0)
|
|
413
|
+
'`': (0, 0), '1': (0, 0), 'q': (0, 0), 'a': (0, 0), 'z': (0, 0),
|
|
414
|
+
'~': (0, 0), '!': (0, 0), 'Q': (0, 0), 'A': (0, 0), 'Z': (0, 0),
|
|
415
|
+
# Left ring (hand=0, finger=1)
|
|
416
|
+
'2': (0, 1), 'w': (0, 1), 's': (0, 1), 'x': (0, 1),
|
|
417
|
+
'@': (0, 1), 'W': (0, 1), 'S': (0, 1), 'X': (0, 1),
|
|
418
|
+
# Left middle (hand=0, finger=2)
|
|
419
|
+
'3': (0, 2), 'e': (0, 2), 'd': (0, 2), 'c': (0, 2),
|
|
420
|
+
'#': (0, 2), 'E': (0, 2), 'D': (0, 2), 'C': (0, 2),
|
|
421
|
+
# Left index - two columns (hand=0, finger=3)
|
|
422
|
+
'4': (0, 3), 'r': (0, 3), 'f': (0, 3), 'v': (0, 3),
|
|
423
|
+
'5': (0, 3), 't': (0, 3), 'g': (0, 3), 'b': (0, 3),
|
|
424
|
+
'$': (0, 3), 'R': (0, 3), 'F': (0, 3), 'V': (0, 3),
|
|
425
|
+
'%': (0, 3), 'T': (0, 3), 'G': (0, 3), 'B': (0, 3),
|
|
426
|
+
# Right index - two columns (hand=1, finger=3)
|
|
427
|
+
'6': (1, 3), 'y': (1, 3), 'h': (1, 3), 'n': (1, 3),
|
|
428
|
+
'7': (1, 3), 'u': (1, 3), 'j': (1, 3), 'm': (1, 3),
|
|
429
|
+
'^': (1, 3), 'Y': (1, 3), 'H': (1, 3), 'N': (1, 3),
|
|
430
|
+
'&': (1, 3), 'U': (1, 3), 'J': (1, 3), 'M': (1, 3),
|
|
431
|
+
# Right middle (hand=1, finger=2)
|
|
432
|
+
'8': (1, 2), 'i': (1, 2), 'k': (1, 2), ',': (1, 2),
|
|
433
|
+
'*': (1, 2), 'I': (1, 2), 'K': (1, 2), '<': (1, 2),
|
|
434
|
+
# Right ring (hand=1, finger=1)
|
|
435
|
+
'9': (1, 1), 'o': (1, 1), 'l': (1, 1), '.': (1, 1),
|
|
436
|
+
'(': (1, 1), 'O': (1, 1), 'L': (1, 1), '>': (1, 1),
|
|
437
|
+
# Right pinky (hand=1, finger=0)
|
|
438
|
+
'0': (1, 0), 'p': (1, 0), ';': (1, 0), '/': (1, 0),
|
|
439
|
+
'-': (1, 0), '[': (1, 0), "'": (1, 0),
|
|
440
|
+
')': (1, 0), 'P': (1, 0), ':': (1, 0), '?': (1, 0),
|
|
441
|
+
'_': (1, 0), '{': (1, 0), '"': (1, 0),
|
|
442
|
+
'=': (1, 0), ']': (1, 0), '\\': (1, 0),
|
|
443
|
+
'+': (1, 0), '}': (1, 0), '|': (1, 0),
|
|
444
|
+
# Space - thumb (hand=2, finger=4)
|
|
445
|
+
' ': (2, 4),
|
|
446
|
+
}
|
|
447
|
+
# fmt: on
|
|
448
|
+
|
|
449
|
+
# Motor coordination weights derived from Aalto 136M Keystrokes dataset
|
|
450
|
+
# Keys: transition type -> weight multiplier
|
|
451
|
+
# Values normalized so cross_hand = 1.0 (baseline)
|
|
452
|
+
MOTOR_WEIGHTS: dict[str, dict[str, float]] = {
|
|
453
|
+
# "Wet ink" - uncorrected errors (errors that survive to final output)
|
|
454
|
+
# Same-finger errors are caught/corrected, cross-hand errors slip through
|
|
455
|
+
"wet_ink": {
|
|
456
|
+
"same_finger": 0.858,
|
|
457
|
+
"same_hand": 0.965,
|
|
458
|
+
"cross_hand": 1.0,
|
|
459
|
+
},
|
|
460
|
+
# "Hastily edited" - raw error distribution before correction
|
|
461
|
+
# Same-finger errors occur most often but are easy to detect
|
|
462
|
+
"hastily_edited": {
|
|
463
|
+
"same_finger": 3.031,
|
|
464
|
+
"same_hand": 1.101,
|
|
465
|
+
"cross_hand": 1.0,
|
|
466
|
+
},
|
|
467
|
+
# Uniform weighting - all transitions equal (original behavior)
|
|
468
|
+
"uniform": {
|
|
469
|
+
"same_finger": 1.0,
|
|
470
|
+
"same_hand": 1.0,
|
|
471
|
+
"cross_hand": 1.0,
|
|
472
|
+
},
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def classify_transition(prev_char: str, curr_char: str) -> str:
|
|
477
|
+
"""Classify the motor coordination required for a key transition.
|
|
478
|
+
|
|
479
|
+
Args:
|
|
480
|
+
prev_char: The previous character typed.
|
|
481
|
+
curr_char: The current character being typed.
|
|
482
|
+
|
|
483
|
+
Returns:
|
|
484
|
+
One of: 'same_finger', 'same_hand', 'cross_hand', 'space', or 'unknown'.
|
|
485
|
+
"""
|
|
486
|
+
prev = FINGER_MAP.get(prev_char)
|
|
487
|
+
curr = FINGER_MAP.get(curr_char)
|
|
488
|
+
|
|
489
|
+
if prev is None or curr is None:
|
|
490
|
+
return "unknown"
|
|
491
|
+
|
|
492
|
+
prev_hand, prev_finger = prev
|
|
493
|
+
curr_hand, curr_finger = curr
|
|
494
|
+
|
|
495
|
+
# Space transitions (thumb) get their own category
|
|
496
|
+
if prev_hand == 2 or curr_hand == 2:
|
|
497
|
+
return "space"
|
|
498
|
+
|
|
499
|
+
# Cross-hand transition
|
|
500
|
+
if prev_hand != curr_hand:
|
|
501
|
+
return "cross_hand"
|
|
502
|
+
|
|
503
|
+
# Same-finger transition (same hand, same finger)
|
|
504
|
+
if prev_finger == curr_finger:
|
|
505
|
+
return "same_finger"
|
|
506
|
+
|
|
507
|
+
# Same-hand transition (same hand, different finger)
|
|
508
|
+
return "same_hand"
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Shared transcript type helpers used across attack and DLC modules."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Literal, Sequence, TypeGuard, Union
|
|
6
|
+
|
|
7
|
+
TranscriptTurn = dict[str, Any]
|
|
8
|
+
Transcript = list[TranscriptTurn]
|
|
9
|
+
|
|
10
|
+
# Type alias for transcript target specifications.
|
|
11
|
+
# - "last": corrupt only the last turn (default behavior)
|
|
12
|
+
# - "all": corrupt all turns
|
|
13
|
+
# - "assistant": corrupt only turns with role="assistant"
|
|
14
|
+
# - "user": corrupt only turns with role="user"
|
|
15
|
+
# - int: corrupt a specific index (negative indexing supported)
|
|
16
|
+
# - Sequence[int]: corrupt specific indices
|
|
17
|
+
TranscriptTarget = Union[Literal["last", "all", "assistant", "user"], int, Sequence[int]]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def is_transcript(
|
|
21
|
+
value: Any,
|
|
22
|
+
*,
|
|
23
|
+
allow_empty: bool = True,
|
|
24
|
+
require_all_content: bool = False,
|
|
25
|
+
) -> TypeGuard[Transcript]:
|
|
26
|
+
"""Return True when ``value`` appears to be a chat transcript mapping list."""
|
|
27
|
+
if not isinstance(value, list):
|
|
28
|
+
return False
|
|
29
|
+
|
|
30
|
+
if not value:
|
|
31
|
+
return allow_empty
|
|
32
|
+
|
|
33
|
+
if not all(isinstance(turn, dict) for turn in value):
|
|
34
|
+
return False
|
|
35
|
+
|
|
36
|
+
if require_all_content:
|
|
37
|
+
return all("content" in turn for turn in value)
|
|
38
|
+
|
|
39
|
+
return "content" in value[-1]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def resolve_transcript_indices(
|
|
43
|
+
transcript: Transcript,
|
|
44
|
+
target: TranscriptTarget,
|
|
45
|
+
) -> list[int]:
|
|
46
|
+
"""Resolve a transcript target specification to concrete indices.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
transcript: The transcript to resolve indices for.
|
|
50
|
+
target: The target specification indicating which turns to corrupt.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
A list of valid indices into the transcript, sorted in ascending order.
|
|
54
|
+
|
|
55
|
+
Raises:
|
|
56
|
+
ValueError: If the target specification is invalid or references
|
|
57
|
+
indices outside the transcript bounds.
|
|
58
|
+
"""
|
|
59
|
+
if not transcript:
|
|
60
|
+
return []
|
|
61
|
+
|
|
62
|
+
length = len(transcript)
|
|
63
|
+
|
|
64
|
+
if target == "last":
|
|
65
|
+
return [length - 1]
|
|
66
|
+
|
|
67
|
+
if target == "all":
|
|
68
|
+
return list(range(length))
|
|
69
|
+
|
|
70
|
+
if target == "assistant":
|
|
71
|
+
return [i for i, turn in enumerate(transcript) if turn.get("role") == "assistant"]
|
|
72
|
+
|
|
73
|
+
if target == "user":
|
|
74
|
+
return [i for i, turn in enumerate(transcript) if turn.get("role") == "user"]
|
|
75
|
+
|
|
76
|
+
if isinstance(target, int):
|
|
77
|
+
# Normalize negative indices
|
|
78
|
+
normalized = target if target >= 0 else length + target
|
|
79
|
+
if not 0 <= normalized < length:
|
|
80
|
+
raise ValueError(f"Transcript index {target} out of bounds for length {length}")
|
|
81
|
+
return [normalized]
|
|
82
|
+
|
|
83
|
+
# Handle sequence of indices
|
|
84
|
+
if isinstance(target, Sequence) and not isinstance(target, str):
|
|
85
|
+
indices: list[int] = []
|
|
86
|
+
for idx in target:
|
|
87
|
+
if not isinstance(idx, int):
|
|
88
|
+
raise ValueError(f"Transcript indices must be integers, got {type(idx).__name__}")
|
|
89
|
+
normalized = idx if idx >= 0 else length + idx
|
|
90
|
+
if not 0 <= normalized < length:
|
|
91
|
+
raise ValueError(f"Transcript index {idx} out of bounds for length {length}")
|
|
92
|
+
indices.append(normalized)
|
|
93
|
+
# Deduplicate and sort
|
|
94
|
+
return sorted(set(indices))
|
|
95
|
+
|
|
96
|
+
raise ValueError(
|
|
97
|
+
f"Invalid transcript target: {target!r}. "
|
|
98
|
+
"Expected 'last', 'all', 'assistant', 'user', int, or sequence of ints."
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
__all__ = [
|
|
103
|
+
"Transcript",
|
|
104
|
+
"TranscriptTarget",
|
|
105
|
+
"TranscriptTurn",
|
|
106
|
+
"is_transcript",
|
|
107
|
+
"resolve_transcript_indices",
|
|
108
|
+
]
|