glitchlings 1.0.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. glitchlings/__init__.py +101 -0
  2. glitchlings/__main__.py +8 -0
  3. glitchlings/_corruption_engine/__init__.py +12 -0
  4. glitchlings/_corruption_engine.cp313-win_amd64.pyd +0 -0
  5. glitchlings/assets/__init__.py +180 -0
  6. glitchlings/assets/apostrofae_pairs.json +32 -0
  7. glitchlings/assets/ekkokin_homophones.json +2014 -0
  8. glitchlings/assets/hokey_assets.json +193 -0
  9. glitchlings/assets/lexemes/academic.json +1049 -0
  10. glitchlings/assets/lexemes/colors.json +1333 -0
  11. glitchlings/assets/lexemes/corporate.json +716 -0
  12. glitchlings/assets/lexemes/cyberpunk.json +22 -0
  13. glitchlings/assets/lexemes/lovecraftian.json +23 -0
  14. glitchlings/assets/lexemes/synonyms.json +3354 -0
  15. glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
  16. glitchlings/assets/ocr_confusions.tsv +30 -0
  17. glitchlings/assets/pipeline_assets.json +29 -0
  18. glitchlings/attack/__init__.py +184 -0
  19. glitchlings/attack/analysis.py +1321 -0
  20. glitchlings/attack/core.py +819 -0
  21. glitchlings/attack/core_execution.py +378 -0
  22. glitchlings/attack/core_planning.py +612 -0
  23. glitchlings/attack/encode.py +114 -0
  24. glitchlings/attack/metrics.py +211 -0
  25. glitchlings/attack/metrics_dispatch.py +70 -0
  26. glitchlings/attack/tokenization.py +338 -0
  27. glitchlings/attack/tokenizer_metrics.py +373 -0
  28. glitchlings/auggie.py +285 -0
  29. glitchlings/compat/__init__.py +9 -0
  30. glitchlings/compat/loaders.py +355 -0
  31. glitchlings/compat/types.py +41 -0
  32. glitchlings/conf/__init__.py +39 -0
  33. glitchlings/conf/loaders.py +331 -0
  34. glitchlings/conf/schema.py +156 -0
  35. glitchlings/conf/types.py +72 -0
  36. glitchlings/config.toml +2 -0
  37. glitchlings/constants.py +139 -0
  38. glitchlings/dev/__init__.py +3 -0
  39. glitchlings/dev/docs.py +45 -0
  40. glitchlings/dlc/__init__.py +21 -0
  41. glitchlings/dlc/_shared.py +300 -0
  42. glitchlings/dlc/gutenberg.py +400 -0
  43. glitchlings/dlc/huggingface.py +68 -0
  44. glitchlings/dlc/langchain.py +147 -0
  45. glitchlings/dlc/nemo.py +283 -0
  46. glitchlings/dlc/prime.py +215 -0
  47. glitchlings/dlc/pytorch.py +98 -0
  48. glitchlings/dlc/pytorch_lightning.py +173 -0
  49. glitchlings/internal/__init__.py +16 -0
  50. glitchlings/internal/rust.py +159 -0
  51. glitchlings/internal/rust_ffi.py +599 -0
  52. glitchlings/main.py +426 -0
  53. glitchlings/protocols.py +91 -0
  54. glitchlings/runtime_config.py +24 -0
  55. glitchlings/util/__init__.py +41 -0
  56. glitchlings/util/adapters.py +65 -0
  57. glitchlings/util/keyboards.py +508 -0
  58. glitchlings/util/transcripts.py +108 -0
  59. glitchlings/zoo/__init__.py +161 -0
  60. glitchlings/zoo/assets/__init__.py +29 -0
  61. glitchlings/zoo/core.py +852 -0
  62. glitchlings/zoo/core_execution.py +154 -0
  63. glitchlings/zoo/core_planning.py +451 -0
  64. glitchlings/zoo/corrupt_dispatch.py +291 -0
  65. glitchlings/zoo/hokey.py +139 -0
  66. glitchlings/zoo/jargoyle.py +301 -0
  67. glitchlings/zoo/mim1c.py +269 -0
  68. glitchlings/zoo/pedant/__init__.py +109 -0
  69. glitchlings/zoo/pedant/core.py +99 -0
  70. glitchlings/zoo/pedant/forms.py +50 -0
  71. glitchlings/zoo/pedant/stones.py +83 -0
  72. glitchlings/zoo/redactyl.py +94 -0
  73. glitchlings/zoo/rng.py +280 -0
  74. glitchlings/zoo/rushmore.py +416 -0
  75. glitchlings/zoo/scannequin.py +370 -0
  76. glitchlings/zoo/transforms.py +331 -0
  77. glitchlings/zoo/typogre.py +194 -0
  78. glitchlings/zoo/validation.py +643 -0
  79. glitchlings/zoo/wherewolf.py +120 -0
  80. glitchlings/zoo/zeedub.py +165 -0
  81. glitchlings-1.0.0.dist-info/METADATA +404 -0
  82. glitchlings-1.0.0.dist-info/RECORD +86 -0
  83. glitchlings-1.0.0.dist-info/WHEEL +5 -0
  84. glitchlings-1.0.0.dist-info/entry_points.txt +3 -0
  85. glitchlings-1.0.0.dist-info/licenses/LICENSE +201 -0
  86. glitchlings-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,508 @@
1
+ """Keyboard layout neighbor maps for typo simulation.
2
+
3
+ This module centralizes keyboard layout data that was previously stored
4
+ directly in :mod:`glitchlings.util.__init__`. It defines adjacency maps
5
+ for various keyboard layouts used by typo-generating glitchlings.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections.abc import Iterable
11
+
12
+ __all__ = [
13
+ "KeyboardLayouts",
14
+ "KeyNeighbors",
15
+ "KEYNEIGHBORS",
16
+ "ShiftMap",
17
+ "ShiftMaps",
18
+ "SHIFT_MAPS",
19
+ "KeyNeighborMap",
20
+ "build_keyboard_neighbor_map",
21
+ # Pre-serialized accessors for pipeline use
22
+ "get_serialized_layout",
23
+ "get_serialized_shift_map",
24
+ # Motor coordination types
25
+ "FingerAssignment",
26
+ "FINGER_MAP",
27
+ "MOTOR_WEIGHTS",
28
+ "classify_transition",
29
+ ]
30
+
31
+ # Type alias for keyboard neighbor maps
32
+ KeyNeighborMap = dict[str, list[str]]
33
+
34
+
35
+ def build_keyboard_neighbor_map(rows: Iterable[str]) -> KeyNeighborMap:
36
+ """Derive 8-neighbour adjacency lists from keyboard layout rows.
37
+
38
+ Each row represents a keyboard row with characters positioned by index.
39
+ Spaces are treated as empty positions. Characters are normalized to lowercase.
40
+
41
+ Args:
42
+ rows: Iterable of strings representing keyboard rows, with
43
+ characters positioned to reflect their physical layout.
44
+
45
+ Returns:
46
+ Dictionary mapping each lowercase character to its adjacent characters.
47
+
48
+ Example:
49
+ >>> rows = ["qwerty", " asdfg"] # 'a' offset by 1
50
+ >>> neighbors = build_keyboard_neighbor_map(rows)
51
+ >>> neighbors['s'] # adjacent to q, w, e, a, d on QWERTY
52
+ ['q', 'w', 'e', 'a', 'd']
53
+ """
54
+ grid: dict[tuple[int, int], str] = {}
55
+ for y, row in enumerate(rows):
56
+ for x, char in enumerate(row):
57
+ if char == " ":
58
+ continue
59
+ grid[(x, y)] = char.lower()
60
+
61
+ neighbors: KeyNeighborMap = {}
62
+ for (x, y), char in grid.items():
63
+ seen: list[str] = []
64
+ for dy in (-1, 0, 1):
65
+ for dx in (-1, 0, 1):
66
+ if dx == 0 and dy == 0:
67
+ continue
68
+ candidate = grid.get((x + dx, y + dy))
69
+ if candidate is None:
70
+ continue
71
+ seen.append(candidate)
72
+ # Preserve encounter order but drop duplicates for determinism
73
+ deduped = list(dict.fromkeys(seen))
74
+ neighbors[char] = deduped
75
+
76
+ return neighbors
77
+
78
+
79
+ KeyboardLayouts = dict[str, KeyNeighborMap]
80
+ ShiftMap = dict[str, str]
81
+ ShiftMaps = dict[str, ShiftMap]
82
+
83
+
84
+ _KEYNEIGHBORS: KeyboardLayouts = {
85
+ "CURATOR_QWERTY": {
86
+ "a": [*"qwsz"],
87
+ "b": [*"vghn "],
88
+ "c": [*"xdfv "],
89
+ "d": [*"serfcx"],
90
+ "e": [*"wsdrf34"],
91
+ "f": [*"drtgvc"],
92
+ "g": [*"ftyhbv"],
93
+ "h": [*"gyujnb"],
94
+ "i": [*"ujko89"],
95
+ "j": [*"huikmn"],
96
+ "k": [*"jilom,"],
97
+ "l": [*"kop;.,"],
98
+ "m": [*"njk, "],
99
+ "n": [*"bhjm "],
100
+ "o": [*"iklp90"],
101
+ "p": [*"o0-[;l"],
102
+ "q": [*"was 12"],
103
+ "r": [*"edft45"],
104
+ "s": [*"awedxz"],
105
+ "t": [*"r56ygf"],
106
+ "u": [*"y78ijh"],
107
+ "v": [*"cfgb "],
108
+ "w": [*"q23esa"],
109
+ "x": [*"zsdc "],
110
+ "y": [*"t67uhg"],
111
+ "z": [*"asx"],
112
+ }
113
+ }
114
+
115
+
116
+ def _register_layout(name: str, rows: Iterable[str]) -> None:
117
+ _KEYNEIGHBORS[name] = build_keyboard_neighbor_map(rows)
118
+
119
+
120
+ _register_layout(
121
+ "DVORAK",
122
+ (
123
+ "`1234567890[]\\",
124
+ " ',.pyfgcrl/=\\",
125
+ " aoeuidhtns-",
126
+ " ;qjkxbmwvz",
127
+ ),
128
+ )
129
+
130
+ _register_layout(
131
+ "COLEMAK",
132
+ (
133
+ "`1234567890-=",
134
+ " qwfpgjluy;[]\\",
135
+ " arstdhneio'",
136
+ " zxcvbkm,./",
137
+ ),
138
+ )
139
+
140
+ _register_layout(
141
+ "QWERTY",
142
+ (
143
+ "`1234567890-=",
144
+ " qwertyuiop[]\\",
145
+ " asdfghjkl;'",
146
+ " zxcvbnm,./",
147
+ ),
148
+ )
149
+
150
+ _register_layout(
151
+ "AZERTY",
152
+ (
153
+ "²&é\"'(-è_çà)=",
154
+ " azertyuiop^$",
155
+ " qsdfghjklmù*",
156
+ " <wxcvbn,;:!",
157
+ ),
158
+ )
159
+
160
+ _register_layout(
161
+ "QWERTZ",
162
+ (
163
+ "^1234567890ß´",
164
+ " qwertzuiopü+",
165
+ " asdfghjklöä#",
166
+ " yxcvbnm,.-",
167
+ ),
168
+ )
169
+
170
+ _register_layout(
171
+ "SPANISH_QWERTY",
172
+ (
173
+ "º1234567890'¡",
174
+ " qwertyuiop´+",
175
+ " asdfghjklñ´",
176
+ " <zxcvbnm,.-",
177
+ ),
178
+ )
179
+
180
+ _register_layout(
181
+ "SWEDISH_QWERTY",
182
+ (
183
+ "§1234567890+´",
184
+ " qwertyuiopå¨",
185
+ " asdfghjklöä'",
186
+ " <zxcvbnm,.-",
187
+ ),
188
+ )
189
+
190
+
191
+ class KeyNeighbors:
192
+ """Attribute-based access to keyboard layout neighbor maps."""
193
+
194
+ def __init__(self) -> None:
195
+ for layout_name, layout in _KEYNEIGHBORS.items():
196
+ setattr(self, layout_name, layout)
197
+
198
+ def get(self, name: str) -> KeyNeighborMap | None:
199
+ """Get a layout by name, returning None if not found."""
200
+ return _KEYNEIGHBORS.get(name)
201
+
202
+
203
+ KEYNEIGHBORS: KeyNeighbors = KeyNeighbors()
204
+
205
+
206
+ # Pre-serialized layouts for pipeline use (avoids per-call dict comprehension)
207
+ # Format: {key: list(neighbors)} - lists instead of iterables for Rust FFI
208
+ _SERIALIZED_LAYOUTS: dict[str, dict[str, list[str]]] = {
209
+ name: {k: list(v) for k, v in layout.items()} for name, layout in _KEYNEIGHBORS.items()
210
+ }
211
+
212
+
213
+ def get_serialized_layout(name: str) -> dict[str, list[str]] | None:
214
+ """Get a pre-serialized layout for pipeline use.
215
+
216
+ Returns the cached serialized form directly - do not mutate.
217
+ """
218
+ return _SERIALIZED_LAYOUTS.get(name)
219
+
220
+
221
+ def _uppercase_keys(layout: str) -> ShiftMap:
222
+ mapping: ShiftMap = {}
223
+ for key in _KEYNEIGHBORS.get(layout, {}):
224
+ if key.isalpha():
225
+ mapping[key] = key.upper()
226
+ return mapping
227
+
228
+
229
+ def _with_letters(base: ShiftMap, layout: str) -> ShiftMap:
230
+ mapping = dict(base)
231
+ mapping.update(_uppercase_keys(layout))
232
+ return mapping
233
+
234
+
235
+ def _qwerty_symbols() -> ShiftMap:
236
+ return {
237
+ "`": "~",
238
+ "1": "!",
239
+ "2": "@",
240
+ "3": "#",
241
+ "4": "$",
242
+ "5": "%",
243
+ "6": "^",
244
+ "7": "&",
245
+ "8": "*",
246
+ "9": "(",
247
+ "0": ")",
248
+ "-": "_",
249
+ "=": "+",
250
+ "[": "{",
251
+ "]": "}",
252
+ "\\": "|",
253
+ ";": ":",
254
+ "'": '"',
255
+ ",": "<",
256
+ ".": ">",
257
+ "/": "?",
258
+ }
259
+
260
+
261
+ def _azerty_symbols() -> ShiftMap:
262
+ return {
263
+ "&": "1",
264
+ "\u00e9": "2",
265
+ '"': "3",
266
+ "'": "4",
267
+ "(": "5",
268
+ "-": "6",
269
+ "\u00e8": "7",
270
+ "_": "8",
271
+ "\u00e7": "9",
272
+ "\u00e0": "0",
273
+ ")": "\u00b0",
274
+ "=": "+",
275
+ "^": "\u00a8",
276
+ "$": "\u00a3",
277
+ "*": "\u00b5",
278
+ "\u00f9": "%",
279
+ "<": ">",
280
+ ",": "?",
281
+ ";": ".",
282
+ ":": "/",
283
+ "!": "\u00a7",
284
+ }
285
+
286
+
287
+ def _qwertz_symbols() -> ShiftMap:
288
+ return {
289
+ "^": "\u00b0",
290
+ "1": "!",
291
+ "2": '"',
292
+ "3": "\u00a7",
293
+ "4": "$",
294
+ "5": "%",
295
+ "6": "&",
296
+ "7": "/",
297
+ "8": "(",
298
+ "9": ")",
299
+ "0": "=",
300
+ "\u00df": "?",
301
+ "\u00b4": "`",
302
+ "+": "*",
303
+ "#": "'",
304
+ "-": "_",
305
+ ",": ";",
306
+ ".": ":",
307
+ "\u00e4": "\u00c4",
308
+ "\u00f6": "\u00d6",
309
+ "\u00fc": "\u00dc",
310
+ }
311
+
312
+
313
+ def _spanish_symbols() -> ShiftMap:
314
+ return {
315
+ "\u00ba": "\u00aa",
316
+ "1": "!",
317
+ "2": '"',
318
+ "3": "\u00b7",
319
+ "4": "$",
320
+ "5": "%",
321
+ "6": "&",
322
+ "7": "/",
323
+ "8": "(",
324
+ "9": ")",
325
+ "0": "=",
326
+ "'": "?",
327
+ "\u00a1": "\u00bf",
328
+ "+": "*",
329
+ "\u00b4": "\u00a8",
330
+ "-": "_",
331
+ ",": ";",
332
+ ".": ":",
333
+ "<": ">",
334
+ "\u00f1": "\u00d1",
335
+ }
336
+
337
+
338
+ def _swedish_symbols() -> ShiftMap:
339
+ return {
340
+ "\u00a7": "\u00bd",
341
+ "1": "!",
342
+ "2": '"',
343
+ "3": "#",
344
+ "4": "\u00a4",
345
+ "5": "%",
346
+ "6": "&",
347
+ "7": "/",
348
+ "8": "(",
349
+ "9": ")",
350
+ "0": "=",
351
+ "+": "?",
352
+ "\u00b4": "\u00a8",
353
+ "-": "_",
354
+ ",": ";",
355
+ ".": ":",
356
+ "<": ">",
357
+ "\u00e5": "\u00c5",
358
+ "\u00e4": "\u00c4",
359
+ "\u00f6": "\u00d6",
360
+ }
361
+
362
+
363
+ _SHIFT_MAPS: ShiftMaps = {
364
+ "CURATOR_QWERTY": _with_letters(_qwerty_symbols(), "CURATOR_QWERTY"),
365
+ "QWERTY": _with_letters(_qwerty_symbols(), "QWERTY"),
366
+ "COLEMAK": _with_letters(_qwerty_symbols(), "COLEMAK"),
367
+ "DVORAK": _with_letters(_qwerty_symbols(), "DVORAK"),
368
+ "AZERTY": _with_letters(_azerty_symbols(), "AZERTY"),
369
+ "QWERTZ": _with_letters(_qwertz_symbols(), "QWERTZ"),
370
+ "SPANISH_QWERTY": _with_letters(_spanish_symbols(), "SPANISH_QWERTY"),
371
+ "SWEDISH_QWERTY": _with_letters(_swedish_symbols(), "SWEDISH_QWERTY"),
372
+ }
373
+
374
+
375
+ class ShiftMapsAccessor:
376
+ """Attribute-based access to per-layout shift maps."""
377
+
378
+ def __init__(self) -> None:
379
+ for layout_name, mapping in _SHIFT_MAPS.items():
380
+ setattr(self, layout_name, mapping)
381
+
382
+ def get(self, name: str) -> ShiftMap | None:
383
+ """Get a shift map by name, returning None if not found."""
384
+ return _SHIFT_MAPS.get(name)
385
+
386
+
387
+ SHIFT_MAPS: ShiftMapsAccessor = ShiftMapsAccessor()
388
+
389
+
390
+ def get_serialized_shift_map(name: str) -> dict[str, str] | None:
391
+ """Get a pre-serialized shift map for pipeline use.
392
+
393
+ Returns the cached dict directly - do not mutate.
394
+ """
395
+ return _SHIFT_MAPS.get(name)
396
+
397
+
398
+ # ---------------------------------------------------------------------------
399
+ # Motor Coordination Types
400
+ # ---------------------------------------------------------------------------
401
+ # Based on the Aalto 136M Keystrokes dataset
402
+ # Dhakal et al. (2018). Observations on Typing from 136 Million Keystrokes. CHI '18.
403
+ # https://doi.org/10.1145/3173574.3174220
404
+
405
+ # Finger assignment: (hand, finger)
406
+ # hand: 0=left, 1=right, 2=thumb/space
407
+ # finger: 0=pinky, 1=ring, 2=middle, 3=index, 4=thumb
408
+ FingerAssignment = tuple[int, int]
409
+
410
+ # fmt: off
411
+ FINGER_MAP: dict[str, FingerAssignment] = {
412
+ # Left pinky (hand=0, finger=0)
413
+ '`': (0, 0), '1': (0, 0), 'q': (0, 0), 'a': (0, 0), 'z': (0, 0),
414
+ '~': (0, 0), '!': (0, 0), 'Q': (0, 0), 'A': (0, 0), 'Z': (0, 0),
415
+ # Left ring (hand=0, finger=1)
416
+ '2': (0, 1), 'w': (0, 1), 's': (0, 1), 'x': (0, 1),
417
+ '@': (0, 1), 'W': (0, 1), 'S': (0, 1), 'X': (0, 1),
418
+ # Left middle (hand=0, finger=2)
419
+ '3': (0, 2), 'e': (0, 2), 'd': (0, 2), 'c': (0, 2),
420
+ '#': (0, 2), 'E': (0, 2), 'D': (0, 2), 'C': (0, 2),
421
+ # Left index - two columns (hand=0, finger=3)
422
+ '4': (0, 3), 'r': (0, 3), 'f': (0, 3), 'v': (0, 3),
423
+ '5': (0, 3), 't': (0, 3), 'g': (0, 3), 'b': (0, 3),
424
+ '$': (0, 3), 'R': (0, 3), 'F': (0, 3), 'V': (0, 3),
425
+ '%': (0, 3), 'T': (0, 3), 'G': (0, 3), 'B': (0, 3),
426
+ # Right index - two columns (hand=1, finger=3)
427
+ '6': (1, 3), 'y': (1, 3), 'h': (1, 3), 'n': (1, 3),
428
+ '7': (1, 3), 'u': (1, 3), 'j': (1, 3), 'm': (1, 3),
429
+ '^': (1, 3), 'Y': (1, 3), 'H': (1, 3), 'N': (1, 3),
430
+ '&': (1, 3), 'U': (1, 3), 'J': (1, 3), 'M': (1, 3),
431
+ # Right middle (hand=1, finger=2)
432
+ '8': (1, 2), 'i': (1, 2), 'k': (1, 2), ',': (1, 2),
433
+ '*': (1, 2), 'I': (1, 2), 'K': (1, 2), '<': (1, 2),
434
+ # Right ring (hand=1, finger=1)
435
+ '9': (1, 1), 'o': (1, 1), 'l': (1, 1), '.': (1, 1),
436
+ '(': (1, 1), 'O': (1, 1), 'L': (1, 1), '>': (1, 1),
437
+ # Right pinky (hand=1, finger=0)
438
+ '0': (1, 0), 'p': (1, 0), ';': (1, 0), '/': (1, 0),
439
+ '-': (1, 0), '[': (1, 0), "'": (1, 0),
440
+ ')': (1, 0), 'P': (1, 0), ':': (1, 0), '?': (1, 0),
441
+ '_': (1, 0), '{': (1, 0), '"': (1, 0),
442
+ '=': (1, 0), ']': (1, 0), '\\': (1, 0),
443
+ '+': (1, 0), '}': (1, 0), '|': (1, 0),
444
+ # Space - thumb (hand=2, finger=4)
445
+ ' ': (2, 4),
446
+ }
447
+ # fmt: on
448
+
449
+ # Motor coordination weights derived from Aalto 136M Keystrokes dataset
450
+ # Keys: transition type -> weight multiplier
451
+ # Values normalized so cross_hand = 1.0 (baseline)
452
+ MOTOR_WEIGHTS: dict[str, dict[str, float]] = {
453
+ # "Wet ink" - uncorrected errors (errors that survive to final output)
454
+ # Same-finger errors are caught/corrected, cross-hand errors slip through
455
+ "wet_ink": {
456
+ "same_finger": 0.858,
457
+ "same_hand": 0.965,
458
+ "cross_hand": 1.0,
459
+ },
460
+ # "Hastily edited" - raw error distribution before correction
461
+ # Same-finger errors occur most often but are easy to detect
462
+ "hastily_edited": {
463
+ "same_finger": 3.031,
464
+ "same_hand": 1.101,
465
+ "cross_hand": 1.0,
466
+ },
467
+ # Uniform weighting - all transitions equal (original behavior)
468
+ "uniform": {
469
+ "same_finger": 1.0,
470
+ "same_hand": 1.0,
471
+ "cross_hand": 1.0,
472
+ },
473
+ }
474
+
475
+
476
+ def classify_transition(prev_char: str, curr_char: str) -> str:
477
+ """Classify the motor coordination required for a key transition.
478
+
479
+ Args:
480
+ prev_char: The previous character typed.
481
+ curr_char: The current character being typed.
482
+
483
+ Returns:
484
+ One of: 'same_finger', 'same_hand', 'cross_hand', 'space', or 'unknown'.
485
+ """
486
+ prev = FINGER_MAP.get(prev_char)
487
+ curr = FINGER_MAP.get(curr_char)
488
+
489
+ if prev is None or curr is None:
490
+ return "unknown"
491
+
492
+ prev_hand, prev_finger = prev
493
+ curr_hand, curr_finger = curr
494
+
495
+ # Space transitions (thumb) get their own category
496
+ if prev_hand == 2 or curr_hand == 2:
497
+ return "space"
498
+
499
+ # Cross-hand transition
500
+ if prev_hand != curr_hand:
501
+ return "cross_hand"
502
+
503
+ # Same-finger transition (same hand, same finger)
504
+ if prev_finger == curr_finger:
505
+ return "same_finger"
506
+
507
+ # Same-hand transition (same hand, different finger)
508
+ return "same_hand"
@@ -0,0 +1,108 @@
1
+ """Shared transcript type helpers used across attack and DLC modules."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Literal, Sequence, TypeGuard, Union
6
+
7
+ TranscriptTurn = dict[str, Any]
8
+ Transcript = list[TranscriptTurn]
9
+
10
+ # Type alias for transcript target specifications.
11
+ # - "last": corrupt only the last turn (default behavior)
12
+ # - "all": corrupt all turns
13
+ # - "assistant": corrupt only turns with role="assistant"
14
+ # - "user": corrupt only turns with role="user"
15
+ # - int: corrupt a specific index (negative indexing supported)
16
+ # - Sequence[int]: corrupt specific indices
17
+ TranscriptTarget = Union[Literal["last", "all", "assistant", "user"], int, Sequence[int]]
18
+
19
+
20
+ def is_transcript(
21
+ value: Any,
22
+ *,
23
+ allow_empty: bool = True,
24
+ require_all_content: bool = False,
25
+ ) -> TypeGuard[Transcript]:
26
+ """Return True when ``value`` appears to be a chat transcript mapping list."""
27
+ if not isinstance(value, list):
28
+ return False
29
+
30
+ if not value:
31
+ return allow_empty
32
+
33
+ if not all(isinstance(turn, dict) for turn in value):
34
+ return False
35
+
36
+ if require_all_content:
37
+ return all("content" in turn for turn in value)
38
+
39
+ return "content" in value[-1]
40
+
41
+
42
+ def resolve_transcript_indices(
43
+ transcript: Transcript,
44
+ target: TranscriptTarget,
45
+ ) -> list[int]:
46
+ """Resolve a transcript target specification to concrete indices.
47
+
48
+ Args:
49
+ transcript: The transcript to resolve indices for.
50
+ target: The target specification indicating which turns to corrupt.
51
+
52
+ Returns:
53
+ A list of valid indices into the transcript, sorted in ascending order.
54
+
55
+ Raises:
56
+ ValueError: If the target specification is invalid or references
57
+ indices outside the transcript bounds.
58
+ """
59
+ if not transcript:
60
+ return []
61
+
62
+ length = len(transcript)
63
+
64
+ if target == "last":
65
+ return [length - 1]
66
+
67
+ if target == "all":
68
+ return list(range(length))
69
+
70
+ if target == "assistant":
71
+ return [i for i, turn in enumerate(transcript) if turn.get("role") == "assistant"]
72
+
73
+ if target == "user":
74
+ return [i for i, turn in enumerate(transcript) if turn.get("role") == "user"]
75
+
76
+ if isinstance(target, int):
77
+ # Normalize negative indices
78
+ normalized = target if target >= 0 else length + target
79
+ if not 0 <= normalized < length:
80
+ raise ValueError(f"Transcript index {target} out of bounds for length {length}")
81
+ return [normalized]
82
+
83
+ # Handle sequence of indices
84
+ if isinstance(target, Sequence) and not isinstance(target, str):
85
+ indices: list[int] = []
86
+ for idx in target:
87
+ if not isinstance(idx, int):
88
+ raise ValueError(f"Transcript indices must be integers, got {type(idx).__name__}")
89
+ normalized = idx if idx >= 0 else length + idx
90
+ if not 0 <= normalized < length:
91
+ raise ValueError(f"Transcript index {idx} out of bounds for length {length}")
92
+ indices.append(normalized)
93
+ # Deduplicate and sort
94
+ return sorted(set(indices))
95
+
96
+ raise ValueError(
97
+ f"Invalid transcript target: {target!r}. "
98
+ "Expected 'last', 'all', 'assistant', 'user', int, or sequence of ints."
99
+ )
100
+
101
+
102
+ __all__ = [
103
+ "Transcript",
104
+ "TranscriptTarget",
105
+ "TranscriptTurn",
106
+ "is_transcript",
107
+ "resolve_transcript_indices",
108
+ ]