glitchlings 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zoo/scannequin.py ADDED
@@ -0,0 +1,124 @@
1
+ import re
2
+ import random
3
+ from .core import Glitchling, AttackWave, AttackOrder
4
+
5
+
6
+ def ocr_artifacts(
7
+ text: str,
8
+ error_rate: float = 0.02,
9
+ seed: int | None = None,
10
+ rng: random.Random | None = None,
11
+ ) -> str:
12
+ """Introduce OCR-like artifacts into text.
13
+
14
+ Parameters
15
+ - text: Input text to corrupt.
16
+ - error_rate: Max proportion of eligible confusion matches to replace (default 0.02).
17
+ - seed: Optional seed if `rng` not provided.
18
+ - rng: Optional RNG; overrides seed.
19
+
20
+ Notes
21
+ - Uses a curated set of common OCR confusions (rn↔m, cl↔d, O↔0, l/I/1, etc.).
22
+ - Collects all non-overlapping candidate spans in reading order, then samples
23
+ a subset deterministically with the provided RNG.
24
+ - Replacements can change length (e.g., m→rn), so edits are applied from left
25
+ to right using precomputed spans to avoid index drift.
26
+ """
27
+ if not text:
28
+ return text
29
+
30
+ if rng is None:
31
+ rng = random.Random(seed)
32
+
33
+ # map: source -> list of possible replacements
34
+ # Keep patterns small and specific; longer patterns first avoid overmatching
35
+ confusion_table: list[tuple[str, list[str]]] = [
36
+ ("li", ["h"]),
37
+ ("h", ["li"]),
38
+ ("rn", ["m"]),
39
+ ("m", ["rn"]),
40
+ ("cl", ["d"]),
41
+ ("d", ["cl"]),
42
+ ("I", ["l"]),
43
+ ("l", ["I", "1"]),
44
+ ("1", ["l", "I"]),
45
+ ("0", ["O"]),
46
+ ("O", ["0"]),
47
+ ("B", ["8"]),
48
+ ("8", ["B"]),
49
+ ("S", ["5"]),
50
+ ("5", ["S"]),
51
+ ("Z", ["2"]),
52
+ ("2", ["Z"]),
53
+ ("G", ["6"]),
54
+ ("6", ["G"]),
55
+ ("“", ['"']),
56
+ ("”", ['"']),
57
+ ("‘", ["'"]),
58
+ ("’", ["'"]),
59
+ ("—", ["-"]), # em dash -> hyphen
60
+ ("–", ["-"]), # en dash -> hyphen
61
+ ]
62
+
63
+ # Build candidate matches as (start, end, choices)
64
+ candidates: list[tuple[int, int, list[str]]] = []
65
+
66
+ # To avoid double-counting overlapping patterns (like 'l' inside 'li'),
67
+ # we will scan longer patterns first by sorting by len(src) desc.
68
+ for src, choices in sorted(confusion_table, key=lambda p: -len(p[0])):
69
+ pattern = re.escape(src)
70
+ for m in re.finditer(pattern, text):
71
+ start, end = m.span()
72
+ candidates.append((start, end, choices))
73
+
74
+ if not candidates:
75
+ return text
76
+
77
+ # Decide how many to replace
78
+ k = int(len(candidates) * error_rate)
79
+ if k <= 0:
80
+ return text
81
+
82
+ # Shuffle deterministically and select non-overlapping k spans
83
+ rng.shuffle(candidates)
84
+ chosen: list[tuple[int, int, str]] = []
85
+ occupied: list[tuple[int, int]] = []
86
+
87
+ def overlaps(a: tuple[int, int], b: tuple[int, int]) -> bool:
88
+ return not (a[1] <= b[0] or b[1] <= a[0])
89
+
90
+ for start, end, choices in candidates:
91
+ if len(chosen) >= k:
92
+ break
93
+ span = (start, end)
94
+ if any(overlaps(span, occ) for occ in occupied):
95
+ continue
96
+ replacement = rng.choice(choices)
97
+ chosen.append((start, end, replacement))
98
+ occupied.append(span)
99
+
100
+ if not chosen:
101
+ return text
102
+
103
+ # Apply edits from left to right
104
+ chosen.sort(key=lambda t: t[0])
105
+ out_parts = []
106
+ cursor = 0
107
+ for start, end, rep in chosen:
108
+ if cursor < start:
109
+ out_parts.append(text[cursor:start])
110
+ out_parts.append(rep)
111
+ cursor = end
112
+ if cursor < len(text):
113
+ out_parts.append(text[cursor:])
114
+
115
+ return "".join(out_parts)
116
+
117
+
118
+ scannequin = Glitchling(
119
+ name="Scannequin",
120
+ corruption_function=ocr_artifacts,
121
+ scope=AttackWave.CHARACTER,
122
+ order=AttackOrder.LATE,
123
+ error_rate=0.02,
124
+ )
zoo/typogre.py ADDED
@@ -0,0 +1,224 @@
1
+ from .core import Glitchling, AttackWave, AttackOrder
2
+ from ..util import KEYNEIGHBORS
3
+ import random
4
+ import re
5
+ from typing import Literal, Optional
6
+
7
+ # Removed dependency on external 'typo' library for deterministic control.
8
+
9
+
10
+ def unichar(text: str, rng: random.Random) -> str:
11
+ """Collapse one random doubled letter (like 'ee' in 'seed') to a single occurrence."""
12
+ # capture doubled letter followed by trailing word chars so we don't match punctuation
13
+ matches = list(re.finditer(r"((.)\2)(?=\w)", text))
14
+ if not matches:
15
+ return text
16
+ m = rng.choice(matches)
17
+ start, end = m.span(1)
18
+ # Replace the doubled pair with a single char
19
+ return text[:start] + text[start] + text[end:]
20
+
21
+
22
+ def subs(text, index, rng: random.Random, key_neighbors=None):
23
+ if key_neighbors is None:
24
+ key_neighbors = getattr(KEYNEIGHBORS, "CURATOR_QWERTY")
25
+ char = text[index]
26
+ neighbors = key_neighbors.get(char, [])
27
+ if not neighbors:
28
+ return text
29
+ new_char = rng.choice(neighbors)
30
+ return text[:index] + new_char + text[index + 1 :]
31
+
32
+
33
+ def indel(
34
+ text: str,
35
+ index: int,
36
+ op: Literal["delete", "insert", "swap"],
37
+ rng: random.Random,
38
+ key_neighbors=None,
39
+ ):
40
+ if key_neighbors is None:
41
+ key_neighbors = getattr(KEYNEIGHBORS, "CURATOR_QWERTY")
42
+ if index < 0 or index >= len(text):
43
+ return text
44
+ if op == "delete":
45
+ return text[:index] + text[index + 1 :]
46
+ if op == "swap":
47
+ if index >= len(text) - 1:
48
+ return text
49
+ return text[:index] + text[index + 1] + text[index] + text[index + 2 :]
50
+ # insert (choose neighbor of this char) – if none, just duplicate char
51
+ char = text[index]
52
+ candidates = key_neighbors.get(char, []) or [char]
53
+ new_char = rng.choice(candidates)
54
+ return text[:index] + new_char + text[index:]
55
+
56
+
57
+ def repeated_char(text: str, rng: random.Random) -> str:
58
+ """Repeat a random non-space character once (e.g., 'cat' -> 'caat')."""
59
+ positions = [i for i, c in enumerate(text) if not c.isspace()]
60
+ if not positions:
61
+ return text
62
+ i = rng.choice(positions)
63
+ return text[:i] + text[i] + text[i:]
64
+
65
+
66
+ def random_space(text: str, rng: random.Random) -> str:
67
+ """Insert a space at a random boundary between characters (excluding ends)."""
68
+ if len(text) < 2:
69
+ return text
70
+ idx = rng.randrange(1, len(text))
71
+ return text[:idx] + " " + text[idx:]
72
+
73
+
74
+ def skipped_space(text: str, rng: random.Random) -> str:
75
+ """Remove a random existing single space (simulate missed space press)."""
76
+ space_positions = [m.start() for m in re.finditer(r" ", text)]
77
+ if not space_positions:
78
+ return text
79
+ idx = rng.choice(space_positions)
80
+ # collapse this one space: remove it
81
+ return text[:idx] + text[idx + 1 :]
82
+
83
+
84
+ def _is_word_char(c: str) -> bool:
85
+ return c.isalnum() or c == "_"
86
+
87
+
88
+ def _eligible_idx(s: str, i: int, preserve_first_last: bool) -> bool:
89
+ """O(1) check whether index i is eligible under preserve_first_last."""
90
+ if i < 0 or i >= len(s):
91
+ return False
92
+ if not _is_word_char(s[i]):
93
+ return False
94
+ if not preserve_first_last:
95
+ return True
96
+ # interior-of-word only
97
+ left_ok = i > 0 and _is_word_char(s[i - 1])
98
+ right_ok = i + 1 < len(s) and _is_word_char(s[i + 1])
99
+ return left_ok and right_ok
100
+
101
+
102
+ def _draw_eligible_index(
103
+ rng: random.Random, s: str, preserve_first_last: bool, max_tries: int = 16
104
+ ) -> Optional[int]:
105
+ """Try a few uniform draws; if none hit, do a single wraparound scan."""
106
+ n = len(s)
107
+ if n == 0:
108
+ return None
109
+ for _ in range(max_tries):
110
+ i = rng.randrange(n)
111
+ if _eligible_idx(s, i, preserve_first_last):
112
+ return i
113
+ # Fallback: linear scan starting from a random point (rare path)
114
+ start = rng.randrange(n)
115
+ i = start
116
+ while True:
117
+ if _eligible_idx(s, i, preserve_first_last):
118
+ return i
119
+ i += 1
120
+ if i == n:
121
+ i = 0
122
+ if i == start:
123
+ return None
124
+
125
+
126
+ def fatfinger(
127
+ text: str,
128
+ max_change_rate: float = 0.02,
129
+ preserve_first_last: bool = False,
130
+ keyboard: str = "CURATOR_QWERTY",
131
+ seed: int | None = None,
132
+ rng: random.Random | None = None,
133
+ ) -> str:
134
+ """Introduce character-level "fat finger" edits.
135
+
136
+ Parameters
137
+ - text: Input string to corrupt.
138
+ - max_change_rate: Max proportion of characters to edit (default 0.02).
139
+ - preserve_first_last: If True, avoid modifying first/last character of words (default False).
140
+ - keyboard: Name of keyboard neighbor map from util.KEYNEIGHBORS to use (default "CURATOR_QWERTY").
141
+ - seed: Optional seed used if `rng` is not provided; creates a dedicated Random.
142
+ - rng: Optional random.Random to use; if provided, overrides `seed`.
143
+
144
+ Notes
145
+ - Chooses indices lazily from the current text after each edit to keep offsets valid.
146
+ - Uses the glitchling's own RNG for determinism when run via Gaggle/summon.
147
+ """
148
+ if rng is None:
149
+ rng = random.Random(seed)
150
+ if not text:
151
+ return ""
152
+
153
+ s = text
154
+ max_changes = max(1, int(len(s) * max_change_rate))
155
+
156
+ # Prebind for speed
157
+ layout = getattr(KEYNEIGHBORS, keyboard)
158
+ choose = rng.choice
159
+
160
+ # Actions that require a specific index vs. "global" actions
161
+ positional_actions = ("char_swap", "missing_char", "extra_char", "nearby_char")
162
+ global_actions = ("skipped_space", "random_space", "unichar", "repeated_char")
163
+ all_actions = positional_actions + global_actions
164
+
165
+ # Pre-draw action types (cheap); pick indices lazily on each step
166
+ actions_drawn = [choose(all_actions) for _ in range(max_changes)]
167
+
168
+ for action in actions_drawn:
169
+ if action in positional_actions:
170
+ idx = _draw_eligible_index(rng, s, preserve_first_last)
171
+ if idx is None:
172
+ continue # nothing eligible; skip
173
+
174
+ if action == "char_swap":
175
+ # Try swapping to the right; if not possible, optionally try left
176
+ j = idx + 1
177
+ if j < len(s) and (
178
+ not preserve_first_last or _eligible_idx(s, j, True)
179
+ ):
180
+ s = s[:idx] + s[j] + s[idx] + s[j + 1 :]
181
+ else:
182
+ j = idx - 1
183
+ if j >= 0 and (
184
+ not preserve_first_last or _eligible_idx(s, j, True)
185
+ ):
186
+ s = s[:j] + s[idx] + s[j] + s[idx + 1 :]
187
+ # else: give up this action
188
+
189
+ elif action == "missing_char":
190
+ s = s[:idx] + s[idx + 1 :]
191
+
192
+ elif action == "extra_char":
193
+ ch = s[idx]
194
+ neighbors = layout.get(ch.lower(), []) or [ch]
195
+ ins = choose(neighbors) or ch
196
+ s = s[:idx] + ins + s[idx:]
197
+
198
+ elif action == "nearby_char":
199
+ ch = s[idx]
200
+ neighbors = layout.get(ch.lower(), [])
201
+ if neighbors:
202
+ rep = choose(neighbors)
203
+ s = s[:idx] + rep + s[idx + 1 :]
204
+
205
+ else:
206
+ # "Global" actions that internally pick their own positions
207
+ if action == "skipped_space":
208
+ s = skipped_space(s, rng)
209
+ elif action == "random_space":
210
+ s = random_space(s, rng)
211
+ elif action == "unichar":
212
+ s = unichar(s, rng)
213
+ elif action == "repeated_char":
214
+ s = repeated_char(s, rng)
215
+
216
+ return s
217
+
218
+
219
+ typogre = Glitchling(
220
+ name="Typogre",
221
+ corruption_function=fatfinger,
222
+ scope=AttackWave.CHARACTER,
223
+ order=AttackOrder.EARLY,
224
+ )