glitchlings 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- .github/workflows/publish.yml +42 -0
- .github/workflows/testpypi.yml +38 -0
- .gitignore +12 -0
- LICENSE +21 -0
- MONSTER_MANUAL.md +272 -0
- PKG-INFO +244 -0
- README.md +192 -0
- RELEASE.md +47 -0
- __init__.py +73 -0
- dlc/__init__.py +0 -0
- dlc/prime.py +50 -0
- glitchlings-0.1.0.dist-info/METADATA +244 -0
- glitchlings-0.1.0.dist-info/RECORD +28 -0
- glitchlings-0.1.0.dist-info/WHEEL +4 -0
- glitchlings-0.1.0.dist-info/entry_points.txt +2 -0
- glitchlings-0.1.0.dist-info/licenses/LICENSE +21 -0
- main.py +6 -0
- pyproject.toml +74 -0
- util/__init__.py +73 -0
- zoo/__init__.py +50 -0
- zoo/core.py +136 -0
- zoo/jargoyle.py +89 -0
- zoo/mim1c.py +62 -0
- zoo/redactyl.py +73 -0
- zoo/reduple.py +54 -0
- zoo/rushmore.py +53 -0
- zoo/scannequin.py +124 -0
- zoo/typogre.py +224 -0
zoo/scannequin.py
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
import re
|
2
|
+
import random
|
3
|
+
from .core import Glitchling, AttackWave, AttackOrder
|
4
|
+
|
5
|
+
|
6
|
+
def ocr_artifacts(
|
7
|
+
text: str,
|
8
|
+
error_rate: float = 0.02,
|
9
|
+
seed: int | None = None,
|
10
|
+
rng: random.Random | None = None,
|
11
|
+
) -> str:
|
12
|
+
"""Introduce OCR-like artifacts into text.
|
13
|
+
|
14
|
+
Parameters
|
15
|
+
- text: Input text to corrupt.
|
16
|
+
- error_rate: Max proportion of eligible confusion matches to replace (default 0.02).
|
17
|
+
- seed: Optional seed if `rng` not provided.
|
18
|
+
- rng: Optional RNG; overrides seed.
|
19
|
+
|
20
|
+
Notes
|
21
|
+
- Uses a curated set of common OCR confusions (rn↔m, cl↔d, O↔0, l/I/1, etc.).
|
22
|
+
- Collects all non-overlapping candidate spans in reading order, then samples
|
23
|
+
a subset deterministically with the provided RNG.
|
24
|
+
- Replacements can change length (e.g., m→rn), so edits are applied from left
|
25
|
+
to right using precomputed spans to avoid index drift.
|
26
|
+
"""
|
27
|
+
if not text:
|
28
|
+
return text
|
29
|
+
|
30
|
+
if rng is None:
|
31
|
+
rng = random.Random(seed)
|
32
|
+
|
33
|
+
# map: source -> list of possible replacements
|
34
|
+
# Keep patterns small and specific; longer patterns first avoid overmatching
|
35
|
+
confusion_table: list[tuple[str, list[str]]] = [
|
36
|
+
("li", ["h"]),
|
37
|
+
("h", ["li"]),
|
38
|
+
("rn", ["m"]),
|
39
|
+
("m", ["rn"]),
|
40
|
+
("cl", ["d"]),
|
41
|
+
("d", ["cl"]),
|
42
|
+
("I", ["l"]),
|
43
|
+
("l", ["I", "1"]),
|
44
|
+
("1", ["l", "I"]),
|
45
|
+
("0", ["O"]),
|
46
|
+
("O", ["0"]),
|
47
|
+
("B", ["8"]),
|
48
|
+
("8", ["B"]),
|
49
|
+
("S", ["5"]),
|
50
|
+
("5", ["S"]),
|
51
|
+
("Z", ["2"]),
|
52
|
+
("2", ["Z"]),
|
53
|
+
("G", ["6"]),
|
54
|
+
("6", ["G"]),
|
55
|
+
("“", ['"']),
|
56
|
+
("”", ['"']),
|
57
|
+
("‘", ["'"]),
|
58
|
+
("’", ["'"]),
|
59
|
+
("—", ["-"]), # em dash -> hyphen
|
60
|
+
("–", ["-"]), # en dash -> hyphen
|
61
|
+
]
|
62
|
+
|
63
|
+
# Build candidate matches as (start, end, choices)
|
64
|
+
candidates: list[tuple[int, int, list[str]]] = []
|
65
|
+
|
66
|
+
# To avoid double-counting overlapping patterns (like 'l' inside 'li'),
|
67
|
+
# we will scan longer patterns first by sorting by len(src) desc.
|
68
|
+
for src, choices in sorted(confusion_table, key=lambda p: -len(p[0])):
|
69
|
+
pattern = re.escape(src)
|
70
|
+
for m in re.finditer(pattern, text):
|
71
|
+
start, end = m.span()
|
72
|
+
candidates.append((start, end, choices))
|
73
|
+
|
74
|
+
if not candidates:
|
75
|
+
return text
|
76
|
+
|
77
|
+
# Decide how many to replace
|
78
|
+
k = int(len(candidates) * error_rate)
|
79
|
+
if k <= 0:
|
80
|
+
return text
|
81
|
+
|
82
|
+
# Shuffle deterministically and select non-overlapping k spans
|
83
|
+
rng.shuffle(candidates)
|
84
|
+
chosen: list[tuple[int, int, str]] = []
|
85
|
+
occupied: list[tuple[int, int]] = []
|
86
|
+
|
87
|
+
def overlaps(a: tuple[int, int], b: tuple[int, int]) -> bool:
|
88
|
+
return not (a[1] <= b[0] or b[1] <= a[0])
|
89
|
+
|
90
|
+
for start, end, choices in candidates:
|
91
|
+
if len(chosen) >= k:
|
92
|
+
break
|
93
|
+
span = (start, end)
|
94
|
+
if any(overlaps(span, occ) for occ in occupied):
|
95
|
+
continue
|
96
|
+
replacement = rng.choice(choices)
|
97
|
+
chosen.append((start, end, replacement))
|
98
|
+
occupied.append(span)
|
99
|
+
|
100
|
+
if not chosen:
|
101
|
+
return text
|
102
|
+
|
103
|
+
# Apply edits from left to right
|
104
|
+
chosen.sort(key=lambda t: t[0])
|
105
|
+
out_parts = []
|
106
|
+
cursor = 0
|
107
|
+
for start, end, rep in chosen:
|
108
|
+
if cursor < start:
|
109
|
+
out_parts.append(text[cursor:start])
|
110
|
+
out_parts.append(rep)
|
111
|
+
cursor = end
|
112
|
+
if cursor < len(text):
|
113
|
+
out_parts.append(text[cursor:])
|
114
|
+
|
115
|
+
return "".join(out_parts)
|
116
|
+
|
117
|
+
|
118
|
+
scannequin = Glitchling(
|
119
|
+
name="Scannequin",
|
120
|
+
corruption_function=ocr_artifacts,
|
121
|
+
scope=AttackWave.CHARACTER,
|
122
|
+
order=AttackOrder.LATE,
|
123
|
+
error_rate=0.02,
|
124
|
+
)
|
zoo/typogre.py
ADDED
@@ -0,0 +1,224 @@
|
|
1
|
+
from .core import Glitchling, AttackWave, AttackOrder
|
2
|
+
from ..util import KEYNEIGHBORS
|
3
|
+
import random
|
4
|
+
import re
|
5
|
+
from typing import Literal, Optional
|
6
|
+
|
7
|
+
# Removed dependency on external 'typo' library for deterministic control.
|
8
|
+
|
9
|
+
|
10
|
+
def unichar(text: str, rng: random.Random) -> str:
|
11
|
+
"""Collapse one random doubled letter (like 'ee' in 'seed') to a single occurrence."""
|
12
|
+
# capture doubled letter followed by trailing word chars so we don't match punctuation
|
13
|
+
matches = list(re.finditer(r"((.)\2)(?=\w)", text))
|
14
|
+
if not matches:
|
15
|
+
return text
|
16
|
+
m = rng.choice(matches)
|
17
|
+
start, end = m.span(1)
|
18
|
+
# Replace the doubled pair with a single char
|
19
|
+
return text[:start] + text[start] + text[end:]
|
20
|
+
|
21
|
+
|
22
|
+
def subs(text, index, rng: random.Random, key_neighbors=None):
|
23
|
+
if key_neighbors is None:
|
24
|
+
key_neighbors = getattr(KEYNEIGHBORS, "CURATOR_QWERTY")
|
25
|
+
char = text[index]
|
26
|
+
neighbors = key_neighbors.get(char, [])
|
27
|
+
if not neighbors:
|
28
|
+
return text
|
29
|
+
new_char = rng.choice(neighbors)
|
30
|
+
return text[:index] + new_char + text[index + 1 :]
|
31
|
+
|
32
|
+
|
33
|
+
def indel(
|
34
|
+
text: str,
|
35
|
+
index: int,
|
36
|
+
op: Literal["delete", "insert", "swap"],
|
37
|
+
rng: random.Random,
|
38
|
+
key_neighbors=None,
|
39
|
+
):
|
40
|
+
if key_neighbors is None:
|
41
|
+
key_neighbors = getattr(KEYNEIGHBORS, "CURATOR_QWERTY")
|
42
|
+
if index < 0 or index >= len(text):
|
43
|
+
return text
|
44
|
+
if op == "delete":
|
45
|
+
return text[:index] + text[index + 1 :]
|
46
|
+
if op == "swap":
|
47
|
+
if index >= len(text) - 1:
|
48
|
+
return text
|
49
|
+
return text[:index] + text[index + 1] + text[index] + text[index + 2 :]
|
50
|
+
# insert (choose neighbor of this char) – if none, just duplicate char
|
51
|
+
char = text[index]
|
52
|
+
candidates = key_neighbors.get(char, []) or [char]
|
53
|
+
new_char = rng.choice(candidates)
|
54
|
+
return text[:index] + new_char + text[index:]
|
55
|
+
|
56
|
+
|
57
|
+
def repeated_char(text: str, rng: random.Random) -> str:
|
58
|
+
"""Repeat a random non-space character once (e.g., 'cat' -> 'caat')."""
|
59
|
+
positions = [i for i, c in enumerate(text) if not c.isspace()]
|
60
|
+
if not positions:
|
61
|
+
return text
|
62
|
+
i = rng.choice(positions)
|
63
|
+
return text[:i] + text[i] + text[i:]
|
64
|
+
|
65
|
+
|
66
|
+
def random_space(text: str, rng: random.Random) -> str:
|
67
|
+
"""Insert a space at a random boundary between characters (excluding ends)."""
|
68
|
+
if len(text) < 2:
|
69
|
+
return text
|
70
|
+
idx = rng.randrange(1, len(text))
|
71
|
+
return text[:idx] + " " + text[idx:]
|
72
|
+
|
73
|
+
|
74
|
+
def skipped_space(text: str, rng: random.Random) -> str:
|
75
|
+
"""Remove a random existing single space (simulate missed space press)."""
|
76
|
+
space_positions = [m.start() for m in re.finditer(r" ", text)]
|
77
|
+
if not space_positions:
|
78
|
+
return text
|
79
|
+
idx = rng.choice(space_positions)
|
80
|
+
# collapse this one space: remove it
|
81
|
+
return text[:idx] + text[idx + 1 :]
|
82
|
+
|
83
|
+
|
84
|
+
def _is_word_char(c: str) -> bool:
|
85
|
+
return c.isalnum() or c == "_"
|
86
|
+
|
87
|
+
|
88
|
+
def _eligible_idx(s: str, i: int, preserve_first_last: bool) -> bool:
|
89
|
+
"""O(1) check whether index i is eligible under preserve_first_last."""
|
90
|
+
if i < 0 or i >= len(s):
|
91
|
+
return False
|
92
|
+
if not _is_word_char(s[i]):
|
93
|
+
return False
|
94
|
+
if not preserve_first_last:
|
95
|
+
return True
|
96
|
+
# interior-of-word only
|
97
|
+
left_ok = i > 0 and _is_word_char(s[i - 1])
|
98
|
+
right_ok = i + 1 < len(s) and _is_word_char(s[i + 1])
|
99
|
+
return left_ok and right_ok
|
100
|
+
|
101
|
+
|
102
|
+
def _draw_eligible_index(
|
103
|
+
rng: random.Random, s: str, preserve_first_last: bool, max_tries: int = 16
|
104
|
+
) -> Optional[int]:
|
105
|
+
"""Try a few uniform draws; if none hit, do a single wraparound scan."""
|
106
|
+
n = len(s)
|
107
|
+
if n == 0:
|
108
|
+
return None
|
109
|
+
for _ in range(max_tries):
|
110
|
+
i = rng.randrange(n)
|
111
|
+
if _eligible_idx(s, i, preserve_first_last):
|
112
|
+
return i
|
113
|
+
# Fallback: linear scan starting from a random point (rare path)
|
114
|
+
start = rng.randrange(n)
|
115
|
+
i = start
|
116
|
+
while True:
|
117
|
+
if _eligible_idx(s, i, preserve_first_last):
|
118
|
+
return i
|
119
|
+
i += 1
|
120
|
+
if i == n:
|
121
|
+
i = 0
|
122
|
+
if i == start:
|
123
|
+
return None
|
124
|
+
|
125
|
+
|
126
|
+
def fatfinger(
|
127
|
+
text: str,
|
128
|
+
max_change_rate: float = 0.02,
|
129
|
+
preserve_first_last: bool = False,
|
130
|
+
keyboard: str = "CURATOR_QWERTY",
|
131
|
+
seed: int | None = None,
|
132
|
+
rng: random.Random | None = None,
|
133
|
+
) -> str:
|
134
|
+
"""Introduce character-level "fat finger" edits.
|
135
|
+
|
136
|
+
Parameters
|
137
|
+
- text: Input string to corrupt.
|
138
|
+
- max_change_rate: Max proportion of characters to edit (default 0.02).
|
139
|
+
- preserve_first_last: If True, avoid modifying first/last character of words (default False).
|
140
|
+
- keyboard: Name of keyboard neighbor map from util.KEYNEIGHBORS to use (default "CURATOR_QWERTY").
|
141
|
+
- seed: Optional seed used if `rng` is not provided; creates a dedicated Random.
|
142
|
+
- rng: Optional random.Random to use; if provided, overrides `seed`.
|
143
|
+
|
144
|
+
Notes
|
145
|
+
- Chooses indices lazily from the current text after each edit to keep offsets valid.
|
146
|
+
- Uses the glitchling's own RNG for determinism when run via Gaggle/summon.
|
147
|
+
"""
|
148
|
+
if rng is None:
|
149
|
+
rng = random.Random(seed)
|
150
|
+
if not text:
|
151
|
+
return ""
|
152
|
+
|
153
|
+
s = text
|
154
|
+
max_changes = max(1, int(len(s) * max_change_rate))
|
155
|
+
|
156
|
+
# Prebind for speed
|
157
|
+
layout = getattr(KEYNEIGHBORS, keyboard)
|
158
|
+
choose = rng.choice
|
159
|
+
|
160
|
+
# Actions that require a specific index vs. "global" actions
|
161
|
+
positional_actions = ("char_swap", "missing_char", "extra_char", "nearby_char")
|
162
|
+
global_actions = ("skipped_space", "random_space", "unichar", "repeated_char")
|
163
|
+
all_actions = positional_actions + global_actions
|
164
|
+
|
165
|
+
# Pre-draw action types (cheap); pick indices lazily on each step
|
166
|
+
actions_drawn = [choose(all_actions) for _ in range(max_changes)]
|
167
|
+
|
168
|
+
for action in actions_drawn:
|
169
|
+
if action in positional_actions:
|
170
|
+
idx = _draw_eligible_index(rng, s, preserve_first_last)
|
171
|
+
if idx is None:
|
172
|
+
continue # nothing eligible; skip
|
173
|
+
|
174
|
+
if action == "char_swap":
|
175
|
+
# Try swapping to the right; if not possible, optionally try left
|
176
|
+
j = idx + 1
|
177
|
+
if j < len(s) and (
|
178
|
+
not preserve_first_last or _eligible_idx(s, j, True)
|
179
|
+
):
|
180
|
+
s = s[:idx] + s[j] + s[idx] + s[j + 1 :]
|
181
|
+
else:
|
182
|
+
j = idx - 1
|
183
|
+
if j >= 0 and (
|
184
|
+
not preserve_first_last or _eligible_idx(s, j, True)
|
185
|
+
):
|
186
|
+
s = s[:j] + s[idx] + s[j] + s[idx + 1 :]
|
187
|
+
# else: give up this action
|
188
|
+
|
189
|
+
elif action == "missing_char":
|
190
|
+
s = s[:idx] + s[idx + 1 :]
|
191
|
+
|
192
|
+
elif action == "extra_char":
|
193
|
+
ch = s[idx]
|
194
|
+
neighbors = layout.get(ch.lower(), []) or [ch]
|
195
|
+
ins = choose(neighbors) or ch
|
196
|
+
s = s[:idx] + ins + s[idx:]
|
197
|
+
|
198
|
+
elif action == "nearby_char":
|
199
|
+
ch = s[idx]
|
200
|
+
neighbors = layout.get(ch.lower(), [])
|
201
|
+
if neighbors:
|
202
|
+
rep = choose(neighbors)
|
203
|
+
s = s[:idx] + rep + s[idx + 1 :]
|
204
|
+
|
205
|
+
else:
|
206
|
+
# "Global" actions that internally pick their own positions
|
207
|
+
if action == "skipped_space":
|
208
|
+
s = skipped_space(s, rng)
|
209
|
+
elif action == "random_space":
|
210
|
+
s = random_space(s, rng)
|
211
|
+
elif action == "unichar":
|
212
|
+
s = unichar(s, rng)
|
213
|
+
elif action == "repeated_char":
|
214
|
+
s = repeated_char(s, rng)
|
215
|
+
|
216
|
+
return s
|
217
|
+
|
218
|
+
|
219
|
+
typogre = Glitchling(
|
220
|
+
name="Typogre",
|
221
|
+
corruption_function=fatfinger,
|
222
|
+
scope=AttackWave.CHARACTER,
|
223
|
+
order=AttackOrder.EARLY,
|
224
|
+
)
|