glitchlings 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glitchlings/__init__.py +42 -0
- glitchlings/__main__.py +9 -0
- {dlc → glitchlings/dlc}/prime.py +52 -50
- glitchlings/main.py +238 -0
- glitchlings/util/__init__.py +151 -0
- {zoo → glitchlings/zoo}/__init__.py +57 -50
- glitchlings/zoo/core.py +230 -0
- glitchlings/zoo/jargoyle.py +225 -0
- {zoo → glitchlings/zoo}/mim1c.py +79 -62
- {zoo → glitchlings/zoo}/redactyl.py +128 -73
- {zoo → glitchlings/zoo}/reduple.py +100 -54
- glitchlings/zoo/rushmore.py +97 -0
- {zoo → glitchlings/zoo}/scannequin.py +166 -124
- glitchlings/zoo/typogre.py +184 -0
- {glitchlings-0.1.1.dist-info → glitchlings-0.1.3.dist-info}/METADATA +49 -23
- glitchlings-0.1.3.dist-info/RECORD +20 -0
- {glitchlings-0.1.1.dist-info → glitchlings-0.1.3.dist-info}/licenses/LICENSE +201 -201
- .github/workflows/publish.yml +0 -42
- .gitignore +0 -14
- LICENSE +0 -201
- MONSTER_MANUAL.md +0 -272
- PKG-INFO +0 -429
- README.md +0 -196
- __init__.py +0 -73
- glitchlings-0.1.1.dist-info/RECORD +0 -26
- main.py +0 -6
- pyproject.toml +0 -79
- util/__init__.py +0 -73
- zoo/core.py +0 -136
- zoo/jargoyle.py +0 -89
- zoo/rushmore.py +0 -53
- zoo/typogre.py +0 -224
- {dlc → glitchlings/dlc}/__init__.py +0 -0
- {glitchlings-0.1.1.dist-info → glitchlings-0.1.3.dist-info}/WHEEL +0 -0
- {glitchlings-0.1.1.dist-info → glitchlings-0.1.3.dist-info}/entry_points.txt +0 -0
@@ -1,124 +1,166 @@
|
|
1
|
-
import re
|
2
|
-
import random
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
-
|
19
|
-
|
20
|
-
|
21
|
-
-
|
22
|
-
-
|
23
|
-
|
24
|
-
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
("
|
40
|
-
("
|
41
|
-
("
|
42
|
-
("
|
43
|
-
("
|
44
|
-
("
|
45
|
-
("
|
46
|
-
("
|
47
|
-
("
|
48
|
-
("
|
49
|
-
("
|
50
|
-
("
|
51
|
-
("
|
52
|
-
("
|
53
|
-
("
|
54
|
-
("
|
55
|
-
("
|
56
|
-
("
|
57
|
-
("
|
58
|
-
("
|
59
|
-
("
|
60
|
-
("
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
#
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
if
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
cursor
|
112
|
-
|
113
|
-
out_parts.append(
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
error_rate=0.02,
|
124
|
-
|
1
|
+
import re
|
2
|
+
import random
|
3
|
+
|
4
|
+
from .core import Glitchling, AttackWave, AttackOrder
|
5
|
+
|
6
|
+
try:
|
7
|
+
from glitchlings._zoo_rust import ocr_artifacts as _ocr_artifacts_rust
|
8
|
+
except ImportError: # pragma: no cover - compiled extension not present
|
9
|
+
_ocr_artifacts_rust = None
|
10
|
+
|
11
|
+
|
12
|
+
def _python_ocr_artifacts(
|
13
|
+
text: str,
|
14
|
+
*,
|
15
|
+
error_rate: float,
|
16
|
+
rng: random.Random,
|
17
|
+
) -> str:
|
18
|
+
"""Introduce OCR-like artifacts into text.
|
19
|
+
|
20
|
+
Parameters
|
21
|
+
- text: Input text to corrupt.
|
22
|
+
- error_rate: Max proportion of eligible confusion matches to replace (default 0.02).
|
23
|
+
- seed: Optional seed if `rng` not provided.
|
24
|
+
- rng: Optional RNG; overrides seed.
|
25
|
+
|
26
|
+
Notes
|
27
|
+
- Uses a curated set of common OCR confusions (rn↔m, cl↔d, O↔0, l/I/1, etc.).
|
28
|
+
- Collects all non-overlapping candidate spans in reading order, then samples
|
29
|
+
a subset deterministically with the provided RNG.
|
30
|
+
- Replacements can change length (e.g., m→rn), so edits are applied from left
|
31
|
+
to right using precomputed spans to avoid index drift.
|
32
|
+
"""
|
33
|
+
if not text:
|
34
|
+
return text
|
35
|
+
|
36
|
+
# map: source -> list of possible replacements
|
37
|
+
# Keep patterns small and specific; longer patterns first avoid overmatching
|
38
|
+
confusion_table: list[tuple[str, list[str]]] = [
|
39
|
+
("li", ["h"]),
|
40
|
+
("h", ["li"]),
|
41
|
+
("rn", ["m"]),
|
42
|
+
("m", ["rn"]),
|
43
|
+
("cl", ["d"]),
|
44
|
+
("d", ["cl"]),
|
45
|
+
("I", ["l"]),
|
46
|
+
("l", ["I", "1"]),
|
47
|
+
("1", ["l", "I"]),
|
48
|
+
("0", ["O"]),
|
49
|
+
("O", ["0"]),
|
50
|
+
("B", ["8"]),
|
51
|
+
("8", ["B"]),
|
52
|
+
("S", ["5"]),
|
53
|
+
("5", ["S"]),
|
54
|
+
("Z", ["2"]),
|
55
|
+
("2", ["Z"]),
|
56
|
+
("G", ["6"]),
|
57
|
+
("6", ["G"]),
|
58
|
+
("“", ['"']),
|
59
|
+
("”", ['"']),
|
60
|
+
("‘", ["'"]),
|
61
|
+
("’", ["'"]),
|
62
|
+
("—", ["-"]), # em dash -> hyphen
|
63
|
+
("–", ["-"]), # en dash -> hyphen
|
64
|
+
]
|
65
|
+
|
66
|
+
# Build candidate matches as (start, end, choices)
|
67
|
+
candidates: list[tuple[int, int, list[str]]] = []
|
68
|
+
|
69
|
+
# To avoid double-counting overlapping patterns (like 'l' inside 'li'),
|
70
|
+
# we will scan longer patterns first by sorting by len(src) desc.
|
71
|
+
for src, choices in sorted(confusion_table, key=lambda p: -len(p[0])):
|
72
|
+
pattern = re.escape(src)
|
73
|
+
for m in re.finditer(pattern, text):
|
74
|
+
start, end = m.span()
|
75
|
+
candidates.append((start, end, choices))
|
76
|
+
|
77
|
+
if not candidates:
|
78
|
+
return text
|
79
|
+
|
80
|
+
# Decide how many to replace
|
81
|
+
k = int(len(candidates) * error_rate)
|
82
|
+
if k <= 0:
|
83
|
+
return text
|
84
|
+
|
85
|
+
# Shuffle deterministically and select non-overlapping k spans
|
86
|
+
rng.shuffle(candidates)
|
87
|
+
chosen: list[tuple[int, int, str]] = []
|
88
|
+
occupied: list[tuple[int, int]] = []
|
89
|
+
|
90
|
+
def overlaps(a: tuple[int, int], b: tuple[int, int]) -> bool:
|
91
|
+
return not (a[1] <= b[0] or b[1] <= a[0])
|
92
|
+
|
93
|
+
for start, end, choices in candidates:
|
94
|
+
if len(chosen) >= k:
|
95
|
+
break
|
96
|
+
span = (start, end)
|
97
|
+
if any(overlaps(span, occ) for occ in occupied):
|
98
|
+
continue
|
99
|
+
replacement = rng.choice(choices)
|
100
|
+
chosen.append((start, end, replacement))
|
101
|
+
occupied.append(span)
|
102
|
+
|
103
|
+
if not chosen:
|
104
|
+
return text
|
105
|
+
|
106
|
+
# Apply edits from left to right
|
107
|
+
chosen.sort(key=lambda t: t[0])
|
108
|
+
out_parts = []
|
109
|
+
cursor = 0
|
110
|
+
for start, end, rep in chosen:
|
111
|
+
if cursor < start:
|
112
|
+
out_parts.append(text[cursor:start])
|
113
|
+
out_parts.append(rep)
|
114
|
+
cursor = end
|
115
|
+
if cursor < len(text):
|
116
|
+
out_parts.append(text[cursor:])
|
117
|
+
|
118
|
+
return "".join(out_parts)
|
119
|
+
|
120
|
+
|
121
|
+
def ocr_artifacts(
|
122
|
+
text: str,
|
123
|
+
error_rate: float = 0.02,
|
124
|
+
seed: int | None = None,
|
125
|
+
rng: random.Random | None = None,
|
126
|
+
) -> str:
|
127
|
+
"""Introduce OCR-like artifacts into text.
|
128
|
+
|
129
|
+
Prefers the Rust implementation when available.
|
130
|
+
"""
|
131
|
+
|
132
|
+
if not text:
|
133
|
+
return text
|
134
|
+
|
135
|
+
if rng is None:
|
136
|
+
rng = random.Random(seed)
|
137
|
+
|
138
|
+
if _ocr_artifacts_rust is not None:
|
139
|
+
return _ocr_artifacts_rust(text, error_rate, rng)
|
140
|
+
|
141
|
+
return _python_ocr_artifacts(text, error_rate=error_rate, rng=rng)
|
142
|
+
|
143
|
+
|
144
|
+
class Scannequin(Glitchling):
|
145
|
+
"""Glitchling that simulates OCR artifacts using common confusions."""
|
146
|
+
|
147
|
+
def __init__(
|
148
|
+
self,
|
149
|
+
*,
|
150
|
+
error_rate: float = 0.02,
|
151
|
+
seed: int | None = None,
|
152
|
+
) -> None:
|
153
|
+
super().__init__(
|
154
|
+
name="Scannequin",
|
155
|
+
corruption_function=ocr_artifacts,
|
156
|
+
scope=AttackWave.CHARACTER,
|
157
|
+
order=AttackOrder.LATE,
|
158
|
+
seed=seed,
|
159
|
+
error_rate=error_rate,
|
160
|
+
)
|
161
|
+
|
162
|
+
|
163
|
+
scannequin = Scannequin()
|
164
|
+
|
165
|
+
|
166
|
+
__all__ = ["Scannequin", "scannequin"]
|
@@ -0,0 +1,184 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import random
|
4
|
+
from typing import Optional
|
5
|
+
|
6
|
+
from .core import Glitchling, AttackWave, AttackOrder
|
7
|
+
from ..util import KEYNEIGHBORS
|
8
|
+
|
9
|
+
try:
|
10
|
+
from glitchlings._typogre_rust import fatfinger as _fatfinger_rust
|
11
|
+
except ImportError: # pragma: no cover - compiled extension not present
|
12
|
+
_fatfinger_rust = None
|
13
|
+
|
14
|
+
|
15
|
+
def _python_unichar(text: str, rng: random.Random) -> str:
|
16
|
+
"""Collapse one random doubled letter (like 'ee' in 'seed') to a single occurrence."""
|
17
|
+
import re
|
18
|
+
|
19
|
+
matches = list(re.finditer(r"((.)\2)(?=\w)", text))
|
20
|
+
if not matches:
|
21
|
+
return text
|
22
|
+
start, end = rng.choice(matches).span(1)
|
23
|
+
return text[:start] + text[start] + text[end:]
|
24
|
+
|
25
|
+
|
26
|
+
def _python_skipped_space(text: str, rng: random.Random) -> str:
|
27
|
+
import re
|
28
|
+
|
29
|
+
space_positions = [m.start() for m in re.finditer(r" ", text)]
|
30
|
+
if not space_positions:
|
31
|
+
return text
|
32
|
+
idx = rng.choice(space_positions)
|
33
|
+
return text[:idx] + text[idx + 1 :]
|
34
|
+
|
35
|
+
|
36
|
+
def _python_random_space(text: str, rng: random.Random) -> str:
|
37
|
+
if len(text) < 2:
|
38
|
+
return text
|
39
|
+
idx = rng.randrange(1, len(text))
|
40
|
+
return text[:idx] + " " + text[idx:]
|
41
|
+
|
42
|
+
|
43
|
+
def _python_repeated_char(text: str, rng: random.Random) -> str:
|
44
|
+
positions = [i for i, c in enumerate(text) if not c.isspace()]
|
45
|
+
if not positions:
|
46
|
+
return text
|
47
|
+
i = rng.choice(positions)
|
48
|
+
return text[:i] + text[i] + text[i:]
|
49
|
+
|
50
|
+
|
51
|
+
def _python_is_word_char(c: str) -> bool:
|
52
|
+
return c.isalnum() or c == "_"
|
53
|
+
|
54
|
+
|
55
|
+
def _python_eligible_idx(s: str, i: int) -> bool:
|
56
|
+
if i < 0 or i >= len(s):
|
57
|
+
return False
|
58
|
+
if not _python_is_word_char(s[i]):
|
59
|
+
return False
|
60
|
+
left_ok = i > 0 and _python_is_word_char(s[i - 1])
|
61
|
+
right_ok = i + 1 < len(s) and _python_is_word_char(s[i + 1])
|
62
|
+
return left_ok and right_ok
|
63
|
+
|
64
|
+
|
65
|
+
def _python_draw_eligible_index(
|
66
|
+
rng: random.Random, s: str, max_tries: int = 16
|
67
|
+
) -> Optional[int]:
|
68
|
+
n = len(s)
|
69
|
+
if n == 0:
|
70
|
+
return None
|
71
|
+
for _ in range(max_tries):
|
72
|
+
i = rng.randrange(n)
|
73
|
+
if _python_eligible_idx(s, i):
|
74
|
+
return i
|
75
|
+
start = rng.randrange(n)
|
76
|
+
i = start
|
77
|
+
while True:
|
78
|
+
if _python_eligible_idx(s, i):
|
79
|
+
return i
|
80
|
+
i += 1
|
81
|
+
if i == n:
|
82
|
+
i = 0
|
83
|
+
if i == start:
|
84
|
+
return None
|
85
|
+
|
86
|
+
|
87
|
+
def _fatfinger_python(
|
88
|
+
text: str,
|
89
|
+
*,
|
90
|
+
max_change_rate: float,
|
91
|
+
layout: dict[str, list[str]],
|
92
|
+
rng: random.Random,
|
93
|
+
) -> str:
|
94
|
+
s = text
|
95
|
+
max_changes = max(1, int(len(s) * max_change_rate))
|
96
|
+
|
97
|
+
positional_actions = ("char_swap", "missing_char", "extra_char", "nearby_char")
|
98
|
+
global_actions = ("skipped_space", "random_space", "unichar", "repeated_char")
|
99
|
+
all_actions = positional_actions + global_actions
|
100
|
+
|
101
|
+
actions_drawn = [rng.choice(all_actions) for _ in range(max_changes)]
|
102
|
+
|
103
|
+
for action in actions_drawn:
|
104
|
+
if action in positional_actions:
|
105
|
+
idx = _python_draw_eligible_index(rng, s)
|
106
|
+
if idx is None:
|
107
|
+
continue
|
108
|
+
if action == "char_swap":
|
109
|
+
j = idx + 1
|
110
|
+
s = s[:idx] + s[j] + s[idx] + s[j + 1 :]
|
111
|
+
elif action == "missing_char":
|
112
|
+
if _python_eligible_idx(s, idx):
|
113
|
+
s = s[:idx] + s[idx + 1 :]
|
114
|
+
elif action == "extra_char":
|
115
|
+
ch = s[idx]
|
116
|
+
neighbors = layout.get(ch.lower(), []) or [ch]
|
117
|
+
ins = rng.choice(neighbors) or ch
|
118
|
+
s = s[:idx] + ins + s[idx:]
|
119
|
+
elif action == "nearby_char":
|
120
|
+
ch = s[idx]
|
121
|
+
neighbors = layout.get(ch.lower(), [])
|
122
|
+
if neighbors:
|
123
|
+
rep = rng.choice(neighbors)
|
124
|
+
s = s[:idx] + rep + s[idx + 1 :]
|
125
|
+
else:
|
126
|
+
if action == "skipped_space":
|
127
|
+
s = _python_skipped_space(s, rng)
|
128
|
+
elif action == "random_space":
|
129
|
+
s = _python_random_space(s, rng)
|
130
|
+
elif action == "unichar":
|
131
|
+
s = _python_unichar(s, rng)
|
132
|
+
elif action == "repeated_char":
|
133
|
+
s = _python_repeated_char(s, rng)
|
134
|
+
return s
|
135
|
+
|
136
|
+
|
137
|
+
def fatfinger(
|
138
|
+
text: str,
|
139
|
+
max_change_rate: float = 0.02,
|
140
|
+
keyboard: str = "CURATOR_QWERTY",
|
141
|
+
seed: int | None = None,
|
142
|
+
rng: random.Random | None = None,
|
143
|
+
) -> str:
|
144
|
+
"""Introduce character-level "fat finger" edits with a Rust fast path."""
|
145
|
+
|
146
|
+
if rng is None:
|
147
|
+
rng = random.Random(seed)
|
148
|
+
if not text:
|
149
|
+
return ""
|
150
|
+
|
151
|
+
layout = getattr(KEYNEIGHBORS, keyboard)
|
152
|
+
|
153
|
+
if _fatfinger_rust is not None:
|
154
|
+
return _fatfinger_rust(text, max_change_rate=max_change_rate, layout=layout, rng=rng)
|
155
|
+
|
156
|
+
return _fatfinger_python(text, max_change_rate=max_change_rate, layout=layout, rng=rng)
|
157
|
+
|
158
|
+
|
159
|
+
class Typogre(Glitchling):
|
160
|
+
"""Glitchling that introduces deterministic keyboard-typing errors."""
|
161
|
+
|
162
|
+
def __init__(
|
163
|
+
self,
|
164
|
+
*,
|
165
|
+
max_change_rate: float = 0.02,
|
166
|
+
keyboard: str = "CURATOR_QWERTY",
|
167
|
+
seed: int | None = None,
|
168
|
+
) -> None:
|
169
|
+
super().__init__(
|
170
|
+
name="Typogre",
|
171
|
+
corruption_function=fatfinger,
|
172
|
+
scope=AttackWave.CHARACTER,
|
173
|
+
order=AttackOrder.EARLY,
|
174
|
+
seed=seed,
|
175
|
+
max_change_rate=max_change_rate,
|
176
|
+
keyboard=keyboard,
|
177
|
+
)
|
178
|
+
|
179
|
+
|
180
|
+
typogre = Typogre()
|
181
|
+
|
182
|
+
|
183
|
+
__all__ = ["Typogre", "typogre"]
|
184
|
+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: glitchlings
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.3
|
4
4
|
Summary: Monsters for your language games.
|
5
5
|
Project-URL: Homepage, https://github.com/osoleve/glitchlings
|
6
6
|
Project-URL: Repository, https://github.com/osoleve/glitchlings.git
|
@@ -209,7 +209,7 @@ License: Apache License
|
|
209
209
|
See the License for the specific language governing permissions and
|
210
210
|
limitations under the License.
|
211
211
|
License-File: LICENSE
|
212
|
-
Keywords: adversarial,
|
212
|
+
Keywords: adversarial augmentation,nlp,text,text augmentation
|
213
213
|
Classifier: Development Status :: 3 - Alpha
|
214
214
|
Classifier: Intended Audience :: Developers
|
215
215
|
Classifier: License :: OSI Approved :: Apache Software License
|
@@ -223,10 +223,9 @@ Requires-Python: >=3.12
|
|
223
223
|
Requires-Dist: confusable-homoglyphs>=3.3.1
|
224
224
|
Requires-Dist: datasets>=4.0.0
|
225
225
|
Requires-Dist: jellyfish>=1.2.0
|
226
|
+
Requires-Dist: nltk>=3.9.1
|
226
227
|
Provides-Extra: dev
|
227
228
|
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
228
|
-
Provides-Extra: jargoyle
|
229
|
-
Requires-Dist: nltk>=3.9.1; extra == 'jargoyle'
|
230
229
|
Provides-Extra: prime
|
231
230
|
Requires-Dist: verifiers>=0.1.3.post0; extra == 'prime'
|
232
231
|
Description-Content-Type: text/markdown
|
@@ -270,10 +269,16 @@ pip install -U glitchlings
|
|
270
269
|
```
|
271
270
|
|
272
271
|
```python
|
273
|
-
from glitchlings import
|
272
|
+
from glitchlings import Gaggle, SAMPLE_TEXT, Typogre, Mim1c, Reduple, Rushmore
|
274
273
|
|
275
|
-
gaggle =
|
276
|
-
|
274
|
+
gaggle = Gaggle([
|
275
|
+
Typogre(max_change_rate=0.03),
|
276
|
+
Mim1c(replacement_rate=0.02),
|
277
|
+
Reduple(seed=404),
|
278
|
+
Rushmore(max_deletion_rate=0.02),
|
279
|
+
])
|
280
|
+
|
281
|
+
print(gaggle(SAMPLE_TEXT))
|
277
282
|
```
|
278
283
|
|
279
284
|
> Onҽ mھrning, wһen Gregor Samƽa woke from trouble𝐝 𝑑reams, he found himself transformed in his bed into a horrible vermin٠ He l lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightlh domed and divided by arches ino stiff sections. The bedding was adly able to cover it and and seemed ready to slide off any moment. His many legxs, pitifully thin compared with the size of the the rest of him, waved about helplessly ashe looked looked.
|
@@ -286,14 +291,23 @@ Conversely, training a model to perform well in the presence of the types of per
|
|
286
291
|
|
287
292
|
## Your First Battle
|
288
293
|
|
289
|
-
Summon your chosen `Glitchling` (_or a few, if ya nasty_) and call it on your text or slot it into `Dataset.map(...)`, supplying a seed if desired.
|
290
|
-
|
291
|
-
|
294
|
+
Summon your chosen `Glitchling` (_or a few, if ya nasty_) and call it on your text or slot it into `Dataset.map(...)`, supplying a seed if desired.
|
295
|
+
Glitchlings are standard Python classes, so you can instantiate them with whatever parameters fit your scenario:
|
296
|
+
|
297
|
+
```python
|
298
|
+
from glitchlings import Gaggle, Typogre, Mim1c
|
299
|
+
|
300
|
+
custom_typogre = Typogre(max_change_rate=0.1)
|
301
|
+
selective_mimic = Mim1c(replacement_rate=0.05, classes=["LATIN", "GREEK"])
|
292
302
|
|
293
|
-
|
303
|
+
gaggle = Gaggle([custom_typogre, selective_mimic], seed=99)
|
304
|
+
print(gaggle("Summoned heroes do not fear the glitch."))
|
305
|
+
```
|
306
|
+
|
307
|
+
Calling a `Glitchling` on a `str` transparently calls `.corrupt(str, ...) -> str`.
|
294
308
|
This means that as long as your glitchlings get along logically, they play nicely with one another.
|
295
309
|
|
296
|
-
When summoned as a `Gaggle`, the `Glitchling`s will automatically order themselves into attack waves, based on the scope of the change they make:
|
310
|
+
When summoned as or gathered into a `Gaggle`, the `Glitchling`s will automatically order themselves into attack waves, based on the scope of the change they make:
|
297
311
|
|
298
312
|
1. Document
|
299
313
|
2. Paragraph
|
@@ -303,6 +317,23 @@ When summoned as a `Gaggle`, the `Glitchling`s will automatically order themselv
|
|
303
317
|
|
304
318
|
They're horrible little gremlins, but they're not _unreasonable_.
|
305
319
|
|
320
|
+
## Command-Line Interface (CLI)
|
321
|
+
|
322
|
+
Keyboard warriors can challenge them directly via the `glitchlings` command:
|
323
|
+
|
324
|
+
```bash
|
325
|
+
# Discover which glitchlings are currently on the loose.
|
326
|
+
glitchlings --list
|
327
|
+
|
328
|
+
# Run Typogre against the contents of a file and inspect the diff.
|
329
|
+
glitchlings -g typogre --file documents/report.txt --diff
|
330
|
+
|
331
|
+
# Pipe text straight into the CLI for an on-the-fly corruption.
|
332
|
+
echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
|
333
|
+
```
|
334
|
+
|
335
|
+
Use `--help` for a complete breakdown of available options.
|
336
|
+
|
306
337
|
## Starter 'lings
|
307
338
|
|
308
339
|
For maintainability reasons, all `Glitchling` have consented to be given nicknames once they're in your care. See the [Monster Manual](MONSTER_MANUAL.md) for a complete bestiary.
|
@@ -311,13 +342,12 @@ For maintainability reasons, all `Glitchling` have consented to be given nicknam
|
|
311
342
|
|
312
343
|
_What a nice word, would be a shame if something happened to it._
|
313
344
|
|
314
|
-
> _**Fatfinger.**_ Typogre introduces character-level errors (duplicating, dropping, adding, or swapping) based on the layout of a (
|
345
|
+
> _**Fatfinger.**_ Typogre introduces character-level errors (duplicating, dropping, adding, or swapping) based on the layout of a keyboard (QWERTY by default, with Dvorak and Colemak variants built-in).
|
315
346
|
>
|
316
347
|
> Args
|
317
348
|
>
|
318
349
|
> - `max_change_rate (float)`: The maximum number of edits to make as a percentage of the length (default: 0.02, 2%).
|
319
|
-
> - `
|
320
|
-
> - `keyboard (str)`: Keyboard layout key-neighbor map to use (default: "CURATOR_QWERTY").
|
350
|
+
> - `keyboard (str)`: Keyboard layout key-neighbor map to use (default: "CURATOR_QWERTY"; also accepts "QWERTY", "DVORAK", "COLEMAK", and "AZERTY").
|
321
351
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
322
352
|
|
323
353
|
### Mim1c
|
@@ -347,12 +377,12 @@ _How can a computer need reading glasses?_
|
|
347
377
|
|
348
378
|
_Uh oh. The worst person you know just bought a thesaurus._
|
349
379
|
|
350
|
-
> _**Sesquipedalianism.**_ Jargoyle, the insufferable `Glitchling`, replaces
|
380
|
+
> _**Sesquipedalianism.**_ Jargoyle, the insufferable `Glitchling`, replaces words from selected parts of speech with synonyms at random, without regard for connotational or denotational differences.
|
351
381
|
>
|
352
382
|
> Args
|
353
383
|
>
|
354
384
|
> - `replacement_rate (float)`: The maximum proportion of words to replace (default: 0.1, 10%).
|
355
|
-
> - `part_of_speech`: The WordNet part of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, or `
|
385
|
+
> - `part_of_speech`: The WordNet part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all.
|
356
386
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
357
387
|
|
358
388
|
### Reduple
|
@@ -406,19 +436,15 @@ Cave paintings and oral tradition contain many depictions of strange, otherworld
|
|
406
436
|
These _Apocryphal `Glitchling`_ are said to possess unique abilities or behaviors.
|
407
437
|
If you encounter one of these elusive beings, please document your findings and share them with _The Curator_.
|
408
438
|
|
409
|
-
### Reproducible Corruption
|
439
|
+
### Ensuring Reproducible Corruption
|
410
440
|
|
411
|
-
Every `Glitchling`
|
441
|
+
Every `Glitchling` should own its own independent `random.Random` instance. That means:
|
412
442
|
|
413
443
|
- No `random.seed(...)` calls touch Python's global RNG.
|
414
444
|
- Supplying a `seed` when you construct a `Glitchling` (or when you `summon(...)`) makes its behavior reproducible.
|
415
445
|
- Re-running a `Gaggle` with the same master seed and the same input text (_and same external data!_) yields identical corruption output.
|
416
446
|
- Corruption functions are written to accept an `rng` parameter internally so that all randomness is centralized and testable.
|
417
447
|
|
418
|
-
#### Caveats
|
419
|
-
|
420
|
-
- If you mutate a glitchling's parameters after you've used it (e.g. `typogre.set_param(...)`) the outputs may not be the same as before the change. So don't do that.
|
421
|
-
|
422
448
|
#### At Wits' End?
|
423
449
|
|
424
450
|
If you're trying to add a new glitchling and can't seem to make it deterministic, here are some places to look for determinism-breaking code:
|
@@ -0,0 +1,20 @@
|
|
1
|
+
glitchlings/__init__.py,sha256=yD0BaldUpcc_QlHVca1z1iwpOp8ne1H9YVQHc85d1So,580
|
2
|
+
glitchlings/__main__.py,sha256=EOiBgay0x6B9VlSDzSQvMuoq6bHJdSvFSgcAVGGKkd4,121
|
3
|
+
glitchlings/main.py,sha256=1pdVqytcrkh_GxOb0UPnZ0NzYKMoUnXmAWQB4cY5SEg,6199
|
4
|
+
glitchlings/dlc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
glitchlings/dlc/prime.py,sha256=WnLIon2WbdPGx_PK4vF6nOwJICXudZ6zKGR1hVES4Oc,1452
|
6
|
+
glitchlings/util/__init__.py,sha256=OCpWFtloU-sATBv2XpBGlkR7UFR6RemUtuCheuRA4yw,4018
|
7
|
+
glitchlings/zoo/__init__.py,sha256=hXQci2tysMoRHXiR6NDkWtGkKgcO0xxsMB91eiM_Llc,1344
|
8
|
+
glitchlings/zoo/core.py,sha256=5f9pWBZZSDADiUSs-xUahIqCEb9EUq-YcR_N5HzBAw0,8021
|
9
|
+
glitchlings/zoo/jargoyle.py,sha256=fvBP4ngqZ9BHLmpIjiLqGedriwAMuZc6ryqKT5GWfPw,6924
|
10
|
+
glitchlings/zoo/mim1c.py,sha256=X4jW4YrNqbyG0IEDx7wXUsPTwrUXGw2vXUO1kC2yY94,2471
|
11
|
+
glitchlings/zoo/redactyl.py,sha256=T0SAAbkva4A-tnQkXsUJ43N6Q33TsKElDvldUz69sMQ,3546
|
12
|
+
glitchlings/zoo/reduple.py,sha256=ML4TLQNfOkSaF7G9Sjy_i9ILB4FIl1I101CIppNGmOw,2773
|
13
|
+
glitchlings/zoo/rushmore.py,sha256=FH-pHnj1XKFzLRRQIHOojTkbkCpipNKnxSfxP9UGYZI,2528
|
14
|
+
glitchlings/zoo/scannequin.py,sha256=4QP_dpReUxno0mk5Hnn2uCfd3B6eDa7ZGePuW1dyqBU,4630
|
15
|
+
glitchlings/zoo/typogre.py,sha256=8aYULO4nvdyFDsknAfrlQYKeWz_Tgh5uXAkF3omHe0o,5358
|
16
|
+
glitchlings-0.1.3.dist-info/METADATA,sha256=fwqJfu1FrQwJfAnc5UQIaaN3L7er_FWek0cMzRFSVuw,24978
|
17
|
+
glitchlings-0.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
18
|
+
glitchlings-0.1.3.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
|
19
|
+
glitchlings-0.1.3.dist-info/licenses/LICENSE,sha256=YCvGip-LoaRyu6h0nPo71q6eHEkzUpsE11psDJOIRkw,11337
|
20
|
+
glitchlings-0.1.3.dist-info/RECORD,,
|