glitchlings 0.4.5__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +71 -0
- glitchlings/__main__.py +8 -0
- glitchlings/_zoo_rust.cp311-win_amd64.pyd +0 -0
- glitchlings/compat.py +282 -0
- glitchlings/config.py +386 -0
- glitchlings/config.toml +3 -0
- glitchlings/data/__init__.py +1 -0
- glitchlings/data/hokey_assets.json +193 -0
- glitchlings/dlc/__init__.py +7 -0
- glitchlings/dlc/_shared.py +153 -0
- glitchlings/dlc/huggingface.py +81 -0
- glitchlings/dlc/prime.py +254 -0
- glitchlings/dlc/pytorch.py +166 -0
- glitchlings/dlc/pytorch_lightning.py +209 -0
- glitchlings/lexicon/__init__.py +192 -0
- glitchlings/lexicon/_cache.py +108 -0
- glitchlings/lexicon/data/default_vector_cache.json +82 -0
- glitchlings/lexicon/metrics.py +162 -0
- glitchlings/lexicon/vector.py +652 -0
- glitchlings/lexicon/wordnet.py +228 -0
- glitchlings/main.py +364 -0
- glitchlings/util/__init__.py +195 -0
- glitchlings/util/adapters.py +27 -0
- glitchlings/util/hokey_generator.py +144 -0
- glitchlings/util/stretch_locator.py +140 -0
- glitchlings/util/stretchability.py +375 -0
- glitchlings/zoo/__init__.py +172 -0
- glitchlings/zoo/_ocr_confusions.py +32 -0
- glitchlings/zoo/_rate.py +131 -0
- glitchlings/zoo/_rust_extensions.py +143 -0
- glitchlings/zoo/_sampling.py +54 -0
- glitchlings/zoo/_text_utils.py +100 -0
- glitchlings/zoo/adjax.py +128 -0
- glitchlings/zoo/apostrofae.py +127 -0
- glitchlings/zoo/assets/__init__.py +0 -0
- glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
- glitchlings/zoo/core.py +582 -0
- glitchlings/zoo/hokey.py +173 -0
- glitchlings/zoo/jargoyle.py +335 -0
- glitchlings/zoo/mim1c.py +109 -0
- glitchlings/zoo/ocr_confusions.tsv +30 -0
- glitchlings/zoo/redactyl.py +193 -0
- glitchlings/zoo/reduple.py +148 -0
- glitchlings/zoo/rushmore.py +153 -0
- glitchlings/zoo/scannequin.py +171 -0
- glitchlings/zoo/typogre.py +231 -0
- glitchlings/zoo/zeedub.py +185 -0
- glitchlings-0.4.5.dist-info/METADATA +648 -0
- glitchlings-0.4.5.dist-info/RECORD +53 -0
- glitchlings-0.4.5.dist-info/WHEEL +5 -0
- glitchlings-0.4.5.dist-info/entry_points.txt +2 -0
- glitchlings-0.4.5.dist-info/licenses/LICENSE +201 -0
- glitchlings-0.4.5.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
import random
|
|
5
|
+
from typing import Any, Optional, cast
|
|
6
|
+
|
|
7
|
+
from ..util import KEYNEIGHBORS
|
|
8
|
+
from ._rate import resolve_rate
|
|
9
|
+
from ._rust_extensions import get_rust_operation
|
|
10
|
+
from .core import AttackOrder, AttackWave, Glitchling
|
|
11
|
+
|
|
12
|
+
# Load Rust-accelerated operation if available
|
|
13
|
+
_fatfinger_rust = get_rust_operation("fatfinger")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _python_unichar(text: str, rng: random.Random) -> str:
|
|
17
|
+
"""Collapse one random doubled letter (like 'ee' in 'seed') to a single occurrence."""
|
|
18
|
+
import re
|
|
19
|
+
|
|
20
|
+
matches = list(re.finditer(r"((.)\2)(?=\w)", text))
|
|
21
|
+
if not matches:
|
|
22
|
+
return text
|
|
23
|
+
start, end = rng.choice(matches).span(1)
|
|
24
|
+
return text[:start] + text[start] + text[end:]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _python_skipped_space(text: str, rng: random.Random) -> str:
|
|
28
|
+
import re
|
|
29
|
+
|
|
30
|
+
space_positions = [m.start() for m in re.finditer(r" ", text)]
|
|
31
|
+
if not space_positions:
|
|
32
|
+
return text
|
|
33
|
+
idx = rng.choice(space_positions)
|
|
34
|
+
return text[:idx] + text[idx + 1 :]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _python_random_space(text: str, rng: random.Random) -> str:
|
|
38
|
+
if len(text) < 2:
|
|
39
|
+
return text
|
|
40
|
+
idx = rng.randrange(1, len(text))
|
|
41
|
+
return text[:idx] + " " + text[idx:]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _python_repeated_char(text: str, rng: random.Random) -> str:
|
|
45
|
+
positions = [i for i, c in enumerate(text) if not c.isspace()]
|
|
46
|
+
if not positions:
|
|
47
|
+
return text
|
|
48
|
+
i = rng.choice(positions)
|
|
49
|
+
return text[:i] + text[i] + text[i:]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _python_is_word_char(c: str) -> bool:
|
|
53
|
+
return c.isalnum() or c == "_"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _python_eligible_idx(s: str, i: int) -> bool:
|
|
57
|
+
if i < 0 or i >= len(s):
|
|
58
|
+
return False
|
|
59
|
+
if not _python_is_word_char(s[i]):
|
|
60
|
+
return False
|
|
61
|
+
left_ok = i > 0 and _python_is_word_char(s[i - 1])
|
|
62
|
+
right_ok = i + 1 < len(s) and _python_is_word_char(s[i + 1])
|
|
63
|
+
return left_ok and right_ok
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _python_draw_eligible_index(rng: random.Random, s: str, max_tries: int = 16) -> Optional[int]:
|
|
67
|
+
n = len(s)
|
|
68
|
+
if n == 0:
|
|
69
|
+
return None
|
|
70
|
+
for _ in range(max_tries):
|
|
71
|
+
i = rng.randrange(n)
|
|
72
|
+
if _python_eligible_idx(s, i):
|
|
73
|
+
return i
|
|
74
|
+
start = rng.randrange(n)
|
|
75
|
+
i = start
|
|
76
|
+
while True:
|
|
77
|
+
if _python_eligible_idx(s, i):
|
|
78
|
+
return i
|
|
79
|
+
i += 1
|
|
80
|
+
if i == n:
|
|
81
|
+
i = 0
|
|
82
|
+
if i == start:
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _fatfinger_python(
|
|
87
|
+
text: str,
|
|
88
|
+
*,
|
|
89
|
+
rate: float,
|
|
90
|
+
layout: dict[str, list[str]],
|
|
91
|
+
rng: random.Random,
|
|
92
|
+
) -> str:
|
|
93
|
+
if rate <= 0.0:
|
|
94
|
+
return text
|
|
95
|
+
|
|
96
|
+
s = text
|
|
97
|
+
max_changes = math.ceil(len(s) * rate)
|
|
98
|
+
if max_changes == 0:
|
|
99
|
+
return s
|
|
100
|
+
|
|
101
|
+
positional_actions = ("char_swap", "missing_char", "extra_char", "nearby_char")
|
|
102
|
+
global_actions = ("skipped_space", "random_space", "unichar", "repeated_char")
|
|
103
|
+
all_actions = positional_actions + global_actions
|
|
104
|
+
|
|
105
|
+
actions_drawn = [rng.choice(all_actions) for _ in range(max_changes)]
|
|
106
|
+
|
|
107
|
+
for action in actions_drawn:
|
|
108
|
+
if action in positional_actions:
|
|
109
|
+
idx = _python_draw_eligible_index(rng, s)
|
|
110
|
+
if idx is None:
|
|
111
|
+
continue
|
|
112
|
+
if action == "char_swap":
|
|
113
|
+
j = idx + 1
|
|
114
|
+
s = s[:idx] + s[j] + s[idx] + s[j + 1 :]
|
|
115
|
+
elif action == "missing_char":
|
|
116
|
+
if _python_eligible_idx(s, idx):
|
|
117
|
+
s = s[:idx] + s[idx + 1 :]
|
|
118
|
+
elif action == "extra_char":
|
|
119
|
+
ch = s[idx]
|
|
120
|
+
neighbors = layout.get(ch.lower(), []) or [ch]
|
|
121
|
+
ins = rng.choice(neighbors) or ch
|
|
122
|
+
s = s[:idx] + ins + s[idx:]
|
|
123
|
+
elif action == "nearby_char":
|
|
124
|
+
ch = s[idx]
|
|
125
|
+
neighbors = layout.get(ch.lower(), [])
|
|
126
|
+
if neighbors:
|
|
127
|
+
rep = rng.choice(neighbors)
|
|
128
|
+
s = s[:idx] + rep + s[idx + 1 :]
|
|
129
|
+
else:
|
|
130
|
+
if action == "skipped_space":
|
|
131
|
+
s = _python_skipped_space(s, rng)
|
|
132
|
+
elif action == "random_space":
|
|
133
|
+
s = _python_random_space(s, rng)
|
|
134
|
+
elif action == "unichar":
|
|
135
|
+
s = _python_unichar(s, rng)
|
|
136
|
+
elif action == "repeated_char":
|
|
137
|
+
s = _python_repeated_char(s, rng)
|
|
138
|
+
return s
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def fatfinger(
|
|
142
|
+
text: str,
|
|
143
|
+
rate: float | None = None,
|
|
144
|
+
keyboard: str = "CURATOR_QWERTY",
|
|
145
|
+
seed: int | None = None,
|
|
146
|
+
rng: random.Random | None = None,
|
|
147
|
+
*,
|
|
148
|
+
max_change_rate: float | None = None,
|
|
149
|
+
) -> str:
|
|
150
|
+
"""Introduce character-level "fat finger" edits with a Rust fast path."""
|
|
151
|
+
effective_rate = resolve_rate(
|
|
152
|
+
rate=rate,
|
|
153
|
+
legacy_value=max_change_rate,
|
|
154
|
+
default=0.02,
|
|
155
|
+
legacy_name="max_change_rate",
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
if rng is None:
|
|
159
|
+
rng = random.Random(seed)
|
|
160
|
+
if not text:
|
|
161
|
+
return ""
|
|
162
|
+
|
|
163
|
+
clamped_rate = max(0.0, effective_rate)
|
|
164
|
+
if clamped_rate == 0.0:
|
|
165
|
+
return text
|
|
166
|
+
|
|
167
|
+
layout = getattr(KEYNEIGHBORS, keyboard)
|
|
168
|
+
|
|
169
|
+
if _fatfinger_rust is not None:
|
|
170
|
+
return cast(
|
|
171
|
+
str,
|
|
172
|
+
_fatfinger_rust(text, max_change_rate=clamped_rate, layout=layout, rng=rng),
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
return _fatfinger_python(text, rate=clamped_rate, layout=layout, rng=rng)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
class Typogre(Glitchling):
|
|
179
|
+
"""Glitchling that introduces deterministic keyboard-typing errors."""
|
|
180
|
+
|
|
181
|
+
def __init__(
|
|
182
|
+
self,
|
|
183
|
+
*,
|
|
184
|
+
rate: float | None = None,
|
|
185
|
+
max_change_rate: float | None = None,
|
|
186
|
+
keyboard: str = "CURATOR_QWERTY",
|
|
187
|
+
seed: int | None = None,
|
|
188
|
+
) -> None:
|
|
189
|
+
self._param_aliases = {"max_change_rate": "rate"}
|
|
190
|
+
effective_rate = resolve_rate(
|
|
191
|
+
rate=rate,
|
|
192
|
+
legacy_value=max_change_rate,
|
|
193
|
+
default=0.02,
|
|
194
|
+
legacy_name="max_change_rate",
|
|
195
|
+
)
|
|
196
|
+
super().__init__(
|
|
197
|
+
name="Typogre",
|
|
198
|
+
corruption_function=fatfinger,
|
|
199
|
+
scope=AttackWave.CHARACTER,
|
|
200
|
+
order=AttackOrder.EARLY,
|
|
201
|
+
seed=seed,
|
|
202
|
+
rate=effective_rate,
|
|
203
|
+
keyboard=keyboard,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
def pipeline_operation(self) -> dict[str, Any] | None:
|
|
207
|
+
rate = self.kwargs.get("rate")
|
|
208
|
+
if rate is None:
|
|
209
|
+
rate = self.kwargs.get("max_change_rate")
|
|
210
|
+
if rate is None:
|
|
211
|
+
return None
|
|
212
|
+
|
|
213
|
+
keyboard = self.kwargs.get("keyboard", "CURATOR_QWERTY")
|
|
214
|
+
layout = getattr(KEYNEIGHBORS, str(keyboard), None)
|
|
215
|
+
if layout is None:
|
|
216
|
+
return None
|
|
217
|
+
|
|
218
|
+
serialized_layout = {key: list(value) for key, value in layout.items()}
|
|
219
|
+
|
|
220
|
+
return {
|
|
221
|
+
"type": "typo",
|
|
222
|
+
"rate": float(rate),
|
|
223
|
+
"keyboard": str(keyboard),
|
|
224
|
+
"layout": serialized_layout,
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
typogre = Typogre()
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
__all__ = ["Typogre", "typogre"]
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
import random
|
|
5
|
+
from collections.abc import Sequence
|
|
6
|
+
from typing import Any, cast
|
|
7
|
+
|
|
8
|
+
from ._rate import resolve_rate
|
|
9
|
+
from ._rust_extensions import get_rust_operation
|
|
10
|
+
from .core import AttackOrder, AttackWave, Glitchling
|
|
11
|
+
|
|
12
|
+
# Load Rust-accelerated operation if available
|
|
13
|
+
_inject_zero_widths_rust = get_rust_operation("inject_zero_widths")
|
|
14
|
+
|
|
15
|
+
_DEFAULT_ZERO_WIDTH_CHARACTERS: tuple[str, ...] = (
|
|
16
|
+
"\u200b", # ZERO WIDTH SPACE
|
|
17
|
+
"\u200c", # ZERO WIDTH NON-JOINER
|
|
18
|
+
"\u200d", # ZERO WIDTH JOINER
|
|
19
|
+
"\ufeff", # ZERO WIDTH NO-BREAK SPACE
|
|
20
|
+
"\u2060", # WORD JOINER
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _python_insert_zero_widths(
|
|
25
|
+
text: str,
|
|
26
|
+
*,
|
|
27
|
+
rate: float,
|
|
28
|
+
rng: random.Random,
|
|
29
|
+
characters: Sequence[str],
|
|
30
|
+
) -> str:
|
|
31
|
+
if not text:
|
|
32
|
+
return text
|
|
33
|
+
|
|
34
|
+
palette = [char for char in characters if char]
|
|
35
|
+
if not palette:
|
|
36
|
+
return text
|
|
37
|
+
|
|
38
|
+
positions = [
|
|
39
|
+
index + 1
|
|
40
|
+
for index in range(len(text) - 1)
|
|
41
|
+
if not text[index].isspace() and not text[index + 1].isspace()
|
|
42
|
+
]
|
|
43
|
+
if not positions:
|
|
44
|
+
return text
|
|
45
|
+
|
|
46
|
+
total = len(positions)
|
|
47
|
+
clamped_rate = max(0.0, rate)
|
|
48
|
+
if clamped_rate <= 0.0:
|
|
49
|
+
return text
|
|
50
|
+
|
|
51
|
+
target = clamped_rate * total
|
|
52
|
+
count = math.floor(target)
|
|
53
|
+
remainder = target - count
|
|
54
|
+
if remainder > 0.0 and rng.random() < remainder:
|
|
55
|
+
count += 1
|
|
56
|
+
count = min(total, count)
|
|
57
|
+
|
|
58
|
+
if count <= 0:
|
|
59
|
+
return text
|
|
60
|
+
|
|
61
|
+
chosen = rng.sample(positions, count)
|
|
62
|
+
chosen.sort()
|
|
63
|
+
|
|
64
|
+
chars = list(text)
|
|
65
|
+
for position in reversed(chosen):
|
|
66
|
+
chars.insert(position, rng.choice(palette))
|
|
67
|
+
|
|
68
|
+
return "".join(chars)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def insert_zero_widths(
|
|
72
|
+
text: str,
|
|
73
|
+
rate: float | None = None,
|
|
74
|
+
seed: int | None = None,
|
|
75
|
+
rng: random.Random | None = None,
|
|
76
|
+
*,
|
|
77
|
+
characters: Sequence[str] | None = None,
|
|
78
|
+
) -> str:
|
|
79
|
+
"""Inject zero-width characters between non-space character pairs."""
|
|
80
|
+
effective_rate = resolve_rate(
|
|
81
|
+
rate=rate,
|
|
82
|
+
legacy_value=None,
|
|
83
|
+
default=0.02,
|
|
84
|
+
legacy_name="rate",
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
if rng is None:
|
|
88
|
+
rng = random.Random(seed)
|
|
89
|
+
|
|
90
|
+
palette: Sequence[str] = (
|
|
91
|
+
tuple(characters) if characters is not None else _DEFAULT_ZERO_WIDTH_CHARACTERS
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
cleaned_palette = tuple(char for char in palette if char)
|
|
95
|
+
if not cleaned_palette or not text:
|
|
96
|
+
return text
|
|
97
|
+
|
|
98
|
+
clamped_rate = max(0.0, effective_rate)
|
|
99
|
+
if clamped_rate == 0.0:
|
|
100
|
+
return text
|
|
101
|
+
|
|
102
|
+
if _inject_zero_widths_rust is not None:
|
|
103
|
+
state = None
|
|
104
|
+
python_state = None
|
|
105
|
+
if hasattr(rng, "getstate") and hasattr(rng, "setstate"):
|
|
106
|
+
state = rng.getstate()
|
|
107
|
+
python_result = _python_insert_zero_widths(
|
|
108
|
+
text,
|
|
109
|
+
rate=clamped_rate,
|
|
110
|
+
rng=rng,
|
|
111
|
+
characters=cleaned_palette,
|
|
112
|
+
)
|
|
113
|
+
if state is not None:
|
|
114
|
+
if hasattr(rng, "getstate"):
|
|
115
|
+
python_state = rng.getstate()
|
|
116
|
+
rng.setstate(state)
|
|
117
|
+
rust_result = cast(
|
|
118
|
+
str,
|
|
119
|
+
_inject_zero_widths_rust(text, clamped_rate, list(cleaned_palette), rng),
|
|
120
|
+
)
|
|
121
|
+
if rust_result == python_result:
|
|
122
|
+
return rust_result
|
|
123
|
+
if python_state is not None and hasattr(rng, "setstate"):
|
|
124
|
+
rng.setstate(python_state)
|
|
125
|
+
return python_result
|
|
126
|
+
|
|
127
|
+
return _python_insert_zero_widths(
|
|
128
|
+
text,
|
|
129
|
+
rate=clamped_rate,
|
|
130
|
+
rng=rng,
|
|
131
|
+
characters=cleaned_palette,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class Zeedub(Glitchling):
|
|
136
|
+
"""Glitchling that plants zero-width glyphs inside words."""
|
|
137
|
+
|
|
138
|
+
def __init__(
|
|
139
|
+
self,
|
|
140
|
+
*,
|
|
141
|
+
rate: float | None = None,
|
|
142
|
+
seed: int | None = None,
|
|
143
|
+
characters: Sequence[str] | None = None,
|
|
144
|
+
) -> None:
|
|
145
|
+
effective_rate = resolve_rate(
|
|
146
|
+
rate=rate,
|
|
147
|
+
legacy_value=None,
|
|
148
|
+
default=0.02,
|
|
149
|
+
legacy_name="rate",
|
|
150
|
+
)
|
|
151
|
+
super().__init__(
|
|
152
|
+
name="Zeedub",
|
|
153
|
+
corruption_function=insert_zero_widths,
|
|
154
|
+
scope=AttackWave.CHARACTER,
|
|
155
|
+
order=AttackOrder.LAST,
|
|
156
|
+
seed=seed,
|
|
157
|
+
rate=effective_rate,
|
|
158
|
+
characters=tuple(characters) if characters is not None else None,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
def pipeline_operation(self) -> dict[str, Any] | None:
|
|
162
|
+
rate = self.kwargs.get("rate")
|
|
163
|
+
if rate is None:
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
raw_characters = self.kwargs.get("characters")
|
|
167
|
+
if raw_characters is None:
|
|
168
|
+
palette = tuple(_DEFAULT_ZERO_WIDTH_CHARACTERS)
|
|
169
|
+
else:
|
|
170
|
+
palette = tuple(str(char) for char in raw_characters if char)
|
|
171
|
+
|
|
172
|
+
if not palette:
|
|
173
|
+
return None
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
"type": "zwj",
|
|
177
|
+
"rate": float(rate),
|
|
178
|
+
"characters": list(palette),
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
zeedub = Zeedub()
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
__all__ = ["Zeedub", "zeedub", "insert_zero_widths"]
|