glitchlings 0.2.5__cp312-cp312-win_amd64.whl → 0.9.3__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glitchlings/__init__.py +36 -17
- glitchlings/__main__.py +0 -1
- glitchlings/_zoo_rust/__init__.py +12 -0
- glitchlings/_zoo_rust.cp312-win_amd64.pyd +0 -0
- glitchlings/assets/__init__.py +180 -0
- glitchlings/assets/apostrofae_pairs.json +32 -0
- glitchlings/assets/ekkokin_homophones.json +2014 -0
- glitchlings/assets/hokey_assets.json +193 -0
- glitchlings/assets/lexemes/academic.json +1049 -0
- glitchlings/assets/lexemes/colors.json +1333 -0
- glitchlings/assets/lexemes/corporate.json +716 -0
- glitchlings/assets/lexemes/cyberpunk.json +22 -0
- glitchlings/assets/lexemes/lovecraftian.json +23 -0
- glitchlings/assets/lexemes/synonyms.json +3354 -0
- glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
- glitchlings/assets/pipeline_assets.json +29 -0
- glitchlings/attack/__init__.py +53 -0
- glitchlings/attack/compose.py +299 -0
- glitchlings/attack/core.py +465 -0
- glitchlings/attack/encode.py +114 -0
- glitchlings/attack/metrics.py +104 -0
- glitchlings/attack/metrics_dispatch.py +70 -0
- glitchlings/attack/tokenization.py +157 -0
- glitchlings/auggie.py +283 -0
- glitchlings/compat/__init__.py +9 -0
- glitchlings/compat/loaders.py +355 -0
- glitchlings/compat/types.py +41 -0
- glitchlings/conf/__init__.py +41 -0
- glitchlings/conf/loaders.py +331 -0
- glitchlings/conf/schema.py +156 -0
- glitchlings/conf/types.py +72 -0
- glitchlings/config.toml +2 -0
- glitchlings/constants.py +59 -0
- glitchlings/dev/__init__.py +3 -0
- glitchlings/dev/docs.py +45 -0
- glitchlings/dlc/__init__.py +17 -3
- glitchlings/dlc/_shared.py +296 -0
- glitchlings/dlc/gutenberg.py +400 -0
- glitchlings/dlc/huggingface.py +37 -65
- glitchlings/dlc/prime.py +55 -114
- glitchlings/dlc/pytorch.py +98 -0
- glitchlings/dlc/pytorch_lightning.py +173 -0
- glitchlings/internal/__init__.py +16 -0
- glitchlings/internal/rust.py +159 -0
- glitchlings/internal/rust_ffi.py +432 -0
- glitchlings/main.py +123 -32
- glitchlings/runtime_config.py +24 -0
- glitchlings/util/__init__.py +29 -176
- glitchlings/util/adapters.py +65 -0
- glitchlings/util/keyboards.py +311 -0
- glitchlings/util/transcripts.py +108 -0
- glitchlings/zoo/__init__.py +47 -24
- glitchlings/zoo/assets/__init__.py +29 -0
- glitchlings/zoo/core.py +301 -167
- glitchlings/zoo/core_execution.py +98 -0
- glitchlings/zoo/core_planning.py +451 -0
- glitchlings/zoo/corrupt_dispatch.py +295 -0
- glitchlings/zoo/ekkokin.py +118 -0
- glitchlings/zoo/hokey.py +137 -0
- glitchlings/zoo/jargoyle.py +179 -274
- glitchlings/zoo/mim1c.py +106 -68
- glitchlings/zoo/pedant/__init__.py +107 -0
- glitchlings/zoo/pedant/core.py +105 -0
- glitchlings/zoo/pedant/forms.py +74 -0
- glitchlings/zoo/pedant/stones.py +74 -0
- glitchlings/zoo/redactyl.py +44 -175
- glitchlings/zoo/rng.py +259 -0
- glitchlings/zoo/rushmore.py +359 -116
- glitchlings/zoo/scannequin.py +18 -125
- glitchlings/zoo/transforms.py +386 -0
- glitchlings/zoo/typogre.py +76 -162
- glitchlings/zoo/validation.py +477 -0
- glitchlings/zoo/zeedub.py +33 -86
- glitchlings-0.9.3.dist-info/METADATA +334 -0
- glitchlings-0.9.3.dist-info/RECORD +80 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/entry_points.txt +1 -0
- glitchlings/zoo/_ocr_confusions.py +0 -34
- glitchlings/zoo/_rate.py +0 -21
- glitchlings/zoo/reduple.py +0 -169
- glitchlings-0.2.5.dist-info/METADATA +0 -490
- glitchlings-0.2.5.dist-info/RECORD +0 -27
- /glitchlings/{zoo → assets}/ocr_confusions.tsv +0 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/WHEEL +0 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/licenses/LICENSE +0 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/top_level.txt +0 -0
glitchlings/zoo/typogre.py
CHANGED
|
@@ -1,199 +1,80 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import math
|
|
4
3
|
import random
|
|
5
|
-
from
|
|
4
|
+
from collections.abc import Mapping, Sequence
|
|
5
|
+
from typing import cast
|
|
6
6
|
|
|
7
|
-
from .
|
|
8
|
-
from .
|
|
9
|
-
from ..util import KEYNEIGHBORS
|
|
7
|
+
from glitchlings.constants import DEFAULT_TYPOGRE_KEYBOARD, DEFAULT_TYPOGRE_RATE
|
|
8
|
+
from glitchlings.internal.rust_ffi import fatfinger_rust, resolve_seed
|
|
10
9
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
except ImportError: # pragma: no cover - compiled extension not present
|
|
14
|
-
_fatfinger_rust = None
|
|
10
|
+
from ..util import KEYNEIGHBORS, SHIFT_MAPS
|
|
11
|
+
from .core import AttackOrder, AttackWave, Glitchling, PipelineOperationPayload
|
|
15
12
|
|
|
16
13
|
|
|
17
|
-
def
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
if not matches:
|
|
23
|
-
return text
|
|
24
|
-
start, end = rng.choice(matches).span(1)
|
|
25
|
-
return text[:start] + text[start] + text[end:]
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def _python_skipped_space(text: str, rng: random.Random) -> str:
|
|
29
|
-
import re
|
|
30
|
-
|
|
31
|
-
space_positions = [m.start() for m in re.finditer(r" ", text)]
|
|
32
|
-
if not space_positions:
|
|
33
|
-
return text
|
|
34
|
-
idx = rng.choice(space_positions)
|
|
35
|
-
return text[:idx] + text[idx + 1 :]
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def _python_random_space(text: str, rng: random.Random) -> str:
|
|
39
|
-
if len(text) < 2:
|
|
40
|
-
return text
|
|
41
|
-
idx = rng.randrange(1, len(text))
|
|
42
|
-
return text[:idx] + " " + text[idx:]
|
|
14
|
+
def _resolve_slip_exit_rate(
|
|
15
|
+
shift_slip_rate: float,
|
|
16
|
+
shift_slip_exit_rate: float | None,
|
|
17
|
+
) -> float:
|
|
18
|
+
"""Derive the slip exit rate, defaulting to a burst-friendly value."""
|
|
43
19
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
if not positions:
|
|
48
|
-
return text
|
|
49
|
-
i = rng.choice(positions)
|
|
50
|
-
return text[:i] + text[i] + text[i:]
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def _python_is_word_char(c: str) -> bool:
|
|
54
|
-
return c.isalnum() or c == "_"
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def _python_eligible_idx(s: str, i: int) -> bool:
|
|
58
|
-
if i < 0 or i >= len(s):
|
|
59
|
-
return False
|
|
60
|
-
if not _python_is_word_char(s[i]):
|
|
61
|
-
return False
|
|
62
|
-
left_ok = i > 0 and _python_is_word_char(s[i - 1])
|
|
63
|
-
right_ok = i + 1 < len(s) and _python_is_word_char(s[i + 1])
|
|
64
|
-
return left_ok and right_ok
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def _python_draw_eligible_index(
|
|
68
|
-
rng: random.Random, s: str, max_tries: int = 16
|
|
69
|
-
) -> Optional[int]:
|
|
70
|
-
n = len(s)
|
|
71
|
-
if n == 0:
|
|
72
|
-
return None
|
|
73
|
-
for _ in range(max_tries):
|
|
74
|
-
i = rng.randrange(n)
|
|
75
|
-
if _python_eligible_idx(s, i):
|
|
76
|
-
return i
|
|
77
|
-
start = rng.randrange(n)
|
|
78
|
-
i = start
|
|
79
|
-
while True:
|
|
80
|
-
if _python_eligible_idx(s, i):
|
|
81
|
-
return i
|
|
82
|
-
i += 1
|
|
83
|
-
if i == n:
|
|
84
|
-
i = 0
|
|
85
|
-
if i == start:
|
|
86
|
-
return None
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
def _fatfinger_python(
|
|
90
|
-
text: str,
|
|
91
|
-
*,
|
|
92
|
-
rate: float,
|
|
93
|
-
layout: dict[str, list[str]],
|
|
94
|
-
rng: random.Random,
|
|
95
|
-
) -> str:
|
|
96
|
-
if rate <= 0.0:
|
|
97
|
-
return text
|
|
98
|
-
|
|
99
|
-
s = text
|
|
100
|
-
max_changes = math.ceil(len(s) * rate)
|
|
101
|
-
if max_changes == 0:
|
|
102
|
-
return s
|
|
103
|
-
|
|
104
|
-
positional_actions = ("char_swap", "missing_char", "extra_char", "nearby_char")
|
|
105
|
-
global_actions = ("skipped_space", "random_space", "unichar", "repeated_char")
|
|
106
|
-
all_actions = positional_actions + global_actions
|
|
107
|
-
|
|
108
|
-
actions_drawn = [rng.choice(all_actions) for _ in range(max_changes)]
|
|
109
|
-
|
|
110
|
-
for action in actions_drawn:
|
|
111
|
-
if action in positional_actions:
|
|
112
|
-
idx = _python_draw_eligible_index(rng, s)
|
|
113
|
-
if idx is None:
|
|
114
|
-
continue
|
|
115
|
-
if action == "char_swap":
|
|
116
|
-
j = idx + 1
|
|
117
|
-
s = s[:idx] + s[j] + s[idx] + s[j + 1 :]
|
|
118
|
-
elif action == "missing_char":
|
|
119
|
-
if _python_eligible_idx(s, idx):
|
|
120
|
-
s = s[:idx] + s[idx + 1 :]
|
|
121
|
-
elif action == "extra_char":
|
|
122
|
-
ch = s[idx]
|
|
123
|
-
neighbors = layout.get(ch.lower(), []) or [ch]
|
|
124
|
-
ins = rng.choice(neighbors) or ch
|
|
125
|
-
s = s[:idx] + ins + s[idx:]
|
|
126
|
-
elif action == "nearby_char":
|
|
127
|
-
ch = s[idx]
|
|
128
|
-
neighbors = layout.get(ch.lower(), [])
|
|
129
|
-
if neighbors:
|
|
130
|
-
rep = rng.choice(neighbors)
|
|
131
|
-
s = s[:idx] + rep + s[idx + 1 :]
|
|
132
|
-
else:
|
|
133
|
-
if action == "skipped_space":
|
|
134
|
-
s = _python_skipped_space(s, rng)
|
|
135
|
-
elif action == "random_space":
|
|
136
|
-
s = _python_random_space(s, rng)
|
|
137
|
-
elif action == "unichar":
|
|
138
|
-
s = _python_unichar(s, rng)
|
|
139
|
-
elif action == "repeated_char":
|
|
140
|
-
s = _python_repeated_char(s, rng)
|
|
141
|
-
return s
|
|
20
|
+
if shift_slip_exit_rate is not None:
|
|
21
|
+
return max(0.0, shift_slip_exit_rate)
|
|
22
|
+
return max(0.0, shift_slip_rate * 0.5)
|
|
142
23
|
|
|
143
24
|
|
|
144
25
|
def fatfinger(
|
|
145
26
|
text: str,
|
|
146
27
|
rate: float | None = None,
|
|
147
|
-
keyboard: str =
|
|
28
|
+
keyboard: str = DEFAULT_TYPOGRE_KEYBOARD,
|
|
29
|
+
layout: Mapping[str, Sequence[str]] | None = None,
|
|
148
30
|
seed: int | None = None,
|
|
149
31
|
rng: random.Random | None = None,
|
|
150
32
|
*,
|
|
151
|
-
|
|
33
|
+
shift_slip_rate: float = 0.0,
|
|
34
|
+
shift_slip_exit_rate: float | None = None,
|
|
35
|
+
shift_map: Mapping[str, str] | None = None,
|
|
152
36
|
) -> str:
|
|
153
37
|
"""Introduce character-level "fat finger" edits with a Rust fast path."""
|
|
38
|
+
effective_rate = DEFAULT_TYPOGRE_RATE if rate is None else rate
|
|
154
39
|
|
|
155
|
-
effective_rate = resolve_rate(
|
|
156
|
-
rate=rate,
|
|
157
|
-
legacy_value=max_change_rate,
|
|
158
|
-
default=0.02,
|
|
159
|
-
legacy_name="max_change_rate",
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
if rng is None:
|
|
163
|
-
rng = random.Random(seed)
|
|
164
40
|
if not text:
|
|
165
41
|
return ""
|
|
166
42
|
|
|
43
|
+
layout_mapping = layout if layout is not None else getattr(KEYNEIGHBORS, keyboard)
|
|
44
|
+
slip_rate = max(0.0, shift_slip_rate)
|
|
45
|
+
slip_exit_rate = _resolve_slip_exit_rate(slip_rate, shift_slip_exit_rate)
|
|
46
|
+
slip_map = shift_map if shift_map is not None else getattr(SHIFT_MAPS, keyboard, None)
|
|
47
|
+
|
|
167
48
|
clamped_rate = max(0.0, effective_rate)
|
|
168
|
-
if clamped_rate == 0.0:
|
|
49
|
+
if slip_rate == 0.0 and clamped_rate == 0.0:
|
|
169
50
|
return text
|
|
170
51
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
52
|
+
return fatfinger_rust(
|
|
53
|
+
text,
|
|
54
|
+
clamped_rate,
|
|
55
|
+
layout_mapping,
|
|
56
|
+
resolve_seed(seed, rng),
|
|
57
|
+
shift_slip_rate=slip_rate,
|
|
58
|
+
shift_slip_exit_rate=slip_exit_rate,
|
|
59
|
+
shift_map=slip_map,
|
|
60
|
+
)
|
|
177
61
|
|
|
178
62
|
|
|
179
63
|
class Typogre(Glitchling):
|
|
180
64
|
"""Glitchling that introduces deterministic keyboard-typing errors."""
|
|
181
65
|
|
|
66
|
+
flavor = "What a nice word, would be a shame if something happened to it..."
|
|
67
|
+
|
|
182
68
|
def __init__(
|
|
183
69
|
self,
|
|
184
70
|
*,
|
|
185
71
|
rate: float | None = None,
|
|
186
|
-
|
|
187
|
-
|
|
72
|
+
keyboard: str = DEFAULT_TYPOGRE_KEYBOARD,
|
|
73
|
+
shift_slip_rate: float = 0.0,
|
|
74
|
+
shift_slip_exit_rate: float | None = None,
|
|
188
75
|
seed: int | None = None,
|
|
189
76
|
) -> None:
|
|
190
|
-
|
|
191
|
-
effective_rate = resolve_rate(
|
|
192
|
-
rate=rate,
|
|
193
|
-
legacy_value=max_change_rate,
|
|
194
|
-
default=0.02,
|
|
195
|
-
legacy_name="max_change_rate",
|
|
196
|
-
)
|
|
77
|
+
effective_rate = DEFAULT_TYPOGRE_RATE if rate is None else rate
|
|
197
78
|
super().__init__(
|
|
198
79
|
name="Typogre",
|
|
199
80
|
corruption_function=fatfinger,
|
|
@@ -202,11 +83,44 @@ class Typogre(Glitchling):
|
|
|
202
83
|
seed=seed,
|
|
203
84
|
rate=effective_rate,
|
|
204
85
|
keyboard=keyboard,
|
|
86
|
+
shift_slip_rate=max(0.0, shift_slip_rate),
|
|
87
|
+
shift_slip_exit_rate=shift_slip_exit_rate,
|
|
205
88
|
)
|
|
206
89
|
|
|
90
|
+
def pipeline_operation(self) -> PipelineOperationPayload:
|
|
91
|
+
rate_value = self.kwargs.get("rate")
|
|
92
|
+
rate = DEFAULT_TYPOGRE_RATE if rate_value is None else float(rate_value)
|
|
93
|
+
keyboard = self.kwargs.get("keyboard", DEFAULT_TYPOGRE_KEYBOARD)
|
|
94
|
+
layout = getattr(KEYNEIGHBORS, str(keyboard), None)
|
|
95
|
+
if layout is None:
|
|
96
|
+
message = f"Unknown keyboard layout '{keyboard}' for Typogre pipeline"
|
|
97
|
+
raise RuntimeError(message)
|
|
98
|
+
|
|
99
|
+
serialized_layout = {key: list(value) for key, value in layout.items()}
|
|
100
|
+
shift_slip_rate = float(self.kwargs.get("shift_slip_rate", 0.0) or 0.0)
|
|
101
|
+
shift_slip_exit_rate = self.kwargs.get("shift_slip_exit_rate")
|
|
102
|
+
resolved_exit_rate = _resolve_slip_exit_rate(shift_slip_rate, shift_slip_exit_rate)
|
|
103
|
+
shift_map = getattr(SHIFT_MAPS, str(keyboard), None)
|
|
104
|
+
if shift_slip_rate > 0.0 and shift_map is None:
|
|
105
|
+
message = f"Unknown shift map layout '{keyboard}' for Typogre pipeline"
|
|
106
|
+
raise RuntimeError(message)
|
|
107
|
+
serialized_shift_map = dict(shift_map) if shift_map is not None else None
|
|
108
|
+
|
|
109
|
+
return cast(
|
|
110
|
+
PipelineOperationPayload,
|
|
111
|
+
{
|
|
112
|
+
"type": "typo",
|
|
113
|
+
"rate": float(rate),
|
|
114
|
+
"keyboard": str(keyboard),
|
|
115
|
+
"layout": serialized_layout,
|
|
116
|
+
"shift_slip_rate": shift_slip_rate,
|
|
117
|
+
"shift_slip_exit_rate": float(resolved_exit_rate),
|
|
118
|
+
"shift_map": serialized_shift_map,
|
|
119
|
+
},
|
|
120
|
+
)
|
|
207
121
|
|
|
208
|
-
typogre = Typogre()
|
|
209
122
|
|
|
123
|
+
typogre = Typogre()
|
|
210
124
|
|
|
211
|
-
__all__ = ["Typogre", "typogre"]
|
|
212
125
|
|
|
126
|
+
__all__ = ["Typogre", "typogre", "fatfinger"]
|