glitchlings 0.4.5__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

Files changed (53) hide show
  1. glitchlings/__init__.py +71 -0
  2. glitchlings/__main__.py +8 -0
  3. glitchlings/_zoo_rust.cp311-win_amd64.pyd +0 -0
  4. glitchlings/compat.py +282 -0
  5. glitchlings/config.py +386 -0
  6. glitchlings/config.toml +3 -0
  7. glitchlings/data/__init__.py +1 -0
  8. glitchlings/data/hokey_assets.json +193 -0
  9. glitchlings/dlc/__init__.py +7 -0
  10. glitchlings/dlc/_shared.py +153 -0
  11. glitchlings/dlc/huggingface.py +81 -0
  12. glitchlings/dlc/prime.py +254 -0
  13. glitchlings/dlc/pytorch.py +166 -0
  14. glitchlings/dlc/pytorch_lightning.py +209 -0
  15. glitchlings/lexicon/__init__.py +192 -0
  16. glitchlings/lexicon/_cache.py +108 -0
  17. glitchlings/lexicon/data/default_vector_cache.json +82 -0
  18. glitchlings/lexicon/metrics.py +162 -0
  19. glitchlings/lexicon/vector.py +652 -0
  20. glitchlings/lexicon/wordnet.py +228 -0
  21. glitchlings/main.py +364 -0
  22. glitchlings/util/__init__.py +195 -0
  23. glitchlings/util/adapters.py +27 -0
  24. glitchlings/util/hokey_generator.py +144 -0
  25. glitchlings/util/stretch_locator.py +140 -0
  26. glitchlings/util/stretchability.py +375 -0
  27. glitchlings/zoo/__init__.py +172 -0
  28. glitchlings/zoo/_ocr_confusions.py +32 -0
  29. glitchlings/zoo/_rate.py +131 -0
  30. glitchlings/zoo/_rust_extensions.py +143 -0
  31. glitchlings/zoo/_sampling.py +54 -0
  32. glitchlings/zoo/_text_utils.py +100 -0
  33. glitchlings/zoo/adjax.py +128 -0
  34. glitchlings/zoo/apostrofae.py +127 -0
  35. glitchlings/zoo/assets/__init__.py +0 -0
  36. glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
  37. glitchlings/zoo/core.py +582 -0
  38. glitchlings/zoo/hokey.py +173 -0
  39. glitchlings/zoo/jargoyle.py +335 -0
  40. glitchlings/zoo/mim1c.py +109 -0
  41. glitchlings/zoo/ocr_confusions.tsv +30 -0
  42. glitchlings/zoo/redactyl.py +193 -0
  43. glitchlings/zoo/reduple.py +148 -0
  44. glitchlings/zoo/rushmore.py +153 -0
  45. glitchlings/zoo/scannequin.py +171 -0
  46. glitchlings/zoo/typogre.py +231 -0
  47. glitchlings/zoo/zeedub.py +185 -0
  48. glitchlings-0.4.5.dist-info/METADATA +648 -0
  49. glitchlings-0.4.5.dist-info/RECORD +53 -0
  50. glitchlings-0.4.5.dist-info/WHEEL +5 -0
  51. glitchlings-0.4.5.dist-info/entry_points.txt +2 -0
  52. glitchlings-0.4.5.dist-info/licenses/LICENSE +201 -0
  53. glitchlings-0.4.5.dist-info/top_level.txt +1 -0
@@ -0,0 +1,231 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ import random
5
+ from typing import Any, Optional, cast
6
+
7
+ from ..util import KEYNEIGHBORS
8
+ from ._rate import resolve_rate
9
+ from ._rust_extensions import get_rust_operation
10
+ from .core import AttackOrder, AttackWave, Glitchling
11
+
12
+ # Load Rust-accelerated operation if available
13
+ _fatfinger_rust = get_rust_operation("fatfinger")
14
+
15
+
16
+ def _python_unichar(text: str, rng: random.Random) -> str:
17
+ """Collapse one random doubled letter (like 'ee' in 'seed') to a single occurrence."""
18
+ import re
19
+
20
+ matches = list(re.finditer(r"((.)\2)(?=\w)", text))
21
+ if not matches:
22
+ return text
23
+ start, end = rng.choice(matches).span(1)
24
+ return text[:start] + text[start] + text[end:]
25
+
26
+
27
+ def _python_skipped_space(text: str, rng: random.Random) -> str:
28
+ import re
29
+
30
+ space_positions = [m.start() for m in re.finditer(r" ", text)]
31
+ if not space_positions:
32
+ return text
33
+ idx = rng.choice(space_positions)
34
+ return text[:idx] + text[idx + 1 :]
35
+
36
+
37
+ def _python_random_space(text: str, rng: random.Random) -> str:
38
+ if len(text) < 2:
39
+ return text
40
+ idx = rng.randrange(1, len(text))
41
+ return text[:idx] + " " + text[idx:]
42
+
43
+
44
+ def _python_repeated_char(text: str, rng: random.Random) -> str:
45
+ positions = [i for i, c in enumerate(text) if not c.isspace()]
46
+ if not positions:
47
+ return text
48
+ i = rng.choice(positions)
49
+ return text[:i] + text[i] + text[i:]
50
+
51
+
52
+ def _python_is_word_char(c: str) -> bool:
53
+ return c.isalnum() or c == "_"
54
+
55
+
56
+ def _python_eligible_idx(s: str, i: int) -> bool:
57
+ if i < 0 or i >= len(s):
58
+ return False
59
+ if not _python_is_word_char(s[i]):
60
+ return False
61
+ left_ok = i > 0 and _python_is_word_char(s[i - 1])
62
+ right_ok = i + 1 < len(s) and _python_is_word_char(s[i + 1])
63
+ return left_ok and right_ok
64
+
65
+
66
+ def _python_draw_eligible_index(rng: random.Random, s: str, max_tries: int = 16) -> Optional[int]:
67
+ n = len(s)
68
+ if n == 0:
69
+ return None
70
+ for _ in range(max_tries):
71
+ i = rng.randrange(n)
72
+ if _python_eligible_idx(s, i):
73
+ return i
74
+ start = rng.randrange(n)
75
+ i = start
76
+ while True:
77
+ if _python_eligible_idx(s, i):
78
+ return i
79
+ i += 1
80
+ if i == n:
81
+ i = 0
82
+ if i == start:
83
+ return None
84
+
85
+
86
+ def _fatfinger_python(
87
+ text: str,
88
+ *,
89
+ rate: float,
90
+ layout: dict[str, list[str]],
91
+ rng: random.Random,
92
+ ) -> str:
93
+ if rate <= 0.0:
94
+ return text
95
+
96
+ s = text
97
+ max_changes = math.ceil(len(s) * rate)
98
+ if max_changes == 0:
99
+ return s
100
+
101
+ positional_actions = ("char_swap", "missing_char", "extra_char", "nearby_char")
102
+ global_actions = ("skipped_space", "random_space", "unichar", "repeated_char")
103
+ all_actions = positional_actions + global_actions
104
+
105
+ actions_drawn = [rng.choice(all_actions) for _ in range(max_changes)]
106
+
107
+ for action in actions_drawn:
108
+ if action in positional_actions:
109
+ idx = _python_draw_eligible_index(rng, s)
110
+ if idx is None:
111
+ continue
112
+ if action == "char_swap":
113
+ j = idx + 1
114
+ s = s[:idx] + s[j] + s[idx] + s[j + 1 :]
115
+ elif action == "missing_char":
116
+ if _python_eligible_idx(s, idx):
117
+ s = s[:idx] + s[idx + 1 :]
118
+ elif action == "extra_char":
119
+ ch = s[idx]
120
+ neighbors = layout.get(ch.lower(), []) or [ch]
121
+ ins = rng.choice(neighbors) or ch
122
+ s = s[:idx] + ins + s[idx:]
123
+ elif action == "nearby_char":
124
+ ch = s[idx]
125
+ neighbors = layout.get(ch.lower(), [])
126
+ if neighbors:
127
+ rep = rng.choice(neighbors)
128
+ s = s[:idx] + rep + s[idx + 1 :]
129
+ else:
130
+ if action == "skipped_space":
131
+ s = _python_skipped_space(s, rng)
132
+ elif action == "random_space":
133
+ s = _python_random_space(s, rng)
134
+ elif action == "unichar":
135
+ s = _python_unichar(s, rng)
136
+ elif action == "repeated_char":
137
+ s = _python_repeated_char(s, rng)
138
+ return s
139
+
140
+
141
+ def fatfinger(
142
+ text: str,
143
+ rate: float | None = None,
144
+ keyboard: str = "CURATOR_QWERTY",
145
+ seed: int | None = None,
146
+ rng: random.Random | None = None,
147
+ *,
148
+ max_change_rate: float | None = None,
149
+ ) -> str:
150
+ """Introduce character-level "fat finger" edits with a Rust fast path."""
151
+ effective_rate = resolve_rate(
152
+ rate=rate,
153
+ legacy_value=max_change_rate,
154
+ default=0.02,
155
+ legacy_name="max_change_rate",
156
+ )
157
+
158
+ if rng is None:
159
+ rng = random.Random(seed)
160
+ if not text:
161
+ return ""
162
+
163
+ clamped_rate = max(0.0, effective_rate)
164
+ if clamped_rate == 0.0:
165
+ return text
166
+
167
+ layout = getattr(KEYNEIGHBORS, keyboard)
168
+
169
+ if _fatfinger_rust is not None:
170
+ return cast(
171
+ str,
172
+ _fatfinger_rust(text, max_change_rate=clamped_rate, layout=layout, rng=rng),
173
+ )
174
+
175
+ return _fatfinger_python(text, rate=clamped_rate, layout=layout, rng=rng)
176
+
177
+
178
+ class Typogre(Glitchling):
179
+ """Glitchling that introduces deterministic keyboard-typing errors."""
180
+
181
+ def __init__(
182
+ self,
183
+ *,
184
+ rate: float | None = None,
185
+ max_change_rate: float | None = None,
186
+ keyboard: str = "CURATOR_QWERTY",
187
+ seed: int | None = None,
188
+ ) -> None:
189
+ self._param_aliases = {"max_change_rate": "rate"}
190
+ effective_rate = resolve_rate(
191
+ rate=rate,
192
+ legacy_value=max_change_rate,
193
+ default=0.02,
194
+ legacy_name="max_change_rate",
195
+ )
196
+ super().__init__(
197
+ name="Typogre",
198
+ corruption_function=fatfinger,
199
+ scope=AttackWave.CHARACTER,
200
+ order=AttackOrder.EARLY,
201
+ seed=seed,
202
+ rate=effective_rate,
203
+ keyboard=keyboard,
204
+ )
205
+
206
+ def pipeline_operation(self) -> dict[str, Any] | None:
207
+ rate = self.kwargs.get("rate")
208
+ if rate is None:
209
+ rate = self.kwargs.get("max_change_rate")
210
+ if rate is None:
211
+ return None
212
+
213
+ keyboard = self.kwargs.get("keyboard", "CURATOR_QWERTY")
214
+ layout = getattr(KEYNEIGHBORS, str(keyboard), None)
215
+ if layout is None:
216
+ return None
217
+
218
+ serialized_layout = {key: list(value) for key, value in layout.items()}
219
+
220
+ return {
221
+ "type": "typo",
222
+ "rate": float(rate),
223
+ "keyboard": str(keyboard),
224
+ "layout": serialized_layout,
225
+ }
226
+
227
+
228
+ typogre = Typogre()
229
+
230
+
231
+ __all__ = ["Typogre", "typogre"]
@@ -0,0 +1,185 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ import random
5
+ from collections.abc import Sequence
6
+ from typing import Any, cast
7
+
8
+ from ._rate import resolve_rate
9
+ from ._rust_extensions import get_rust_operation
10
+ from .core import AttackOrder, AttackWave, Glitchling
11
+
12
+ # Load Rust-accelerated operation if available
13
+ _inject_zero_widths_rust = get_rust_operation("inject_zero_widths")
14
+
15
+ _DEFAULT_ZERO_WIDTH_CHARACTERS: tuple[str, ...] = (
16
+ "\u200b", # ZERO WIDTH SPACE
17
+ "\u200c", # ZERO WIDTH NON-JOINER
18
+ "\u200d", # ZERO WIDTH JOINER
19
+ "\ufeff", # ZERO WIDTH NO-BREAK SPACE
20
+ "\u2060", # WORD JOINER
21
+ )
22
+
23
+
24
+ def _python_insert_zero_widths(
25
+ text: str,
26
+ *,
27
+ rate: float,
28
+ rng: random.Random,
29
+ characters: Sequence[str],
30
+ ) -> str:
31
+ if not text:
32
+ return text
33
+
34
+ palette = [char for char in characters if char]
35
+ if not palette:
36
+ return text
37
+
38
+ positions = [
39
+ index + 1
40
+ for index in range(len(text) - 1)
41
+ if not text[index].isspace() and not text[index + 1].isspace()
42
+ ]
43
+ if not positions:
44
+ return text
45
+
46
+ total = len(positions)
47
+ clamped_rate = max(0.0, rate)
48
+ if clamped_rate <= 0.0:
49
+ return text
50
+
51
+ target = clamped_rate * total
52
+ count = math.floor(target)
53
+ remainder = target - count
54
+ if remainder > 0.0 and rng.random() < remainder:
55
+ count += 1
56
+ count = min(total, count)
57
+
58
+ if count <= 0:
59
+ return text
60
+
61
+ chosen = rng.sample(positions, count)
62
+ chosen.sort()
63
+
64
+ chars = list(text)
65
+ for position in reversed(chosen):
66
+ chars.insert(position, rng.choice(palette))
67
+
68
+ return "".join(chars)
69
+
70
+
71
+ def insert_zero_widths(
72
+ text: str,
73
+ rate: float | None = None,
74
+ seed: int | None = None,
75
+ rng: random.Random | None = None,
76
+ *,
77
+ characters: Sequence[str] | None = None,
78
+ ) -> str:
79
+ """Inject zero-width characters between non-space character pairs."""
80
+ effective_rate = resolve_rate(
81
+ rate=rate,
82
+ legacy_value=None,
83
+ default=0.02,
84
+ legacy_name="rate",
85
+ )
86
+
87
+ if rng is None:
88
+ rng = random.Random(seed)
89
+
90
+ palette: Sequence[str] = (
91
+ tuple(characters) if characters is not None else _DEFAULT_ZERO_WIDTH_CHARACTERS
92
+ )
93
+
94
+ cleaned_palette = tuple(char for char in palette if char)
95
+ if not cleaned_palette or not text:
96
+ return text
97
+
98
+ clamped_rate = max(0.0, effective_rate)
99
+ if clamped_rate == 0.0:
100
+ return text
101
+
102
+ if _inject_zero_widths_rust is not None:
103
+ state = None
104
+ python_state = None
105
+ if hasattr(rng, "getstate") and hasattr(rng, "setstate"):
106
+ state = rng.getstate()
107
+ python_result = _python_insert_zero_widths(
108
+ text,
109
+ rate=clamped_rate,
110
+ rng=rng,
111
+ characters=cleaned_palette,
112
+ )
113
+ if state is not None:
114
+ if hasattr(rng, "getstate"):
115
+ python_state = rng.getstate()
116
+ rng.setstate(state)
117
+ rust_result = cast(
118
+ str,
119
+ _inject_zero_widths_rust(text, clamped_rate, list(cleaned_palette), rng),
120
+ )
121
+ if rust_result == python_result:
122
+ return rust_result
123
+ if python_state is not None and hasattr(rng, "setstate"):
124
+ rng.setstate(python_state)
125
+ return python_result
126
+
127
+ return _python_insert_zero_widths(
128
+ text,
129
+ rate=clamped_rate,
130
+ rng=rng,
131
+ characters=cleaned_palette,
132
+ )
133
+
134
+
135
+ class Zeedub(Glitchling):
136
+ """Glitchling that plants zero-width glyphs inside words."""
137
+
138
+ def __init__(
139
+ self,
140
+ *,
141
+ rate: float | None = None,
142
+ seed: int | None = None,
143
+ characters: Sequence[str] | None = None,
144
+ ) -> None:
145
+ effective_rate = resolve_rate(
146
+ rate=rate,
147
+ legacy_value=None,
148
+ default=0.02,
149
+ legacy_name="rate",
150
+ )
151
+ super().__init__(
152
+ name="Zeedub",
153
+ corruption_function=insert_zero_widths,
154
+ scope=AttackWave.CHARACTER,
155
+ order=AttackOrder.LAST,
156
+ seed=seed,
157
+ rate=effective_rate,
158
+ characters=tuple(characters) if characters is not None else None,
159
+ )
160
+
161
+ def pipeline_operation(self) -> dict[str, Any] | None:
162
+ rate = self.kwargs.get("rate")
163
+ if rate is None:
164
+ return None
165
+
166
+ raw_characters = self.kwargs.get("characters")
167
+ if raw_characters is None:
168
+ palette = tuple(_DEFAULT_ZERO_WIDTH_CHARACTERS)
169
+ else:
170
+ palette = tuple(str(char) for char in raw_characters if char)
171
+
172
+ if not palette:
173
+ return None
174
+
175
+ return {
176
+ "type": "zwj",
177
+ "rate": float(rate),
178
+ "characters": list(palette),
179
+ }
180
+
181
+
182
+ zeedub = Zeedub()
183
+
184
+
185
+ __all__ = ["Zeedub", "zeedub", "insert_zero_widths"]