glitchlings 0.4.5__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

Files changed (53) hide show
  1. glitchlings/__init__.py +71 -0
  2. glitchlings/__main__.py +8 -0
  3. glitchlings/_zoo_rust.cp311-win_amd64.pyd +0 -0
  4. glitchlings/compat.py +282 -0
  5. glitchlings/config.py +386 -0
  6. glitchlings/config.toml +3 -0
  7. glitchlings/data/__init__.py +1 -0
  8. glitchlings/data/hokey_assets.json +193 -0
  9. glitchlings/dlc/__init__.py +7 -0
  10. glitchlings/dlc/_shared.py +153 -0
  11. glitchlings/dlc/huggingface.py +81 -0
  12. glitchlings/dlc/prime.py +254 -0
  13. glitchlings/dlc/pytorch.py +166 -0
  14. glitchlings/dlc/pytorch_lightning.py +209 -0
  15. glitchlings/lexicon/__init__.py +192 -0
  16. glitchlings/lexicon/_cache.py +108 -0
  17. glitchlings/lexicon/data/default_vector_cache.json +82 -0
  18. glitchlings/lexicon/metrics.py +162 -0
  19. glitchlings/lexicon/vector.py +652 -0
  20. glitchlings/lexicon/wordnet.py +228 -0
  21. glitchlings/main.py +364 -0
  22. glitchlings/util/__init__.py +195 -0
  23. glitchlings/util/adapters.py +27 -0
  24. glitchlings/util/hokey_generator.py +144 -0
  25. glitchlings/util/stretch_locator.py +140 -0
  26. glitchlings/util/stretchability.py +375 -0
  27. glitchlings/zoo/__init__.py +172 -0
  28. glitchlings/zoo/_ocr_confusions.py +32 -0
  29. glitchlings/zoo/_rate.py +131 -0
  30. glitchlings/zoo/_rust_extensions.py +143 -0
  31. glitchlings/zoo/_sampling.py +54 -0
  32. glitchlings/zoo/_text_utils.py +100 -0
  33. glitchlings/zoo/adjax.py +128 -0
  34. glitchlings/zoo/apostrofae.py +127 -0
  35. glitchlings/zoo/assets/__init__.py +0 -0
  36. glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
  37. glitchlings/zoo/core.py +582 -0
  38. glitchlings/zoo/hokey.py +173 -0
  39. glitchlings/zoo/jargoyle.py +335 -0
  40. glitchlings/zoo/mim1c.py +109 -0
  41. glitchlings/zoo/ocr_confusions.tsv +30 -0
  42. glitchlings/zoo/redactyl.py +193 -0
  43. glitchlings/zoo/reduple.py +148 -0
  44. glitchlings/zoo/rushmore.py +153 -0
  45. glitchlings/zoo/scannequin.py +171 -0
  46. glitchlings/zoo/typogre.py +231 -0
  47. glitchlings/zoo/zeedub.py +185 -0
  48. glitchlings-0.4.5.dist-info/METADATA +648 -0
  49. glitchlings-0.4.5.dist-info/RECORD +53 -0
  50. glitchlings-0.4.5.dist-info/WHEEL +5 -0
  51. glitchlings-0.4.5.dist-info/entry_points.txt +2 -0
  52. glitchlings-0.4.5.dist-info/licenses/LICENSE +201 -0
  53. glitchlings-0.4.5.dist-info/top_level.txt +1 -0
@@ -0,0 +1,195 @@
1
+ import difflib
2
+ from collections.abc import Iterable
3
+
4
+ __all__ = [
5
+ "SAMPLE_TEXT",
6
+ "string_diffs",
7
+ "KeyNeighborMap",
8
+ "KeyboardLayouts",
9
+ "KeyNeighbors",
10
+ "KEYNEIGHBORS",
11
+ ]
12
+
13
+ SAMPLE_TEXT = (
14
+ "One morning, when Gregor Samsa woke from troubled dreams, he found himself "
15
+ "transformed in his bed into a horrible vermin. He lay on his armour-like back, and "
16
+ "if he lifted his head a little he could see his brown belly, slightly domed and "
17
+ "divided by arches into stiff sections. The bedding was hardly able to cover it and "
18
+ "seemed ready to slide off any moment. His many legs, pitifully thin compared with "
19
+ "the size of the rest of him, waved about helplessly as he looked."
20
+ )
21
+
22
+
23
+ def string_diffs(a: str, b: str) -> list[list[tuple[str, str, str]]]:
24
+ """Compare two strings using SequenceMatcher and return
25
+ grouped adjacent opcodes (excluding 'equal' tags).
26
+
27
+ Each element is a tuple: (tag, a_text, b_text).
28
+ """
29
+ sm = difflib.SequenceMatcher(None, a, b)
30
+ ops: list[list[tuple[str, str, str]]] = []
31
+ buffer: list[tuple[str, str, str]] = []
32
+
33
+ for tag, i1, i2, j1, j2 in sm.get_opcodes():
34
+ if tag == "equal":
35
+ # flush any buffered operations before skipping
36
+ if buffer:
37
+ ops.append(buffer)
38
+ buffer = []
39
+ continue
40
+
41
+ # append operation to buffer
42
+ buffer.append((tag, a[i1:i2], b[j1:j2]))
43
+
44
+ # flush trailing buffer
45
+ if buffer:
46
+ ops.append(buffer)
47
+
48
+ return ops
49
+
50
+
51
+ KeyNeighborMap = dict[str, list[str]]
52
+ KeyboardLayouts = dict[str, KeyNeighborMap]
53
+
54
+
55
+ def _build_neighbor_map(rows: Iterable[str]) -> KeyNeighborMap:
56
+ """Derive 8-neighbour adjacency lists from keyboard layout rows."""
57
+ grid: dict[tuple[int, int], str] = {}
58
+ for y, row in enumerate(rows):
59
+ for x, char in enumerate(row):
60
+ if char == " ":
61
+ continue
62
+ grid[(x, y)] = char.lower()
63
+
64
+ neighbors: KeyNeighborMap = {}
65
+ for (x, y), char in grid.items():
66
+ seen: list[str] = []
67
+ for dy in (-1, 0, 1):
68
+ for dx in (-1, 0, 1):
69
+ if dx == 0 and dy == 0:
70
+ continue
71
+ candidate = grid.get((x + dx, y + dy))
72
+ if candidate is None:
73
+ continue
74
+ seen.append(candidate)
75
+ # Preserve encounter order but drop duplicates for determinism
76
+ deduped = list(dict.fromkeys(seen))
77
+ neighbors[char] = deduped
78
+
79
+ return neighbors
80
+
81
+
82
+ _KEYNEIGHBORS: KeyboardLayouts = {
83
+ "CURATOR_QWERTY": {
84
+ "a": [*"qwsz"],
85
+ "b": [*"vghn "],
86
+ "c": [*"xdfv "],
87
+ "d": [*"serfcx"],
88
+ "e": [*"wsdrf34"],
89
+ "f": [*"drtgvc"],
90
+ "g": [*"ftyhbv"],
91
+ "h": [*"gyujnb"],
92
+ "i": [*"ujko89"],
93
+ "j": [*"huikmn"],
94
+ "k": [*"jilom,"],
95
+ "l": [*"kop;.,"],
96
+ "m": [*"njk, "],
97
+ "n": [*"bhjm "],
98
+ "o": [*"iklp90"],
99
+ "p": [*"o0-[;l"],
100
+ "q": [*"was 12"],
101
+ "r": [*"edft45"],
102
+ "s": [*"awedxz"],
103
+ "t": [*"r56ygf"],
104
+ "u": [*"y78ijh"],
105
+ "v": [*"cfgb "],
106
+ "w": [*"q23esa"],
107
+ "x": [*"zsdc "],
108
+ "y": [*"t67uhg"],
109
+ "z": [*"asx"],
110
+ }
111
+ }
112
+
113
+
114
+ def _register_layout(name: str, rows: Iterable[str]) -> None:
115
+ _KEYNEIGHBORS[name] = _build_neighbor_map(rows)
116
+
117
+
118
+ _register_layout(
119
+ "DVORAK",
120
+ (
121
+ "`1234567890[]\\",
122
+ " ',.pyfgcrl/=\\",
123
+ " aoeuidhtns-",
124
+ " ;qjkxbmwvz",
125
+ ),
126
+ )
127
+
128
+ _register_layout(
129
+ "COLEMAK",
130
+ (
131
+ "`1234567890-=",
132
+ " qwfpgjluy;[]\\",
133
+ " arstdhneio'",
134
+ " zxcvbkm,./",
135
+ ),
136
+ )
137
+
138
+ _register_layout(
139
+ "QWERTY",
140
+ (
141
+ "`1234567890-=",
142
+ " qwertyuiop[]\\",
143
+ " asdfghjkl;'",
144
+ " zxcvbnm,./",
145
+ ),
146
+ )
147
+
148
+ _register_layout(
149
+ "AZERTY",
150
+ (
151
+ "²&é\"'(-è_çà)=",
152
+ " azertyuiop^$",
153
+ " qsdfghjklmù*",
154
+ " <wxcvbn,;:!",
155
+ ),
156
+ )
157
+
158
+ _register_layout(
159
+ "QWERTZ",
160
+ (
161
+ "^1234567890ß´",
162
+ " qwertzuiopü+",
163
+ " asdfghjklöä#",
164
+ " yxcvbnm,.-",
165
+ ),
166
+ )
167
+
168
+ _register_layout(
169
+ "SPANISH_QWERTY",
170
+ (
171
+ "º1234567890'¡",
172
+ " qwertyuiop´+",
173
+ " asdfghjklñ´",
174
+ " <zxcvbnm,.-",
175
+ ),
176
+ )
177
+
178
+ _register_layout(
179
+ "SWEDISH_QWERTY",
180
+ (
181
+ "§1234567890+´",
182
+ " qwertyuiopå¨",
183
+ " asdfghjklöä'",
184
+ " <zxcvbnm,.-",
185
+ ),
186
+ )
187
+
188
+
189
+ class KeyNeighbors:
190
+ def __init__(self) -> None:
191
+ for layout_name, layout in _KEYNEIGHBORS.items():
192
+ setattr(self, layout_name, layout)
193
+
194
+
195
+ KEYNEIGHBORS: KeyNeighbors = KeyNeighbors()
@@ -0,0 +1,27 @@
1
+ """Adapter helpers shared across Python and DLC integrations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Iterable
6
+
7
+ from ..zoo import Gaggle, Glitchling, summon
8
+
9
+
10
+ def coerce_gaggle(
11
+ glitchlings: Glitchling | Gaggle | str | Iterable[str | Glitchling],
12
+ *,
13
+ seed: int,
14
+ ) -> Gaggle:
15
+ """Return a :class:`Gaggle` built from any supported glitchling specifier."""
16
+ if isinstance(glitchlings, Gaggle):
17
+ return glitchlings
18
+
19
+ if isinstance(glitchlings, (Glitchling, str)):
20
+ resolved: Iterable[str | Glitchling] = [glitchlings]
21
+ else:
22
+ resolved = glitchlings
23
+
24
+ return summon(list(resolved), seed=seed)
25
+
26
+
27
+ __all__ = ["coerce_gaggle"]
@@ -0,0 +1,144 @@
1
+ """Hokey expressive lengthening generator."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+ from .stretch_locator import StretchSite, apply_stretch, find_stretch_site
8
+ from .stretchability import RandomLike, StretchabilityAnalyzer, StretchabilityFeatures
9
+
10
+
11
+ @dataclass(slots=True)
12
+ class HokeyConfig:
13
+ rate: float = 0.3
14
+ extension_min: int = 2
15
+ extension_max: int = 5
16
+ base_p: float = 0.45
17
+ word_length_threshold: int = 6
18
+
19
+
20
+ @dataclass(slots=True)
21
+ class StretchEvent:
22
+ token_index: int
23
+ original: str
24
+ stretched: str
25
+ repeats: int
26
+ site: StretchSite
27
+ score: float
28
+ features: StretchabilityFeatures
29
+
30
+
31
+ class NegativeBinomialSampler:
32
+ """Sample stretch lengths from a clipped negative binomial distribution."""
33
+
34
+ def __init__(self, base_p: float = 0.45) -> None:
35
+ self.base_p = base_p
36
+
37
+ def sample(
38
+ self,
39
+ rng: RandomLike,
40
+ *,
41
+ intensity: float,
42
+ minimum: int,
43
+ maximum: int,
44
+ ) -> int:
45
+ minimum = max(0, int(minimum))
46
+ maximum = max(minimum, int(maximum))
47
+ if maximum == 0:
48
+ return 0
49
+ if maximum == minimum:
50
+ return maximum
51
+
52
+ r = max(1, int(round(1 + 2 * intensity)))
53
+ adjusted_p = self.base_p / (1.0 + 0.75 * max(0.0, intensity))
54
+ adjusted_p = max(0.05, min(0.95, adjusted_p))
55
+ failures = sum(self._geometric_sample(rng, adjusted_p) for _ in range(r))
56
+ extra = minimum + failures
57
+ return max(minimum, min(maximum, extra))
58
+
59
+ @staticmethod
60
+ def _geometric_sample(rng: RandomLike, p: float) -> int:
61
+ count = 0
62
+ while rng.random() > p:
63
+ count += 1
64
+ return count
65
+
66
+
67
+ class HokeyGenerator:
68
+ """Full expressive lengthening pipeline."""
69
+
70
+ def __init__(
71
+ self,
72
+ analyzer: StretchabilityAnalyzer | None = None,
73
+ sampler: NegativeBinomialSampler | None = None,
74
+ ) -> None:
75
+ self.analyzer = analyzer or StretchabilityAnalyzer()
76
+ self.sampler = sampler or NegativeBinomialSampler()
77
+
78
+ def generate(
79
+ self,
80
+ text: str,
81
+ *,
82
+ rng: RandomLike,
83
+ config: HokeyConfig,
84
+ ) -> tuple[str, list[StretchEvent]]:
85
+ if not text:
86
+ return text, []
87
+
88
+ if config.base_p != self.sampler.base_p:
89
+ self.sampler.base_p = config.base_p
90
+
91
+ tokens = self.analyzer.tokenise(text)
92
+ candidates = self.analyzer.analyse_tokens(tokens)
93
+ selected = self.analyzer.select_candidates(candidates, rate=config.rate, rng=rng)
94
+ if not selected:
95
+ return text, []
96
+
97
+ token_strings = [token.text for token in tokens]
98
+ events: list[StretchEvent] = []
99
+
100
+ for candidate in selected:
101
+ token_idx = candidate.token.index
102
+ original = token_strings[token_idx]
103
+ site = find_stretch_site(original)
104
+ if site is None:
105
+ continue
106
+
107
+ intensity = min(1.5, candidate.features.intensity() + 0.35 * candidate.score)
108
+ alpha_count = sum(1 for ch in original if ch.isalpha())
109
+ if config.word_length_threshold > 0 and alpha_count > config.word_length_threshold * 2:
110
+ continue
111
+ if config.word_length_threshold > 0 and alpha_count > config.word_length_threshold:
112
+ excess = alpha_count - config.word_length_threshold
113
+ intensity = intensity / (1.0 + 0.35 * excess)
114
+ if candidate.score < 0.35 and excess >= 2:
115
+ continue
116
+ intensity = max(0.05, intensity)
117
+
118
+ repeats = self.sampler.sample(
119
+ rng,
120
+ intensity=intensity,
121
+ minimum=config.extension_min,
122
+ maximum=config.extension_max,
123
+ )
124
+ if repeats <= 0:
125
+ continue
126
+
127
+ stretched_word = apply_stretch(original, site, repeats)
128
+ token_strings[token_idx] = stretched_word
129
+ events.append(
130
+ StretchEvent(
131
+ token_index=token_idx,
132
+ original=original,
133
+ stretched=stretched_word,
134
+ repeats=repeats,
135
+ site=site,
136
+ score=candidate.score,
137
+ features=candidate.features,
138
+ )
139
+ )
140
+
141
+ return "".join(token_strings), events
142
+
143
+
144
+ __all__ = ["HokeyGenerator", "HokeyConfig", "StretchEvent", "NegativeBinomialSampler"]
@@ -0,0 +1,140 @@
1
+ """Identify where expressive stretches should occur within a token."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Iterable
7
+
8
+ VOWELS = set("aeiouyAEIOUY")
9
+ SONORANTS = set("rlmnwyhRLMNWYH")
10
+ SIBILANTS = set("sSzZxXcCjJ") | {"sh", "Sh", "sH", "SH", "zh", "Zh"}
11
+ DIGRAPHS = {
12
+ "aa",
13
+ "ae",
14
+ "ai",
15
+ "ay",
16
+ "ee",
17
+ "ei",
18
+ "ey",
19
+ "ie",
20
+ "io",
21
+ "oa",
22
+ "oe",
23
+ "oi",
24
+ "oo",
25
+ "ou",
26
+ "ua",
27
+ "ue",
28
+ "ui",
29
+ "ya",
30
+ "yo",
31
+ "yu",
32
+ }
33
+
34
+
35
+ @dataclass(slots=True)
36
+ class StretchSite:
37
+ """Location of a stretchable grapheme."""
38
+
39
+ start: int
40
+ end: int
41
+ category: str
42
+
43
+ def unit(self, token: str) -> str:
44
+ return token[self.start : self.end]
45
+
46
+
47
+ def _alpha_indices(token: str) -> list[int]:
48
+ return [idx for idx, char in enumerate(token) if char.isalpha()]
49
+
50
+
51
+ def _vowel_clusters(token: str, indices: Iterable[int]) -> list[tuple[int, int]]:
52
+ clusters: list[tuple[int, int]] = []
53
+ start: int | None = None
54
+ prev_idx: int | None = None
55
+ for idx in indices:
56
+ char = token[idx]
57
+ if char in VOWELS:
58
+ if start is None:
59
+ start = idx
60
+ elif prev_idx is not None and idx != prev_idx + 1:
61
+ clusters.append((start, prev_idx + 1))
62
+ start = idx
63
+ else:
64
+ if start is not None:
65
+ clusters.append((start, idx))
66
+ start = None
67
+ prev_idx = idx
68
+ if start is not None and prev_idx is not None:
69
+ clusters.append((start, prev_idx + 1))
70
+ return clusters
71
+
72
+
73
+ def find_stretch_site(token: str) -> StretchSite | None:
74
+ """Return the most suitable stretch site for ``token``."""
75
+
76
+ alpha_indices = _alpha_indices(token)
77
+ if not alpha_indices:
78
+ return None
79
+
80
+ lower = token.lower()
81
+ clusters = _vowel_clusters(lower, alpha_indices)
82
+ candidates: list[tuple[int, StretchSite]] = []
83
+
84
+ # Sibilant/sonorant coda extension (yes -> yesss, hmm -> hmmmm)
85
+ last_idx = alpha_indices[-1]
86
+ last_char = lower[last_idx]
87
+ if len(alpha_indices) >= 2:
88
+ prev_char = lower[alpha_indices[-2]]
89
+ else:
90
+ prev_char = ""
91
+ has_multi_vowel = any(
92
+ (end - start >= 2) and not (lower[start] == "y" and start == 0) for start, end in clusters
93
+ )
94
+ if last_char in {"s", "z"} and prev_char in VOWELS and not has_multi_vowel:
95
+ candidates.append((5, StretchSite(last_idx, last_idx + 1, "coda")))
96
+ elif last_char in SONORANTS and prev_char in VOWELS and not has_multi_vowel:
97
+ candidates.append((4, StretchSite(last_idx, last_idx + 1, "coda")))
98
+ elif not clusters:
99
+ candidates.append((2, StretchSite(last_idx, last_idx + 1, "consonant")))
100
+
101
+ # CVCe pattern (cute -> cuuute)
102
+ if lower.endswith("e") and len(alpha_indices) >= 3:
103
+ final_letter = alpha_indices[-1]
104
+ if token[final_letter].lower() == "e":
105
+ c_idx = alpha_indices[-2]
106
+ v_idx = alpha_indices[-3]
107
+ if token[c_idx].lower() not in VOWELS and token[v_idx].lower() in VOWELS:
108
+ candidates.append((4, StretchSite(v_idx, v_idx + 1, "cvce")))
109
+
110
+ for cluster in clusters:
111
+ start, end = cluster
112
+ substring = lower[start:end]
113
+ category = "vowel"
114
+ if any(substring[i : i + 2] in DIGRAPHS for i in range(max(0, len(substring) - 1))):
115
+ category = "digraph"
116
+ priority = 3 if cluster == clusters[-1] else 2
117
+ candidates.append((priority, StretchSite(start, end, category)))
118
+
119
+ if not candidates:
120
+ return None
121
+
122
+ candidates.sort(key=lambda item: (item[0], item[1].end - item[1].start, -item[1].start))
123
+ return candidates[-1][1]
124
+
125
+
126
+ def apply_stretch(token: str, site: StretchSite, repeats: int) -> str:
127
+ """Return ``token`` with ``repeats`` extra copies of the grapheme at ``site``."""
128
+
129
+ if repeats <= 0:
130
+ return token
131
+ chars = list(token)
132
+ stretched: list[str] = []
133
+ for idx, char in enumerate(chars):
134
+ stretched.append(char)
135
+ if site.start <= idx < site.end:
136
+ stretched.append(char * repeats)
137
+ return "".join(stretched)
138
+
139
+
140
+ __all__ = ["StretchSite", "find_stretch_site", "apply_stretch"]