glitchlings 0.4.5__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

Files changed (53) hide show
  1. glitchlings/__init__.py +71 -0
  2. glitchlings/__main__.py +8 -0
  3. glitchlings/_zoo_rust.cp311-win_amd64.pyd +0 -0
  4. glitchlings/compat.py +282 -0
  5. glitchlings/config.py +386 -0
  6. glitchlings/config.toml +3 -0
  7. glitchlings/data/__init__.py +1 -0
  8. glitchlings/data/hokey_assets.json +193 -0
  9. glitchlings/dlc/__init__.py +7 -0
  10. glitchlings/dlc/_shared.py +153 -0
  11. glitchlings/dlc/huggingface.py +81 -0
  12. glitchlings/dlc/prime.py +254 -0
  13. glitchlings/dlc/pytorch.py +166 -0
  14. glitchlings/dlc/pytorch_lightning.py +209 -0
  15. glitchlings/lexicon/__init__.py +192 -0
  16. glitchlings/lexicon/_cache.py +108 -0
  17. glitchlings/lexicon/data/default_vector_cache.json +82 -0
  18. glitchlings/lexicon/metrics.py +162 -0
  19. glitchlings/lexicon/vector.py +652 -0
  20. glitchlings/lexicon/wordnet.py +228 -0
  21. glitchlings/main.py +364 -0
  22. glitchlings/util/__init__.py +195 -0
  23. glitchlings/util/adapters.py +27 -0
  24. glitchlings/util/hokey_generator.py +144 -0
  25. glitchlings/util/stretch_locator.py +140 -0
  26. glitchlings/util/stretchability.py +375 -0
  27. glitchlings/zoo/__init__.py +172 -0
  28. glitchlings/zoo/_ocr_confusions.py +32 -0
  29. glitchlings/zoo/_rate.py +131 -0
  30. glitchlings/zoo/_rust_extensions.py +143 -0
  31. glitchlings/zoo/_sampling.py +54 -0
  32. glitchlings/zoo/_text_utils.py +100 -0
  33. glitchlings/zoo/adjax.py +128 -0
  34. glitchlings/zoo/apostrofae.py +127 -0
  35. glitchlings/zoo/assets/__init__.py +0 -0
  36. glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
  37. glitchlings/zoo/core.py +582 -0
  38. glitchlings/zoo/hokey.py +173 -0
  39. glitchlings/zoo/jargoyle.py +335 -0
  40. glitchlings/zoo/mim1c.py +109 -0
  41. glitchlings/zoo/ocr_confusions.tsv +30 -0
  42. glitchlings/zoo/redactyl.py +193 -0
  43. glitchlings/zoo/reduple.py +148 -0
  44. glitchlings/zoo/rushmore.py +153 -0
  45. glitchlings/zoo/scannequin.py +171 -0
  46. glitchlings/zoo/typogre.py +231 -0
  47. glitchlings/zoo/zeedub.py +185 -0
  48. glitchlings-0.4.5.dist-info/METADATA +648 -0
  49. glitchlings-0.4.5.dist-info/RECORD +53 -0
  50. glitchlings-0.4.5.dist-info/WHEEL +5 -0
  51. glitchlings-0.4.5.dist-info/entry_points.txt +2 -0
  52. glitchlings-0.4.5.dist-info/licenses/LICENSE +201 -0
  53. glitchlings-0.4.5.dist-info/top_level.txt +1 -0
@@ -0,0 +1,375 @@
1
+ """Stretchability scoring and candidate selection for Hokey."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from dataclasses import dataclass
8
+ from importlib import resources
9
+ from typing import Any, Protocol, Sequence, TypedDict, cast
10
+
11
+ # Regexes reused across the module
12
+ TOKEN_REGEX = re.compile(r"\w+|\W+")
13
+ ALPHA_REGEX = re.compile(r"[A-Za-z]")
14
+ EMOJI_REGEX = re.compile(r"[\U0001F300-\U0001FAFF]")
15
+ CLAUSE_PUNCTUATION = {".", "?", "!", ";"}
16
+
17
+
18
+ class HokeyAssets(TypedDict):
19
+ lexical_prior: dict[str, float]
20
+ interjections: list[str]
21
+ intensifiers: list[str]
22
+ evaluatives: list[str]
23
+ positive_lexicon: list[str]
24
+ negative_lexicon: list[str]
25
+
26
+
27
+ class RandomLike(Protocol):
28
+ """Interface for RNGs that expose ``random()``."""
29
+
30
+ def random(self) -> float: ...
31
+
32
+
33
+ # Lexical prior probabilities and pragmatic lexica shared with the Rust fast path.
34
+ def _load_assets() -> HokeyAssets:
35
+ with (
36
+ resources.files("glitchlings.data")
37
+ .joinpath("hokey_assets.json")
38
+ .open("r", encoding="utf-8") as payload
39
+ ):
40
+ data: Any = json.load(payload)
41
+ return cast(HokeyAssets, data)
42
+
43
+
44
+ _ASSETS = _load_assets()
45
+ LEXICAL_PRIOR: dict[str, float] = {
46
+ token: float(score) for token, score in _ASSETS["lexical_prior"].items()
47
+ }
48
+
49
+ # Pragmatic lexica for POS/discourse cues
50
+ INTERJECTIONS = frozenset(_ASSETS["interjections"])
51
+ INTENSIFIERS = frozenset(_ASSETS["intensifiers"])
52
+ EVALUATIVES = frozenset(_ASSETS["evaluatives"])
53
+ POSITIVE_LEXICON = frozenset(_ASSETS["positive_lexicon"])
54
+ NEGATIVE_LEXICON = frozenset(_ASSETS["negative_lexicon"])
55
+
56
+ VOWELS = set("aeiouy")
57
+ SONORANT_CODAS = set("rlmnwyh")
58
+ SIBILANT_CODAS = {"s", "z", "x", "c", "j", "sh", "zh"}
59
+ DIGRAPHS = {
60
+ "aa",
61
+ "ae",
62
+ "ai",
63
+ "ay",
64
+ "ee",
65
+ "ei",
66
+ "ey",
67
+ "ie",
68
+ "oa",
69
+ "oe",
70
+ "oi",
71
+ "oo",
72
+ "ou",
73
+ "ue",
74
+ "ui",
75
+ }
76
+
77
+ MAX_CANDIDATES_PER_CLAUSE = 4
78
+ MIN_SCORE_THRESHOLD = 0.18
79
+
80
+
81
+ @dataclass(slots=True)
82
+ class TokenInfo:
83
+ text: str
84
+ start: int
85
+ end: int
86
+ is_word: bool
87
+ clause_index: int
88
+ preceding_punct: str
89
+ following_punct: str
90
+ index: int
91
+
92
+ @property
93
+ def normalised(self) -> str:
94
+ return self.text.lower()
95
+
96
+
97
+ @dataclass(slots=True)
98
+ class StretchabilityFeatures:
99
+ lexical: float
100
+ pos: float
101
+ sentiment: float
102
+ phonotactic: float
103
+ context: float
104
+ sentiment_swing: float
105
+
106
+ def intensity(self) -> float:
107
+ """Map features to an intensity scalar in [0, 1.5]."""
108
+ emphasis = 0.6 * self.context + 0.4 * self.sentiment_swing
109
+ return max(0.0, min(1.5, 0.5 * (self.lexical + self.phonotactic) + emphasis))
110
+
111
+
112
+ @dataclass(slots=True)
113
+ class StretchCandidate:
114
+ token: TokenInfo
115
+ score: float
116
+ features: StretchabilityFeatures
117
+
118
+
119
+ class StretchabilityAnalyzer:
120
+ """Compute stretchability scores and select candidates."""
121
+
122
+ def __init__(
123
+ self,
124
+ *,
125
+ lexical_prior: dict[str, float] | None = None,
126
+ weights: tuple[float, float, float, float, float] = (0.32, 0.18, 0.14, 0.22, 0.14),
127
+ ) -> None:
128
+ self.lexical_prior = lexical_prior or LEXICAL_PRIOR
129
+ self.weights = weights
130
+
131
+ # ------------------------------------------------------------------
132
+ # Public API
133
+ # ------------------------------------------------------------------
134
+ def tokenise(self, text: str) -> list[TokenInfo]:
135
+ """Tokenise text preserving separator tokens."""
136
+ return self._tokenise(text)
137
+
138
+ def analyse(self, text: str) -> list[StretchCandidate]:
139
+ if not text:
140
+ return []
141
+ tokens = self._tokenise(text)
142
+ return self.analyse_tokens(tokens)
143
+
144
+ def analyse_tokens(self, tokens: Sequence[TokenInfo]) -> list[StretchCandidate]:
145
+ candidates: list[StretchCandidate] = []
146
+ for idx, token in enumerate(tokens):
147
+ if not token.is_word:
148
+ continue
149
+ if self._excluded(token, tokens, idx):
150
+ continue
151
+
152
+ features = self._compute_features(token, tokens, idx)
153
+ score = self._composite_score(features)
154
+ if score < MIN_SCORE_THRESHOLD:
155
+ continue
156
+ candidates.append(StretchCandidate(token=token, score=score, features=features))
157
+ return candidates
158
+
159
+ def select_candidates(
160
+ self,
161
+ candidates: Sequence[StretchCandidate],
162
+ *,
163
+ rate: float,
164
+ rng: RandomLike,
165
+ ) -> list[StretchCandidate]:
166
+ if not candidates or rate <= 0:
167
+ return []
168
+
169
+ grouped: dict[int, list[StretchCandidate]] = {}
170
+ for candidate in candidates:
171
+ grouped.setdefault(candidate.token.clause_index, []).append(candidate)
172
+
173
+ selected: list[StretchCandidate] = []
174
+ total_expected = max(0, min(len(candidates), int(round(len(candidates) * rate))))
175
+
176
+ for clause_index in sorted(grouped):
177
+ clause_candidates = sorted(
178
+ grouped[clause_index], key=lambda c: (-c.score, c.token.start)
179
+ )
180
+ clause_candidates = clause_candidates[:MAX_CANDIDATES_PER_CLAUSE]
181
+ clause_quota = max(
182
+ 0, min(len(clause_candidates), int(round(len(clause_candidates) * rate)))
183
+ )
184
+
185
+ provisional: list[StretchCandidate] = []
186
+ for candidate in clause_candidates:
187
+ probability = min(1.0, rate * (0.35 + 0.65 * candidate.score))
188
+ if rng.random() < probability:
189
+ provisional.append(candidate)
190
+ if len(provisional) >= clause_quota:
191
+ break
192
+
193
+ if len(provisional) < clause_quota:
194
+ leftovers = [c for c in clause_candidates if c not in provisional]
195
+ needed = clause_quota - len(provisional)
196
+ provisional.extend(leftovers[:needed])
197
+
198
+ selected.extend(provisional)
199
+
200
+ if len(selected) < total_expected:
201
+ remaining = [c for c in candidates if c not in selected]
202
+ remaining.sort(key=lambda c: (-c.score, c.token.start))
203
+ selected.extend(remaining[: total_expected - len(selected)])
204
+
205
+ # Keep deterministic order by position
206
+ selected.sort(key=lambda c: c.token.start)
207
+ return selected
208
+
209
+ # ------------------------------------------------------------------
210
+ # Internal helpers
211
+ # ------------------------------------------------------------------
212
+ def _tokenise(self, text: str) -> list[TokenInfo]:
213
+ tokens: list[TokenInfo] = []
214
+ clause_index = 0
215
+ matches = list(TOKEN_REGEX.finditer(text))
216
+ for idx, match in enumerate(matches):
217
+ token_text = match.group(0)
218
+ is_word = bool(ALPHA_REGEX.search(token_text)) and token_text.strip().isalnum()
219
+ preceding = matches[idx - 1].group(0) if idx > 0 else ""
220
+ following = matches[idx + 1].group(0) if idx + 1 < len(matches) else ""
221
+ tokens.append(
222
+ TokenInfo(
223
+ text=token_text,
224
+ start=match.start(),
225
+ end=match.end(),
226
+ is_word=is_word,
227
+ clause_index=clause_index,
228
+ preceding_punct=preceding,
229
+ following_punct=following,
230
+ index=idx,
231
+ )
232
+ )
233
+ if any(ch in CLAUSE_PUNCTUATION for ch in token_text):
234
+ clause_index += 1
235
+ return tokens
236
+
237
+ def _excluded(self, token: TokenInfo, tokens: Sequence[TokenInfo], index: int) -> bool:
238
+ text = token.text
239
+ normalised = token.normalised
240
+ if sum(ch.isalpha() for ch in text) < 2:
241
+ return True
242
+ if any(ch.isdigit() for ch in text):
243
+ return True
244
+ lowered = normalised
245
+ if "http" in lowered or "www" in lowered or "//" in lowered:
246
+ return True
247
+ if any(symbol in text for symbol in {"#", "@", "&", "{", "}", "<", ">"}):
248
+ return True
249
+ if "_" in text:
250
+ return True
251
+ if "/" in text or "\\" in text:
252
+ return True
253
+
254
+ # Heuristic proper noun check: Title case mid-clause counts as proper noun
255
+ if text[:1].isupper() and text[1:].islower():
256
+ previous_clause_start = index == 0
257
+ if not previous_clause_start:
258
+ for prior in reversed(tokens[:index]):
259
+ stripped = prior.text.strip()
260
+ if not stripped:
261
+ continue
262
+ if stripped[-1] in CLAUSE_PUNCTUATION:
263
+ previous_clause_start = True
264
+ break
265
+ if not previous_clause_start:
266
+ return True
267
+ return False
268
+
269
+ def _compute_features(
270
+ self, token: TokenInfo, tokens: Sequence[TokenInfo], index: int
271
+ ) -> StretchabilityFeatures:
272
+ lexical = self.lexical_prior.get(token.normalised, 0.12)
273
+ pos_score = self._pos_score(token)
274
+ sentiment_score, sentiment_swing = self._sentiment(tokens, index)
275
+ phon_score = self._phonotactic(token.normalised)
276
+ context_score = self._contextual(token, tokens, index)
277
+ return StretchabilityFeatures(
278
+ lexical=lexical,
279
+ pos=pos_score,
280
+ sentiment=sentiment_score,
281
+ phonotactic=phon_score,
282
+ context=context_score,
283
+ sentiment_swing=sentiment_swing,
284
+ )
285
+
286
+ def _composite_score(self, features: StretchabilityFeatures) -> float:
287
+ lex_w, pos_w, sent_w, phon_w, ctx_w = self.weights
288
+ weighted = (
289
+ lex_w * features.lexical
290
+ + pos_w * features.pos
291
+ + sent_w * features.sentiment
292
+ + phon_w * features.phonotactic
293
+ + ctx_w * features.context
294
+ )
295
+ total_weight = sum(self.weights)
296
+ score = weighted / total_weight if total_weight else 0.0
297
+ return max(0.0, min(1.0, score))
298
+
299
+ # ------------------------------------------------------------------
300
+ # Feature helpers
301
+ # ------------------------------------------------------------------
302
+ def _pos_score(self, token: TokenInfo) -> float:
303
+ normalised = token.normalised
304
+ if normalised in INTERJECTIONS:
305
+ return 0.95
306
+ if normalised in INTENSIFIERS:
307
+ return 0.85
308
+ if normalised in EVALUATIVES:
309
+ return 0.7
310
+ if normalised.endswith("ly"):
311
+ return 0.55
312
+ if token.text.isupper() and len(token.text) > 1:
313
+ return 0.65
314
+ return 0.3
315
+
316
+ def _sentiment(self, tokens: Sequence[TokenInfo], index: int) -> tuple[float, float]:
317
+ window = [tok for tok in tokens[max(0, index - 2) : index + 3] if tok.is_word]
318
+ if not window:
319
+ return 0.5, 0.0
320
+ pos_hits = sum(1 for tok in window if tok.normalised in POSITIVE_LEXICON)
321
+ neg_hits = sum(1 for tok in window if tok.normalised in NEGATIVE_LEXICON)
322
+ total = len(window)
323
+ balance = (pos_hits - neg_hits) / total
324
+ sentiment_score = 0.5 + 0.5 * max(-1.0, min(1.0, balance))
325
+ swing = abs(balance)
326
+ return sentiment_score, swing
327
+
328
+ def _phonotactic(self, normalised: str) -> float:
329
+ if not any(ch in VOWELS for ch in normalised):
330
+ return 0.0
331
+ score = 0.25
332
+ if any(normalised.endswith(c) for c in SONORANT_CODAS):
333
+ score += 0.2
334
+ if any(normalised.endswith(c) for c in SIBILANT_CODAS):
335
+ score += 0.18
336
+ if any(digraph in normalised for digraph in DIGRAPHS):
337
+ score += 0.22
338
+ if re.search(r"[aeiouy]{2,}", normalised):
339
+ score += 0.22
340
+ if re.search(r"(.)(?!\1)(.)\1", normalised):
341
+ score += 0.08
342
+ return max(0.0, min(1.0, score))
343
+
344
+ def _contextual(self, token: TokenInfo, tokens: Sequence[TokenInfo], index: int) -> float:
345
+ score = 0.2
346
+ before = token.preceding_punct
347
+ after = token.following_punct
348
+ token_text = token.text
349
+ if after and after.count("!") >= 1:
350
+ score += 0.25
351
+ if after and after.count("?") >= 1:
352
+ score += 0.2
353
+ if before and before.count("!") >= 2:
354
+ score += 0.2
355
+ if after and ("!!" in after or "??" in after):
356
+ score += 0.15
357
+ if token_text.isupper() and len(token_text) > 1:
358
+ score += 0.25
359
+ if EMOJI_REGEX.search(before or "") or EMOJI_REGEX.search(after or ""):
360
+ score += 0.15
361
+ # Clause-final emphasis
362
+ if index + 1 < len(tokens):
363
+ trailing = tokens[index + 1].text
364
+ if any(p in trailing for p in {"!!!", "??", "?!"}):
365
+ score += 0.2
366
+ return max(0.0, min(1.0, score))
367
+
368
+
369
+ __all__ = [
370
+ "StretchabilityAnalyzer",
371
+ "StretchCandidate",
372
+ "StretchabilityFeatures",
373
+ "TokenInfo",
374
+ "RandomLike",
375
+ ]
@@ -0,0 +1,172 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ from typing import Any
5
+
6
+ from .adjax import Adjax, adjax
7
+ from .apostrofae import Apostrofae, apostrofae
8
+ from .core import (
9
+ Gaggle,
10
+ Glitchling,
11
+ is_rust_pipeline_enabled,
12
+ is_rust_pipeline_supported,
13
+ pipeline_feature_flag_enabled,
14
+ plan_glitchling_specs,
15
+ plan_glitchlings,
16
+ )
17
+ from .hokey import Hokey, hokey
18
+ from .jargoyle import Jargoyle, jargoyle
19
+ from .jargoyle import dependencies_available as _jargoyle_available
20
+ from .mim1c import Mim1c, mim1c
21
+ from .redactyl import Redactyl, redactyl
22
+ from .reduple import Reduple, reduple
23
+ from .rushmore import Rushmore, rushmore
24
+ from .scannequin import Scannequin, scannequin
25
+ from .typogre import Typogre, typogre
26
+ from .zeedub import Zeedub, zeedub
27
+
28
+ __all__ = [
29
+ "Typogre",
30
+ "typogre",
31
+ "Mim1c",
32
+ "mim1c",
33
+ "Jargoyle",
34
+ "jargoyle",
35
+ "Apostrofae",
36
+ "apostrofae",
37
+ "Hokey",
38
+ "hokey",
39
+ "Adjax",
40
+ "adjax",
41
+ "Reduple",
42
+ "reduple",
43
+ "Rushmore",
44
+ "rushmore",
45
+ "Redactyl",
46
+ "redactyl",
47
+ "Scannequin",
48
+ "scannequin",
49
+ "Zeedub",
50
+ "zeedub",
51
+ "Glitchling",
52
+ "Gaggle",
53
+ "plan_glitchlings",
54
+ "plan_glitchling_specs",
55
+ "is_rust_pipeline_enabled",
56
+ "is_rust_pipeline_supported",
57
+ "pipeline_feature_flag_enabled",
58
+ "summon",
59
+ "BUILTIN_GLITCHLINGS",
60
+ "DEFAULT_GLITCHLING_NAMES",
61
+ "parse_glitchling_spec",
62
+ "get_glitchling_class",
63
+ ]
64
+
65
+ _HAS_JARGOYLE = _jargoyle_available()
66
+
67
+ _BUILTIN_GLITCHLING_LIST: list[Glitchling] = [typogre, apostrofae, hokey, mim1c]
68
+ if _HAS_JARGOYLE:
69
+ _BUILTIN_GLITCHLING_LIST.append(jargoyle)
70
+ _BUILTIN_GLITCHLING_LIST.extend([adjax, reduple, rushmore, redactyl, scannequin, zeedub])
71
+
72
+ BUILTIN_GLITCHLINGS: dict[str, Glitchling] = {
73
+ glitchling.name.lower(): glitchling for glitchling in _BUILTIN_GLITCHLING_LIST
74
+ }
75
+
76
+ _BUILTIN_GLITCHLING_TYPES: dict[str, type[Glitchling]] = {
77
+ typogre.name.lower(): Typogre,
78
+ apostrofae.name.lower(): Apostrofae,
79
+ hokey.name.lower(): Hokey,
80
+ mim1c.name.lower(): Mim1c,
81
+ adjax.name.lower(): Adjax,
82
+ reduple.name.lower(): Reduple,
83
+ rushmore.name.lower(): Rushmore,
84
+ redactyl.name.lower(): Redactyl,
85
+ scannequin.name.lower(): Scannequin,
86
+ zeedub.name.lower(): Zeedub,
87
+ }
88
+ if _HAS_JARGOYLE:
89
+ _BUILTIN_GLITCHLING_TYPES[jargoyle.name.lower()] = Jargoyle
90
+
91
+ DEFAULT_GLITCHLING_NAMES: list[str] = list(BUILTIN_GLITCHLINGS.keys())
92
+
93
+
94
+ def parse_glitchling_spec(specification: str) -> Glitchling:
95
+ """Return a glitchling instance configured according to ``specification``."""
96
+ text = specification.strip()
97
+ if not text:
98
+ raise ValueError("Glitchling specification cannot be empty.")
99
+
100
+ if "(" not in text:
101
+ glitchling = BUILTIN_GLITCHLINGS.get(text.lower())
102
+ if glitchling is None:
103
+ raise ValueError(f"Glitchling '{text}' not found.")
104
+ return glitchling
105
+
106
+ if not text.endswith(")"):
107
+ raise ValueError(f"Invalid parameter syntax for glitchling '{text}'.")
108
+
109
+ name_part, arg_source = text[:-1].split("(", 1)
110
+ name = name_part.strip()
111
+ if not name:
112
+ raise ValueError(f"Invalid glitchling specification '{text}'.")
113
+
114
+ lower_name = name.lower()
115
+ glitchling_type = _BUILTIN_GLITCHLING_TYPES.get(lower_name)
116
+ if glitchling_type is None:
117
+ raise ValueError(f"Glitchling '{name}' not found.")
118
+
119
+ try:
120
+ call_expr = ast.parse(f"_({arg_source})", mode="eval").body
121
+ except SyntaxError as exc:
122
+ raise ValueError(f"Invalid parameter syntax for glitchling '{name}': {exc.msg}") from exc
123
+
124
+ if not isinstance(call_expr, ast.Call) or call_expr.args:
125
+ raise ValueError(f"Glitchling '{name}' parameters must be provided as keyword arguments.")
126
+
127
+ kwargs: dict[str, Any] = {}
128
+ for keyword in call_expr.keywords:
129
+ if keyword.arg is None:
130
+ raise ValueError(
131
+ f"Glitchling '{name}' does not support unpacking arbitrary keyword arguments."
132
+ )
133
+ try:
134
+ kwargs[keyword.arg] = ast.literal_eval(keyword.value)
135
+ except (ValueError, SyntaxError) as exc:
136
+ raise ValueError(
137
+ f"Failed to parse value for parameter '{keyword.arg}' on glitchling '{name}': {exc}"
138
+ ) from exc
139
+
140
+ try:
141
+ return glitchling_type(**kwargs)
142
+ except TypeError as exc:
143
+ raise ValueError(f"Failed to instantiate glitchling '{name}': {exc}") from exc
144
+
145
+
146
+ def get_glitchling_class(name: str) -> type[Glitchling]:
147
+ """Look up the glitchling class registered under ``name``."""
148
+ key = name.strip().lower()
149
+ if not key:
150
+ raise ValueError("Glitchling name cannot be empty.")
151
+
152
+ glitchling_type = _BUILTIN_GLITCHLING_TYPES.get(key)
153
+ if glitchling_type is None:
154
+ raise ValueError(f"Glitchling '{name}' not found.")
155
+
156
+ return glitchling_type
157
+
158
+
159
+ def summon(glitchlings: list[str | Glitchling], seed: int = 151) -> Gaggle:
160
+ """Summon glitchlings by name (using defaults) or instance (to change parameters)."""
161
+ summoned: list[Glitchling] = []
162
+ for entry in glitchlings:
163
+ if isinstance(entry, Glitchling):
164
+ summoned.append(entry)
165
+ continue
166
+
167
+ try:
168
+ summoned.append(parse_glitchling_spec(entry))
169
+ except ValueError as exc:
170
+ raise ValueError(str(exc)) from exc
171
+
172
+ return Gaggle(summoned, seed=seed)
@@ -0,0 +1,32 @@
1
+ from __future__ import annotations
2
+
3
+ from importlib import resources
4
+
5
+ _CONFUSION_TABLE: list[tuple[str, list[str]]] | None = None
6
+
7
+
8
+ def load_confusion_table() -> list[tuple[str, list[str]]]:
9
+ """Load the OCR confusion table shared by Python and Rust implementations."""
10
+ global _CONFUSION_TABLE
11
+ if _CONFUSION_TABLE is not None:
12
+ return _CONFUSION_TABLE
13
+
14
+ data = resources.files(__package__) / "ocr_confusions.tsv"
15
+ text = data.read_text(encoding="utf-8")
16
+ indexed_entries: list[tuple[int, tuple[str, list[str]]]] = []
17
+ for line_number, line in enumerate(text.splitlines()):
18
+ stripped = line.strip()
19
+ if not stripped or stripped.startswith("#"):
20
+ continue
21
+ parts = stripped.split()
22
+ if len(parts) < 2:
23
+ continue
24
+ source, *replacements = parts
25
+ indexed_entries.append((line_number, (source, replacements)))
26
+
27
+ # Sort longer patterns first to avoid overlapping matches, mirroring the
28
+ # behaviour of the Rust `confusion_table` helper.
29
+ indexed_entries.sort(key=lambda item: (-len(item[1][0]), item[0]))
30
+ entries = [entry for _, entry in indexed_entries]
31
+ _CONFUSION_TABLE = entries
32
+ return entries
@@ -0,0 +1,131 @@
1
+ """Utilities for handling legacy parameter names across glitchling classes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import warnings
6
+
7
+
8
+ def resolve_rate(
9
+ *,
10
+ rate: float | None,
11
+ legacy_value: float | None,
12
+ default: float,
13
+ legacy_name: str,
14
+ ) -> float:
15
+ """Return the effective rate while enforcing mutual exclusivity.
16
+
17
+ This function centralizes the handling of legacy parameter names, allowing
18
+ glitchlings to maintain backwards compatibility while encouraging migration
19
+ to the standardized 'rate' parameter.
20
+
21
+ Parameters
22
+ ----------
23
+ rate : float | None
24
+ The preferred parameter value.
25
+ legacy_value : float | None
26
+ The deprecated legacy parameter value.
27
+ default : float
28
+ Default value if neither parameter is specified.
29
+ legacy_name : str
30
+ Name of the legacy parameter for error/warning messages.
31
+
32
+ Returns
33
+ -------
34
+ float
35
+ The resolved rate value.
36
+
37
+ Raises
38
+ ------
39
+ ValueError
40
+ If both rate and legacy_value are specified simultaneously.
41
+
42
+ Warnings
43
+ --------
44
+ DeprecationWarning
45
+ If the legacy parameter is used, a deprecation warning is issued.
46
+
47
+ Examples
48
+ --------
49
+ >>> resolve_rate(rate=0.5, legacy_value=None, default=0.1, legacy_name="old_rate")
50
+ 0.5
51
+ >>> resolve_rate(rate=None, legacy_value=0.3, default=0.1, legacy_name="old_rate")
52
+ 0.3 # Issues deprecation warning
53
+ >>> resolve_rate(rate=None, legacy_value=None, default=0.1, legacy_name="old_rate")
54
+ 0.1
55
+
56
+ """
57
+ if rate is not None and legacy_value is not None:
58
+ raise ValueError(f"Specify either 'rate' or '{legacy_name}', not both.")
59
+
60
+ if rate is not None:
61
+ return rate
62
+
63
+ if legacy_value is not None:
64
+ warnings.warn(
65
+ f"The '{legacy_name}' parameter is deprecated and will be removed in version 0.6.0. "
66
+ f"Use 'rate' instead.",
67
+ DeprecationWarning,
68
+ stacklevel=3,
69
+ )
70
+ return legacy_value
71
+
72
+ return default
73
+
74
+
75
+ def resolve_legacy_param(
76
+ *,
77
+ preferred_value: object,
78
+ legacy_value: object,
79
+ default: object,
80
+ preferred_name: str,
81
+ legacy_name: str,
82
+ ) -> object:
83
+ """Resolve a parameter that has both preferred and legacy names.
84
+
85
+ This is a generalized version of resolve_rate() that works with any type.
86
+
87
+ Parameters
88
+ ----------
89
+ preferred_value : object
90
+ The value from the preferred parameter name.
91
+ legacy_value : object
92
+ The value from the legacy parameter name.
93
+ default : object
94
+ Default value if neither parameter is specified.
95
+ preferred_name : str
96
+ Name of the preferred parameter.
97
+ legacy_name : str
98
+ Name of the legacy parameter for warning messages.
99
+
100
+ Returns
101
+ -------
102
+ object
103
+ The resolved parameter value.
104
+
105
+ Raises
106
+ ------
107
+ ValueError
108
+ If both preferred and legacy values are specified simultaneously.
109
+
110
+ Warnings
111
+ --------
112
+ DeprecationWarning
113
+ If the legacy parameter is used.
114
+
115
+ """
116
+ if preferred_value is not None and legacy_value is not None:
117
+ raise ValueError(f"Specify either '{preferred_name}' or '{legacy_name}', not both.")
118
+
119
+ if preferred_value is not None:
120
+ return preferred_value
121
+
122
+ if legacy_value is not None:
123
+ warnings.warn(
124
+ f"The '{legacy_name}' parameter is deprecated and will be removed in version 0.6.0. "
125
+ f"Use '{preferred_name}' instead.",
126
+ DeprecationWarning,
127
+ stacklevel=3,
128
+ )
129
+ return legacy_value
130
+
131
+ return default