glitchlings 0.4.5__cp312-cp312-macosx_11_0_universal2.whl → 0.5.1__cp312-cp312-macosx_11_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

Files changed (38) hide show
  1. glitchlings/__init__.py +33 -0
  2. glitchlings/_zoo_rust.cpython-312-darwin.so +0 -0
  3. glitchlings/assets/ekkokin_homophones.json +1995 -0
  4. glitchlings/compat.py +98 -8
  5. glitchlings/config.py +12 -24
  6. glitchlings/dev/__init__.py +5 -0
  7. glitchlings/dev/sync_assets.py +130 -0
  8. glitchlings/dlc/pytorch_lightning.py +13 -1
  9. glitchlings/spectroll.py +5 -0
  10. glitchlings/util/stretchability.py +4 -9
  11. glitchlings/zoo/__init__.py +10 -2
  12. glitchlings/zoo/_ocr_confusions.py +3 -3
  13. glitchlings/zoo/_text_utils.py +10 -9
  14. glitchlings/zoo/adjax.py +3 -18
  15. glitchlings/zoo/apostrofae.py +2 -5
  16. glitchlings/zoo/assets/__init__.py +91 -0
  17. glitchlings/zoo/ekkokin.py +226 -0
  18. glitchlings/zoo/jargoyle.py +2 -16
  19. glitchlings/zoo/mim1c.py +2 -17
  20. glitchlings/zoo/redactyl.py +3 -17
  21. glitchlings/zoo/reduple.py +3 -17
  22. glitchlings/zoo/rushmore.py +3 -20
  23. glitchlings/zoo/scannequin.py +3 -20
  24. glitchlings/zoo/spectroll.py +159 -0
  25. glitchlings/zoo/typogre.py +2 -19
  26. glitchlings/zoo/zeedub.py +2 -13
  27. {glitchlings-0.4.5.dist-info → glitchlings-0.5.1.dist-info}/METADATA +22 -7
  28. glitchlings-0.5.1.dist-info/RECORD +57 -0
  29. glitchlings/data/__init__.py +0 -1
  30. glitchlings/zoo/_rate.py +0 -131
  31. glitchlings-0.4.5.dist-info/RECORD +0 -53
  32. /glitchlings/{zoo/assets → assets}/apostrofae_pairs.json +0 -0
  33. /glitchlings/{data → assets}/hokey_assets.json +0 -0
  34. /glitchlings/{zoo → assets}/ocr_confusions.tsv +0 -0
  35. {glitchlings-0.4.5.dist-info → glitchlings-0.5.1.dist-info}/WHEEL +0 -0
  36. {glitchlings-0.4.5.dist-info → glitchlings-0.5.1.dist-info}/entry_points.txt +0 -0
  37. {glitchlings-0.4.5.dist-info → glitchlings-0.5.1.dist-info}/licenses/LICENSE +0 -0
  38. {glitchlings-0.4.5.dist-info → glitchlings-0.5.1.dist-info}/top_level.txt +0 -0
glitchlings/compat.py CHANGED
@@ -6,7 +6,7 @@ import re
6
6
  from dataclasses import dataclass
7
7
  from importlib import import_module, metadata
8
8
  from types import ModuleType
9
- from typing import Any, Callable, Iterable, Protocol, cast
9
+ from typing import Any, Callable, Iterable, Mapping, NoReturn, Protocol, cast
10
10
 
11
11
 
12
12
  class _MissingSentinel:
@@ -50,26 +50,106 @@ else:
50
50
  Requirement = cast(type[_RequirementProtocol], _RequirementClass)
51
51
 
52
52
 
53
+ def _build_lightning_stub() -> ModuleType:
54
+ """Return a minimal PyTorch Lightning stub when the dependency is absent."""
55
+
56
+ module = ModuleType("pytorch_lightning")
57
+
58
+ class LightningDataModule: # pragma: no cover - simple compatibility shim
59
+ """Lightweight stand-in for PyTorch Lightning's ``LightningDataModule``."""
60
+
61
+ def __init__(self, *args: Any, **kwargs: Any) -> None: # noqa: D401 - parity with real class
62
+ pass
63
+
64
+ def prepare_data(self, *args: Any, **kwargs: Any) -> None: # noqa: D401 - parity with real class
65
+ return None
66
+
67
+ def setup(self, *args: Any, **kwargs: Any) -> None:
68
+ return None
69
+
70
+ def teardown(self, *args: Any, **kwargs: Any) -> None:
71
+ return None
72
+
73
+ def state_dict(self) -> dict[str, Any]:
74
+ return {}
75
+
76
+ def load_state_dict(self, state_dict: Mapping[str, Any]) -> None:
77
+ return None
78
+
79
+ def transfer_batch_to_device(self, batch: Any, device: Any, dataloader_idx: int) -> Any:
80
+ return batch
81
+
82
+ def on_before_batch_transfer(self, batch: Any, dataloader_idx: int) -> Any:
83
+ return batch
84
+
85
+ def on_after_batch_transfer(self, batch: Any, dataloader_idx: int) -> Any:
86
+ return batch
87
+
88
+ def train_dataloader(self, *args: Any, **kwargs: Any) -> Any:
89
+ return []
90
+
91
+ def val_dataloader(self, *args: Any, **kwargs: Any) -> Any:
92
+ return []
93
+
94
+ def test_dataloader(self, *args: Any, **kwargs: Any) -> Any:
95
+ return []
96
+
97
+ def predict_dataloader(self, *args: Any, **kwargs: Any) -> Any:
98
+ return []
99
+
100
+ setattr(module, "LightningDataModule", LightningDataModule)
101
+ setattr(module, "__all__", ["LightningDataModule"])
102
+ setattr(
103
+ module,
104
+ "__doc__",
105
+ "Lightweight stub module that exposes a minimal LightningDataModule "
106
+ "when PyTorch Lightning is unavailable.",
107
+ )
108
+ setattr(module, "__version__", "0.0.0-stub")
109
+ return module
110
+
111
+
53
112
  @dataclass
54
113
  class OptionalDependency:
55
114
  """Lazily import an optional dependency and retain the import error."""
56
115
 
57
116
  module_name: str
117
+ fallback_factory: Callable[[], ModuleType] | None = None
58
118
  _cached: ModuleType | None | _MissingSentinel = _MISSING
59
119
  _error: ModuleNotFoundError | None = None
120
+ _used_fallback: bool = False
121
+ _fallback_instance: ModuleType | None = None
60
122
 
61
123
  def _attempt_import(self) -> ModuleType | None:
62
124
  try:
63
125
  module = import_module(self.module_name)
64
126
  except ModuleNotFoundError as exc:
127
+ if self.fallback_factory is not None:
128
+ if self._fallback_instance is None:
129
+ self._fallback_instance = self.fallback_factory()
130
+ module = self._fallback_instance
131
+ self._cached = module
132
+ # Preserve the original error so load()/require() can re-raise it
133
+ self._error = exc
134
+ self._used_fallback = True
135
+ return module
65
136
  self._cached = None
66
137
  self._error = exc
67
138
  return None
68
139
  else:
69
140
  self._cached = module
70
141
  self._error = None
142
+ self._used_fallback = False
71
143
  return module
72
144
 
145
+ def _raise_missing_error(self) -> NoReturn:
146
+ """Raise ModuleNotFoundError for the missing dependency."""
147
+ error = self._error
148
+ if error is not None:
149
+ raise error
150
+ message = f"{self.module_name} is not installed"
151
+ raise ModuleNotFoundError(message)
152
+
73
153
  def get(self) -> ModuleType | None:
74
154
  """Return the imported module or ``None`` when unavailable."""
75
155
  cached = self._cached
@@ -82,12 +162,10 @@ class OptionalDependency:
82
162
  def load(self) -> ModuleType:
83
163
  """Return the dependency, raising the original import error when absent."""
84
164
  module = self.get()
165
+ if self._used_fallback:
166
+ self._raise_missing_error()
85
167
  if module is None:
86
- error = self._error
87
- if error is not None:
88
- raise error
89
- message = f"{self.module_name} is not installed"
90
- raise ModuleNotFoundError(message)
168
+ self._raise_missing_error()
91
169
  return module
92
170
 
93
171
  def require(self, message: str) -> ModuleType:
@@ -99,18 +177,27 @@ class OptionalDependency:
99
177
 
100
178
  def available(self) -> bool:
101
179
  """Return ``True`` when the dependency can be imported."""
102
- return self.get() is not None
180
+ module = self.get()
181
+ if module is None:
182
+ return False
183
+ if self._used_fallback:
184
+ return False
185
+ return True
103
186
 
104
187
  def reset(self) -> None:
105
188
  """Forget any cached import result."""
106
189
  self._cached = _MISSING
107
190
  self._error = None
191
+ self._used_fallback = False
192
+ self._fallback_instance = None
108
193
 
109
194
  def attr(self, attribute: str) -> Any | None:
110
195
  """Return ``attribute`` from the dependency when available."""
111
196
  module = self.get()
112
197
  if module is None:
113
198
  return None
199
+ if self._used_fallback:
200
+ return None
114
201
  return getattr(module, attribute, None)
115
202
 
116
203
  @property
@@ -120,7 +207,10 @@ class OptionalDependency:
120
207
  return self._error
121
208
 
122
209
 
123
- pytorch_lightning = OptionalDependency("pytorch_lightning")
210
+ pytorch_lightning = OptionalDependency(
211
+ "pytorch_lightning",
212
+ fallback_factory=_build_lightning_stub,
213
+ )
124
214
  datasets = OptionalDependency("datasets")
125
215
  verifiers = OptionalDependency("verifiers")
126
216
  jellyfish = OptionalDependency("jellyfish")
glitchlings/config.py CHANGED
@@ -4,7 +4,6 @@ from __future__ import annotations
4
4
 
5
5
  import importlib
6
6
  import os
7
- import warnings
8
7
  from dataclasses import dataclass, field
9
8
  from io import TextIOBase
10
9
  from pathlib import Path
@@ -57,17 +56,6 @@ ATTACK_CONFIG_SCHEMA: dict[str, Any] = {
57
56
  "required": ["name"],
58
57
  "properties": {
59
58
  "name": {"type": "string", "minLength": 1},
60
- "type": {"type": "string", "minLength": 1},
61
- "parameters": {"type": "object"},
62
- },
63
- "additionalProperties": True,
64
- },
65
- {
66
- "type": "object",
67
- "required": ["type"],
68
- "properties": {
69
- "name": {"type": "string", "minLength": 1},
70
- "type": {"type": "string", "minLength": 1},
71
59
  "parameters": {"type": "object"},
72
60
  },
73
61
  "additionalProperties": True,
@@ -263,7 +251,12 @@ def _validate_attack_config_schema(data: Any, *, source: str) -> Mapping[str, An
263
251
 
264
252
  for index, entry in enumerate(raw_glitchlings, start=1):
265
253
  if isinstance(entry, Mapping):
266
- name_candidate = entry.get("name") or entry.get("type")
254
+ if "type" in entry:
255
+ raise ValueError(
256
+ f"{source}: glitchling #{index} uses unsupported 'type'; use 'name'."
257
+ )
258
+
259
+ name_candidate = entry.get("name")
267
260
  if not isinstance(name_candidate, str) or not name_candidate.strip():
268
261
  raise ValueError(f"{source}: glitchling #{index} is missing a 'name'.")
269
262
  parameters = entry.get("parameters")
@@ -326,17 +319,12 @@ def _build_glitchling(entry: Any, source: str, index: int) -> "Glitchling":
326
319
  raise ValueError(f"{source}: glitchling #{index}: {exc}") from exc
327
320
 
328
321
  if isinstance(entry, Mapping):
329
- name_value = entry.get("name")
330
- legacy_type = entry.get("type")
331
- if name_value is None and legacy_type is not None:
332
- warnings.warn(
333
- f"{source}: glitchling #{index} uses 'type'; prefer 'name'.",
334
- DeprecationWarning,
335
- stacklevel=2,
322
+ if "type" in entry:
323
+ raise ValueError(
324
+ f"{source}: glitchling #{index} uses unsupported 'type'; use 'name'."
336
325
  )
337
- name_value = legacy_type
338
- elif name_value is None:
339
- name_value = legacy_type
326
+
327
+ name_value = entry.get("name")
340
328
 
341
329
  if not isinstance(name_value, str) or not name_value.strip():
342
330
  raise ValueError(f"{source}: glitchling #{index} is missing a 'name'.")
@@ -352,7 +340,7 @@ def _build_glitchling(entry: Any, source: str, index: int) -> "Glitchling":
352
340
  kwargs = {
353
341
  key: value
354
342
  for key, value in entry.items()
355
- if key not in {"name", "type", "parameters"}
343
+ if key not in {"name", "parameters"}
356
344
  }
357
345
 
358
346
  try:
@@ -0,0 +1,5 @@
1
+ """Developer-facing utilities for maintaining the Glitchlings repository."""
2
+
3
+ from .sync_assets import sync_assets
4
+
5
+ __all__ = ["sync_assets"]
@@ -0,0 +1,130 @@
1
+ """Maintain the canonical glitchling asset bundle shared by Python and Rust."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import sys
7
+ from pathlib import Path
8
+ from typing import Iterator, Sequence
9
+
10
+ PIPELINE_ASSETS: frozenset[str] = frozenset(
11
+ {
12
+ "apostrofae_pairs.json",
13
+ "ekkokin_homophones.json",
14
+ "hokey_assets.json",
15
+ "ocr_confusions.tsv",
16
+ }
17
+ )
18
+
19
+
20
+ def _project_root(default: Path | None = None) -> Path:
21
+ if default is not None:
22
+ return default
23
+ return Path(__file__).resolve().parents[3]
24
+
25
+
26
+ def _canonical_asset_dir(project_root: Path) -> Path:
27
+ canonical = project_root / "assets"
28
+ if not canonical.is_dir():
29
+ raise RuntimeError(
30
+ "expected canonical assets under 'assets'; "
31
+ "run this command from the repository root"
32
+ )
33
+ return canonical
34
+
35
+
36
+ def _legacy_rust_asset_dir(project_root: Path) -> Path:
37
+ return project_root / "rust" / "zoo" / "assets"
38
+
39
+
40
+ def _iter_legacy_assets(rust_dir: Path) -> Iterator[Path]:
41
+ if not rust_dir.exists():
42
+ return
43
+ for path in rust_dir.iterdir():
44
+ if path.is_file():
45
+ yield path
46
+
47
+
48
+ def sync_assets(
49
+ project_root: Path | None = None,
50
+ *,
51
+ check: bool = False,
52
+ quiet: bool = False,
53
+ ) -> bool:
54
+ """Ensure pipeline assets exist only at their canonical location."""
55
+
56
+ root = _project_root(project_root)
57
+ canonical_dir = _canonical_asset_dir(root)
58
+ rust_dir = _legacy_rust_asset_dir(root)
59
+
60
+ missing_sources = [name for name in PIPELINE_ASSETS if not (canonical_dir / name).is_file()]
61
+ if missing_sources:
62
+ missing_list = ", ".join(sorted(missing_sources))
63
+ raise RuntimeError(f"missing canonical assets: {missing_list}")
64
+
65
+ legacy_assets = list(_iter_legacy_assets(rust_dir))
66
+
67
+ if check:
68
+ if legacy_assets:
69
+ if not quiet:
70
+ for duplicate in legacy_assets:
71
+ message = (
72
+ "legacy vendored asset "
73
+ f"{duplicate.relative_to(root)} still exists; "
74
+ "run sync_assets to remove it"
75
+ )
76
+ print(message, file=sys.stderr)
77
+ return False
78
+ if not quiet:
79
+ print("No legacy Rust asset copies detected.")
80
+ return True
81
+
82
+ removed_any = False
83
+ for duplicate in legacy_assets:
84
+ duplicate.unlink()
85
+ removed_any = True
86
+ if not quiet:
87
+ print(f"Removed legacy vendored asset {duplicate.relative_to(root)}")
88
+
89
+ if removed_any:
90
+ try:
91
+ rust_dir.rmdir()
92
+ except OSError:
93
+ pass
94
+ elif not quiet:
95
+ print("No legacy Rust asset copies to remove.")
96
+
97
+ return True
98
+
99
+
100
+ def build_parser() -> argparse.ArgumentParser:
101
+ parser = argparse.ArgumentParser(
102
+ description="Prune legacy vendored Rust assets so only canonical copies remain.",
103
+ )
104
+ parser.add_argument(
105
+ "--check",
106
+ action="store_true",
107
+ help="exit with a non-zero status when vendored assets diverge",
108
+ )
109
+ parser.add_argument(
110
+ "--quiet",
111
+ action="store_true",
112
+ help="suppress status output",
113
+ )
114
+ parser.add_argument(
115
+ "--project-root",
116
+ type=Path,
117
+ help="override the detected project root (useful for testing)",
118
+ )
119
+ return parser
120
+
121
+
122
+ def main(argv: Sequence[str] | None = None) -> int:
123
+ parser = build_parser()
124
+ args = parser.parse_args(argv)
125
+ ok = sync_assets(project_root=args.project_root, check=args.check, quiet=args.quiet)
126
+ return 0 if ok else 1
127
+
128
+
129
+ if __name__ == "__main__": # pragma: no cover - CLI entry point
130
+ raise SystemExit(main())
@@ -187,7 +187,19 @@ def _ensure_datamodule_class() -> Any:
187
187
  setattr(datamodule_cls, "glitch", glitch)
188
188
 
189
189
  if not issubclass(_GlitchedLightningDataModule, datamodule_cls):
190
- _GlitchedLightningDataModule.__bases__ = (datamodule_cls,)
190
+ try:
191
+ _GlitchedLightningDataModule.__bases__ = (datamodule_cls,)
192
+ except TypeError:
193
+ namespace = {
194
+ name: value
195
+ for name, value in vars(_GlitchedLightningDataModule).items()
196
+ if name not in {"__dict__", "__weakref__"}
197
+ }
198
+ replacement = cast(
199
+ type[Any],
200
+ type("_GlitchedLightningDataModule", (datamodule_cls,), namespace),
201
+ )
202
+ globals()["_GlitchedLightningDataModule"] = replacement
191
203
 
192
204
  return datamodule_cls
193
205
 
@@ -0,0 +1,5 @@
1
+ from __future__ import annotations
2
+
3
+ from .zoo.spectroll import Spectroll, spectroll, swap_colors
4
+
5
+ __all__ = ["Spectroll", "spectroll", "swap_colors"]
@@ -2,11 +2,11 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import json
6
5
  import re
7
6
  from dataclasses import dataclass
8
- from importlib import resources
9
- from typing import Any, Protocol, Sequence, TypedDict, cast
7
+ from typing import Protocol, Sequence, TypedDict, cast
8
+
9
+ from glitchlings.zoo import assets
10
10
 
11
11
  # Regexes reused across the module
12
12
  TOKEN_REGEX = re.compile(r"\w+|\W+")
@@ -32,12 +32,7 @@ class RandomLike(Protocol):
32
32
 
33
33
  # Lexical prior probabilities and pragmatic lexica shared with the Rust fast path.
34
34
  def _load_assets() -> HokeyAssets:
35
- with (
36
- resources.files("glitchlings.data")
37
- .joinpath("hokey_assets.json")
38
- .open("r", encoding="utf-8") as payload
39
- ):
40
- data: Any = json.load(payload)
35
+ data = assets.load_json("hokey_assets.json")
41
36
  return cast(HokeyAssets, data)
42
37
 
43
38
 
@@ -14,6 +14,7 @@ from .core import (
14
14
  plan_glitchling_specs,
15
15
  plan_glitchlings,
16
16
  )
17
+ from .ekkokin import Ekkokin, ekkokin
17
18
  from .hokey import Hokey, hokey
18
19
  from .jargoyle import Jargoyle, jargoyle
19
20
  from .jargoyle import dependencies_available as _jargoyle_available
@@ -22,6 +23,7 @@ from .redactyl import Redactyl, redactyl
22
23
  from .reduple import Reduple, reduple
23
24
  from .rushmore import Rushmore, rushmore
24
25
  from .scannequin import Scannequin, scannequin
26
+ from .spectroll import Spectroll, spectroll
25
27
  from .typogre import Typogre, typogre
26
28
  from .zeedub import Zeedub, zeedub
27
29
 
@@ -32,6 +34,8 @@ __all__ = [
32
34
  "mim1c",
33
35
  "Jargoyle",
34
36
  "jargoyle",
37
+ "Ekkokin",
38
+ "ekkokin",
35
39
  "Apostrofae",
36
40
  "apostrofae",
37
41
  "Hokey",
@@ -44,6 +48,8 @@ __all__ = [
44
48
  "rushmore",
45
49
  "Redactyl",
46
50
  "redactyl",
51
+ "Spectroll",
52
+ "spectroll",
47
53
  "Scannequin",
48
54
  "scannequin",
49
55
  "Zeedub",
@@ -64,10 +70,10 @@ __all__ = [
64
70
 
65
71
  _HAS_JARGOYLE = _jargoyle_available()
66
72
 
67
- _BUILTIN_GLITCHLING_LIST: list[Glitchling] = [typogre, apostrofae, hokey, mim1c]
73
+ _BUILTIN_GLITCHLING_LIST: list[Glitchling] = [typogre, apostrofae, hokey, mim1c, ekkokin]
68
74
  if _HAS_JARGOYLE:
69
75
  _BUILTIN_GLITCHLING_LIST.append(jargoyle)
70
- _BUILTIN_GLITCHLING_LIST.extend([adjax, reduple, rushmore, redactyl, scannequin, zeedub])
76
+ _BUILTIN_GLITCHLING_LIST.extend([adjax, reduple, rushmore, redactyl, spectroll, scannequin, zeedub])
71
77
 
72
78
  BUILTIN_GLITCHLINGS: dict[str, Glitchling] = {
73
79
  glitchling.name.lower(): glitchling for glitchling in _BUILTIN_GLITCHLING_LIST
@@ -75,6 +81,7 @@ BUILTIN_GLITCHLINGS: dict[str, Glitchling] = {
75
81
 
76
82
  _BUILTIN_GLITCHLING_TYPES: dict[str, type[Glitchling]] = {
77
83
  typogre.name.lower(): Typogre,
84
+ ekkokin.name.lower(): Ekkokin,
78
85
  apostrofae.name.lower(): Apostrofae,
79
86
  hokey.name.lower(): Hokey,
80
87
  mim1c.name.lower(): Mim1c,
@@ -82,6 +89,7 @@ _BUILTIN_GLITCHLING_TYPES: dict[str, type[Glitchling]] = {
82
89
  reduple.name.lower(): Reduple,
83
90
  rushmore.name.lower(): Rushmore,
84
91
  redactyl.name.lower(): Redactyl,
92
+ spectroll.name.lower(): Spectroll,
85
93
  scannequin.name.lower(): Scannequin,
86
94
  zeedub.name.lower(): Zeedub,
87
95
  }
@@ -1,18 +1,18 @@
1
1
  from __future__ import annotations
2
2
 
3
- from importlib import resources
3
+ from .assets import read_text
4
4
 
5
5
  _CONFUSION_TABLE: list[tuple[str, list[str]]] | None = None
6
6
 
7
7
 
8
8
  def load_confusion_table() -> list[tuple[str, list[str]]]:
9
9
  """Load the OCR confusion table shared by Python and Rust implementations."""
10
+
10
11
  global _CONFUSION_TABLE
11
12
  if _CONFUSION_TABLE is not None:
12
13
  return _CONFUSION_TABLE
13
14
 
14
- data = resources.files(__package__) / "ocr_confusions.tsv"
15
- text = data.read_text(encoding="utf-8")
15
+ text = read_text("ocr_confusions.tsv")
16
16
  indexed_entries: list[tuple[int, tuple[str, list[str]]]] = []
17
17
  for line_number, line in enumerate(text.splitlines()):
18
18
  stripped = line.strip()
@@ -21,9 +21,9 @@ def split_token_edges(token: str) -> tuple[str, str, str]:
21
21
  return match.group(1), match.group(2), match.group(3)
22
22
 
23
23
 
24
- def token_core_length(token: str) -> int:
25
- """Return the length of the main word characters for weighting heuristics."""
26
- _, core, _ = split_token_edges(token)
24
+ def _resolve_core_length(core: str, token: str) -> int:
25
+ """Return a stable core-length measurement used by weighting heuristics."""
26
+
27
27
  candidate = core if core else token
28
28
  length = len(candidate)
29
29
  if length <= 0:
@@ -34,6 +34,12 @@ def token_core_length(token: str) -> int:
34
34
  return length
35
35
 
36
36
 
37
+ def token_core_length(token: str) -> int:
38
+ """Return the length of the main word characters for weighting heuristics."""
39
+ _, core, _ = split_token_edges(token)
40
+ return _resolve_core_length(core, token)
41
+
42
+
37
43
  @dataclass(frozen=True)
38
44
  class WordToken:
39
45
  """Metadata describing a non-whitespace token yielded by word splitters."""
@@ -71,12 +77,7 @@ def collect_word_tokens(
71
77
  continue
72
78
 
73
79
  prefix, core, suffix = split_token_edges(token)
74
- core_length = len(core)
75
- if core_length <= 0:
76
- stripped = token.strip()
77
- core_length = len(stripped) if stripped else len(token)
78
- if core_length <= 0:
79
- core_length = 1
80
+ core_length = _resolve_core_length(core, token)
80
81
 
81
82
  collected.append(
82
83
  WordToken(
glitchlings/zoo/adjax.py CHANGED
@@ -3,7 +3,6 @@ from __future__ import annotations
3
3
  import random
4
4
  from typing import Any, cast
5
5
 
6
- from ._rate import resolve_rate
7
6
  from ._rust_extensions import get_rust_operation
8
7
  from ._text_utils import split_preserving_whitespace, split_token_edges
9
8
  from .core import AttackWave, Glitchling
@@ -66,16 +65,9 @@ def swap_adjacent_words(
66
65
  rate: float | None = None,
67
66
  seed: int | None = None,
68
67
  rng: random.Random | None = None,
69
- *,
70
- swap_rate: float | None = None,
71
68
  ) -> str:
72
69
  """Swap adjacent word cores while preserving spacing and punctuation."""
73
- effective_rate = resolve_rate(
74
- rate=rate,
75
- legacy_value=swap_rate,
76
- default=0.5,
77
- legacy_name="swap_rate",
78
- )
70
+ effective_rate = 0.5 if rate is None else rate
79
71
  clamped_rate = max(0.0, min(effective_rate, 1.0))
80
72
 
81
73
  if rng is None:
@@ -94,16 +86,9 @@ class Adjax(Glitchling):
94
86
  self,
95
87
  *,
96
88
  rate: float | None = None,
97
- swap_rate: float | None = None,
98
89
  seed: int | None = None,
99
90
  ) -> None:
100
- self._param_aliases = {"swap_rate": "rate"}
101
- effective_rate = resolve_rate(
102
- rate=rate,
103
- legacy_value=swap_rate,
104
- default=0.5,
105
- legacy_name="swap_rate",
106
- )
91
+ effective_rate = 0.5 if rate is None else rate
107
92
  super().__init__(
108
93
  name="Adjax",
109
94
  corruption_function=swap_adjacent_words,
@@ -118,7 +103,7 @@ class Adjax(Glitchling):
118
103
  return None
119
104
  return {
120
105
  "type": "swap_adjacent",
121
- "swap_rate": float(rate),
106
+ "rate": float(rate),
122
107
  }
123
108
 
124
109
 
@@ -2,13 +2,12 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import json
6
5
  import random
7
6
  from functools import cache
8
- from importlib import resources
9
7
  from typing import Any, Sequence, cast
10
8
 
11
9
  from ._rust_extensions import get_rust_operation
10
+ from .assets import load_json
12
11
  from .core import AttackOrder, AttackWave, Gaggle, Glitchling
13
12
 
14
13
  # Load Rust-accelerated operation if available
@@ -19,9 +18,7 @@ _apostrofae_rust = get_rust_operation("apostrofae")
19
18
  def _load_replacement_pairs() -> dict[str, list[tuple[str, str]]]:
20
19
  """Load the curated mapping of straight quotes to fancy pairs."""
21
20
 
22
- resource = resources.files(f"{__package__}.assets").joinpath("apostrofae_pairs.json")
23
- with resource.open("r", encoding="utf-8") as handle:
24
- data: dict[str, list[Sequence[str]]] = json.load(handle)
21
+ data: dict[str, list[Sequence[str]]] = load_json("apostrofae_pairs.json")
25
22
 
26
23
  parsed: dict[str, list[tuple[str, str]]] = {}
27
24
  for straight, replacements in data.items():