glitchlings 0.4.5__cp310-cp310-macosx_11_0_universal2.whl → 0.5.1__cp310-cp310-macosx_11_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +33 -0
- glitchlings/_zoo_rust.cpython-310-darwin.so +0 -0
- glitchlings/assets/ekkokin_homophones.json +1995 -0
- glitchlings/compat.py +98 -8
- glitchlings/config.py +12 -24
- glitchlings/dev/__init__.py +5 -0
- glitchlings/dev/sync_assets.py +130 -0
- glitchlings/dlc/pytorch_lightning.py +13 -1
- glitchlings/spectroll.py +5 -0
- glitchlings/util/stretchability.py +4 -9
- glitchlings/zoo/__init__.py +10 -2
- glitchlings/zoo/_ocr_confusions.py +3 -3
- glitchlings/zoo/_text_utils.py +10 -9
- glitchlings/zoo/adjax.py +3 -18
- glitchlings/zoo/apostrofae.py +2 -5
- glitchlings/zoo/assets/__init__.py +91 -0
- glitchlings/zoo/ekkokin.py +226 -0
- glitchlings/zoo/jargoyle.py +2 -16
- glitchlings/zoo/mim1c.py +2 -17
- glitchlings/zoo/redactyl.py +3 -17
- glitchlings/zoo/reduple.py +3 -17
- glitchlings/zoo/rushmore.py +3 -20
- glitchlings/zoo/scannequin.py +3 -20
- glitchlings/zoo/spectroll.py +159 -0
- glitchlings/zoo/typogre.py +2 -19
- glitchlings/zoo/zeedub.py +2 -13
- {glitchlings-0.4.5.dist-info → glitchlings-0.5.1.dist-info}/METADATA +22 -7
- glitchlings-0.5.1.dist-info/RECORD +57 -0
- glitchlings/data/__init__.py +0 -1
- glitchlings/zoo/_rate.py +0 -131
- glitchlings-0.4.5.dist-info/RECORD +0 -53
- /glitchlings/{zoo/assets → assets}/apostrofae_pairs.json +0 -0
- /glitchlings/{data → assets}/hokey_assets.json +0 -0
- /glitchlings/{zoo → assets}/ocr_confusions.tsv +0 -0
- {glitchlings-0.4.5.dist-info → glitchlings-0.5.1.dist-info}/WHEEL +0 -0
- {glitchlings-0.4.5.dist-info → glitchlings-0.5.1.dist-info}/entry_points.txt +0 -0
- {glitchlings-0.4.5.dist-info → glitchlings-0.5.1.dist-info}/licenses/LICENSE +0 -0
- {glitchlings-0.4.5.dist-info → glitchlings-0.5.1.dist-info}/top_level.txt +0 -0
glitchlings/compat.py
CHANGED
|
@@ -6,7 +6,7 @@ import re
|
|
|
6
6
|
from dataclasses import dataclass
|
|
7
7
|
from importlib import import_module, metadata
|
|
8
8
|
from types import ModuleType
|
|
9
|
-
from typing import Any, Callable, Iterable, Protocol, cast
|
|
9
|
+
from typing import Any, Callable, Iterable, Mapping, NoReturn, Protocol, cast
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class _MissingSentinel:
|
|
@@ -50,26 +50,106 @@ else:
|
|
|
50
50
|
Requirement = cast(type[_RequirementProtocol], _RequirementClass)
|
|
51
51
|
|
|
52
52
|
|
|
53
|
+
def _build_lightning_stub() -> ModuleType:
|
|
54
|
+
"""Return a minimal PyTorch Lightning stub when the dependency is absent."""
|
|
55
|
+
|
|
56
|
+
module = ModuleType("pytorch_lightning")
|
|
57
|
+
|
|
58
|
+
class LightningDataModule: # pragma: no cover - simple compatibility shim
|
|
59
|
+
"""Lightweight stand-in for PyTorch Lightning's ``LightningDataModule``."""
|
|
60
|
+
|
|
61
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None: # noqa: D401 - parity with real class
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
def prepare_data(self, *args: Any, **kwargs: Any) -> None: # noqa: D401 - parity with real class
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
def setup(self, *args: Any, **kwargs: Any) -> None:
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
def teardown(self, *args: Any, **kwargs: Any) -> None:
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
def state_dict(self) -> dict[str, Any]:
|
|
74
|
+
return {}
|
|
75
|
+
|
|
76
|
+
def load_state_dict(self, state_dict: Mapping[str, Any]) -> None:
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
def transfer_batch_to_device(self, batch: Any, device: Any, dataloader_idx: int) -> Any:
|
|
80
|
+
return batch
|
|
81
|
+
|
|
82
|
+
def on_before_batch_transfer(self, batch: Any, dataloader_idx: int) -> Any:
|
|
83
|
+
return batch
|
|
84
|
+
|
|
85
|
+
def on_after_batch_transfer(self, batch: Any, dataloader_idx: int) -> Any:
|
|
86
|
+
return batch
|
|
87
|
+
|
|
88
|
+
def train_dataloader(self, *args: Any, **kwargs: Any) -> Any:
|
|
89
|
+
return []
|
|
90
|
+
|
|
91
|
+
def val_dataloader(self, *args: Any, **kwargs: Any) -> Any:
|
|
92
|
+
return []
|
|
93
|
+
|
|
94
|
+
def test_dataloader(self, *args: Any, **kwargs: Any) -> Any:
|
|
95
|
+
return []
|
|
96
|
+
|
|
97
|
+
def predict_dataloader(self, *args: Any, **kwargs: Any) -> Any:
|
|
98
|
+
return []
|
|
99
|
+
|
|
100
|
+
setattr(module, "LightningDataModule", LightningDataModule)
|
|
101
|
+
setattr(module, "__all__", ["LightningDataModule"])
|
|
102
|
+
setattr(
|
|
103
|
+
module,
|
|
104
|
+
"__doc__",
|
|
105
|
+
"Lightweight stub module that exposes a minimal LightningDataModule "
|
|
106
|
+
"when PyTorch Lightning is unavailable.",
|
|
107
|
+
)
|
|
108
|
+
setattr(module, "__version__", "0.0.0-stub")
|
|
109
|
+
return module
|
|
110
|
+
|
|
111
|
+
|
|
53
112
|
@dataclass
|
|
54
113
|
class OptionalDependency:
|
|
55
114
|
"""Lazily import an optional dependency and retain the import error."""
|
|
56
115
|
|
|
57
116
|
module_name: str
|
|
117
|
+
fallback_factory: Callable[[], ModuleType] | None = None
|
|
58
118
|
_cached: ModuleType | None | _MissingSentinel = _MISSING
|
|
59
119
|
_error: ModuleNotFoundError | None = None
|
|
120
|
+
_used_fallback: bool = False
|
|
121
|
+
_fallback_instance: ModuleType | None = None
|
|
60
122
|
|
|
61
123
|
def _attempt_import(self) -> ModuleType | None:
|
|
62
124
|
try:
|
|
63
125
|
module = import_module(self.module_name)
|
|
64
126
|
except ModuleNotFoundError as exc:
|
|
127
|
+
if self.fallback_factory is not None:
|
|
128
|
+
if self._fallback_instance is None:
|
|
129
|
+
self._fallback_instance = self.fallback_factory()
|
|
130
|
+
module = self._fallback_instance
|
|
131
|
+
self._cached = module
|
|
132
|
+
# Preserve the original error so load()/require() can re-raise it
|
|
133
|
+
self._error = exc
|
|
134
|
+
self._used_fallback = True
|
|
135
|
+
return module
|
|
65
136
|
self._cached = None
|
|
66
137
|
self._error = exc
|
|
67
138
|
return None
|
|
68
139
|
else:
|
|
69
140
|
self._cached = module
|
|
70
141
|
self._error = None
|
|
142
|
+
self._used_fallback = False
|
|
71
143
|
return module
|
|
72
144
|
|
|
145
|
+
def _raise_missing_error(self) -> NoReturn:
|
|
146
|
+
"""Raise ModuleNotFoundError for the missing dependency."""
|
|
147
|
+
error = self._error
|
|
148
|
+
if error is not None:
|
|
149
|
+
raise error
|
|
150
|
+
message = f"{self.module_name} is not installed"
|
|
151
|
+
raise ModuleNotFoundError(message)
|
|
152
|
+
|
|
73
153
|
def get(self) -> ModuleType | None:
|
|
74
154
|
"""Return the imported module or ``None`` when unavailable."""
|
|
75
155
|
cached = self._cached
|
|
@@ -82,12 +162,10 @@ class OptionalDependency:
|
|
|
82
162
|
def load(self) -> ModuleType:
|
|
83
163
|
"""Return the dependency, raising the original import error when absent."""
|
|
84
164
|
module = self.get()
|
|
165
|
+
if self._used_fallback:
|
|
166
|
+
self._raise_missing_error()
|
|
85
167
|
if module is None:
|
|
86
|
-
|
|
87
|
-
if error is not None:
|
|
88
|
-
raise error
|
|
89
|
-
message = f"{self.module_name} is not installed"
|
|
90
|
-
raise ModuleNotFoundError(message)
|
|
168
|
+
self._raise_missing_error()
|
|
91
169
|
return module
|
|
92
170
|
|
|
93
171
|
def require(self, message: str) -> ModuleType:
|
|
@@ -99,18 +177,27 @@ class OptionalDependency:
|
|
|
99
177
|
|
|
100
178
|
def available(self) -> bool:
|
|
101
179
|
"""Return ``True`` when the dependency can be imported."""
|
|
102
|
-
|
|
180
|
+
module = self.get()
|
|
181
|
+
if module is None:
|
|
182
|
+
return False
|
|
183
|
+
if self._used_fallback:
|
|
184
|
+
return False
|
|
185
|
+
return True
|
|
103
186
|
|
|
104
187
|
def reset(self) -> None:
|
|
105
188
|
"""Forget any cached import result."""
|
|
106
189
|
self._cached = _MISSING
|
|
107
190
|
self._error = None
|
|
191
|
+
self._used_fallback = False
|
|
192
|
+
self._fallback_instance = None
|
|
108
193
|
|
|
109
194
|
def attr(self, attribute: str) -> Any | None:
|
|
110
195
|
"""Return ``attribute`` from the dependency when available."""
|
|
111
196
|
module = self.get()
|
|
112
197
|
if module is None:
|
|
113
198
|
return None
|
|
199
|
+
if self._used_fallback:
|
|
200
|
+
return None
|
|
114
201
|
return getattr(module, attribute, None)
|
|
115
202
|
|
|
116
203
|
@property
|
|
@@ -120,7 +207,10 @@ class OptionalDependency:
|
|
|
120
207
|
return self._error
|
|
121
208
|
|
|
122
209
|
|
|
123
|
-
pytorch_lightning = OptionalDependency(
|
|
210
|
+
pytorch_lightning = OptionalDependency(
|
|
211
|
+
"pytorch_lightning",
|
|
212
|
+
fallback_factory=_build_lightning_stub,
|
|
213
|
+
)
|
|
124
214
|
datasets = OptionalDependency("datasets")
|
|
125
215
|
verifiers = OptionalDependency("verifiers")
|
|
126
216
|
jellyfish = OptionalDependency("jellyfish")
|
glitchlings/config.py
CHANGED
|
@@ -4,7 +4,6 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import importlib
|
|
6
6
|
import os
|
|
7
|
-
import warnings
|
|
8
7
|
from dataclasses import dataclass, field
|
|
9
8
|
from io import TextIOBase
|
|
10
9
|
from pathlib import Path
|
|
@@ -57,17 +56,6 @@ ATTACK_CONFIG_SCHEMA: dict[str, Any] = {
|
|
|
57
56
|
"required": ["name"],
|
|
58
57
|
"properties": {
|
|
59
58
|
"name": {"type": "string", "minLength": 1},
|
|
60
|
-
"type": {"type": "string", "minLength": 1},
|
|
61
|
-
"parameters": {"type": "object"},
|
|
62
|
-
},
|
|
63
|
-
"additionalProperties": True,
|
|
64
|
-
},
|
|
65
|
-
{
|
|
66
|
-
"type": "object",
|
|
67
|
-
"required": ["type"],
|
|
68
|
-
"properties": {
|
|
69
|
-
"name": {"type": "string", "minLength": 1},
|
|
70
|
-
"type": {"type": "string", "minLength": 1},
|
|
71
59
|
"parameters": {"type": "object"},
|
|
72
60
|
},
|
|
73
61
|
"additionalProperties": True,
|
|
@@ -263,7 +251,12 @@ def _validate_attack_config_schema(data: Any, *, source: str) -> Mapping[str, An
|
|
|
263
251
|
|
|
264
252
|
for index, entry in enumerate(raw_glitchlings, start=1):
|
|
265
253
|
if isinstance(entry, Mapping):
|
|
266
|
-
|
|
254
|
+
if "type" in entry:
|
|
255
|
+
raise ValueError(
|
|
256
|
+
f"{source}: glitchling #{index} uses unsupported 'type'; use 'name'."
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
name_candidate = entry.get("name")
|
|
267
260
|
if not isinstance(name_candidate, str) or not name_candidate.strip():
|
|
268
261
|
raise ValueError(f"{source}: glitchling #{index} is missing a 'name'.")
|
|
269
262
|
parameters = entry.get("parameters")
|
|
@@ -326,17 +319,12 @@ def _build_glitchling(entry: Any, source: str, index: int) -> "Glitchling":
|
|
|
326
319
|
raise ValueError(f"{source}: glitchling #{index}: {exc}") from exc
|
|
327
320
|
|
|
328
321
|
if isinstance(entry, Mapping):
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
warnings.warn(
|
|
333
|
-
f"{source}: glitchling #{index} uses 'type'; prefer 'name'.",
|
|
334
|
-
DeprecationWarning,
|
|
335
|
-
stacklevel=2,
|
|
322
|
+
if "type" in entry:
|
|
323
|
+
raise ValueError(
|
|
324
|
+
f"{source}: glitchling #{index} uses unsupported 'type'; use 'name'."
|
|
336
325
|
)
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
name_value = legacy_type
|
|
326
|
+
|
|
327
|
+
name_value = entry.get("name")
|
|
340
328
|
|
|
341
329
|
if not isinstance(name_value, str) or not name_value.strip():
|
|
342
330
|
raise ValueError(f"{source}: glitchling #{index} is missing a 'name'.")
|
|
@@ -352,7 +340,7 @@ def _build_glitchling(entry: Any, source: str, index: int) -> "Glitchling":
|
|
|
352
340
|
kwargs = {
|
|
353
341
|
key: value
|
|
354
342
|
for key, value in entry.items()
|
|
355
|
-
if key not in {"name", "
|
|
343
|
+
if key not in {"name", "parameters"}
|
|
356
344
|
}
|
|
357
345
|
|
|
358
346
|
try:
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""Maintain the canonical glitchling asset bundle shared by Python and Rust."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Iterator, Sequence
|
|
9
|
+
|
|
10
|
+
PIPELINE_ASSETS: frozenset[str] = frozenset(
|
|
11
|
+
{
|
|
12
|
+
"apostrofae_pairs.json",
|
|
13
|
+
"ekkokin_homophones.json",
|
|
14
|
+
"hokey_assets.json",
|
|
15
|
+
"ocr_confusions.tsv",
|
|
16
|
+
}
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _project_root(default: Path | None = None) -> Path:
|
|
21
|
+
if default is not None:
|
|
22
|
+
return default
|
|
23
|
+
return Path(__file__).resolve().parents[3]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _canonical_asset_dir(project_root: Path) -> Path:
|
|
27
|
+
canonical = project_root / "assets"
|
|
28
|
+
if not canonical.is_dir():
|
|
29
|
+
raise RuntimeError(
|
|
30
|
+
"expected canonical assets under 'assets'; "
|
|
31
|
+
"run this command from the repository root"
|
|
32
|
+
)
|
|
33
|
+
return canonical
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _legacy_rust_asset_dir(project_root: Path) -> Path:
|
|
37
|
+
return project_root / "rust" / "zoo" / "assets"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _iter_legacy_assets(rust_dir: Path) -> Iterator[Path]:
|
|
41
|
+
if not rust_dir.exists():
|
|
42
|
+
return
|
|
43
|
+
for path in rust_dir.iterdir():
|
|
44
|
+
if path.is_file():
|
|
45
|
+
yield path
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def sync_assets(
|
|
49
|
+
project_root: Path | None = None,
|
|
50
|
+
*,
|
|
51
|
+
check: bool = False,
|
|
52
|
+
quiet: bool = False,
|
|
53
|
+
) -> bool:
|
|
54
|
+
"""Ensure pipeline assets exist only at their canonical location."""
|
|
55
|
+
|
|
56
|
+
root = _project_root(project_root)
|
|
57
|
+
canonical_dir = _canonical_asset_dir(root)
|
|
58
|
+
rust_dir = _legacy_rust_asset_dir(root)
|
|
59
|
+
|
|
60
|
+
missing_sources = [name for name in PIPELINE_ASSETS if not (canonical_dir / name).is_file()]
|
|
61
|
+
if missing_sources:
|
|
62
|
+
missing_list = ", ".join(sorted(missing_sources))
|
|
63
|
+
raise RuntimeError(f"missing canonical assets: {missing_list}")
|
|
64
|
+
|
|
65
|
+
legacy_assets = list(_iter_legacy_assets(rust_dir))
|
|
66
|
+
|
|
67
|
+
if check:
|
|
68
|
+
if legacy_assets:
|
|
69
|
+
if not quiet:
|
|
70
|
+
for duplicate in legacy_assets:
|
|
71
|
+
message = (
|
|
72
|
+
"legacy vendored asset "
|
|
73
|
+
f"{duplicate.relative_to(root)} still exists; "
|
|
74
|
+
"run sync_assets to remove it"
|
|
75
|
+
)
|
|
76
|
+
print(message, file=sys.stderr)
|
|
77
|
+
return False
|
|
78
|
+
if not quiet:
|
|
79
|
+
print("No legacy Rust asset copies detected.")
|
|
80
|
+
return True
|
|
81
|
+
|
|
82
|
+
removed_any = False
|
|
83
|
+
for duplicate in legacy_assets:
|
|
84
|
+
duplicate.unlink()
|
|
85
|
+
removed_any = True
|
|
86
|
+
if not quiet:
|
|
87
|
+
print(f"Removed legacy vendored asset {duplicate.relative_to(root)}")
|
|
88
|
+
|
|
89
|
+
if removed_any:
|
|
90
|
+
try:
|
|
91
|
+
rust_dir.rmdir()
|
|
92
|
+
except OSError:
|
|
93
|
+
pass
|
|
94
|
+
elif not quiet:
|
|
95
|
+
print("No legacy Rust asset copies to remove.")
|
|
96
|
+
|
|
97
|
+
return True
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
101
|
+
parser = argparse.ArgumentParser(
|
|
102
|
+
description="Prune legacy vendored Rust assets so only canonical copies remain.",
|
|
103
|
+
)
|
|
104
|
+
parser.add_argument(
|
|
105
|
+
"--check",
|
|
106
|
+
action="store_true",
|
|
107
|
+
help="exit with a non-zero status when vendored assets diverge",
|
|
108
|
+
)
|
|
109
|
+
parser.add_argument(
|
|
110
|
+
"--quiet",
|
|
111
|
+
action="store_true",
|
|
112
|
+
help="suppress status output",
|
|
113
|
+
)
|
|
114
|
+
parser.add_argument(
|
|
115
|
+
"--project-root",
|
|
116
|
+
type=Path,
|
|
117
|
+
help="override the detected project root (useful for testing)",
|
|
118
|
+
)
|
|
119
|
+
return parser
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
123
|
+
parser = build_parser()
|
|
124
|
+
args = parser.parse_args(argv)
|
|
125
|
+
ok = sync_assets(project_root=args.project_root, check=args.check, quiet=args.quiet)
|
|
126
|
+
return 0 if ok else 1
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
if __name__ == "__main__": # pragma: no cover - CLI entry point
|
|
130
|
+
raise SystemExit(main())
|
|
@@ -187,7 +187,19 @@ def _ensure_datamodule_class() -> Any:
|
|
|
187
187
|
setattr(datamodule_cls, "glitch", glitch)
|
|
188
188
|
|
|
189
189
|
if not issubclass(_GlitchedLightningDataModule, datamodule_cls):
|
|
190
|
-
|
|
190
|
+
try:
|
|
191
|
+
_GlitchedLightningDataModule.__bases__ = (datamodule_cls,)
|
|
192
|
+
except TypeError:
|
|
193
|
+
namespace = {
|
|
194
|
+
name: value
|
|
195
|
+
for name, value in vars(_GlitchedLightningDataModule).items()
|
|
196
|
+
if name not in {"__dict__", "__weakref__"}
|
|
197
|
+
}
|
|
198
|
+
replacement = cast(
|
|
199
|
+
type[Any],
|
|
200
|
+
type("_GlitchedLightningDataModule", (datamodule_cls,), namespace),
|
|
201
|
+
)
|
|
202
|
+
globals()["_GlitchedLightningDataModule"] = replacement
|
|
191
203
|
|
|
192
204
|
return datamodule_cls
|
|
193
205
|
|
glitchlings/spectroll.py
ADDED
|
@@ -2,11 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
import json
|
|
6
5
|
import re
|
|
7
6
|
from dataclasses import dataclass
|
|
8
|
-
from
|
|
9
|
-
|
|
7
|
+
from typing import Protocol, Sequence, TypedDict, cast
|
|
8
|
+
|
|
9
|
+
from glitchlings.zoo import assets
|
|
10
10
|
|
|
11
11
|
# Regexes reused across the module
|
|
12
12
|
TOKEN_REGEX = re.compile(r"\w+|\W+")
|
|
@@ -32,12 +32,7 @@ class RandomLike(Protocol):
|
|
|
32
32
|
|
|
33
33
|
# Lexical prior probabilities and pragmatic lexica shared with the Rust fast path.
|
|
34
34
|
def _load_assets() -> HokeyAssets:
|
|
35
|
-
|
|
36
|
-
resources.files("glitchlings.data")
|
|
37
|
-
.joinpath("hokey_assets.json")
|
|
38
|
-
.open("r", encoding="utf-8") as payload
|
|
39
|
-
):
|
|
40
|
-
data: Any = json.load(payload)
|
|
35
|
+
data = assets.load_json("hokey_assets.json")
|
|
41
36
|
return cast(HokeyAssets, data)
|
|
42
37
|
|
|
43
38
|
|
glitchlings/zoo/__init__.py
CHANGED
|
@@ -14,6 +14,7 @@ from .core import (
|
|
|
14
14
|
plan_glitchling_specs,
|
|
15
15
|
plan_glitchlings,
|
|
16
16
|
)
|
|
17
|
+
from .ekkokin import Ekkokin, ekkokin
|
|
17
18
|
from .hokey import Hokey, hokey
|
|
18
19
|
from .jargoyle import Jargoyle, jargoyle
|
|
19
20
|
from .jargoyle import dependencies_available as _jargoyle_available
|
|
@@ -22,6 +23,7 @@ from .redactyl import Redactyl, redactyl
|
|
|
22
23
|
from .reduple import Reduple, reduple
|
|
23
24
|
from .rushmore import Rushmore, rushmore
|
|
24
25
|
from .scannequin import Scannequin, scannequin
|
|
26
|
+
from .spectroll import Spectroll, spectroll
|
|
25
27
|
from .typogre import Typogre, typogre
|
|
26
28
|
from .zeedub import Zeedub, zeedub
|
|
27
29
|
|
|
@@ -32,6 +34,8 @@ __all__ = [
|
|
|
32
34
|
"mim1c",
|
|
33
35
|
"Jargoyle",
|
|
34
36
|
"jargoyle",
|
|
37
|
+
"Ekkokin",
|
|
38
|
+
"ekkokin",
|
|
35
39
|
"Apostrofae",
|
|
36
40
|
"apostrofae",
|
|
37
41
|
"Hokey",
|
|
@@ -44,6 +48,8 @@ __all__ = [
|
|
|
44
48
|
"rushmore",
|
|
45
49
|
"Redactyl",
|
|
46
50
|
"redactyl",
|
|
51
|
+
"Spectroll",
|
|
52
|
+
"spectroll",
|
|
47
53
|
"Scannequin",
|
|
48
54
|
"scannequin",
|
|
49
55
|
"Zeedub",
|
|
@@ -64,10 +70,10 @@ __all__ = [
|
|
|
64
70
|
|
|
65
71
|
_HAS_JARGOYLE = _jargoyle_available()
|
|
66
72
|
|
|
67
|
-
_BUILTIN_GLITCHLING_LIST: list[Glitchling] = [typogre, apostrofae, hokey, mim1c]
|
|
73
|
+
_BUILTIN_GLITCHLING_LIST: list[Glitchling] = [typogre, apostrofae, hokey, mim1c, ekkokin]
|
|
68
74
|
if _HAS_JARGOYLE:
|
|
69
75
|
_BUILTIN_GLITCHLING_LIST.append(jargoyle)
|
|
70
|
-
_BUILTIN_GLITCHLING_LIST.extend([adjax, reduple, rushmore, redactyl, scannequin, zeedub])
|
|
76
|
+
_BUILTIN_GLITCHLING_LIST.extend([adjax, reduple, rushmore, redactyl, spectroll, scannequin, zeedub])
|
|
71
77
|
|
|
72
78
|
BUILTIN_GLITCHLINGS: dict[str, Glitchling] = {
|
|
73
79
|
glitchling.name.lower(): glitchling for glitchling in _BUILTIN_GLITCHLING_LIST
|
|
@@ -75,6 +81,7 @@ BUILTIN_GLITCHLINGS: dict[str, Glitchling] = {
|
|
|
75
81
|
|
|
76
82
|
_BUILTIN_GLITCHLING_TYPES: dict[str, type[Glitchling]] = {
|
|
77
83
|
typogre.name.lower(): Typogre,
|
|
84
|
+
ekkokin.name.lower(): Ekkokin,
|
|
78
85
|
apostrofae.name.lower(): Apostrofae,
|
|
79
86
|
hokey.name.lower(): Hokey,
|
|
80
87
|
mim1c.name.lower(): Mim1c,
|
|
@@ -82,6 +89,7 @@ _BUILTIN_GLITCHLING_TYPES: dict[str, type[Glitchling]] = {
|
|
|
82
89
|
reduple.name.lower(): Reduple,
|
|
83
90
|
rushmore.name.lower(): Rushmore,
|
|
84
91
|
redactyl.name.lower(): Redactyl,
|
|
92
|
+
spectroll.name.lower(): Spectroll,
|
|
85
93
|
scannequin.name.lower(): Scannequin,
|
|
86
94
|
zeedub.name.lower(): Zeedub,
|
|
87
95
|
}
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from .assets import read_text
|
|
4
4
|
|
|
5
5
|
_CONFUSION_TABLE: list[tuple[str, list[str]]] | None = None
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def load_confusion_table() -> list[tuple[str, list[str]]]:
|
|
9
9
|
"""Load the OCR confusion table shared by Python and Rust implementations."""
|
|
10
|
+
|
|
10
11
|
global _CONFUSION_TABLE
|
|
11
12
|
if _CONFUSION_TABLE is not None:
|
|
12
13
|
return _CONFUSION_TABLE
|
|
13
14
|
|
|
14
|
-
|
|
15
|
-
text = data.read_text(encoding="utf-8")
|
|
15
|
+
text = read_text("ocr_confusions.tsv")
|
|
16
16
|
indexed_entries: list[tuple[int, tuple[str, list[str]]]] = []
|
|
17
17
|
for line_number, line in enumerate(text.splitlines()):
|
|
18
18
|
stripped = line.strip()
|
glitchlings/zoo/_text_utils.py
CHANGED
|
@@ -21,9 +21,9 @@ def split_token_edges(token: str) -> tuple[str, str, str]:
|
|
|
21
21
|
return match.group(1), match.group(2), match.group(3)
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
def
|
|
25
|
-
"""Return
|
|
26
|
-
|
|
24
|
+
def _resolve_core_length(core: str, token: str) -> int:
|
|
25
|
+
"""Return a stable core-length measurement used by weighting heuristics."""
|
|
26
|
+
|
|
27
27
|
candidate = core if core else token
|
|
28
28
|
length = len(candidate)
|
|
29
29
|
if length <= 0:
|
|
@@ -34,6 +34,12 @@ def token_core_length(token: str) -> int:
|
|
|
34
34
|
return length
|
|
35
35
|
|
|
36
36
|
|
|
37
|
+
def token_core_length(token: str) -> int:
|
|
38
|
+
"""Return the length of the main word characters for weighting heuristics."""
|
|
39
|
+
_, core, _ = split_token_edges(token)
|
|
40
|
+
return _resolve_core_length(core, token)
|
|
41
|
+
|
|
42
|
+
|
|
37
43
|
@dataclass(frozen=True)
|
|
38
44
|
class WordToken:
|
|
39
45
|
"""Metadata describing a non-whitespace token yielded by word splitters."""
|
|
@@ -71,12 +77,7 @@ def collect_word_tokens(
|
|
|
71
77
|
continue
|
|
72
78
|
|
|
73
79
|
prefix, core, suffix = split_token_edges(token)
|
|
74
|
-
core_length =
|
|
75
|
-
if core_length <= 0:
|
|
76
|
-
stripped = token.strip()
|
|
77
|
-
core_length = len(stripped) if stripped else len(token)
|
|
78
|
-
if core_length <= 0:
|
|
79
|
-
core_length = 1
|
|
80
|
+
core_length = _resolve_core_length(core, token)
|
|
80
81
|
|
|
81
82
|
collected.append(
|
|
82
83
|
WordToken(
|
glitchlings/zoo/adjax.py
CHANGED
|
@@ -3,7 +3,6 @@ from __future__ import annotations
|
|
|
3
3
|
import random
|
|
4
4
|
from typing import Any, cast
|
|
5
5
|
|
|
6
|
-
from ._rate import resolve_rate
|
|
7
6
|
from ._rust_extensions import get_rust_operation
|
|
8
7
|
from ._text_utils import split_preserving_whitespace, split_token_edges
|
|
9
8
|
from .core import AttackWave, Glitchling
|
|
@@ -66,16 +65,9 @@ def swap_adjacent_words(
|
|
|
66
65
|
rate: float | None = None,
|
|
67
66
|
seed: int | None = None,
|
|
68
67
|
rng: random.Random | None = None,
|
|
69
|
-
*,
|
|
70
|
-
swap_rate: float | None = None,
|
|
71
68
|
) -> str:
|
|
72
69
|
"""Swap adjacent word cores while preserving spacing and punctuation."""
|
|
73
|
-
effective_rate =
|
|
74
|
-
rate=rate,
|
|
75
|
-
legacy_value=swap_rate,
|
|
76
|
-
default=0.5,
|
|
77
|
-
legacy_name="swap_rate",
|
|
78
|
-
)
|
|
70
|
+
effective_rate = 0.5 if rate is None else rate
|
|
79
71
|
clamped_rate = max(0.0, min(effective_rate, 1.0))
|
|
80
72
|
|
|
81
73
|
if rng is None:
|
|
@@ -94,16 +86,9 @@ class Adjax(Glitchling):
|
|
|
94
86
|
self,
|
|
95
87
|
*,
|
|
96
88
|
rate: float | None = None,
|
|
97
|
-
swap_rate: float | None = None,
|
|
98
89
|
seed: int | None = None,
|
|
99
90
|
) -> None:
|
|
100
|
-
|
|
101
|
-
effective_rate = resolve_rate(
|
|
102
|
-
rate=rate,
|
|
103
|
-
legacy_value=swap_rate,
|
|
104
|
-
default=0.5,
|
|
105
|
-
legacy_name="swap_rate",
|
|
106
|
-
)
|
|
91
|
+
effective_rate = 0.5 if rate is None else rate
|
|
107
92
|
super().__init__(
|
|
108
93
|
name="Adjax",
|
|
109
94
|
corruption_function=swap_adjacent_words,
|
|
@@ -118,7 +103,7 @@ class Adjax(Glitchling):
|
|
|
118
103
|
return None
|
|
119
104
|
return {
|
|
120
105
|
"type": "swap_adjacent",
|
|
121
|
-
"
|
|
106
|
+
"rate": float(rate),
|
|
122
107
|
}
|
|
123
108
|
|
|
124
109
|
|
glitchlings/zoo/apostrofae.py
CHANGED
|
@@ -2,13 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
import json
|
|
6
5
|
import random
|
|
7
6
|
from functools import cache
|
|
8
|
-
from importlib import resources
|
|
9
7
|
from typing import Any, Sequence, cast
|
|
10
8
|
|
|
11
9
|
from ._rust_extensions import get_rust_operation
|
|
10
|
+
from .assets import load_json
|
|
12
11
|
from .core import AttackOrder, AttackWave, Gaggle, Glitchling
|
|
13
12
|
|
|
14
13
|
# Load Rust-accelerated operation if available
|
|
@@ -19,9 +18,7 @@ _apostrofae_rust = get_rust_operation("apostrofae")
|
|
|
19
18
|
def _load_replacement_pairs() -> dict[str, list[tuple[str, str]]]:
|
|
20
19
|
"""Load the curated mapping of straight quotes to fancy pairs."""
|
|
21
20
|
|
|
22
|
-
|
|
23
|
-
with resource.open("r", encoding="utf-8") as handle:
|
|
24
|
-
data: dict[str, list[Sequence[str]]] = json.load(handle)
|
|
21
|
+
data: dict[str, list[Sequence[str]]] = load_json("apostrofae_pairs.json")
|
|
25
22
|
|
|
26
23
|
parsed: dict[str, list[tuple[str, str]]] = {}
|
|
27
24
|
for straight, replacements in data.items():
|