glitchlings 0.4.1__cp311-cp311-macosx_11_0_universal2.whl → 0.4.2__cp311-cp311-macosx_11_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +26 -17
- glitchlings/__main__.py +0 -1
- glitchlings/_zoo_rust.cpython-311-darwin.so +0 -0
- glitchlings/compat.py +215 -0
- glitchlings/config.py +136 -19
- glitchlings/dlc/_shared.py +68 -0
- glitchlings/dlc/huggingface.py +26 -41
- glitchlings/dlc/prime.py +64 -101
- glitchlings/lexicon/__init__.py +8 -19
- glitchlings/lexicon/_cache.py +0 -7
- glitchlings/lexicon/graph.py +4 -12
- glitchlings/lexicon/metrics.py +1 -8
- glitchlings/lexicon/vector.py +15 -34
- glitchlings/lexicon/wordnet.py +31 -32
- glitchlings/main.py +9 -13
- glitchlings/util/__init__.py +18 -4
- glitchlings/util/adapters.py +27 -0
- glitchlings/zoo/__init__.py +21 -14
- glitchlings/zoo/_ocr_confusions.py +1 -3
- glitchlings/zoo/_rate.py +1 -4
- glitchlings/zoo/_sampling.py +0 -1
- glitchlings/zoo/_text_utils.py +1 -5
- glitchlings/zoo/adjax.py +0 -2
- glitchlings/zoo/core.py +114 -75
- glitchlings/zoo/jargoyle.py +9 -14
- glitchlings/zoo/mim1c.py +11 -10
- glitchlings/zoo/redactyl.py +5 -8
- glitchlings/zoo/reduple.py +3 -1
- glitchlings/zoo/rushmore.py +2 -8
- glitchlings/zoo/scannequin.py +5 -4
- glitchlings/zoo/typogre.py +3 -7
- glitchlings/zoo/zeedub.py +2 -2
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.2.dist-info}/METADATA +67 -3
- glitchlings-0.4.2.dist-info/RECORD +42 -0
- glitchlings-0.4.1.dist-info/RECORD +0 -39
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.2.dist-info}/WHEEL +0 -0
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.2.dist-info}/entry_points.txt +0 -0
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.2.dist-info}/licenses/LICENSE +0 -0
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.2.dist-info}/top_level.txt +0 -0
glitchlings/lexicon/vector.py
CHANGED
|
@@ -6,17 +6,18 @@ import argparse
|
|
|
6
6
|
import importlib
|
|
7
7
|
import json
|
|
8
8
|
import math
|
|
9
|
-
from pathlib import Path
|
|
10
9
|
import sys
|
|
10
|
+
from pathlib import Path
|
|
11
11
|
from typing import Any, Callable, Iterable, Iterator, Mapping, MutableMapping, Sequence
|
|
12
12
|
|
|
13
13
|
from . import LexiconBackend
|
|
14
|
-
from ._cache import CacheSnapshot
|
|
14
|
+
from ._cache import CacheSnapshot
|
|
15
|
+
from ._cache import load_cache as _load_cache_file
|
|
16
|
+
from ._cache import write_cache as _write_cache_file
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
def _cosine_similarity(vector_a: Sequence[float], vector_b: Sequence[float]) -> float:
|
|
18
20
|
"""Return the cosine similarity between two dense vectors."""
|
|
19
|
-
|
|
20
21
|
dot_product = 0.0
|
|
21
22
|
norm_a = 0.0
|
|
22
23
|
norm_b = 0.0
|
|
@@ -144,7 +145,6 @@ class _SpaCyAdapter(_Adapter):
|
|
|
144
145
|
|
|
145
146
|
def _load_json_vectors(path: Path) -> Mapping[str, Sequence[float]]:
|
|
146
147
|
"""Load embeddings from a JSON mapping of token to vector list."""
|
|
147
|
-
|
|
148
148
|
with path.open("r", encoding="utf8") as handle:
|
|
149
149
|
payload = json.load(handle)
|
|
150
150
|
|
|
@@ -164,11 +164,8 @@ def _load_json_vectors(path: Path) -> Mapping[str, Sequence[float]]:
|
|
|
164
164
|
|
|
165
165
|
def _load_gensim_vectors(path: Path, *, binary: bool | None = None) -> Any:
|
|
166
166
|
"""Load ``gensim`` vectors from ``path``."""
|
|
167
|
-
|
|
168
167
|
if importlib.util.find_spec("gensim") is None:
|
|
169
|
-
raise RuntimeError(
|
|
170
|
-
"The gensim package is required to load keyed vector embeddings."
|
|
171
|
-
)
|
|
168
|
+
raise RuntimeError("The gensim package is required to load keyed vector embeddings.")
|
|
172
169
|
|
|
173
170
|
keyed_vectors_module = importlib.import_module("gensim.models.keyedvectors")
|
|
174
171
|
if binary is None:
|
|
@@ -177,14 +174,11 @@ def _load_gensim_vectors(path: Path, *, binary: bool | None = None) -> Any:
|
|
|
177
174
|
if path.suffix in {".kv", ".kv2"}:
|
|
178
175
|
return keyed_vectors_module.KeyedVectors.load(str(path), mmap="r")
|
|
179
176
|
|
|
180
|
-
return keyed_vectors_module.KeyedVectors.load_word2vec_format(
|
|
181
|
-
str(path), binary=binary
|
|
182
|
-
)
|
|
177
|
+
return keyed_vectors_module.KeyedVectors.load_word2vec_format(str(path), binary=binary)
|
|
183
178
|
|
|
184
179
|
|
|
185
180
|
def _load_spacy_language(model_name: str) -> Any:
|
|
186
181
|
"""Load a spaCy language pipeline by name."""
|
|
187
|
-
|
|
188
182
|
if importlib.util.find_spec("spacy") is None:
|
|
189
183
|
raise RuntimeError(
|
|
190
184
|
"spaCy is required to use spaCy-backed vector lexicons; install the 'vectors' extra."
|
|
@@ -196,7 +190,6 @@ def _load_spacy_language(model_name: str) -> Any:
|
|
|
196
190
|
|
|
197
191
|
def _resolve_source(source: Any | None) -> _Adapter | None:
|
|
198
192
|
"""Return an adapter instance for ``source`` if possible."""
|
|
199
|
-
|
|
200
193
|
if source is None:
|
|
201
194
|
return None
|
|
202
195
|
|
|
@@ -229,9 +222,7 @@ def _resolve_source(source: Any | None) -> _Adapter | None:
|
|
|
229
222
|
|
|
230
223
|
if suffix in {".kv", ".kv2", ".bin", ".gz", ".txt", ".vec"}:
|
|
231
224
|
binary_flag = False if suffix in {".txt", ".vec"} else None
|
|
232
|
-
return _GensimAdapter(
|
|
233
|
-
_load_gensim_vectors(resolved_path, binary=binary_flag)
|
|
234
|
-
)
|
|
225
|
+
return _GensimAdapter(_load_gensim_vectors(resolved_path, binary=binary_flag))
|
|
235
226
|
|
|
236
227
|
if hasattr(source, "most_similar") and hasattr(source, "key_to_index"):
|
|
237
228
|
return _GensimAdapter(source)
|
|
@@ -358,42 +349,33 @@ class VectorLexicon(LexiconBackend):
|
|
|
358
349
|
self._cache_dirty = True
|
|
359
350
|
return synonyms
|
|
360
351
|
|
|
361
|
-
def get_synonyms(
|
|
362
|
-
self, word: str, pos: str | None = None, n: int = 5
|
|
363
|
-
) -> list[str]:
|
|
352
|
+
def get_synonyms(self, word: str, pos: str | None = None, n: int = 5) -> list[str]:
|
|
364
353
|
normalized = self._normalize_for_lookup(word)
|
|
365
354
|
synonyms = self._ensure_cached(original=word, normalized=normalized)
|
|
366
355
|
return self._deterministic_sample(synonyms, limit=n, word=word, pos=pos)
|
|
367
356
|
|
|
368
357
|
def precompute(self, word: str, *, limit: int | None = None) -> list[str]:
|
|
369
358
|
"""Populate the cache for ``word`` and return the stored synonyms."""
|
|
370
|
-
|
|
371
359
|
normalized = self._normalize_for_lookup(word)
|
|
372
|
-
return list(
|
|
373
|
-
self._ensure_cached(original=word, normalized=normalized, limit=limit)
|
|
374
|
-
)
|
|
360
|
+
return list(self._ensure_cached(original=word, normalized=normalized, limit=limit))
|
|
375
361
|
|
|
376
362
|
def iter_vocabulary(self) -> Iterator[str]:
|
|
377
363
|
"""Yield vocabulary tokens from the underlying embedding source."""
|
|
378
|
-
|
|
379
364
|
if self._adapter is None:
|
|
380
365
|
return iter(())
|
|
381
366
|
return self._adapter.iter_keys()
|
|
382
367
|
|
|
383
368
|
def export_cache(self) -> dict[str, list[str]]:
|
|
384
369
|
"""Return a copy of the in-memory synonym cache."""
|
|
385
|
-
|
|
386
370
|
return {key: list(values) for key, values in self._cache.items()}
|
|
387
371
|
|
|
388
372
|
@classmethod
|
|
389
373
|
def load_cache(cls, path: str | Path) -> CacheSnapshot:
|
|
390
374
|
"""Load and validate a cache file for reuse."""
|
|
391
|
-
|
|
392
375
|
return _load_cache_file(Path(path))
|
|
393
376
|
|
|
394
377
|
def save_cache(self, path: str | Path | None = None) -> Path:
|
|
395
378
|
"""Persist the current cache to disk, returning the path used."""
|
|
396
|
-
|
|
397
379
|
if path is None:
|
|
398
380
|
if self._cache_path is None:
|
|
399
381
|
raise RuntimeError("No cache path supplied to VectorLexicon.")
|
|
@@ -430,7 +412,6 @@ def build_vector_cache(
|
|
|
430
412
|
normalizer: Callable[[str], str] | None = None,
|
|
431
413
|
) -> Path:
|
|
432
414
|
"""Generate a synonym cache for ``words`` using ``source`` embeddings."""
|
|
433
|
-
|
|
434
415
|
lexicon = VectorLexicon(
|
|
435
416
|
source=source,
|
|
436
417
|
max_neighbors=max_neighbors,
|
|
@@ -448,7 +429,6 @@ def build_vector_cache(
|
|
|
448
429
|
|
|
449
430
|
def load_vector_source(spec: str) -> Any:
|
|
450
431
|
"""Resolve ``spec`` strings for the cache-building CLI."""
|
|
451
|
-
|
|
452
432
|
if spec.startswith("spacy:"):
|
|
453
433
|
model_name = spec.split(":", 1)[1]
|
|
454
434
|
return _load_spacy_language(model_name)
|
|
@@ -538,7 +518,6 @@ def _iter_tokens_from_file(path: Path) -> Iterator[str]:
|
|
|
538
518
|
|
|
539
519
|
def main(argv: Sequence[str] | None = None) -> int:
|
|
540
520
|
"""Entry-point for ``python -m glitchlings.lexicon.vector``."""
|
|
541
|
-
|
|
542
521
|
args = _parse_cli(argv)
|
|
543
522
|
|
|
544
523
|
if args.output.exists() and not args.overwrite:
|
|
@@ -547,11 +526,13 @@ def main(argv: Sequence[str] | None = None) -> int:
|
|
|
547
526
|
)
|
|
548
527
|
|
|
549
528
|
if args.normalizer == "lower":
|
|
550
|
-
normalizer: Callable[[str], str] | None =
|
|
551
|
-
None if args.case_sensitive else str.lower
|
|
552
|
-
)
|
|
529
|
+
normalizer: Callable[[str], str] | None = None if args.case_sensitive else str.lower
|
|
553
530
|
else:
|
|
554
|
-
|
|
531
|
+
|
|
532
|
+
def _identity(value: str) -> str:
|
|
533
|
+
return value
|
|
534
|
+
|
|
535
|
+
normalizer = _identity
|
|
555
536
|
|
|
556
537
|
source = load_vector_source(args.source)
|
|
557
538
|
if args.tokens is not None:
|
glitchlings/lexicon/wordnet.py
CHANGED
|
@@ -2,41 +2,50 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
from importlib import import_module
|
|
6
|
+
from pathlib import Path
|
|
5
7
|
from typing import TYPE_CHECKING, Any
|
|
6
8
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
nltk = None # type: ignore[assignment]
|
|
11
|
-
find = None # type: ignore[assignment]
|
|
12
|
-
_NLTK_IMPORT_ERROR = exc
|
|
13
|
-
else: # pragma: no cover - executed when NLTK is present
|
|
14
|
-
from nltk.corpus.reader import WordNetCorpusReader as _WordNetCorpusReader # type: ignore[import]
|
|
15
|
-
from nltk.data import find as _nltk_find # type: ignore[import]
|
|
9
|
+
from ..compat import nltk as _nltk_dependency
|
|
10
|
+
from . import LexiconBackend
|
|
11
|
+
from ._cache import CacheSnapshot
|
|
16
12
|
|
|
17
|
-
|
|
18
|
-
|
|
13
|
+
nltk = _nltk_dependency.get() # type: ignore[assignment]
|
|
14
|
+
_NLTK_IMPORT_ERROR = _nltk_dependency.error
|
|
19
15
|
|
|
20
16
|
if TYPE_CHECKING: # pragma: no cover - typing aid only
|
|
21
17
|
from nltk.corpus.reader import WordNetCorpusReader # type: ignore[import]
|
|
22
18
|
else: # pragma: no cover - runtime fallback to avoid hard dependency
|
|
23
19
|
WordNetCorpusReader = Any
|
|
24
20
|
|
|
21
|
+
find: Any | None = None
|
|
22
|
+
_WORDNET_MODULE: Any | None = None
|
|
23
|
+
|
|
25
24
|
if nltk is not None: # pragma: no cover - guarded by import success
|
|
26
25
|
try:
|
|
27
|
-
|
|
26
|
+
corpus_reader_module = import_module("nltk.corpus.reader")
|
|
27
|
+
WordNetCorpusReader = corpus_reader_module.WordNetCorpusReader # type: ignore[assignment]
|
|
28
|
+
except ModuleNotFoundError as exc: # pragma: no cover - triggered when corpus missing
|
|
29
|
+
if _NLTK_IMPORT_ERROR is None:
|
|
30
|
+
_NLTK_IMPORT_ERROR = exc # type: ignore[assignment]
|
|
31
|
+
else:
|
|
32
|
+
try:
|
|
33
|
+
data_module = import_module("nltk.data")
|
|
34
|
+
except ModuleNotFoundError as exc: # pragma: no cover - triggered when data missing
|
|
35
|
+
if _NLTK_IMPORT_ERROR is None:
|
|
36
|
+
_NLTK_IMPORT_ERROR = exc # type: ignore[assignment]
|
|
37
|
+
else:
|
|
38
|
+
find = getattr(data_module, "find", None)
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
_WORDNET_MODULE = import_module("nltk.corpus.wordnet")
|
|
28
42
|
except ModuleNotFoundError: # pragma: no cover - only hit on namespace packages
|
|
29
43
|
_WORDNET_MODULE = None
|
|
30
|
-
else:
|
|
31
|
-
WordNetCorpusReader = _WordNetCorpusReader # type: ignore[assignment]
|
|
32
44
|
else:
|
|
45
|
+
nltk = None # type: ignore[assignment]
|
|
46
|
+
find = None
|
|
33
47
|
_WORDNET_MODULE = None
|
|
34
48
|
|
|
35
|
-
from pathlib import Path
|
|
36
|
-
|
|
37
|
-
from . import LexiconBackend
|
|
38
|
-
from ._cache import CacheSnapshot
|
|
39
|
-
|
|
40
49
|
_WORDNET_HANDLE: WordNetCorpusReader | Any | None = _WORDNET_MODULE
|
|
41
50
|
_wordnet_ready = False
|
|
42
51
|
|
|
@@ -45,26 +54,23 @@ _VALID_POS: tuple[str, ...] = ("n", "v", "a", "r")
|
|
|
45
54
|
|
|
46
55
|
def _require_nltk() -> None:
|
|
47
56
|
"""Ensure the NLTK dependency is present before continuing."""
|
|
48
|
-
|
|
49
57
|
if nltk is None or find is None:
|
|
50
58
|
message = (
|
|
51
59
|
"The NLTK package is required for WordNet-backed lexicons; install "
|
|
52
60
|
"`nltk` and its WordNet corpus manually to enable this backend."
|
|
53
61
|
)
|
|
54
|
-
if
|
|
62
|
+
if "_NLTK_IMPORT_ERROR" in globals() and _NLTK_IMPORT_ERROR is not None:
|
|
55
63
|
raise RuntimeError(message) from _NLTK_IMPORT_ERROR
|
|
56
64
|
raise RuntimeError(message)
|
|
57
65
|
|
|
58
66
|
|
|
59
67
|
def dependencies_available() -> bool:
|
|
60
68
|
"""Return ``True`` when the runtime NLTK dependency is present."""
|
|
61
|
-
|
|
62
69
|
return nltk is not None and find is not None
|
|
63
70
|
|
|
64
71
|
|
|
65
72
|
def _load_wordnet_reader() -> WordNetCorpusReader:
|
|
66
73
|
"""Return a WordNet corpus reader from the downloaded corpus files."""
|
|
67
|
-
|
|
68
74
|
_require_nltk()
|
|
69
75
|
|
|
70
76
|
try:
|
|
@@ -83,7 +89,6 @@ def _load_wordnet_reader() -> WordNetCorpusReader:
|
|
|
83
89
|
|
|
84
90
|
def _wordnet(force_refresh: bool = False) -> WordNetCorpusReader | Any:
|
|
85
91
|
"""Retrieve the active WordNet handle, rebuilding it on demand."""
|
|
86
|
-
|
|
87
92
|
global _WORDNET_HANDLE
|
|
88
93
|
|
|
89
94
|
if force_refresh:
|
|
@@ -98,7 +103,6 @@ def _wordnet(force_refresh: bool = False) -> WordNetCorpusReader | Any:
|
|
|
98
103
|
|
|
99
104
|
def ensure_wordnet() -> None:
|
|
100
105
|
"""Ensure the WordNet corpus is available before use."""
|
|
101
|
-
|
|
102
106
|
global _wordnet_ready
|
|
103
107
|
if _wordnet_ready:
|
|
104
108
|
return
|
|
@@ -115,16 +119,13 @@ def ensure_wordnet() -> None:
|
|
|
115
119
|
resource = _wordnet(force_refresh=True)
|
|
116
120
|
resource.ensure_loaded()
|
|
117
121
|
except LookupError as exc: # pragma: no cover - only triggered when download fails
|
|
118
|
-
raise RuntimeError(
|
|
119
|
-
"Unable to load NLTK WordNet corpus for synonym lookups."
|
|
120
|
-
) from exc
|
|
122
|
+
raise RuntimeError("Unable to load NLTK WordNet corpus for synonym lookups.") from exc
|
|
121
123
|
|
|
122
124
|
_wordnet_ready = True
|
|
123
125
|
|
|
124
126
|
|
|
125
127
|
def _collect_synonyms(word: str, parts_of_speech: tuple[str, ...]) -> list[str]:
|
|
126
128
|
"""Gather deterministic synonym candidates for the supplied word."""
|
|
127
|
-
|
|
128
129
|
normalized_word = word.lower()
|
|
129
130
|
wordnet = _wordnet()
|
|
130
131
|
synonyms: set[str] = set()
|
|
@@ -157,9 +158,7 @@ def _collect_synonyms(word: str, parts_of_speech: tuple[str, ...]) -> list[str]:
|
|
|
157
158
|
class WordNetLexicon(LexiconBackend):
|
|
158
159
|
"""Lexicon that retrieves synonyms from the NLTK WordNet corpus."""
|
|
159
160
|
|
|
160
|
-
def get_synonyms(
|
|
161
|
-
self, word: str, pos: str | None = None, n: int = 5
|
|
162
|
-
) -> list[str]:
|
|
161
|
+
def get_synonyms(self, word: str, pos: str | None = None, n: int = 5) -> list[str]:
|
|
163
162
|
ensure_wordnet()
|
|
164
163
|
|
|
165
164
|
if pos is None:
|
glitchlings/main.py
CHANGED
|
@@ -4,16 +4,16 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import argparse
|
|
6
6
|
import difflib
|
|
7
|
-
from pathlib import Path
|
|
8
7
|
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
9
|
|
|
10
10
|
from . import SAMPLE_TEXT
|
|
11
11
|
from .config import DEFAULT_ATTACK_SEED, build_gaggle, load_attack_config
|
|
12
12
|
from .zoo import (
|
|
13
|
-
Glitchling,
|
|
14
|
-
Gaggle,
|
|
15
13
|
BUILTIN_GLITCHLINGS,
|
|
16
14
|
DEFAULT_GLITCHLING_NAMES,
|
|
15
|
+
Gaggle,
|
|
16
|
+
Glitchling,
|
|
17
17
|
parse_glitchling_spec,
|
|
18
18
|
summon,
|
|
19
19
|
)
|
|
@@ -26,8 +26,8 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
26
26
|
|
|
27
27
|
Returns:
|
|
28
28
|
argparse.ArgumentParser: The configured argument parser instance.
|
|
29
|
-
"""
|
|
30
29
|
|
|
30
|
+
"""
|
|
31
31
|
parser = argparse.ArgumentParser(
|
|
32
32
|
description=(
|
|
33
33
|
"Summon glitchlings to corrupt text. Provide input text as an argument, "
|
|
@@ -157,7 +157,6 @@ def build_lexicon_parser() -> argparse.ArgumentParser:
|
|
|
157
157
|
|
|
158
158
|
def list_glitchlings() -> None:
|
|
159
159
|
"""Print information about the available built-in glitchlings."""
|
|
160
|
-
|
|
161
160
|
for key in DEFAULT_GLITCHLING_NAMES:
|
|
162
161
|
glitchling = BUILTIN_GLITCHLINGS[key]
|
|
163
162
|
display_name = glitchling.name
|
|
@@ -178,8 +177,8 @@ def read_text(args: argparse.Namespace, parser: argparse.ArgumentParser) -> str:
|
|
|
178
177
|
|
|
179
178
|
Raises:
|
|
180
179
|
SystemExit: Raised indirectly via ``parser.error`` on failure.
|
|
181
|
-
"""
|
|
182
180
|
|
|
181
|
+
"""
|
|
183
182
|
if args.file is not None:
|
|
184
183
|
try:
|
|
185
184
|
return args.file.read_text(encoding="utf-8")
|
|
@@ -198,7 +197,8 @@ def read_text(args: argparse.Namespace, parser: argparse.ArgumentParser) -> str:
|
|
|
198
197
|
return SAMPLE_TEXT
|
|
199
198
|
|
|
200
199
|
parser.error(
|
|
201
|
-
"No input text provided. Supply text as an argument, use --file, pipe input, or
|
|
200
|
+
"No input text provided. Supply text as an argument, use --file, pipe input, or "
|
|
201
|
+
"pass --sample."
|
|
202
202
|
)
|
|
203
203
|
raise AssertionError("parser.error should exit")
|
|
204
204
|
|
|
@@ -211,7 +211,6 @@ def summon_glitchlings(
|
|
|
211
211
|
config_path: Path | None = None,
|
|
212
212
|
) -> Gaggle:
|
|
213
213
|
"""Instantiate the requested glitchlings and bundle them in a ``Gaggle``."""
|
|
214
|
-
|
|
215
214
|
if config_path is not None:
|
|
216
215
|
if names:
|
|
217
216
|
parser.error("Cannot combine --config with --glitchling.")
|
|
@@ -245,10 +244,8 @@ def summon_glitchlings(
|
|
|
245
244
|
raise AssertionError("parser.error should exit")
|
|
246
245
|
|
|
247
246
|
|
|
248
|
-
|
|
249
247
|
def show_diff(original: str, corrupted: str) -> None:
|
|
250
248
|
"""Display a unified diff between the original and corrupted text."""
|
|
251
|
-
|
|
252
249
|
diff_lines = list(
|
|
253
250
|
difflib.unified_diff(
|
|
254
251
|
original.splitlines(keepends=True),
|
|
@@ -274,8 +271,8 @@ def run_cli(args: argparse.Namespace, parser: argparse.ArgumentParser) -> int:
|
|
|
274
271
|
|
|
275
272
|
Returns:
|
|
276
273
|
int: Exit code for the process (``0`` on success).
|
|
277
|
-
"""
|
|
278
274
|
|
|
275
|
+
"""
|
|
279
276
|
if args.list:
|
|
280
277
|
list_glitchlings()
|
|
281
278
|
return 0
|
|
@@ -300,7 +297,6 @@ def run_cli(args: argparse.Namespace, parser: argparse.ArgumentParser) -> int:
|
|
|
300
297
|
|
|
301
298
|
def run_build_lexicon(args: argparse.Namespace) -> int:
|
|
302
299
|
"""Delegate to the vector lexicon cache builder using CLI arguments."""
|
|
303
|
-
|
|
304
300
|
from glitchlings.lexicon.vector import main as vector_main
|
|
305
301
|
|
|
306
302
|
vector_args = [
|
|
@@ -337,8 +333,8 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
337
333
|
|
|
338
334
|
Returns:
|
|
339
335
|
int: Exit code suitable for use with ``sys.exit``.
|
|
340
|
-
"""
|
|
341
336
|
|
|
337
|
+
"""
|
|
342
338
|
if argv is None:
|
|
343
339
|
raw_args = sys.argv[1:]
|
|
344
340
|
else:
|
glitchlings/util/__init__.py
CHANGED
|
@@ -1,12 +1,27 @@
|
|
|
1
1
|
import difflib
|
|
2
2
|
from collections.abc import Iterable
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
__all__ = [
|
|
5
|
+
"SAMPLE_TEXT",
|
|
6
|
+
"string_diffs",
|
|
7
|
+
"KeyNeighborMap",
|
|
8
|
+
"KeyboardLayouts",
|
|
9
|
+
"KeyNeighbors",
|
|
10
|
+
"KEYNEIGHBORS",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
SAMPLE_TEXT = (
|
|
14
|
+
"One morning, when Gregor Samsa woke from troubled dreams, he found himself "
|
|
15
|
+
"transformed in his bed into a horrible vermin. He lay on his armour-like back, and "
|
|
16
|
+
"if he lifted his head a little he could see his brown belly, slightly domed and "
|
|
17
|
+
"divided by arches into stiff sections. The bedding was hardly able to cover it and "
|
|
18
|
+
"seemed ready to slide off any moment. His many legs, pitifully thin compared with "
|
|
19
|
+
"the size of the rest of him, waved about helplessly as he looked."
|
|
20
|
+
)
|
|
5
21
|
|
|
6
22
|
|
|
7
23
|
def string_diffs(a: str, b: str) -> list[list[tuple[str, str, str]]]:
|
|
8
|
-
"""
|
|
9
|
-
Compare two strings using SequenceMatcher and return
|
|
24
|
+
"""Compare two strings using SequenceMatcher and return
|
|
10
25
|
grouped adjacent opcodes (excluding 'equal' tags).
|
|
11
26
|
|
|
12
27
|
Each element is a tuple: (tag, a_text, b_text).
|
|
@@ -39,7 +54,6 @@ KeyboardLayouts = dict[str, KeyNeighborMap]
|
|
|
39
54
|
|
|
40
55
|
def _build_neighbor_map(rows: Iterable[str]) -> KeyNeighborMap:
|
|
41
56
|
"""Derive 8-neighbour adjacency lists from keyboard layout rows."""
|
|
42
|
-
|
|
43
57
|
grid: dict[tuple[int, int], str] = {}
|
|
44
58
|
for y, row in enumerate(rows):
|
|
45
59
|
for x, char in enumerate(row):
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Adapter helpers shared across Python and DLC integrations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
|
|
7
|
+
from ..zoo import Gaggle, Glitchling, summon
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def coerce_gaggle(
|
|
11
|
+
glitchlings: Glitchling | Gaggle | str | Iterable[str | Glitchling],
|
|
12
|
+
*,
|
|
13
|
+
seed: int,
|
|
14
|
+
) -> Gaggle:
|
|
15
|
+
"""Return a :class:`Gaggle` built from any supported glitchling specifier."""
|
|
16
|
+
if isinstance(glitchlings, Gaggle):
|
|
17
|
+
return glitchlings
|
|
18
|
+
|
|
19
|
+
if isinstance(glitchlings, (Glitchling, str)):
|
|
20
|
+
resolved: Iterable[str | Glitchling] = [glitchlings]
|
|
21
|
+
else:
|
|
22
|
+
resolved = glitchlings
|
|
23
|
+
|
|
24
|
+
return summon(list(resolved), seed=seed)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
__all__ = ["coerce_gaggle"]
|
glitchlings/zoo/__init__.py
CHANGED
|
@@ -3,16 +3,25 @@ from __future__ import annotations
|
|
|
3
3
|
import ast
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
|
-
from .typogre import Typogre, typogre
|
|
7
|
-
from .mim1c import Mim1c, mim1c
|
|
8
|
-
from .jargoyle import Jargoyle, jargoyle, dependencies_available as _jargoyle_available
|
|
9
6
|
from .adjax import Adjax, adjax
|
|
7
|
+
from .core import (
|
|
8
|
+
Gaggle,
|
|
9
|
+
Glitchling,
|
|
10
|
+
is_rust_pipeline_enabled,
|
|
11
|
+
is_rust_pipeline_supported,
|
|
12
|
+
pipeline_feature_flag_enabled,
|
|
13
|
+
plan_glitchling_specs,
|
|
14
|
+
plan_glitchlings,
|
|
15
|
+
)
|
|
16
|
+
from .jargoyle import Jargoyle, jargoyle
|
|
17
|
+
from .jargoyle import dependencies_available as _jargoyle_available
|
|
18
|
+
from .mim1c import Mim1c, mim1c
|
|
19
|
+
from .redactyl import Redactyl, redactyl
|
|
10
20
|
from .reduple import Reduple, reduple
|
|
11
21
|
from .rushmore import Rushmore, rushmore
|
|
12
|
-
from .redactyl import Redactyl, redactyl
|
|
13
22
|
from .scannequin import Scannequin, scannequin
|
|
23
|
+
from .typogre import Typogre, typogre
|
|
14
24
|
from .zeedub import Zeedub, zeedub
|
|
15
|
-
from .core import Glitchling, Gaggle
|
|
16
25
|
|
|
17
26
|
__all__ = [
|
|
18
27
|
"Typogre",
|
|
@@ -35,6 +44,11 @@ __all__ = [
|
|
|
35
44
|
"zeedub",
|
|
36
45
|
"Glitchling",
|
|
37
46
|
"Gaggle",
|
|
47
|
+
"plan_glitchlings",
|
|
48
|
+
"plan_glitchling_specs",
|
|
49
|
+
"is_rust_pipeline_enabled",
|
|
50
|
+
"is_rust_pipeline_supported",
|
|
51
|
+
"pipeline_feature_flag_enabled",
|
|
38
52
|
"summon",
|
|
39
53
|
"BUILTIN_GLITCHLINGS",
|
|
40
54
|
"DEFAULT_GLITCHLING_NAMES",
|
|
@@ -71,7 +85,6 @@ DEFAULT_GLITCHLING_NAMES: list[str] = list(BUILTIN_GLITCHLINGS.keys())
|
|
|
71
85
|
|
|
72
86
|
def parse_glitchling_spec(specification: str) -> Glitchling:
|
|
73
87
|
"""Return a glitchling instance configured according to ``specification``."""
|
|
74
|
-
|
|
75
88
|
text = specification.strip()
|
|
76
89
|
if not text:
|
|
77
90
|
raise ValueError("Glitchling specification cannot be empty.")
|
|
@@ -98,14 +111,10 @@ def parse_glitchling_spec(specification: str) -> Glitchling:
|
|
|
98
111
|
try:
|
|
99
112
|
call_expr = ast.parse(f"_({arg_source})", mode="eval").body
|
|
100
113
|
except SyntaxError as exc:
|
|
101
|
-
raise ValueError(
|
|
102
|
-
f"Invalid parameter syntax for glitchling '{name}': {exc.msg}"
|
|
103
|
-
) from exc
|
|
114
|
+
raise ValueError(f"Invalid parameter syntax for glitchling '{name}': {exc.msg}") from exc
|
|
104
115
|
|
|
105
116
|
if not isinstance(call_expr, ast.Call) or call_expr.args:
|
|
106
|
-
raise ValueError(
|
|
107
|
-
f"Glitchling '{name}' parameters must be provided as keyword arguments."
|
|
108
|
-
)
|
|
117
|
+
raise ValueError(f"Glitchling '{name}' parameters must be provided as keyword arguments.")
|
|
109
118
|
|
|
110
119
|
kwargs: dict[str, Any] = {}
|
|
111
120
|
for keyword in call_expr.keywords:
|
|
@@ -128,7 +137,6 @@ def parse_glitchling_spec(specification: str) -> Glitchling:
|
|
|
128
137
|
|
|
129
138
|
def get_glitchling_class(name: str) -> type[Glitchling]:
|
|
130
139
|
"""Look up the glitchling class registered under ``name``."""
|
|
131
|
-
|
|
132
140
|
key = name.strip().lower()
|
|
133
141
|
if not key:
|
|
134
142
|
raise ValueError("Glitchling name cannot be empty.")
|
|
@@ -142,7 +150,6 @@ def get_glitchling_class(name: str) -> type[Glitchling]:
|
|
|
142
150
|
|
|
143
151
|
def summon(glitchlings: list[str | Glitchling], seed: int = 151) -> Gaggle:
|
|
144
152
|
"""Summon glitchlings by name (using defaults) or instance (to change parameters)."""
|
|
145
|
-
|
|
146
153
|
summoned: list[Glitchling] = []
|
|
147
154
|
for entry in glitchlings:
|
|
148
155
|
if isinstance(entry, Glitchling):
|
|
@@ -26,9 +26,7 @@ def load_confusion_table() -> list[tuple[str, list[str]]]:
|
|
|
26
26
|
|
|
27
27
|
# Sort longer patterns first to avoid overlapping matches, mirroring the
|
|
28
28
|
# behaviour of the Rust `confusion_table` helper.
|
|
29
|
-
indexed_entries.sort(
|
|
30
|
-
key=lambda item: (-len(item[1][0]), item[0])
|
|
31
|
-
)
|
|
29
|
+
indexed_entries.sort(key=lambda item: (-len(item[1][0]), item[0]))
|
|
32
30
|
entries = [entry for _, entry in indexed_entries]
|
|
33
31
|
_CONFUSION_TABLE = entries
|
|
34
32
|
return entries
|
glitchlings/zoo/_rate.py
CHANGED
|
@@ -9,11 +9,8 @@ def resolve_rate(
|
|
|
9
9
|
legacy_name: str,
|
|
10
10
|
) -> float:
|
|
11
11
|
"""Return the effective rate while enforcing mutual exclusivity."""
|
|
12
|
-
|
|
13
12
|
if rate is not None and legacy_value is not None:
|
|
14
|
-
raise ValueError(
|
|
15
|
-
f"Specify either 'rate' or '{legacy_name}', not both."
|
|
16
|
-
)
|
|
13
|
+
raise ValueError(f"Specify either 'rate' or '{legacy_name}', not both.")
|
|
17
14
|
if rate is not None:
|
|
18
15
|
return rate
|
|
19
16
|
if legacy_value is not None:
|
glitchlings/zoo/_sampling.py
CHANGED
glitchlings/zoo/_text_utils.py
CHANGED
|
@@ -10,13 +10,11 @@ _TOKEN_EDGES_PATTERN = re.compile(r"^(\W*)(.*?)(\W*)$")
|
|
|
10
10
|
|
|
11
11
|
def split_preserving_whitespace(text: str) -> list[str]:
|
|
12
12
|
"""Split text while keeping whitespace tokens for stable reconstruction."""
|
|
13
|
-
|
|
14
13
|
return _WORD_SPLIT_PATTERN.split(text)
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
def split_token_edges(token: str) -> tuple[str, str, str]:
|
|
18
17
|
"""Return leading, core, and trailing segments for a token."""
|
|
19
|
-
|
|
20
18
|
match = _TOKEN_EDGES_PATTERN.match(token)
|
|
21
19
|
if match is None:
|
|
22
20
|
return "", token, ""
|
|
@@ -25,7 +23,6 @@ def split_token_edges(token: str) -> tuple[str, str, str]:
|
|
|
25
23
|
|
|
26
24
|
def token_core_length(token: str) -> int:
|
|
27
25
|
"""Return the length of the main word characters for weighting heuristics."""
|
|
28
|
-
|
|
29
26
|
_, core, _ = split_token_edges(token)
|
|
30
27
|
candidate = core if core else token
|
|
31
28
|
length = len(candidate)
|
|
@@ -50,7 +47,6 @@ class WordToken:
|
|
|
50
47
|
@property
|
|
51
48
|
def has_core(self) -> bool:
|
|
52
49
|
"""Return ``True`` when the token contains at least one core character."""
|
|
53
|
-
|
|
54
50
|
return bool(self.core)
|
|
55
51
|
|
|
56
52
|
|
|
@@ -65,8 +61,8 @@ def collect_word_tokens(
|
|
|
65
61
|
tokens: Token sequence produced by :func:`split_preserving_whitespace`.
|
|
66
62
|
skip_first_word: Exclude the first candidate token (used by Rushmore to
|
|
67
63
|
preserve leading words).
|
|
68
|
-
"""
|
|
69
64
|
|
|
65
|
+
"""
|
|
70
66
|
start = 2 if skip_first_word else 0
|
|
71
67
|
collected: list[WordToken] = []
|
|
72
68
|
for index in range(start, len(tokens), 2):
|
glitchlings/zoo/adjax.py
CHANGED
|
@@ -20,7 +20,6 @@ def _python_swap_adjacent_words(
|
|
|
20
20
|
rng: random.Random,
|
|
21
21
|
) -> str:
|
|
22
22
|
"""Swap the cores of adjacent words while keeping affixes and spacing intact."""
|
|
23
|
-
|
|
24
23
|
tokens = split_preserving_whitespace(text)
|
|
25
24
|
if len(tokens) < 2:
|
|
26
25
|
return text
|
|
@@ -72,7 +71,6 @@ def swap_adjacent_words(
|
|
|
72
71
|
swap_rate: float | None = None,
|
|
73
72
|
) -> str:
|
|
74
73
|
"""Swap adjacent word cores while preserving spacing and punctuation."""
|
|
75
|
-
|
|
76
74
|
effective_rate = resolve_rate(
|
|
77
75
|
rate=rate,
|
|
78
76
|
legacy_value=swap_rate,
|