glitchlings 0.4.1__cp311-cp311-macosx_11_0_universal2.whl → 0.4.3__cp311-cp311-macosx_11_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +30 -17
- glitchlings/__main__.py +0 -1
- glitchlings/_zoo_rust.cpython-311-darwin.so +0 -0
- glitchlings/compat.py +284 -0
- glitchlings/config.py +164 -34
- glitchlings/config.toml +1 -1
- glitchlings/dlc/__init__.py +3 -1
- glitchlings/dlc/_shared.py +68 -0
- glitchlings/dlc/huggingface.py +26 -41
- glitchlings/dlc/prime.py +64 -101
- glitchlings/dlc/pytorch.py +216 -0
- glitchlings/dlc/pytorch_lightning.py +233 -0
- glitchlings/lexicon/__init__.py +12 -33
- glitchlings/lexicon/_cache.py +21 -22
- glitchlings/lexicon/data/default_vector_cache.json +80 -14
- glitchlings/lexicon/metrics.py +1 -8
- glitchlings/lexicon/vector.py +109 -49
- glitchlings/lexicon/wordnet.py +89 -49
- glitchlings/main.py +30 -24
- glitchlings/util/__init__.py +18 -4
- glitchlings/util/adapters.py +27 -0
- glitchlings/zoo/__init__.py +26 -15
- glitchlings/zoo/_ocr_confusions.py +1 -3
- glitchlings/zoo/_rate.py +1 -4
- glitchlings/zoo/_sampling.py +0 -1
- glitchlings/zoo/_text_utils.py +1 -5
- glitchlings/zoo/adjax.py +2 -4
- glitchlings/zoo/apostrofae.py +128 -0
- glitchlings/zoo/assets/__init__.py +0 -0
- glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
- glitchlings/zoo/core.py +152 -87
- glitchlings/zoo/jargoyle.py +50 -45
- glitchlings/zoo/mim1c.py +11 -10
- glitchlings/zoo/redactyl.py +16 -16
- glitchlings/zoo/reduple.py +5 -3
- glitchlings/zoo/rushmore.py +4 -10
- glitchlings/zoo/scannequin.py +7 -6
- glitchlings/zoo/typogre.py +8 -9
- glitchlings/zoo/zeedub.py +6 -3
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/METADATA +101 -4
- glitchlings-0.4.3.dist-info/RECORD +46 -0
- glitchlings/lexicon/graph.py +0 -290
- glitchlings-0.4.1.dist-info/RECORD +0 -39
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/WHEEL +0 -0
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/entry_points.txt +0 -0
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/licenses/LICENSE +0 -0
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/top_level.txt +0 -0
glitchlings/main.py
CHANGED
|
@@ -4,16 +4,18 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import argparse
|
|
6
6
|
import difflib
|
|
7
|
-
from pathlib import Path
|
|
8
7
|
import sys
|
|
8
|
+
from collections.abc import Sequence
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import cast
|
|
9
11
|
|
|
10
12
|
from . import SAMPLE_TEXT
|
|
11
13
|
from .config import DEFAULT_ATTACK_SEED, build_gaggle, load_attack_config
|
|
12
14
|
from .zoo import (
|
|
13
|
-
Glitchling,
|
|
14
|
-
Gaggle,
|
|
15
15
|
BUILTIN_GLITCHLINGS,
|
|
16
16
|
DEFAULT_GLITCHLING_NAMES,
|
|
17
|
+
Gaggle,
|
|
18
|
+
Glitchling,
|
|
17
19
|
parse_glitchling_spec,
|
|
18
20
|
summon,
|
|
19
21
|
)
|
|
@@ -26,8 +28,8 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
26
28
|
|
|
27
29
|
Returns:
|
|
28
30
|
argparse.ArgumentParser: The configured argument parser instance.
|
|
29
|
-
"""
|
|
30
31
|
|
|
32
|
+
"""
|
|
31
33
|
parser = argparse.ArgumentParser(
|
|
32
34
|
description=(
|
|
33
35
|
"Summon glitchlings to corrupt text. Provide input text as an argument, "
|
|
@@ -88,6 +90,7 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
88
90
|
|
|
89
91
|
|
|
90
92
|
def build_lexicon_parser() -> argparse.ArgumentParser:
|
|
93
|
+
"""Create the ``build-lexicon`` subcommand parser with vector cache options."""
|
|
91
94
|
builder = argparse.ArgumentParser(
|
|
92
95
|
prog="glitchlings build-lexicon",
|
|
93
96
|
description=(
|
|
@@ -157,7 +160,6 @@ def build_lexicon_parser() -> argparse.ArgumentParser:
|
|
|
157
160
|
|
|
158
161
|
def list_glitchlings() -> None:
|
|
159
162
|
"""Print information about the available built-in glitchlings."""
|
|
160
|
-
|
|
161
163
|
for key in DEFAULT_GLITCHLING_NAMES:
|
|
162
164
|
glitchling = BUILTIN_GLITCHLINGS[key]
|
|
163
165
|
display_name = glitchling.name
|
|
@@ -178,27 +180,30 @@ def read_text(args: argparse.Namespace, parser: argparse.ArgumentParser) -> str:
|
|
|
178
180
|
|
|
179
181
|
Raises:
|
|
180
182
|
SystemExit: Raised indirectly via ``parser.error`` on failure.
|
|
181
|
-
"""
|
|
182
183
|
|
|
183
|
-
|
|
184
|
+
"""
|
|
185
|
+
file_path = cast(Path | None, getattr(args, "file", None))
|
|
186
|
+
if file_path is not None:
|
|
184
187
|
try:
|
|
185
|
-
return
|
|
188
|
+
return file_path.read_text(encoding="utf-8")
|
|
186
189
|
except OSError as exc:
|
|
187
|
-
filename = getattr(exc, "filename", None) or
|
|
190
|
+
filename = getattr(exc, "filename", None) or file_path
|
|
188
191
|
reason = exc.strerror or str(exc)
|
|
189
192
|
parser.error(f"Failed to read file {filename}: {reason}")
|
|
190
193
|
|
|
191
|
-
|
|
192
|
-
|
|
194
|
+
text_argument = cast(str | None, getattr(args, "text", None))
|
|
195
|
+
if text_argument:
|
|
196
|
+
return text_argument
|
|
193
197
|
|
|
194
198
|
if not sys.stdin.isatty():
|
|
195
199
|
return sys.stdin.read()
|
|
196
200
|
|
|
197
|
-
if args
|
|
201
|
+
if bool(getattr(args, "sample", False)):
|
|
198
202
|
return SAMPLE_TEXT
|
|
199
203
|
|
|
200
204
|
parser.error(
|
|
201
|
-
"No input text provided. Supply text as an argument, use --file, pipe input, or
|
|
205
|
+
"No input text provided. Supply text as an argument, use --file, pipe input, or "
|
|
206
|
+
"pass --sample."
|
|
202
207
|
)
|
|
203
208
|
raise AssertionError("parser.error should exit")
|
|
204
209
|
|
|
@@ -211,7 +216,6 @@ def summon_glitchlings(
|
|
|
211
216
|
config_path: Path | None = None,
|
|
212
217
|
) -> Gaggle:
|
|
213
218
|
"""Instantiate the requested glitchlings and bundle them in a ``Gaggle``."""
|
|
214
|
-
|
|
215
219
|
if config_path is not None:
|
|
216
220
|
if names:
|
|
217
221
|
parser.error("Cannot combine --config with --glitchling.")
|
|
@@ -225,30 +229,30 @@ def summon_glitchlings(
|
|
|
225
229
|
|
|
226
230
|
return build_gaggle(config, seed_override=seed)
|
|
227
231
|
|
|
232
|
+
normalized: Sequence[str | Glitchling]
|
|
228
233
|
if names:
|
|
229
|
-
|
|
234
|
+
parsed: list[str | Glitchling] = []
|
|
230
235
|
for specification in names:
|
|
231
236
|
try:
|
|
232
|
-
|
|
237
|
+
parsed.append(parse_glitchling_spec(specification))
|
|
233
238
|
except ValueError as exc:
|
|
234
239
|
parser.error(str(exc))
|
|
235
240
|
raise AssertionError("parser.error should exit")
|
|
241
|
+
normalized = parsed
|
|
236
242
|
else:
|
|
237
|
-
normalized = DEFAULT_GLITCHLING_NAMES
|
|
243
|
+
normalized = list(DEFAULT_GLITCHLING_NAMES)
|
|
238
244
|
|
|
239
245
|
effective_seed = seed if seed is not None else DEFAULT_ATTACK_SEED
|
|
240
246
|
|
|
241
247
|
try:
|
|
242
|
-
return summon(normalized, seed=effective_seed)
|
|
248
|
+
return summon(list(normalized), seed=effective_seed)
|
|
243
249
|
except ValueError as exc:
|
|
244
250
|
parser.error(str(exc))
|
|
245
251
|
raise AssertionError("parser.error should exit")
|
|
246
252
|
|
|
247
253
|
|
|
248
|
-
|
|
249
254
|
def show_diff(original: str, corrupted: str) -> None:
|
|
250
255
|
"""Display a unified diff between the original and corrupted text."""
|
|
251
|
-
|
|
252
256
|
diff_lines = list(
|
|
253
257
|
difflib.unified_diff(
|
|
254
258
|
original.splitlines(keepends=True),
|
|
@@ -274,8 +278,8 @@ def run_cli(args: argparse.Namespace, parser: argparse.ArgumentParser) -> int:
|
|
|
274
278
|
|
|
275
279
|
Returns:
|
|
276
280
|
int: Exit code for the process (``0`` on success).
|
|
277
|
-
"""
|
|
278
281
|
|
|
282
|
+
"""
|
|
279
283
|
if args.list:
|
|
280
284
|
list_glitchlings()
|
|
281
285
|
return 0
|
|
@@ -288,7 +292,10 @@ def run_cli(args: argparse.Namespace, parser: argparse.ArgumentParser) -> int:
|
|
|
288
292
|
config_path=args.config,
|
|
289
293
|
)
|
|
290
294
|
|
|
291
|
-
corrupted = gaggle(text)
|
|
295
|
+
corrupted = gaggle.corrupt(text)
|
|
296
|
+
if not isinstance(corrupted, str):
|
|
297
|
+
message = "Gaggle returned non-string output for string input"
|
|
298
|
+
raise TypeError(message)
|
|
292
299
|
|
|
293
300
|
if args.diff:
|
|
294
301
|
show_diff(text, corrupted)
|
|
@@ -300,7 +307,6 @@ def run_cli(args: argparse.Namespace, parser: argparse.ArgumentParser) -> int:
|
|
|
300
307
|
|
|
301
308
|
def run_build_lexicon(args: argparse.Namespace) -> int:
|
|
302
309
|
"""Delegate to the vector lexicon cache builder using CLI arguments."""
|
|
303
|
-
|
|
304
310
|
from glitchlings.lexicon.vector import main as vector_main
|
|
305
311
|
|
|
306
312
|
vector_args = [
|
|
@@ -337,8 +343,8 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
337
343
|
|
|
338
344
|
Returns:
|
|
339
345
|
int: Exit code suitable for use with ``sys.exit``.
|
|
340
|
-
"""
|
|
341
346
|
|
|
347
|
+
"""
|
|
342
348
|
if argv is None:
|
|
343
349
|
raw_args = sys.argv[1:]
|
|
344
350
|
else:
|
glitchlings/util/__init__.py
CHANGED
|
@@ -1,12 +1,27 @@
|
|
|
1
1
|
import difflib
|
|
2
2
|
from collections.abc import Iterable
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
__all__ = [
|
|
5
|
+
"SAMPLE_TEXT",
|
|
6
|
+
"string_diffs",
|
|
7
|
+
"KeyNeighborMap",
|
|
8
|
+
"KeyboardLayouts",
|
|
9
|
+
"KeyNeighbors",
|
|
10
|
+
"KEYNEIGHBORS",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
SAMPLE_TEXT = (
|
|
14
|
+
"One morning, when Gregor Samsa woke from troubled dreams, he found himself "
|
|
15
|
+
"transformed in his bed into a horrible vermin. He lay on his armour-like back, and "
|
|
16
|
+
"if he lifted his head a little he could see his brown belly, slightly domed and "
|
|
17
|
+
"divided by arches into stiff sections. The bedding was hardly able to cover it and "
|
|
18
|
+
"seemed ready to slide off any moment. His many legs, pitifully thin compared with "
|
|
19
|
+
"the size of the rest of him, waved about helplessly as he looked."
|
|
20
|
+
)
|
|
5
21
|
|
|
6
22
|
|
|
7
23
|
def string_diffs(a: str, b: str) -> list[list[tuple[str, str, str]]]:
|
|
8
|
-
"""
|
|
9
|
-
Compare two strings using SequenceMatcher and return
|
|
24
|
+
"""Compare two strings using SequenceMatcher and return
|
|
10
25
|
grouped adjacent opcodes (excluding 'equal' tags).
|
|
11
26
|
|
|
12
27
|
Each element is a tuple: (tag, a_text, b_text).
|
|
@@ -39,7 +54,6 @@ KeyboardLayouts = dict[str, KeyNeighborMap]
|
|
|
39
54
|
|
|
40
55
|
def _build_neighbor_map(rows: Iterable[str]) -> KeyNeighborMap:
|
|
41
56
|
"""Derive 8-neighbour adjacency lists from keyboard layout rows."""
|
|
42
|
-
|
|
43
57
|
grid: dict[tuple[int, int], str] = {}
|
|
44
58
|
for y, row in enumerate(rows):
|
|
45
59
|
for x, char in enumerate(row):
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Adapter helpers shared across Python and DLC integrations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
|
|
7
|
+
from ..zoo import Gaggle, Glitchling, summon
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def coerce_gaggle(
|
|
11
|
+
glitchlings: Glitchling | Gaggle | str | Iterable[str | Glitchling],
|
|
12
|
+
*,
|
|
13
|
+
seed: int,
|
|
14
|
+
) -> Gaggle:
|
|
15
|
+
"""Return a :class:`Gaggle` built from any supported glitchling specifier."""
|
|
16
|
+
if isinstance(glitchlings, Gaggle):
|
|
17
|
+
return glitchlings
|
|
18
|
+
|
|
19
|
+
if isinstance(glitchlings, (Glitchling, str)):
|
|
20
|
+
resolved: Iterable[str | Glitchling] = [glitchlings]
|
|
21
|
+
else:
|
|
22
|
+
resolved = glitchlings
|
|
23
|
+
|
|
24
|
+
return summon(list(resolved), seed=seed)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
__all__ = ["coerce_gaggle"]
|
glitchlings/zoo/__init__.py
CHANGED
|
@@ -3,16 +3,26 @@ from __future__ import annotations
|
|
|
3
3
|
import ast
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
|
-
from .typogre import Typogre, typogre
|
|
7
|
-
from .mim1c import Mim1c, mim1c
|
|
8
|
-
from .jargoyle import Jargoyle, jargoyle, dependencies_available as _jargoyle_available
|
|
9
6
|
from .adjax import Adjax, adjax
|
|
7
|
+
from .apostrofae import Apostrofae, apostrofae
|
|
8
|
+
from .core import (
|
|
9
|
+
Gaggle,
|
|
10
|
+
Glitchling,
|
|
11
|
+
is_rust_pipeline_enabled,
|
|
12
|
+
is_rust_pipeline_supported,
|
|
13
|
+
pipeline_feature_flag_enabled,
|
|
14
|
+
plan_glitchling_specs,
|
|
15
|
+
plan_glitchlings,
|
|
16
|
+
)
|
|
17
|
+
from .jargoyle import Jargoyle, jargoyle
|
|
18
|
+
from .jargoyle import dependencies_available as _jargoyle_available
|
|
19
|
+
from .mim1c import Mim1c, mim1c
|
|
20
|
+
from .redactyl import Redactyl, redactyl
|
|
10
21
|
from .reduple import Reduple, reduple
|
|
11
22
|
from .rushmore import Rushmore, rushmore
|
|
12
|
-
from .redactyl import Redactyl, redactyl
|
|
13
23
|
from .scannequin import Scannequin, scannequin
|
|
24
|
+
from .typogre import Typogre, typogre
|
|
14
25
|
from .zeedub import Zeedub, zeedub
|
|
15
|
-
from .core import Glitchling, Gaggle
|
|
16
26
|
|
|
17
27
|
__all__ = [
|
|
18
28
|
"Typogre",
|
|
@@ -21,6 +31,8 @@ __all__ = [
|
|
|
21
31
|
"mim1c",
|
|
22
32
|
"Jargoyle",
|
|
23
33
|
"jargoyle",
|
|
34
|
+
"Apostrofae",
|
|
35
|
+
"apostrofae",
|
|
24
36
|
"Adjax",
|
|
25
37
|
"adjax",
|
|
26
38
|
"Reduple",
|
|
@@ -35,6 +47,11 @@ __all__ = [
|
|
|
35
47
|
"zeedub",
|
|
36
48
|
"Glitchling",
|
|
37
49
|
"Gaggle",
|
|
50
|
+
"plan_glitchlings",
|
|
51
|
+
"plan_glitchling_specs",
|
|
52
|
+
"is_rust_pipeline_enabled",
|
|
53
|
+
"is_rust_pipeline_supported",
|
|
54
|
+
"pipeline_feature_flag_enabled",
|
|
38
55
|
"summon",
|
|
39
56
|
"BUILTIN_GLITCHLINGS",
|
|
40
57
|
"DEFAULT_GLITCHLING_NAMES",
|
|
@@ -44,7 +61,7 @@ __all__ = [
|
|
|
44
61
|
|
|
45
62
|
_HAS_JARGOYLE = _jargoyle_available()
|
|
46
63
|
|
|
47
|
-
_BUILTIN_GLITCHLING_LIST: list[Glitchling] = [typogre, mim1c]
|
|
64
|
+
_BUILTIN_GLITCHLING_LIST: list[Glitchling] = [typogre, apostrofae, mim1c]
|
|
48
65
|
if _HAS_JARGOYLE:
|
|
49
66
|
_BUILTIN_GLITCHLING_LIST.append(jargoyle)
|
|
50
67
|
_BUILTIN_GLITCHLING_LIST.extend([adjax, reduple, rushmore, redactyl, scannequin, zeedub])
|
|
@@ -55,6 +72,7 @@ BUILTIN_GLITCHLINGS: dict[str, Glitchling] = {
|
|
|
55
72
|
|
|
56
73
|
_BUILTIN_GLITCHLING_TYPES: dict[str, type[Glitchling]] = {
|
|
57
74
|
typogre.name.lower(): Typogre,
|
|
75
|
+
apostrofae.name.lower(): Apostrofae,
|
|
58
76
|
mim1c.name.lower(): Mim1c,
|
|
59
77
|
adjax.name.lower(): Adjax,
|
|
60
78
|
reduple.name.lower(): Reduple,
|
|
@@ -71,7 +89,6 @@ DEFAULT_GLITCHLING_NAMES: list[str] = list(BUILTIN_GLITCHLINGS.keys())
|
|
|
71
89
|
|
|
72
90
|
def parse_glitchling_spec(specification: str) -> Glitchling:
|
|
73
91
|
"""Return a glitchling instance configured according to ``specification``."""
|
|
74
|
-
|
|
75
92
|
text = specification.strip()
|
|
76
93
|
if not text:
|
|
77
94
|
raise ValueError("Glitchling specification cannot be empty.")
|
|
@@ -98,14 +115,10 @@ def parse_glitchling_spec(specification: str) -> Glitchling:
|
|
|
98
115
|
try:
|
|
99
116
|
call_expr = ast.parse(f"_({arg_source})", mode="eval").body
|
|
100
117
|
except SyntaxError as exc:
|
|
101
|
-
raise ValueError(
|
|
102
|
-
f"Invalid parameter syntax for glitchling '{name}': {exc.msg}"
|
|
103
|
-
) from exc
|
|
118
|
+
raise ValueError(f"Invalid parameter syntax for glitchling '{name}': {exc.msg}") from exc
|
|
104
119
|
|
|
105
120
|
if not isinstance(call_expr, ast.Call) or call_expr.args:
|
|
106
|
-
raise ValueError(
|
|
107
|
-
f"Glitchling '{name}' parameters must be provided as keyword arguments."
|
|
108
|
-
)
|
|
121
|
+
raise ValueError(f"Glitchling '{name}' parameters must be provided as keyword arguments.")
|
|
109
122
|
|
|
110
123
|
kwargs: dict[str, Any] = {}
|
|
111
124
|
for keyword in call_expr.keywords:
|
|
@@ -128,7 +141,6 @@ def parse_glitchling_spec(specification: str) -> Glitchling:
|
|
|
128
141
|
|
|
129
142
|
def get_glitchling_class(name: str) -> type[Glitchling]:
|
|
130
143
|
"""Look up the glitchling class registered under ``name``."""
|
|
131
|
-
|
|
132
144
|
key = name.strip().lower()
|
|
133
145
|
if not key:
|
|
134
146
|
raise ValueError("Glitchling name cannot be empty.")
|
|
@@ -142,7 +154,6 @@ def get_glitchling_class(name: str) -> type[Glitchling]:
|
|
|
142
154
|
|
|
143
155
|
def summon(glitchlings: list[str | Glitchling], seed: int = 151) -> Gaggle:
|
|
144
156
|
"""Summon glitchlings by name (using defaults) or instance (to change parameters)."""
|
|
145
|
-
|
|
146
157
|
summoned: list[Glitchling] = []
|
|
147
158
|
for entry in glitchlings:
|
|
148
159
|
if isinstance(entry, Glitchling):
|
|
@@ -26,9 +26,7 @@ def load_confusion_table() -> list[tuple[str, list[str]]]:
|
|
|
26
26
|
|
|
27
27
|
# Sort longer patterns first to avoid overlapping matches, mirroring the
|
|
28
28
|
# behaviour of the Rust `confusion_table` helper.
|
|
29
|
-
indexed_entries.sort(
|
|
30
|
-
key=lambda item: (-len(item[1][0]), item[0])
|
|
31
|
-
)
|
|
29
|
+
indexed_entries.sort(key=lambda item: (-len(item[1][0]), item[0]))
|
|
32
30
|
entries = [entry for _, entry in indexed_entries]
|
|
33
31
|
_CONFUSION_TABLE = entries
|
|
34
32
|
return entries
|
glitchlings/zoo/_rate.py
CHANGED
|
@@ -9,11 +9,8 @@ def resolve_rate(
|
|
|
9
9
|
legacy_name: str,
|
|
10
10
|
) -> float:
|
|
11
11
|
"""Return the effective rate while enforcing mutual exclusivity."""
|
|
12
|
-
|
|
13
12
|
if rate is not None and legacy_value is not None:
|
|
14
|
-
raise ValueError(
|
|
15
|
-
f"Specify either 'rate' or '{legacy_name}', not both."
|
|
16
|
-
)
|
|
13
|
+
raise ValueError(f"Specify either 'rate' or '{legacy_name}', not both.")
|
|
17
14
|
if rate is not None:
|
|
18
15
|
return rate
|
|
19
16
|
if legacy_value is not None:
|
glitchlings/zoo/_sampling.py
CHANGED
glitchlings/zoo/_text_utils.py
CHANGED
|
@@ -10,13 +10,11 @@ _TOKEN_EDGES_PATTERN = re.compile(r"^(\W*)(.*?)(\W*)$")
|
|
|
10
10
|
|
|
11
11
|
def split_preserving_whitespace(text: str) -> list[str]:
|
|
12
12
|
"""Split text while keeping whitespace tokens for stable reconstruction."""
|
|
13
|
-
|
|
14
13
|
return _WORD_SPLIT_PATTERN.split(text)
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
def split_token_edges(token: str) -> tuple[str, str, str]:
|
|
18
17
|
"""Return leading, core, and trailing segments for a token."""
|
|
19
|
-
|
|
20
18
|
match = _TOKEN_EDGES_PATTERN.match(token)
|
|
21
19
|
if match is None:
|
|
22
20
|
return "", token, ""
|
|
@@ -25,7 +23,6 @@ def split_token_edges(token: str) -> tuple[str, str, str]:
|
|
|
25
23
|
|
|
26
24
|
def token_core_length(token: str) -> int:
|
|
27
25
|
"""Return the length of the main word characters for weighting heuristics."""
|
|
28
|
-
|
|
29
26
|
_, core, _ = split_token_edges(token)
|
|
30
27
|
candidate = core if core else token
|
|
31
28
|
length = len(candidate)
|
|
@@ -50,7 +47,6 @@ class WordToken:
|
|
|
50
47
|
@property
|
|
51
48
|
def has_core(self) -> bool:
|
|
52
49
|
"""Return ``True`` when the token contains at least one core character."""
|
|
53
|
-
|
|
54
50
|
return bool(self.core)
|
|
55
51
|
|
|
56
52
|
|
|
@@ -65,8 +61,8 @@ def collect_word_tokens(
|
|
|
65
61
|
tokens: Token sequence produced by :func:`split_preserving_whitespace`.
|
|
66
62
|
skip_first_word: Exclude the first candidate token (used by Rushmore to
|
|
67
63
|
preserve leading words).
|
|
68
|
-
"""
|
|
69
64
|
|
|
65
|
+
"""
|
|
70
66
|
start = 2 if skip_first_word else 0
|
|
71
67
|
collected: list[WordToken] = []
|
|
72
68
|
for index in range(start, len(tokens), 2):
|
glitchlings/zoo/adjax.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import random
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any, cast
|
|
5
5
|
|
|
6
6
|
from ._rate import resolve_rate
|
|
7
7
|
from ._text_utils import split_preserving_whitespace, split_token_edges
|
|
@@ -20,7 +20,6 @@ def _python_swap_adjacent_words(
|
|
|
20
20
|
rng: random.Random,
|
|
21
21
|
) -> str:
|
|
22
22
|
"""Swap the cores of adjacent words while keeping affixes and spacing intact."""
|
|
23
|
-
|
|
24
23
|
tokens = split_preserving_whitespace(text)
|
|
25
24
|
if len(tokens) < 2:
|
|
26
25
|
return text
|
|
@@ -72,7 +71,6 @@ def swap_adjacent_words(
|
|
|
72
71
|
swap_rate: float | None = None,
|
|
73
72
|
) -> str:
|
|
74
73
|
"""Swap adjacent word cores while preserving spacing and punctuation."""
|
|
75
|
-
|
|
76
74
|
effective_rate = resolve_rate(
|
|
77
75
|
rate=rate,
|
|
78
76
|
legacy_value=swap_rate,
|
|
@@ -85,7 +83,7 @@ def swap_adjacent_words(
|
|
|
85
83
|
rng = random.Random(seed)
|
|
86
84
|
|
|
87
85
|
if _swap_adjacent_words_rust is not None:
|
|
88
|
-
return _swap_adjacent_words_rust(text, clamped_rate, rng)
|
|
86
|
+
return cast(str, _swap_adjacent_words_rust(text, clamped_rate, rng))
|
|
89
87
|
|
|
90
88
|
return _python_swap_adjacent_words(text, rate=clamped_rate, rng=rng)
|
|
91
89
|
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Smart-quote glitchling that swaps straight quotes for fancy counterparts."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import random
|
|
7
|
+
from functools import cache
|
|
8
|
+
from importlib import resources
|
|
9
|
+
from typing import Any, Sequence, cast
|
|
10
|
+
|
|
11
|
+
from .core import AttackOrder, AttackWave, Gaggle, Glitchling
|
|
12
|
+
|
|
13
|
+
try: # pragma: no cover - compiled extension not present in pure-Python envs
|
|
14
|
+
from glitchlings._zoo_rust import apostrofae as _apostrofae_rust
|
|
15
|
+
except ImportError: # pragma: no cover - compiled extension not present
|
|
16
|
+
_apostrofae_rust = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@cache
|
|
20
|
+
def _load_replacement_pairs() -> dict[str, list[tuple[str, str]]]:
|
|
21
|
+
"""Load the curated mapping of straight quotes to fancy pairs."""
|
|
22
|
+
|
|
23
|
+
resource = resources.files(f"{__package__}.assets").joinpath("apostrofae_pairs.json")
|
|
24
|
+
with resource.open("r", encoding="utf-8") as handle:
|
|
25
|
+
data: dict[str, list[Sequence[str]]] = json.load(handle)
|
|
26
|
+
|
|
27
|
+
parsed: dict[str, list[tuple[str, str]]] = {}
|
|
28
|
+
for straight, replacements in data.items():
|
|
29
|
+
parsed[straight] = [(pair[0], pair[1]) for pair in replacements if len(pair) == 2]
|
|
30
|
+
return parsed
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _find_quote_pairs(text: str) -> list[tuple[int, int, str]]:
|
|
34
|
+
"""Return all balanced pairs of straight quotes in ``text``.
|
|
35
|
+
|
|
36
|
+
The search walks the string once, pairing sequential occurrences of each quote
|
|
37
|
+
glyph. Unmatched openers remain untouched so contractions (e.g. ``it's``)
|
|
38
|
+
survive unmodified.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
stacks: dict[str, int | None] = {'"': None, "'": None, "`": None}
|
|
42
|
+
pairs: list[tuple[int, int, str]] = []
|
|
43
|
+
|
|
44
|
+
for index, ch in enumerate(text):
|
|
45
|
+
if ch not in stacks:
|
|
46
|
+
continue
|
|
47
|
+
start = stacks[ch]
|
|
48
|
+
if start is None:
|
|
49
|
+
stacks[ch] = index
|
|
50
|
+
else:
|
|
51
|
+
pairs.append((start, index, ch))
|
|
52
|
+
stacks[ch] = None
|
|
53
|
+
|
|
54
|
+
return pairs
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _apostrofae_python(text: str, *, rng: random.Random) -> str:
|
|
58
|
+
"""Python fallback that replaces paired straight quotes with fancy glyphs."""
|
|
59
|
+
|
|
60
|
+
pairs = _load_replacement_pairs()
|
|
61
|
+
candidates = _find_quote_pairs(text)
|
|
62
|
+
if not candidates:
|
|
63
|
+
return text
|
|
64
|
+
|
|
65
|
+
chars = list(text)
|
|
66
|
+
for start, end, glyph in candidates:
|
|
67
|
+
options = pairs.get(glyph)
|
|
68
|
+
if not options:
|
|
69
|
+
continue
|
|
70
|
+
left, right = rng.choice(options)
|
|
71
|
+
chars[start] = left
|
|
72
|
+
chars[end] = right
|
|
73
|
+
return "".join(chars)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def smart_quotes(
|
|
77
|
+
text: str,
|
|
78
|
+
seed: int | None = None,
|
|
79
|
+
rng: random.Random | None = None,
|
|
80
|
+
) -> str:
|
|
81
|
+
"""Replace straight quotes, apostrophes, and backticks with fancy pairs."""
|
|
82
|
+
|
|
83
|
+
if not text:
|
|
84
|
+
return text
|
|
85
|
+
|
|
86
|
+
if rng is None:
|
|
87
|
+
rng = random.Random(seed)
|
|
88
|
+
|
|
89
|
+
if _apostrofae_rust is not None:
|
|
90
|
+
return cast(str, _apostrofae_rust(text, rng))
|
|
91
|
+
|
|
92
|
+
return _apostrofae_python(text, rng=rng)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class Apostrofae(Glitchling):
|
|
96
|
+
"""Glitchling that swaps straight quotes for decorative Unicode pairs."""
|
|
97
|
+
|
|
98
|
+
def __init__(self, *, seed: int | None = None) -> None:
|
|
99
|
+
self._master_seed: int | None = seed
|
|
100
|
+
super().__init__(
|
|
101
|
+
name="Apostrofae",
|
|
102
|
+
corruption_function=smart_quotes,
|
|
103
|
+
scope=AttackWave.CHARACTER,
|
|
104
|
+
order=AttackOrder.NORMAL,
|
|
105
|
+
seed=seed,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def pipeline_operation(self) -> dict[str, Any] | None:
|
|
109
|
+
return {"type": "apostrofae"}
|
|
110
|
+
|
|
111
|
+
def reset_rng(self, seed: int | None = None) -> None: # pragma: no cover - exercised indirectly
|
|
112
|
+
if seed is not None:
|
|
113
|
+
self._master_seed = seed
|
|
114
|
+
super().reset_rng(seed)
|
|
115
|
+
if self.seed is None:
|
|
116
|
+
return
|
|
117
|
+
derived = Gaggle.derive_seed(int(seed), self.name, 0)
|
|
118
|
+
self.seed = int(derived)
|
|
119
|
+
self.rng = random.Random(self.seed)
|
|
120
|
+
self.kwargs["seed"] = self.seed
|
|
121
|
+
else:
|
|
122
|
+
super().reset_rng(None)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
apostrofae = Apostrofae()
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
__all__ = ["Apostrofae", "apostrofae", "smart_quotes"]
|
|
File without changes
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"\"": [
|
|
3
|
+
["“", "”"],
|
|
4
|
+
["„", "“"],
|
|
5
|
+
["«", "»"],
|
|
6
|
+
["‹", "›"],
|
|
7
|
+
["『", "』"],
|
|
8
|
+
["「", "」"],
|
|
9
|
+
["﹁", "﹂"],
|
|
10
|
+
["﹃", "﹄"],
|
|
11
|
+
["〝", "〞"],
|
|
12
|
+
["❝", "❞"]
|
|
13
|
+
],
|
|
14
|
+
"'": [
|
|
15
|
+
["‘", "’"],
|
|
16
|
+
["‚", "‘"],
|
|
17
|
+
["‹", "›"],
|
|
18
|
+
["❮", "❯"],
|
|
19
|
+
["❛", "❜"],
|
|
20
|
+
["﹇", "﹈"]
|
|
21
|
+
],
|
|
22
|
+
"`": [
|
|
23
|
+
["‵", "′"],
|
|
24
|
+
["﹁", "﹂"],
|
|
25
|
+
["﹃", "﹄"],
|
|
26
|
+
["⌈", "⌉"],
|
|
27
|
+
["⌊", "⌋"],
|
|
28
|
+
["⎡", "⎤"],
|
|
29
|
+
["⎣", "⎦"],
|
|
30
|
+
["〝", "〞"]
|
|
31
|
+
]
|
|
32
|
+
}
|