glitchlings 0.4.2__cp312-cp312-win_amd64.whl → 0.4.4__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +4 -0
- glitchlings/_zoo_rust.cp312-win_amd64.pyd +0 -0
- glitchlings/compat.py +80 -11
- glitchlings/config.py +32 -19
- glitchlings/config.toml +1 -1
- glitchlings/dlc/__init__.py +3 -1
- glitchlings/dlc/_shared.py +86 -1
- glitchlings/dlc/pytorch.py +166 -0
- glitchlings/dlc/pytorch_lightning.py +215 -0
- glitchlings/lexicon/__init__.py +10 -16
- glitchlings/lexicon/_cache.py +21 -15
- glitchlings/lexicon/data/default_vector_cache.json +80 -14
- glitchlings/lexicon/vector.py +94 -15
- glitchlings/lexicon/wordnet.py +66 -25
- glitchlings/main.py +21 -11
- glitchlings/zoo/__init__.py +5 -1
- glitchlings/zoo/_rate.py +114 -1
- glitchlings/zoo/_rust_extensions.py +143 -0
- glitchlings/zoo/adjax.py +5 -6
- glitchlings/zoo/apostrofae.py +127 -0
- glitchlings/zoo/assets/__init__.py +0 -0
- glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
- glitchlings/zoo/core.py +61 -23
- glitchlings/zoo/jargoyle.py +50 -36
- glitchlings/zoo/redactyl.py +15 -13
- glitchlings/zoo/reduple.py +5 -6
- glitchlings/zoo/rushmore.py +5 -6
- glitchlings/zoo/scannequin.py +5 -6
- glitchlings/zoo/typogre.py +8 -6
- glitchlings/zoo/zeedub.py +8 -6
- {glitchlings-0.4.2.dist-info → glitchlings-0.4.4.dist-info}/METADATA +40 -4
- glitchlings-0.4.4.dist-info/RECORD +47 -0
- glitchlings/lexicon/graph.py +0 -282
- glitchlings-0.4.2.dist-info/RECORD +0 -42
- {glitchlings-0.4.2.dist-info → glitchlings-0.4.4.dist-info}/WHEEL +0 -0
- {glitchlings-0.4.2.dist-info → glitchlings-0.4.4.dist-info}/entry_points.txt +0 -0
- {glitchlings-0.4.2.dist-info → glitchlings-0.4.4.dist-info}/licenses/LICENSE +0 -0
- {glitchlings-0.4.2.dist-info → glitchlings-0.4.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Smart-quote glitchling that swaps straight quotes for fancy counterparts."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import random
|
|
7
|
+
from functools import cache
|
|
8
|
+
from importlib import resources
|
|
9
|
+
from typing import Any, Sequence, cast
|
|
10
|
+
|
|
11
|
+
from ._rust_extensions import get_rust_operation
|
|
12
|
+
from .core import AttackOrder, AttackWave, Gaggle, Glitchling
|
|
13
|
+
|
|
14
|
+
# Load Rust-accelerated operation if available
|
|
15
|
+
_apostrofae_rust = get_rust_operation("apostrofae")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@cache
|
|
19
|
+
def _load_replacement_pairs() -> dict[str, list[tuple[str, str]]]:
|
|
20
|
+
"""Load the curated mapping of straight quotes to fancy pairs."""
|
|
21
|
+
|
|
22
|
+
resource = resources.files(f"{__package__}.assets").joinpath("apostrofae_pairs.json")
|
|
23
|
+
with resource.open("r", encoding="utf-8") as handle:
|
|
24
|
+
data: dict[str, list[Sequence[str]]] = json.load(handle)
|
|
25
|
+
|
|
26
|
+
parsed: dict[str, list[tuple[str, str]]] = {}
|
|
27
|
+
for straight, replacements in data.items():
|
|
28
|
+
parsed[straight] = [(pair[0], pair[1]) for pair in replacements if len(pair) == 2]
|
|
29
|
+
return parsed
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _find_quote_pairs(text: str) -> list[tuple[int, int, str]]:
|
|
33
|
+
"""Return all balanced pairs of straight quotes in ``text``.
|
|
34
|
+
|
|
35
|
+
The search walks the string once, pairing sequential occurrences of each quote
|
|
36
|
+
glyph. Unmatched openers remain untouched so contractions (e.g. ``it's``)
|
|
37
|
+
survive unmodified.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
stacks: dict[str, int | None] = {'"': None, "'": None, "`": None}
|
|
41
|
+
pairs: list[tuple[int, int, str]] = []
|
|
42
|
+
|
|
43
|
+
for index, ch in enumerate(text):
|
|
44
|
+
if ch not in stacks:
|
|
45
|
+
continue
|
|
46
|
+
start = stacks[ch]
|
|
47
|
+
if start is None:
|
|
48
|
+
stacks[ch] = index
|
|
49
|
+
else:
|
|
50
|
+
pairs.append((start, index, ch))
|
|
51
|
+
stacks[ch] = None
|
|
52
|
+
|
|
53
|
+
return pairs
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _apostrofae_python(text: str, *, rng: random.Random) -> str:
|
|
57
|
+
"""Python fallback that replaces paired straight quotes with fancy glyphs."""
|
|
58
|
+
|
|
59
|
+
pairs = _load_replacement_pairs()
|
|
60
|
+
candidates = _find_quote_pairs(text)
|
|
61
|
+
if not candidates:
|
|
62
|
+
return text
|
|
63
|
+
|
|
64
|
+
chars = list(text)
|
|
65
|
+
for start, end, glyph in candidates:
|
|
66
|
+
options = pairs.get(glyph)
|
|
67
|
+
if not options:
|
|
68
|
+
continue
|
|
69
|
+
left, right = rng.choice(options)
|
|
70
|
+
chars[start] = left
|
|
71
|
+
chars[end] = right
|
|
72
|
+
return "".join(chars)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def smart_quotes(
|
|
76
|
+
text: str,
|
|
77
|
+
seed: int | None = None,
|
|
78
|
+
rng: random.Random | None = None,
|
|
79
|
+
) -> str:
|
|
80
|
+
"""Replace straight quotes, apostrophes, and backticks with fancy pairs."""
|
|
81
|
+
|
|
82
|
+
if not text:
|
|
83
|
+
return text
|
|
84
|
+
|
|
85
|
+
if rng is None:
|
|
86
|
+
rng = random.Random(seed)
|
|
87
|
+
|
|
88
|
+
if _apostrofae_rust is not None:
|
|
89
|
+
return cast(str, _apostrofae_rust(text, rng))
|
|
90
|
+
|
|
91
|
+
return _apostrofae_python(text, rng=rng)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class Apostrofae(Glitchling):
|
|
95
|
+
"""Glitchling that swaps straight quotes for decorative Unicode pairs."""
|
|
96
|
+
|
|
97
|
+
def __init__(self, *, seed: int | None = None) -> None:
|
|
98
|
+
self._master_seed: int | None = seed
|
|
99
|
+
super().__init__(
|
|
100
|
+
name="Apostrofae",
|
|
101
|
+
corruption_function=smart_quotes,
|
|
102
|
+
scope=AttackWave.CHARACTER,
|
|
103
|
+
order=AttackOrder.NORMAL,
|
|
104
|
+
seed=seed,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
def pipeline_operation(self) -> dict[str, Any] | None:
|
|
108
|
+
return {"type": "apostrofae"}
|
|
109
|
+
|
|
110
|
+
def reset_rng(self, seed: int | None = None) -> None: # pragma: no cover - exercised indirectly
|
|
111
|
+
if seed is not None:
|
|
112
|
+
self._master_seed = seed
|
|
113
|
+
super().reset_rng(seed)
|
|
114
|
+
if self.seed is None:
|
|
115
|
+
return
|
|
116
|
+
derived = Gaggle.derive_seed(int(seed), self.name, 0)
|
|
117
|
+
self.seed = int(derived)
|
|
118
|
+
self.rng = random.Random(self.seed)
|
|
119
|
+
self.kwargs["seed"] = self.seed
|
|
120
|
+
else:
|
|
121
|
+
super().reset_rng(None)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
apostrofae = Apostrofae()
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
__all__ = ["Apostrofae", "apostrofae", "smart_quotes"]
|
|
File without changes
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"\"": [
|
|
3
|
+
["“", "”"],
|
|
4
|
+
["„", "“"],
|
|
5
|
+
["«", "»"],
|
|
6
|
+
["‹", "›"],
|
|
7
|
+
["『", "』"],
|
|
8
|
+
["「", "」"],
|
|
9
|
+
["﹁", "﹂"],
|
|
10
|
+
["﹃", "﹄"],
|
|
11
|
+
["〝", "〞"],
|
|
12
|
+
["❝", "❞"]
|
|
13
|
+
],
|
|
14
|
+
"'": [
|
|
15
|
+
["‘", "’"],
|
|
16
|
+
["‚", "‘"],
|
|
17
|
+
["‹", "›"],
|
|
18
|
+
["❮", "❯"],
|
|
19
|
+
["❛", "❜"],
|
|
20
|
+
["﹇", "﹈"]
|
|
21
|
+
],
|
|
22
|
+
"`": [
|
|
23
|
+
["‵", "′"],
|
|
24
|
+
["﹁", "﹂"],
|
|
25
|
+
["﹃", "﹄"],
|
|
26
|
+
["⌈", "⌉"],
|
|
27
|
+
["⌊", "⌋"],
|
|
28
|
+
["⎡", "⎤"],
|
|
29
|
+
["⎣", "⎦"],
|
|
30
|
+
["〝", "〞"]
|
|
31
|
+
]
|
|
32
|
+
}
|
glitchlings/zoo/core.py
CHANGED
|
@@ -7,18 +7,16 @@ import random
|
|
|
7
7
|
from collections.abc import Mapping, Sequence
|
|
8
8
|
from enum import IntEnum, auto
|
|
9
9
|
from hashlib import blake2s
|
|
10
|
-
from typing import TYPE_CHECKING, Any, Callable, Protocol, TypedDict, Union
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, Protocol, TypedDict, TypeGuard, Union, cast
|
|
11
11
|
|
|
12
12
|
from ..compat import get_datasets_dataset, require_datasets
|
|
13
|
+
from ._rust_extensions import get_rust_operation
|
|
13
14
|
|
|
14
15
|
_DatasetsDataset = get_datasets_dataset()
|
|
15
16
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
except ImportError: # pragma: no cover - compiled extension not present
|
|
20
|
-
_compose_glitchlings_rust = None
|
|
21
|
-
_plan_glitchlings_rust = None
|
|
17
|
+
# Load Rust-accelerated orchestration operations if available
|
|
18
|
+
_compose_glitchlings_rust = get_rust_operation("compose_glitchlings")
|
|
19
|
+
_plan_glitchlings_rust = get_rust_operation("plan_glitchlings")
|
|
22
20
|
|
|
23
21
|
|
|
24
22
|
log = logging.getLogger(__name__)
|
|
@@ -35,6 +33,9 @@ class PlanSpecification(TypedDict):
|
|
|
35
33
|
order: int
|
|
36
34
|
|
|
37
35
|
|
|
36
|
+
TranscriptTurn = dict[str, Any]
|
|
37
|
+
Transcript = list[TranscriptTurn]
|
|
38
|
+
|
|
38
39
|
PlanEntry = Union["Glitchling", Mapping[str, Any]]
|
|
39
40
|
|
|
40
41
|
|
|
@@ -132,7 +133,12 @@ def _plan_glitchlings_with_rust(
|
|
|
132
133
|
|
|
133
134
|
try:
|
|
134
135
|
plan = _plan_glitchlings_rust(specs, int(master_seed))
|
|
135
|
-
except
|
|
136
|
+
except (
|
|
137
|
+
TypeError,
|
|
138
|
+
ValueError,
|
|
139
|
+
RuntimeError,
|
|
140
|
+
AttributeError,
|
|
141
|
+
): # pragma: no cover - defer to Python fallback on failure
|
|
136
142
|
log.debug("Rust orchestration planning failed; falling back to Python plan", exc_info=True)
|
|
137
143
|
return None
|
|
138
144
|
|
|
@@ -186,7 +192,7 @@ def plan_glitchlings(
|
|
|
186
192
|
|
|
187
193
|
|
|
188
194
|
if TYPE_CHECKING: # pragma: no cover - typing only
|
|
189
|
-
from datasets import Dataset
|
|
195
|
+
from datasets import Dataset
|
|
190
196
|
elif _DatasetsDataset is not None:
|
|
191
197
|
Dataset = _DatasetsDataset
|
|
192
198
|
else:
|
|
@@ -202,8 +208,8 @@ def _is_transcript(
|
|
|
202
208
|
*,
|
|
203
209
|
allow_empty: bool = True,
|
|
204
210
|
require_all_content: bool = False,
|
|
205
|
-
) ->
|
|
206
|
-
"""Return
|
|
211
|
+
) -> TypeGuard[Transcript]:
|
|
212
|
+
"""Return ``True`` when ``value`` appears to be a chat transcript."""
|
|
207
213
|
if not isinstance(value, list):
|
|
208
214
|
return False
|
|
209
215
|
|
|
@@ -351,15 +357,17 @@ class Glitchling:
|
|
|
351
357
|
corrupted = self.corruption_function(text, *args, **kwargs)
|
|
352
358
|
return corrupted
|
|
353
359
|
|
|
354
|
-
def corrupt(self, text: str |
|
|
360
|
+
def corrupt(self, text: str | Transcript) -> str | Transcript:
|
|
355
361
|
"""Apply the corruption function to text or conversational transcripts."""
|
|
356
362
|
if _is_transcript(text):
|
|
357
|
-
transcript = [dict(turn) for turn in text]
|
|
363
|
+
transcript: Transcript = [dict(turn) for turn in text]
|
|
358
364
|
if transcript:
|
|
359
|
-
|
|
365
|
+
content = transcript[-1].get("content")
|
|
366
|
+
if isinstance(content, str):
|
|
367
|
+
transcript[-1]["content"] = self.__corrupt(content, **self.kwargs)
|
|
360
368
|
return transcript
|
|
361
369
|
|
|
362
|
-
return self.__corrupt(text, **self.kwargs)
|
|
370
|
+
return self.__corrupt(cast(str, text), **self.kwargs)
|
|
363
371
|
|
|
364
372
|
def corrupt_dataset(self, dataset: Dataset, columns: list[str]) -> Dataset:
|
|
365
373
|
"""Apply corruption lazily across dataset columns."""
|
|
@@ -383,7 +391,7 @@ class Glitchling:
|
|
|
383
391
|
|
|
384
392
|
return dataset.with_transform(__corrupt_row)
|
|
385
393
|
|
|
386
|
-
def __call__(self, text: str, *args: Any, **kwds: Any) -> str |
|
|
394
|
+
def __call__(self, text: str, *args: Any, **kwds: Any) -> str | Transcript:
|
|
387
395
|
"""Allow a glitchling to be invoked directly like a callable."""
|
|
388
396
|
return self.corrupt(text, *args, **kwds)
|
|
389
397
|
|
|
@@ -426,7 +434,7 @@ class Gaggle(Glitchling):
|
|
|
426
434
|
seed: Master seed used to derive per-glitchling seeds.
|
|
427
435
|
|
|
428
436
|
"""
|
|
429
|
-
super().__init__("Gaggle", self.
|
|
437
|
+
super().__init__("Gaggle", self._corrupt_text, AttackWave.DOCUMENT, seed=seed)
|
|
430
438
|
self._clones_by_index: list[Glitchling] = []
|
|
431
439
|
for idx, glitchling in enumerate(glitchlings):
|
|
432
440
|
clone = glitchling.clone()
|
|
@@ -528,17 +536,47 @@ class Gaggle(Glitchling):
|
|
|
528
536
|
|
|
529
537
|
return descriptors
|
|
530
538
|
|
|
531
|
-
def
|
|
532
|
-
"""Apply each glitchling to
|
|
539
|
+
def _corrupt_text(self, text: str) -> str:
|
|
540
|
+
"""Apply each glitchling to string input sequentially."""
|
|
533
541
|
master_seed = self.seed
|
|
534
542
|
descriptors = self._pipeline_descriptors()
|
|
535
|
-
if
|
|
543
|
+
if (
|
|
544
|
+
master_seed is not None
|
|
545
|
+
and descriptors is not None
|
|
546
|
+
and _compose_glitchlings_rust is not None
|
|
547
|
+
):
|
|
536
548
|
try:
|
|
537
|
-
return _compose_glitchlings_rust(text, descriptors, master_seed)
|
|
538
|
-
except
|
|
549
|
+
return cast(str, _compose_glitchlings_rust(text, descriptors, master_seed))
|
|
550
|
+
except (
|
|
551
|
+
TypeError,
|
|
552
|
+
ValueError,
|
|
553
|
+
RuntimeError,
|
|
554
|
+
AttributeError,
|
|
555
|
+
): # pragma: no cover - fall back to Python execution
|
|
539
556
|
log.debug("Rust pipeline failed; falling back", exc_info=True)
|
|
540
557
|
|
|
541
558
|
corrupted = text
|
|
542
559
|
for glitchling in self.apply_order:
|
|
543
|
-
|
|
560
|
+
next_value = glitchling.corrupt(corrupted)
|
|
561
|
+
if not isinstance(next_value, str):
|
|
562
|
+
message = "Glitchling pipeline produced non-string output for string input"
|
|
563
|
+
raise TypeError(message)
|
|
564
|
+
corrupted = next_value
|
|
565
|
+
|
|
544
566
|
return corrupted
|
|
567
|
+
|
|
568
|
+
def corrupt(self, text: str | Transcript) -> str | Transcript:
|
|
569
|
+
"""Apply each glitchling to the provided text sequentially."""
|
|
570
|
+
if isinstance(text, str):
|
|
571
|
+
return self._corrupt_text(text)
|
|
572
|
+
|
|
573
|
+
if _is_transcript(text):
|
|
574
|
+
transcript: Transcript = [dict(turn) for turn in text]
|
|
575
|
+
if transcript and "content" in transcript[-1]:
|
|
576
|
+
content = transcript[-1]["content"]
|
|
577
|
+
if isinstance(content, str):
|
|
578
|
+
transcript[-1]["content"] = self._corrupt_text(content)
|
|
579
|
+
return transcript
|
|
580
|
+
|
|
581
|
+
message = f"Unsupported text type for Gaggle corruption: {type(text)!r}"
|
|
582
|
+
raise TypeError(message)
|
glitchlings/zoo/jargoyle.py
CHANGED
|
@@ -2,20 +2,29 @@ import random
|
|
|
2
2
|
import re
|
|
3
3
|
from collections.abc import Iterable
|
|
4
4
|
from dataclasses import dataclass
|
|
5
|
+
from types import ModuleType
|
|
5
6
|
from typing import Any, Literal, cast
|
|
6
7
|
|
|
7
8
|
from glitchlings.lexicon import Lexicon, get_default_lexicon
|
|
8
9
|
|
|
10
|
+
from ._rate import resolve_rate
|
|
11
|
+
from .core import AttackWave, Glitchling
|
|
12
|
+
|
|
13
|
+
_wordnet_module: ModuleType | None
|
|
14
|
+
|
|
9
15
|
try: # pragma: no cover - optional WordNet dependency
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
16
|
+
import glitchlings.lexicon.wordnet as _wordnet_module
|
|
17
|
+
except (
|
|
18
|
+
ImportError,
|
|
19
|
+
ModuleNotFoundError,
|
|
20
|
+
AttributeError,
|
|
21
|
+
): # pragma: no cover - triggered when nltk unavailable
|
|
22
|
+
_wordnet_module = None
|
|
23
|
+
|
|
24
|
+
_wordnet_runtime: ModuleType | None = _wordnet_module
|
|
25
|
+
|
|
26
|
+
WordNetLexicon: type[Lexicon] | None
|
|
27
|
+
if _wordnet_runtime is None:
|
|
19
28
|
|
|
20
29
|
def _lexicon_dependencies_available() -> bool:
|
|
21
30
|
return False
|
|
@@ -26,9 +35,12 @@ except Exception: # pragma: no cover - triggered when nltk unavailable
|
|
|
26
35
|
"and download its WordNet corpus manually if you need legacy synonyms."
|
|
27
36
|
)
|
|
28
37
|
|
|
38
|
+
WordNetLexicon = None
|
|
39
|
+
else:
|
|
40
|
+
WordNetLexicon = cast(type[Lexicon], _wordnet_runtime.WordNetLexicon)
|
|
41
|
+
_lexicon_dependencies_available = _wordnet_runtime.dependencies_available
|
|
42
|
+
_lexicon_ensure_wordnet = _wordnet_runtime.ensure_wordnet
|
|
29
43
|
|
|
30
|
-
from ._rate import resolve_rate
|
|
31
|
-
from .core import AttackWave, Glitchling
|
|
32
44
|
|
|
33
45
|
ensure_wordnet = _lexicon_ensure_wordnet
|
|
34
46
|
|
|
@@ -41,7 +53,7 @@ def dependencies_available() -> bool:
|
|
|
41
53
|
try:
|
|
42
54
|
# Fall back to the configured default lexicon (typically the bundled vector cache).
|
|
43
55
|
get_default_lexicon(seed=None)
|
|
44
|
-
except
|
|
56
|
+
except (RuntimeError, ImportError, ModuleNotFoundError, AttributeError):
|
|
45
57
|
return False
|
|
46
58
|
return True
|
|
47
59
|
|
|
@@ -169,34 +181,36 @@ def substitute_random_synonyms(
|
|
|
169
181
|
candidate_indices: list[int] = []
|
|
170
182
|
candidate_metadata: dict[int, CandidateInfo] = {}
|
|
171
183
|
for idx, tok in enumerate(tokens):
|
|
172
|
-
if idx % 2
|
|
173
|
-
|
|
174
|
-
if not core_word:
|
|
175
|
-
continue
|
|
176
|
-
|
|
177
|
-
chosen_pos: str | None = None
|
|
178
|
-
synonyms: list[str] = []
|
|
184
|
+
if idx % 2 != 0 or not tok or tok.isspace():
|
|
185
|
+
continue
|
|
179
186
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
synonyms = active_lexicon.get_synonyms(core_word, pos=pos)
|
|
184
|
-
if synonyms:
|
|
185
|
-
chosen_pos = pos
|
|
186
|
-
break
|
|
187
|
+
prefix, core_word, suffix = _split_token(tok)
|
|
188
|
+
if not core_word:
|
|
189
|
+
continue
|
|
187
190
|
|
|
188
|
-
|
|
189
|
-
|
|
191
|
+
chosen_pos: str | None = None
|
|
192
|
+
synonyms: list[str] = []
|
|
190
193
|
|
|
194
|
+
for tag in target_pos:
|
|
195
|
+
if not active_lexicon.supports_pos(tag):
|
|
196
|
+
continue
|
|
197
|
+
synonyms = active_lexicon.get_synonyms(core_word, pos=tag)
|
|
191
198
|
if synonyms:
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
199
|
+
chosen_pos = tag
|
|
200
|
+
break
|
|
201
|
+
|
|
202
|
+
if not synonyms and active_lexicon.supports_pos(None):
|
|
203
|
+
synonyms = active_lexicon.get_synonyms(core_word, pos=None)
|
|
204
|
+
|
|
205
|
+
if synonyms:
|
|
206
|
+
candidate_indices.append(idx)
|
|
207
|
+
candidate_metadata[idx] = CandidateInfo(
|
|
208
|
+
prefix=prefix,
|
|
209
|
+
core_word=core_word,
|
|
210
|
+
suffix=suffix,
|
|
211
|
+
part_of_speech=chosen_pos,
|
|
212
|
+
synonyms=synonyms,
|
|
213
|
+
)
|
|
200
214
|
|
|
201
215
|
if not candidate_indices:
|
|
202
216
|
return text
|
glitchlings/zoo/redactyl.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import random
|
|
2
2
|
import re
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any, cast
|
|
4
4
|
|
|
5
5
|
from ._rate import resolve_rate
|
|
6
|
+
from ._rust_extensions import get_rust_operation
|
|
6
7
|
from ._sampling import weighted_sample_without_replacement
|
|
7
8
|
from ._text_utils import (
|
|
8
9
|
WordToken,
|
|
@@ -13,11 +14,8 @@ from .core import AttackWave, Glitchling
|
|
|
13
14
|
|
|
14
15
|
FULL_BLOCK = "█"
|
|
15
16
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
from glitchlings._zoo_rust import redact_words as _redact_words_rust
|
|
19
|
-
except ImportError: # pragma: no cover - compiled extension not present
|
|
20
|
-
_redact_words_rust = None
|
|
17
|
+
# Load Rust-accelerated operation if available
|
|
18
|
+
_redact_words_rust = get_rust_operation("redact_words")
|
|
21
19
|
|
|
22
20
|
|
|
23
21
|
def _python_redact_words(
|
|
@@ -119,13 +117,17 @@ def redact_words(
|
|
|
119
117
|
use_rust = _redact_words_rust is not None and isinstance(merge_adjacent, bool)
|
|
120
118
|
|
|
121
119
|
if use_rust:
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
120
|
+
assert _redact_words_rust is not None # Type narrowing for mypy
|
|
121
|
+
return cast(
|
|
122
|
+
str,
|
|
123
|
+
_redact_words_rust(
|
|
124
|
+
text,
|
|
125
|
+
replacement_char,
|
|
126
|
+
clamped_rate,
|
|
127
|
+
merge_adjacent,
|
|
128
|
+
unweighted_flag,
|
|
129
|
+
rng,
|
|
130
|
+
),
|
|
129
131
|
)
|
|
130
132
|
|
|
131
133
|
return _python_redact_words(
|
glitchlings/zoo/reduple.py
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
import random
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any, cast
|
|
3
3
|
|
|
4
4
|
from ._rate import resolve_rate
|
|
5
|
+
from ._rust_extensions import get_rust_operation
|
|
5
6
|
from ._text_utils import WordToken, collect_word_tokens, split_preserving_whitespace
|
|
6
7
|
from .core import AttackWave, Glitchling
|
|
7
8
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
except ImportError: # pragma: no cover - compiled extension not present
|
|
11
|
-
_reduplicate_words_rust = None
|
|
9
|
+
# Load Rust-accelerated operation if available
|
|
10
|
+
_reduplicate_words_rust = get_rust_operation("reduplicate_words")
|
|
12
11
|
|
|
13
12
|
|
|
14
13
|
def _python_reduplicate_words(
|
|
@@ -94,7 +93,7 @@ def reduplicate_words(
|
|
|
94
93
|
unweighted_flag = bool(unweighted)
|
|
95
94
|
|
|
96
95
|
if _reduplicate_words_rust is not None:
|
|
97
|
-
return _reduplicate_words_rust(text, clamped_rate, unweighted_flag, rng)
|
|
96
|
+
return cast(str, _reduplicate_words_rust(text, clamped_rate, unweighted_flag, rng))
|
|
98
97
|
|
|
99
98
|
return _python_reduplicate_words(
|
|
100
99
|
text,
|
glitchlings/zoo/rushmore.py
CHANGED
|
@@ -1,16 +1,15 @@
|
|
|
1
1
|
import math
|
|
2
2
|
import random
|
|
3
3
|
import re
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any, cast
|
|
5
5
|
|
|
6
6
|
from ._rate import resolve_rate
|
|
7
|
+
from ._rust_extensions import get_rust_operation
|
|
7
8
|
from ._text_utils import WordToken, collect_word_tokens, split_preserving_whitespace
|
|
8
9
|
from .core import AttackWave, Glitchling
|
|
9
10
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
except ImportError: # pragma: no cover - compiled extension not present
|
|
13
|
-
_delete_random_words_rust = None
|
|
11
|
+
# Load Rust-accelerated operation if available
|
|
12
|
+
_delete_random_words_rust = get_rust_operation("delete_random_words")
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
def _python_delete_random_words(
|
|
@@ -97,7 +96,7 @@ def delete_random_words(
|
|
|
97
96
|
unweighted_flag = bool(unweighted)
|
|
98
97
|
|
|
99
98
|
if _delete_random_words_rust is not None:
|
|
100
|
-
return _delete_random_words_rust(text, clamped_rate, unweighted_flag, rng)
|
|
99
|
+
return cast(str, _delete_random_words_rust(text, clamped_rate, unweighted_flag, rng))
|
|
101
100
|
|
|
102
101
|
return _python_delete_random_words(
|
|
103
102
|
text,
|
glitchlings/zoo/scannequin.py
CHANGED
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
import random
|
|
2
2
|
import re
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any, cast
|
|
4
4
|
|
|
5
5
|
from ._ocr_confusions import load_confusion_table
|
|
6
6
|
from ._rate import resolve_rate
|
|
7
|
+
from ._rust_extensions import get_rust_operation
|
|
7
8
|
from .core import AttackOrder, AttackWave, Glitchling
|
|
8
9
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
except ImportError: # pragma: no cover - compiled extension not present
|
|
12
|
-
_ocr_artifacts_rust = None
|
|
10
|
+
# Load Rust-accelerated operation if available
|
|
11
|
+
_ocr_artifacts_rust = get_rust_operation("ocr_artifacts")
|
|
13
12
|
|
|
14
13
|
|
|
15
14
|
def _python_ocr_artifacts(
|
|
@@ -126,7 +125,7 @@ def ocr_artifacts(
|
|
|
126
125
|
clamped_rate = max(0.0, effective_rate)
|
|
127
126
|
|
|
128
127
|
if _ocr_artifacts_rust is not None:
|
|
129
|
-
return _ocr_artifacts_rust(text, clamped_rate, rng)
|
|
128
|
+
return cast(str, _ocr_artifacts_rust(text, clamped_rate, rng))
|
|
130
129
|
|
|
131
130
|
return _python_ocr_artifacts(text, rate=clamped_rate, rng=rng)
|
|
132
131
|
|
glitchlings/zoo/typogre.py
CHANGED
|
@@ -2,16 +2,15 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import math
|
|
4
4
|
import random
|
|
5
|
-
from typing import Any, Optional
|
|
5
|
+
from typing import Any, Optional, cast
|
|
6
6
|
|
|
7
7
|
from ..util import KEYNEIGHBORS
|
|
8
8
|
from ._rate import resolve_rate
|
|
9
|
+
from ._rust_extensions import get_rust_operation
|
|
9
10
|
from .core import AttackOrder, AttackWave, Glitchling
|
|
10
11
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
except ImportError: # pragma: no cover - compiled extension not present
|
|
14
|
-
_fatfinger_rust = None
|
|
12
|
+
# Load Rust-accelerated operation if available
|
|
13
|
+
_fatfinger_rust = get_rust_operation("fatfinger")
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
def _python_unichar(text: str, rng: random.Random) -> str:
|
|
@@ -168,7 +167,10 @@ def fatfinger(
|
|
|
168
167
|
layout = getattr(KEYNEIGHBORS, keyboard)
|
|
169
168
|
|
|
170
169
|
if _fatfinger_rust is not None:
|
|
171
|
-
return
|
|
170
|
+
return cast(
|
|
171
|
+
str,
|
|
172
|
+
_fatfinger_rust(text, max_change_rate=clamped_rate, layout=layout, rng=rng),
|
|
173
|
+
)
|
|
172
174
|
|
|
173
175
|
return _fatfinger_python(text, rate=clamped_rate, layout=layout, rng=rng)
|
|
174
176
|
|
glitchlings/zoo/zeedub.py
CHANGED
|
@@ -3,15 +3,14 @@ from __future__ import annotations
|
|
|
3
3
|
import math
|
|
4
4
|
import random
|
|
5
5
|
from collections.abc import Sequence
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any, cast
|
|
7
7
|
|
|
8
8
|
from ._rate import resolve_rate
|
|
9
|
+
from ._rust_extensions import get_rust_operation
|
|
9
10
|
from .core import AttackOrder, AttackWave, Glitchling
|
|
10
11
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
except ImportError: # pragma: no cover - compiled extension not present
|
|
14
|
-
_inject_zero_widths_rust = None
|
|
12
|
+
# Load Rust-accelerated operation if available
|
|
13
|
+
_inject_zero_widths_rust = get_rust_operation("inject_zero_widths")
|
|
15
14
|
|
|
16
15
|
_DEFAULT_ZERO_WIDTH_CHARACTERS: tuple[str, ...] = (
|
|
17
16
|
"\u200b", # ZERO WIDTH SPACE
|
|
@@ -115,7 +114,10 @@ def insert_zero_widths(
|
|
|
115
114
|
if hasattr(rng, "getstate"):
|
|
116
115
|
python_state = rng.getstate()
|
|
117
116
|
rng.setstate(state)
|
|
118
|
-
rust_result =
|
|
117
|
+
rust_result = cast(
|
|
118
|
+
str,
|
|
119
|
+
_inject_zero_widths_rust(text, clamped_rate, list(cleaned_palette), rng),
|
|
120
|
+
)
|
|
119
121
|
if rust_result == python_result:
|
|
120
122
|
return rust_result
|
|
121
123
|
if python_state is not None and hasattr(rng, "setstate"):
|