glitchlings 0.4.4__cp313-cp313-win_amd64.whl → 0.5.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +4 -0
- glitchlings/_zoo_rust.cp313-win_amd64.pyd +0 -0
- glitchlings/compat.py +2 -4
- glitchlings/config.py +14 -28
- glitchlings/dev/__init__.py +5 -0
- glitchlings/dev/sync_assets.py +153 -0
- glitchlings/dlc/_shared.py +6 -6
- glitchlings/dlc/huggingface.py +6 -6
- glitchlings/dlc/prime.py +1 -1
- glitchlings/dlc/pytorch.py +3 -3
- glitchlings/dlc/pytorch_lightning.py +4 -10
- glitchlings/lexicon/_cache.py +3 -5
- glitchlings/lexicon/vector.py +6 -5
- glitchlings/lexicon/wordnet.py +4 -8
- glitchlings/util/hokey_generator.py +144 -0
- glitchlings/util/stretch_locator.py +140 -0
- glitchlings/util/stretchability.py +370 -0
- glitchlings/zoo/__init__.py +5 -1
- glitchlings/zoo/_ocr_confusions.py +3 -3
- glitchlings/zoo/_text_utils.py +10 -9
- glitchlings/zoo/adjax.py +3 -18
- glitchlings/zoo/apostrofae.py +2 -5
- glitchlings/zoo/assets/__init__.py +54 -0
- glitchlings/zoo/assets/hokey_assets.json +193 -0
- glitchlings/zoo/hokey.py +173 -0
- glitchlings/zoo/jargoyle.py +2 -16
- glitchlings/zoo/mim1c.py +2 -17
- glitchlings/zoo/redactyl.py +3 -17
- glitchlings/zoo/reduple.py +3 -17
- glitchlings/zoo/rushmore.py +3 -20
- glitchlings/zoo/scannequin.py +3 -20
- glitchlings/zoo/typogre.py +2 -19
- glitchlings/zoo/zeedub.py +2 -13
- {glitchlings-0.4.4.dist-info → glitchlings-0.5.0.dist-info}/METADATA +29 -6
- glitchlings-0.5.0.dist-info/RECORD +53 -0
- glitchlings/zoo/_rate.py +0 -131
- glitchlings-0.4.4.dist-info/RECORD +0 -47
- /glitchlings/zoo/{ocr_confusions.tsv → assets/ocr_confusions.tsv} +0 -0
- {glitchlings-0.4.4.dist-info → glitchlings-0.5.0.dist-info}/WHEEL +0 -0
- {glitchlings-0.4.4.dist-info → glitchlings-0.5.0.dist-info}/entry_points.txt +0 -0
- {glitchlings-0.4.4.dist-info → glitchlings-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {glitchlings-0.4.4.dist-info → glitchlings-0.5.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from hashlib import blake2b
|
|
5
|
+
from importlib import resources
|
|
6
|
+
from importlib.resources.abc import Traversable
|
|
7
|
+
from typing import Any, BinaryIO, TextIO, cast
|
|
8
|
+
|
|
9
|
+
_DEFAULT_DIGEST_SIZE = 32
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _asset(name: str) -> Traversable:
|
|
13
|
+
asset = resources.files(__name__).joinpath(name)
|
|
14
|
+
if not asset.is_file(): # pragma: no cover - defensive guard for packaging issues
|
|
15
|
+
raise FileNotFoundError(f"Asset '{name}' not found at {asset}")
|
|
16
|
+
return asset
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def read_text(name: str, *, encoding: str = "utf-8") -> str:
|
|
20
|
+
"""Return the decoded contents of a bundled text asset."""
|
|
21
|
+
|
|
22
|
+
return cast(str, _asset(name).read_text(encoding=encoding))
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def open_text(name: str, *, encoding: str = "utf-8") -> TextIO:
|
|
26
|
+
"""Open a bundled text asset for reading."""
|
|
27
|
+
|
|
28
|
+
return cast(TextIO, _asset(name).open("r", encoding=encoding))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def open_binary(name: str) -> BinaryIO:
|
|
32
|
+
"""Open a bundled binary asset for reading."""
|
|
33
|
+
|
|
34
|
+
return cast(BinaryIO, _asset(name).open("rb"))
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def load_json(name: str, *, encoding: str = "utf-8") -> Any:
|
|
38
|
+
"""Deserialize a JSON asset using the shared loader helpers."""
|
|
39
|
+
|
|
40
|
+
with open_text(name, encoding=encoding) as handle:
|
|
41
|
+
return json.load(handle)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def hash_asset(name: str) -> str:
|
|
45
|
+
"""Return a BLAKE2b digest for the bundled asset ``name``."""
|
|
46
|
+
|
|
47
|
+
digest = blake2b(digest_size=_DEFAULT_DIGEST_SIZE)
|
|
48
|
+
with open_binary(name) as handle:
|
|
49
|
+
for chunk in iter(lambda: handle.read(8192), b""):
|
|
50
|
+
digest.update(chunk)
|
|
51
|
+
return digest.hexdigest()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
__all__ = ["read_text", "open_text", "open_binary", "load_json", "hash_asset"]
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
{
|
|
2
|
+
"lexical_prior": {
|
|
3
|
+
"so": 0.92,
|
|
4
|
+
"no": 0.89,
|
|
5
|
+
"go": 0.72,
|
|
6
|
+
"yeah": 0.86,
|
|
7
|
+
"yay": 0.81,
|
|
8
|
+
"ya": 0.7,
|
|
9
|
+
"hey": 0.66,
|
|
10
|
+
"okay": 0.68,
|
|
11
|
+
"ok": 0.64,
|
|
12
|
+
"cool": 0.78,
|
|
13
|
+
"omg": 0.74,
|
|
14
|
+
"wow": 0.88,
|
|
15
|
+
"wee": 0.62,
|
|
16
|
+
"woo": 0.69,
|
|
17
|
+
"woohoo": 0.74,
|
|
18
|
+
"whoa": 0.71,
|
|
19
|
+
"woah": 0.7,
|
|
20
|
+
"yayyy": 0.75,
|
|
21
|
+
"yayyyy": 0.76,
|
|
22
|
+
"yas": 0.79,
|
|
23
|
+
"yass": 0.8,
|
|
24
|
+
"yaaas": 0.82,
|
|
25
|
+
"please": 0.53,
|
|
26
|
+
"pleaseee": 0.57,
|
|
27
|
+
"pleaseeee": 0.6,
|
|
28
|
+
"pleaseeeee": 0.63,
|
|
29
|
+
"lol": 0.83,
|
|
30
|
+
"lmao": 0.65,
|
|
31
|
+
"omggg": 0.75,
|
|
32
|
+
"omgggg": 0.76,
|
|
33
|
+
"squee": 0.64,
|
|
34
|
+
"hahaha": 0.6,
|
|
35
|
+
"haha": 0.56,
|
|
36
|
+
"really": 0.58,
|
|
37
|
+
"very": 0.49,
|
|
38
|
+
"love": 0.55,
|
|
39
|
+
"cute": 0.52,
|
|
40
|
+
"nice": 0.47,
|
|
41
|
+
"sweet": 0.45,
|
|
42
|
+
"yayness": 0.44,
|
|
43
|
+
"ugh": 0.5,
|
|
44
|
+
"aww": 0.61,
|
|
45
|
+
"yess": 0.81,
|
|
46
|
+
"yes": 0.9,
|
|
47
|
+
"pls": 0.48,
|
|
48
|
+
"pleeeease": 0.62,
|
|
49
|
+
"nooo": 0.88,
|
|
50
|
+
"noooo": 0.89,
|
|
51
|
+
"dang": 0.41,
|
|
52
|
+
"geez": 0.39,
|
|
53
|
+
"danggg": 0.44,
|
|
54
|
+
"dangit": 0.38,
|
|
55
|
+
"sick": 0.35,
|
|
56
|
+
"epic": 0.37,
|
|
57
|
+
"rad": 0.5,
|
|
58
|
+
"goal": 0.56,
|
|
59
|
+
"great": 0.46,
|
|
60
|
+
"awesome": 0.51,
|
|
61
|
+
"amazing": 0.52,
|
|
62
|
+
"perfect": 0.49,
|
|
63
|
+
"fantastic": 0.5,
|
|
64
|
+
"stellar": 0.48,
|
|
65
|
+
"yippee": 0.67,
|
|
66
|
+
"stoked": 0.48,
|
|
67
|
+
"yikes": 0.43,
|
|
68
|
+
"gosh": 0.41,
|
|
69
|
+
"heck": 0.36
|
|
70
|
+
},
|
|
71
|
+
"interjections": [
|
|
72
|
+
"wow",
|
|
73
|
+
"omg",
|
|
74
|
+
"hey",
|
|
75
|
+
"ugh",
|
|
76
|
+
"yay",
|
|
77
|
+
"yayyy",
|
|
78
|
+
"yayyyy",
|
|
79
|
+
"woo",
|
|
80
|
+
"woohoo",
|
|
81
|
+
"whoa",
|
|
82
|
+
"woah",
|
|
83
|
+
"whooo",
|
|
84
|
+
"ah",
|
|
85
|
+
"aw",
|
|
86
|
+
"aww",
|
|
87
|
+
"hmm",
|
|
88
|
+
"huh",
|
|
89
|
+
"yo",
|
|
90
|
+
"yikes",
|
|
91
|
+
"gah",
|
|
92
|
+
"phew",
|
|
93
|
+
"sheesh"
|
|
94
|
+
],
|
|
95
|
+
"intensifiers": [
|
|
96
|
+
"so",
|
|
97
|
+
"very",
|
|
98
|
+
"really",
|
|
99
|
+
"super",
|
|
100
|
+
"mega",
|
|
101
|
+
"ultra",
|
|
102
|
+
"too",
|
|
103
|
+
"way",
|
|
104
|
+
"crazy",
|
|
105
|
+
"insanely",
|
|
106
|
+
"totally",
|
|
107
|
+
"extremely",
|
|
108
|
+
"seriously",
|
|
109
|
+
"absolutely",
|
|
110
|
+
"completely",
|
|
111
|
+
"entirely",
|
|
112
|
+
"utterly",
|
|
113
|
+
"hella",
|
|
114
|
+
"wicked",
|
|
115
|
+
"truly"
|
|
116
|
+
],
|
|
117
|
+
"evaluatives": [
|
|
118
|
+
"cool",
|
|
119
|
+
"great",
|
|
120
|
+
"awesome",
|
|
121
|
+
"amazing",
|
|
122
|
+
"perfect",
|
|
123
|
+
"nice",
|
|
124
|
+
"sweet",
|
|
125
|
+
"lovely",
|
|
126
|
+
"loving",
|
|
127
|
+
"silly",
|
|
128
|
+
"wild",
|
|
129
|
+
"fun",
|
|
130
|
+
"funny",
|
|
131
|
+
"adorable",
|
|
132
|
+
"cute",
|
|
133
|
+
"fantastic",
|
|
134
|
+
"fabulous",
|
|
135
|
+
"brilliant",
|
|
136
|
+
"stellar",
|
|
137
|
+
"rad",
|
|
138
|
+
"epic",
|
|
139
|
+
"delightful",
|
|
140
|
+
"gorgeous"
|
|
141
|
+
],
|
|
142
|
+
"positive_lexicon": [
|
|
143
|
+
"love",
|
|
144
|
+
"loved",
|
|
145
|
+
"loving",
|
|
146
|
+
"like",
|
|
147
|
+
"liked",
|
|
148
|
+
"awesome",
|
|
149
|
+
"amazing",
|
|
150
|
+
"yay",
|
|
151
|
+
"great",
|
|
152
|
+
"good",
|
|
153
|
+
"fun",
|
|
154
|
+
"funny",
|
|
155
|
+
"blessed",
|
|
156
|
+
"excited",
|
|
157
|
+
"cool",
|
|
158
|
+
"best",
|
|
159
|
+
"beautiful",
|
|
160
|
+
"happy",
|
|
161
|
+
"happiest",
|
|
162
|
+
"joy",
|
|
163
|
+
"joyful",
|
|
164
|
+
"thrilled",
|
|
165
|
+
"ecstatic",
|
|
166
|
+
"stoked",
|
|
167
|
+
"pumped",
|
|
168
|
+
"glad"
|
|
169
|
+
],
|
|
170
|
+
"negative_lexicon": [
|
|
171
|
+
"bad",
|
|
172
|
+
"sad",
|
|
173
|
+
"angry",
|
|
174
|
+
"annoyed",
|
|
175
|
+
"mad",
|
|
176
|
+
"terrible",
|
|
177
|
+
"awful",
|
|
178
|
+
"hate",
|
|
179
|
+
"hated",
|
|
180
|
+
"crying",
|
|
181
|
+
"hurt",
|
|
182
|
+
"tired",
|
|
183
|
+
"worst",
|
|
184
|
+
"ugh",
|
|
185
|
+
"nope",
|
|
186
|
+
"upset",
|
|
187
|
+
"frustrated",
|
|
188
|
+
"drained",
|
|
189
|
+
"exhausted",
|
|
190
|
+
"bummed",
|
|
191
|
+
"grumpy"
|
|
192
|
+
]
|
|
193
|
+
}
|
glitchlings/zoo/hokey.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""Hokey glitchling that performs expressive lengthening."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import random
|
|
6
|
+
from typing import Any, cast
|
|
7
|
+
|
|
8
|
+
from ..util.hokey_generator import HokeyConfig, HokeyGenerator, StretchEvent
|
|
9
|
+
from ..util.stretchability import StretchabilityAnalyzer
|
|
10
|
+
from ._rust_extensions import get_rust_operation
|
|
11
|
+
from .core import AttackOrder, AttackWave, Gaggle
|
|
12
|
+
from .core import Glitchling as GlitchlingBase
|
|
13
|
+
|
|
14
|
+
_hokey_rust = get_rust_operation("hokey")
|
|
15
|
+
_ANALYZER = StretchabilityAnalyzer()
|
|
16
|
+
_GENERATOR = HokeyGenerator(analyzer=_ANALYZER)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _python_extend_vowels(
|
|
20
|
+
text: str,
|
|
21
|
+
*,
|
|
22
|
+
rate: float,
|
|
23
|
+
extension_min: int,
|
|
24
|
+
extension_max: int,
|
|
25
|
+
word_length_threshold: int,
|
|
26
|
+
base_p: float,
|
|
27
|
+
rng: random.Random,
|
|
28
|
+
return_trace: bool = False,
|
|
29
|
+
) -> str | tuple[str, list[StretchEvent]]:
|
|
30
|
+
config = HokeyConfig(
|
|
31
|
+
rate=rate,
|
|
32
|
+
extension_min=extension_min,
|
|
33
|
+
extension_max=extension_max,
|
|
34
|
+
word_length_threshold=word_length_threshold,
|
|
35
|
+
base_p=base_p,
|
|
36
|
+
)
|
|
37
|
+
result, events = _GENERATOR.generate(text, rng=rng, config=config)
|
|
38
|
+
return (result, events) if return_trace else result
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def extend_vowels(
|
|
42
|
+
text: str,
|
|
43
|
+
rate: float = 0.3,
|
|
44
|
+
extension_min: int = 2,
|
|
45
|
+
extension_max: int = 5,
|
|
46
|
+
word_length_threshold: int = 6,
|
|
47
|
+
seed: int | None = None,
|
|
48
|
+
rng: random.Random | None = None,
|
|
49
|
+
*,
|
|
50
|
+
return_trace: bool = False,
|
|
51
|
+
base_p: float | None = None,
|
|
52
|
+
) -> str | tuple[str, list[StretchEvent]]:
|
|
53
|
+
"""Extend expressive segments of words for emphasis.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
text : str
|
|
58
|
+
Input text to transform.
|
|
59
|
+
rate : float, optional
|
|
60
|
+
Global selection rate for candidate words.
|
|
61
|
+
extension_min : int, optional
|
|
62
|
+
Minimum number of extra repetitions for the stretch unit.
|
|
63
|
+
extension_max : int, optional
|
|
64
|
+
Maximum number of extra repetitions for the stretch unit.
|
|
65
|
+
word_length_threshold : int, optional
|
|
66
|
+
Preferred maximum alphabetic length; longer words are de-emphasised but not
|
|
67
|
+
excluded.
|
|
68
|
+
seed : int, optional
|
|
69
|
+
Deterministic seed when ``rng`` is not supplied.
|
|
70
|
+
rng : random.Random, optional
|
|
71
|
+
Random number generator to drive sampling.
|
|
72
|
+
return_trace : bool, optional
|
|
73
|
+
When ``True`` also return the stretch events for introspection.
|
|
74
|
+
base_p : float, optional
|
|
75
|
+
Base probability for the negative-binomial sampler (heavier tails for smaller
|
|
76
|
+
values). Defaults to ``0.45``.
|
|
77
|
+
"""
|
|
78
|
+
if not text:
|
|
79
|
+
empty_trace: list[StretchEvent] = []
|
|
80
|
+
return (text, empty_trace) if return_trace else text
|
|
81
|
+
|
|
82
|
+
if rng is None:
|
|
83
|
+
rng = random.Random(seed)
|
|
84
|
+
base_probability = base_p if base_p is not None else 0.45
|
|
85
|
+
|
|
86
|
+
if return_trace or _hokey_rust is None:
|
|
87
|
+
return _python_extend_vowels(
|
|
88
|
+
text,
|
|
89
|
+
rate=rate,
|
|
90
|
+
extension_min=extension_min,
|
|
91
|
+
extension_max=extension_max,
|
|
92
|
+
word_length_threshold=word_length_threshold,
|
|
93
|
+
base_p=base_probability,
|
|
94
|
+
rng=rng,
|
|
95
|
+
return_trace=return_trace,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
return cast(
|
|
99
|
+
str,
|
|
100
|
+
_hokey_rust(
|
|
101
|
+
text,
|
|
102
|
+
rate,
|
|
103
|
+
extension_min,
|
|
104
|
+
extension_max,
|
|
105
|
+
word_length_threshold,
|
|
106
|
+
base_probability,
|
|
107
|
+
rng,
|
|
108
|
+
),
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class Hokey(GlitchlingBase):
|
|
113
|
+
"""Glitchling that stretches words using linguistic heuristics."""
|
|
114
|
+
|
|
115
|
+
seed: int | None
|
|
116
|
+
|
|
117
|
+
def __init__(
|
|
118
|
+
self,
|
|
119
|
+
*,
|
|
120
|
+
rate: float = 0.3,
|
|
121
|
+
extension_min: int = 2,
|
|
122
|
+
extension_max: int = 5,
|
|
123
|
+
word_length_threshold: int = 6,
|
|
124
|
+
base_p: float = 0.45,
|
|
125
|
+
seed: int | None = None,
|
|
126
|
+
) -> None:
|
|
127
|
+
self._master_seed: int | None = seed
|
|
128
|
+
|
|
129
|
+
def _corruption_wrapper(text: str, **kwargs: Any) -> str:
|
|
130
|
+
result = extend_vowels(text, **kwargs)
|
|
131
|
+
return result if isinstance(result, str) else result[0]
|
|
132
|
+
|
|
133
|
+
super().__init__(
|
|
134
|
+
name="Hokey",
|
|
135
|
+
corruption_function=_corruption_wrapper,
|
|
136
|
+
scope=AttackWave.CHARACTER,
|
|
137
|
+
order=AttackOrder.FIRST,
|
|
138
|
+
seed=seed,
|
|
139
|
+
rate=rate,
|
|
140
|
+
extension_min=extension_min,
|
|
141
|
+
extension_max=extension_max,
|
|
142
|
+
word_length_threshold=word_length_threshold,
|
|
143
|
+
base_p=base_p,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def pipeline_operation(self) -> dict[str, Any] | None:
|
|
147
|
+
return {
|
|
148
|
+
"type": "hokey",
|
|
149
|
+
"rate": self.kwargs.get("rate", 0.3),
|
|
150
|
+
"extension_min": self.kwargs.get("extension_min", 2),
|
|
151
|
+
"extension_max": self.kwargs.get("extension_max", 5),
|
|
152
|
+
"word_length_threshold": self.kwargs.get("word_length_threshold", 6),
|
|
153
|
+
"base_p": self.kwargs.get("base_p", 0.45),
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
def reset_rng(self, seed: int | None = None) -> None:
|
|
157
|
+
if seed is not None:
|
|
158
|
+
self._master_seed = seed
|
|
159
|
+
super().reset_rng(seed)
|
|
160
|
+
if self.seed is None:
|
|
161
|
+
return
|
|
162
|
+
derived = Gaggle.derive_seed(int(seed), self.name, 0)
|
|
163
|
+
self.seed = int(derived)
|
|
164
|
+
self.rng = random.Random(self.seed)
|
|
165
|
+
self.kwargs["seed"] = self.seed
|
|
166
|
+
else:
|
|
167
|
+
super().reset_rng(None)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
hokey = Hokey()
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
__all__ = ["Hokey", "hokey", "extend_vowels"]
|
glitchlings/zoo/jargoyle.py
CHANGED
|
@@ -7,7 +7,6 @@ from typing import Any, Literal, cast
|
|
|
7
7
|
|
|
8
8
|
from glitchlings.lexicon import Lexicon, get_default_lexicon
|
|
9
9
|
|
|
10
|
-
from ._rate import resolve_rate
|
|
11
10
|
from .core import AttackWave, Glitchling
|
|
12
11
|
|
|
13
12
|
_wordnet_module: ModuleType | None
|
|
@@ -119,7 +118,6 @@ def substitute_random_synonyms(
|
|
|
119
118
|
seed: int | None = None,
|
|
120
119
|
rng: random.Random | None = None,
|
|
121
120
|
*,
|
|
122
|
-
replacement_rate: float | None = None,
|
|
123
121
|
lexicon: Lexicon | None = None,
|
|
124
122
|
) -> str:
|
|
125
123
|
"""Replace words with random lexicon-driven synonyms.
|
|
@@ -144,12 +142,7 @@ def substitute_random_synonyms(
|
|
|
144
142
|
deterministic subsets per word and part-of-speech using the active seed.
|
|
145
143
|
|
|
146
144
|
"""
|
|
147
|
-
effective_rate =
|
|
148
|
-
rate=rate,
|
|
149
|
-
legacy_value=replacement_rate,
|
|
150
|
-
default=0.1,
|
|
151
|
-
legacy_name="replacement_rate",
|
|
152
|
-
)
|
|
145
|
+
effective_rate = 0.1 if rate is None else rate
|
|
153
146
|
|
|
154
147
|
active_rng: random.Random
|
|
155
148
|
if rng is not None:
|
|
@@ -258,23 +251,16 @@ class Jargoyle(Glitchling):
|
|
|
258
251
|
self,
|
|
259
252
|
*,
|
|
260
253
|
rate: float | None = None,
|
|
261
|
-
replacement_rate: float | None = None,
|
|
262
254
|
part_of_speech: PartOfSpeechInput = "n",
|
|
263
255
|
seed: int | None = None,
|
|
264
256
|
lexicon: Lexicon | None = None,
|
|
265
257
|
) -> None:
|
|
266
|
-
self._param_aliases = {"replacement_rate": "rate"}
|
|
267
258
|
self._owns_lexicon = lexicon is None
|
|
268
259
|
self._external_lexicon_original_seed = (
|
|
269
260
|
lexicon.seed if isinstance(lexicon, Lexicon) else None
|
|
270
261
|
)
|
|
271
262
|
self._initializing = True
|
|
272
|
-
effective_rate =
|
|
273
|
-
rate=rate,
|
|
274
|
-
legacy_value=replacement_rate,
|
|
275
|
-
default=0.01,
|
|
276
|
-
legacy_name="replacement_rate",
|
|
277
|
-
)
|
|
263
|
+
effective_rate = 0.01 if rate is None else rate
|
|
278
264
|
prepared_lexicon = lexicon or get_default_lexicon(seed=seed)
|
|
279
265
|
if lexicon is not None and seed is not None:
|
|
280
266
|
prepared_lexicon.reseed(seed)
|
glitchlings/zoo/mim1c.py
CHANGED
|
@@ -4,7 +4,6 @@ from typing import Literal
|
|
|
4
4
|
|
|
5
5
|
from confusable_homoglyphs import confusables
|
|
6
6
|
|
|
7
|
-
from ._rate import resolve_rate
|
|
8
7
|
from .core import AttackOrder, AttackWave, Glitchling
|
|
9
8
|
|
|
10
9
|
|
|
@@ -15,8 +14,6 @@ def swap_homoglyphs(
|
|
|
15
14
|
banned_characters: Collection[str] | None = None,
|
|
16
15
|
seed: int | None = None,
|
|
17
16
|
rng: random.Random | None = None,
|
|
18
|
-
*,
|
|
19
|
-
replacement_rate: float | None = None,
|
|
20
17
|
) -> str:
|
|
21
18
|
"""Replace characters with visually confusable homoglyphs.
|
|
22
19
|
|
|
@@ -37,12 +34,7 @@ def swap_homoglyphs(
|
|
|
37
34
|
- Maintains determinism by shuffling candidates and sampling via the provided RNG.
|
|
38
35
|
|
|
39
36
|
"""
|
|
40
|
-
effective_rate =
|
|
41
|
-
rate=rate,
|
|
42
|
-
legacy_value=replacement_rate,
|
|
43
|
-
default=0.02,
|
|
44
|
-
legacy_name="replacement_rate",
|
|
45
|
-
)
|
|
37
|
+
effective_rate = 0.02 if rate is None else rate
|
|
46
38
|
|
|
47
39
|
if rng is None:
|
|
48
40
|
rng = random.Random(seed)
|
|
@@ -79,18 +71,11 @@ class Mim1c(Glitchling):
|
|
|
79
71
|
self,
|
|
80
72
|
*,
|
|
81
73
|
rate: float | None = None,
|
|
82
|
-
replacement_rate: float | None = None,
|
|
83
74
|
classes: list[str] | Literal["all"] | None = None,
|
|
84
75
|
banned_characters: Collection[str] | None = None,
|
|
85
76
|
seed: int | None = None,
|
|
86
77
|
) -> None:
|
|
87
|
-
|
|
88
|
-
effective_rate = resolve_rate(
|
|
89
|
-
rate=rate,
|
|
90
|
-
legacy_value=replacement_rate,
|
|
91
|
-
default=0.02,
|
|
92
|
-
legacy_name="replacement_rate",
|
|
93
|
-
)
|
|
78
|
+
effective_rate = 0.02 if rate is None else rate
|
|
94
79
|
super().__init__(
|
|
95
80
|
name="Mim1c",
|
|
96
81
|
corruption_function=swap_homoglyphs,
|
glitchlings/zoo/redactyl.py
CHANGED
|
@@ -2,7 +2,6 @@ import random
|
|
|
2
2
|
import re
|
|
3
3
|
from typing import Any, cast
|
|
4
4
|
|
|
5
|
-
from ._rate import resolve_rate
|
|
6
5
|
from ._rust_extensions import get_rust_operation
|
|
7
6
|
from ._sampling import weighted_sample_without_replacement
|
|
8
7
|
from ._text_utils import (
|
|
@@ -97,16 +96,10 @@ def redact_words(
|
|
|
97
96
|
seed: int = 151,
|
|
98
97
|
rng: random.Random | None = None,
|
|
99
98
|
*,
|
|
100
|
-
redaction_rate: float | None = None,
|
|
101
99
|
unweighted: bool = False,
|
|
102
100
|
) -> str:
|
|
103
101
|
"""Redact random words by replacing their characters."""
|
|
104
|
-
effective_rate =
|
|
105
|
-
rate=rate,
|
|
106
|
-
legacy_value=redaction_rate,
|
|
107
|
-
default=0.025,
|
|
108
|
-
legacy_name="redaction_rate",
|
|
109
|
-
)
|
|
102
|
+
effective_rate = 0.025 if rate is None else rate
|
|
110
103
|
|
|
111
104
|
if rng is None:
|
|
112
105
|
rng = random.Random(seed)
|
|
@@ -148,18 +141,11 @@ class Redactyl(Glitchling):
|
|
|
148
141
|
*,
|
|
149
142
|
replacement_char: str = FULL_BLOCK,
|
|
150
143
|
rate: float | None = None,
|
|
151
|
-
redaction_rate: float | None = None,
|
|
152
144
|
merge_adjacent: bool = False,
|
|
153
145
|
seed: int = 151,
|
|
154
146
|
unweighted: bool = False,
|
|
155
147
|
) -> None:
|
|
156
|
-
|
|
157
|
-
effective_rate = resolve_rate(
|
|
158
|
-
rate=rate,
|
|
159
|
-
legacy_value=redaction_rate,
|
|
160
|
-
default=0.025,
|
|
161
|
-
legacy_name="redaction_rate",
|
|
162
|
-
)
|
|
148
|
+
effective_rate = 0.025 if rate is None else rate
|
|
163
149
|
super().__init__(
|
|
164
150
|
name="Redactyl",
|
|
165
151
|
corruption_function=redact_words,
|
|
@@ -181,7 +167,7 @@ class Redactyl(Glitchling):
|
|
|
181
167
|
return {
|
|
182
168
|
"type": "redact",
|
|
183
169
|
"replacement_char": str(replacement_char),
|
|
184
|
-
"
|
|
170
|
+
"rate": float(rate),
|
|
185
171
|
"merge_adjacent": bool(merge_adjacent),
|
|
186
172
|
"unweighted": unweighted,
|
|
187
173
|
}
|
glitchlings/zoo/reduple.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import random
|
|
2
2
|
from typing import Any, cast
|
|
3
3
|
|
|
4
|
-
from ._rate import resolve_rate
|
|
5
4
|
from ._rust_extensions import get_rust_operation
|
|
6
5
|
from ._text_utils import WordToken, collect_word_tokens, split_preserving_whitespace
|
|
7
6
|
from .core import AttackWave, Glitchling
|
|
@@ -71,7 +70,6 @@ def reduplicate_words(
|
|
|
71
70
|
seed: int | None = None,
|
|
72
71
|
rng: random.Random | None = None,
|
|
73
72
|
*,
|
|
74
|
-
reduplication_rate: float | None = None,
|
|
75
73
|
unweighted: bool = False,
|
|
76
74
|
) -> str:
|
|
77
75
|
"""Randomly reduplicate words in the text.
|
|
@@ -79,12 +77,7 @@ def reduplicate_words(
|
|
|
79
77
|
Falls back to the Python implementation when the optional Rust
|
|
80
78
|
extension is unavailable.
|
|
81
79
|
"""
|
|
82
|
-
effective_rate =
|
|
83
|
-
rate=rate,
|
|
84
|
-
legacy_value=reduplication_rate,
|
|
85
|
-
default=0.01,
|
|
86
|
-
legacy_name="reduplication_rate",
|
|
87
|
-
)
|
|
80
|
+
effective_rate = 0.01 if rate is None else rate
|
|
88
81
|
|
|
89
82
|
if rng is None:
|
|
90
83
|
rng = random.Random(seed)
|
|
@@ -110,17 +103,10 @@ class Reduple(Glitchling):
|
|
|
110
103
|
self,
|
|
111
104
|
*,
|
|
112
105
|
rate: float | None = None,
|
|
113
|
-
reduplication_rate: float | None = None,
|
|
114
106
|
seed: int | None = None,
|
|
115
107
|
unweighted: bool = False,
|
|
116
108
|
) -> None:
|
|
117
|
-
|
|
118
|
-
effective_rate = resolve_rate(
|
|
119
|
-
rate=rate,
|
|
120
|
-
legacy_value=reduplication_rate,
|
|
121
|
-
default=0.01,
|
|
122
|
-
legacy_name="reduplication_rate",
|
|
123
|
-
)
|
|
109
|
+
effective_rate = 0.01 if rate is None else rate
|
|
124
110
|
super().__init__(
|
|
125
111
|
name="Reduple",
|
|
126
112
|
corruption_function=reduplicate_words,
|
|
@@ -137,7 +123,7 @@ class Reduple(Glitchling):
|
|
|
137
123
|
unweighted = bool(self.kwargs.get("unweighted", False))
|
|
138
124
|
return {
|
|
139
125
|
"type": "reduplicate",
|
|
140
|
-
"
|
|
126
|
+
"rate": float(rate),
|
|
141
127
|
"unweighted": unweighted,
|
|
142
128
|
}
|
|
143
129
|
|
glitchlings/zoo/rushmore.py
CHANGED
|
@@ -3,7 +3,6 @@ import random
|
|
|
3
3
|
import re
|
|
4
4
|
from typing import Any, cast
|
|
5
5
|
|
|
6
|
-
from ._rate import resolve_rate
|
|
7
6
|
from ._rust_extensions import get_rust_operation
|
|
8
7
|
from ._text_utils import WordToken, collect_word_tokens, split_preserving_whitespace
|
|
9
8
|
from .core import AttackWave, Glitchling
|
|
@@ -74,20 +73,13 @@ def delete_random_words(
|
|
|
74
73
|
rate: float | None = None,
|
|
75
74
|
seed: int | None = None,
|
|
76
75
|
rng: random.Random | None = None,
|
|
77
|
-
*,
|
|
78
|
-
max_deletion_rate: float | None = None,
|
|
79
76
|
unweighted: bool = False,
|
|
80
77
|
) -> str:
|
|
81
78
|
"""Delete random words from the input text.
|
|
82
79
|
|
|
83
80
|
Uses the optional Rust implementation when available.
|
|
84
81
|
"""
|
|
85
|
-
effective_rate =
|
|
86
|
-
rate=rate,
|
|
87
|
-
legacy_value=max_deletion_rate,
|
|
88
|
-
default=0.01,
|
|
89
|
-
legacy_name="max_deletion_rate",
|
|
90
|
-
)
|
|
82
|
+
effective_rate = 0.01 if rate is None else rate
|
|
91
83
|
|
|
92
84
|
if rng is None:
|
|
93
85
|
rng = random.Random(seed)
|
|
@@ -113,17 +105,10 @@ class Rushmore(Glitchling):
|
|
|
113
105
|
self,
|
|
114
106
|
*,
|
|
115
107
|
rate: float | None = None,
|
|
116
|
-
max_deletion_rate: float | None = None,
|
|
117
108
|
seed: int | None = None,
|
|
118
109
|
unweighted: bool = False,
|
|
119
110
|
) -> None:
|
|
120
|
-
|
|
121
|
-
effective_rate = resolve_rate(
|
|
122
|
-
rate=rate,
|
|
123
|
-
legacy_value=max_deletion_rate,
|
|
124
|
-
default=0.01,
|
|
125
|
-
legacy_name="max_deletion_rate",
|
|
126
|
-
)
|
|
111
|
+
effective_rate = 0.01 if rate is None else rate
|
|
127
112
|
super().__init__(
|
|
128
113
|
name="Rushmore",
|
|
129
114
|
corruption_function=delete_random_words,
|
|
@@ -135,14 +120,12 @@ class Rushmore(Glitchling):
|
|
|
135
120
|
|
|
136
121
|
def pipeline_operation(self) -> dict[str, Any] | None:
|
|
137
122
|
rate = self.kwargs.get("rate")
|
|
138
|
-
if rate is None:
|
|
139
|
-
rate = self.kwargs.get("max_deletion_rate")
|
|
140
123
|
if rate is None:
|
|
141
124
|
return None
|
|
142
125
|
unweighted = bool(self.kwargs.get("unweighted", False))
|
|
143
126
|
return {
|
|
144
127
|
"type": "delete",
|
|
145
|
-
"
|
|
128
|
+
"rate": float(rate),
|
|
146
129
|
"unweighted": unweighted,
|
|
147
130
|
}
|
|
148
131
|
|