glitchlings 0.2.1__cp312-cp312-win_amd64.whl → 0.2.3__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glitchlings/_zoo_rust.cp312-win_amd64.pyd +0 -0
- glitchlings/dlc/prime.py +44 -22
- glitchlings/main.py +17 -39
- glitchlings/util/__init__.py +30 -0
- glitchlings/zoo/__init__.py +96 -19
- glitchlings/zoo/_ocr_confusions.py +34 -0
- glitchlings/zoo/_rate.py +21 -0
- glitchlings/zoo/core.py +56 -52
- glitchlings/zoo/jargoyle.py +77 -16
- glitchlings/zoo/mim1c.py +24 -5
- glitchlings/zoo/ocr_confusions.tsv +30 -0
- glitchlings/zoo/redactyl.py +46 -9
- glitchlings/zoo/reduple.py +36 -8
- glitchlings/zoo/rushmore.py +40 -8
- glitchlings/zoo/scannequin.py +42 -37
- glitchlings/zoo/typogre.py +36 -8
- {glitchlings-0.2.1.dist-info → glitchlings-0.2.3.dist-info}/METADATA +28 -61
- glitchlings-0.2.3.dist-info/RECORD +26 -0
- glitchlings/_typogre_rust.cp312-win_amd64.pyd +0 -0
- glitchlings-0.2.1.dist-info/RECORD +0 -24
- {glitchlings-0.2.1.dist-info → glitchlings-0.2.3.dist-info}/WHEEL +0 -0
- {glitchlings-0.2.1.dist-info → glitchlings-0.2.3.dist-info}/entry_points.txt +0 -0
- {glitchlings-0.2.1.dist-info → glitchlings-0.2.3.dist-info}/licenses/LICENSE +0 -0
- {glitchlings-0.2.1.dist-info → glitchlings-0.2.3.dist-info}/top_level.txt +0 -0
Binary file
|
glitchlings/dlc/prime.py
CHANGED
@@ -79,8 +79,8 @@ def tutorial_level(
|
|
79
79
|
) -> vf.Environment:
|
80
80
|
"""Create a low-corruption environment using tuned defaults."""
|
81
81
|
|
82
|
-
tuned_mim1c = Mim1c(
|
83
|
-
tuned_typogre = Typogre(
|
82
|
+
tuned_mim1c = Mim1c(rate=0.01 * difficulty.value)
|
83
|
+
tuned_typogre = Typogre(rate=0.025 * difficulty.value)
|
84
84
|
|
85
85
|
return load_environment(
|
86
86
|
env,
|
@@ -220,32 +220,54 @@ def echo_chamber(
|
|
220
220
|
"Specify which split to use when the dataset loads as a DatasetDict."
|
221
221
|
)
|
222
222
|
|
223
|
-
|
224
|
-
|
223
|
+
filtered_dataset = hf_dataset.filter(
|
224
|
+
lambda row: row.get(column) is not None,
|
225
|
+
load_from_cache_file=False,
|
226
|
+
)
|
225
227
|
|
226
|
-
|
227
|
-
value = row.get(column)
|
228
|
-
if value is None:
|
229
|
-
continue
|
228
|
+
source_column_names = list(filtered_dataset.column_names)
|
230
229
|
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
answers.append(text)
|
230
|
+
def _build_prompt(row: dict[str, Any]) -> dict[str, Any]:
|
231
|
+
text = str(row[column])
|
232
|
+
prompt = [
|
233
|
+
{"role": "system", "content": instructions},
|
234
|
+
{"role": "user", "content": f"Corrupted text:\n{text}"},
|
235
|
+
]
|
236
|
+
return {"prompt": prompt, "answer": text}
|
239
237
|
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
238
|
+
base_dataset = filtered_dataset.map(
|
239
|
+
_build_prompt,
|
240
|
+
remove_columns=source_column_names,
|
241
|
+
load_from_cache_file=False,
|
242
|
+
)
|
244
243
|
|
245
|
-
|
244
|
+
try:
|
245
|
+
dataset_length = len(base_dataset) # type: ignore[arg-type]
|
246
|
+
except TypeError:
|
247
|
+
preview_rows: list[dict[str, Any]]
|
248
|
+
take_fn = getattr(base_dataset, "take", None)
|
249
|
+
if callable(take_fn):
|
250
|
+
preview_rows = list(take_fn(1))
|
251
|
+
else:
|
252
|
+
iterator = iter(base_dataset)
|
253
|
+
try:
|
254
|
+
first_row = next(iterator)
|
255
|
+
except StopIteration:
|
256
|
+
preview_rows = []
|
257
|
+
else:
|
258
|
+
preview_rows = [first_row]
|
259
|
+
if not preview_rows:
|
260
|
+
raise ValueError(
|
261
|
+
f"Column '{column}' did not yield any textual entries in dataset '{dataset_id}'."
|
262
|
+
)
|
263
|
+
else:
|
264
|
+
if dataset_length == 0:
|
265
|
+
raise ValueError(
|
266
|
+
f"Column '{column}' did not yield any textual entries in dataset '{dataset_id}'."
|
267
|
+
)
|
246
268
|
|
247
269
|
gaggle = _as_gaggle(glitchlings, seed=seed)
|
248
|
-
glitched_dataset = gaggle.corrupt_dataset(
|
270
|
+
glitched_dataset = gaggle.corrupt_dataset(base_dataset, ["prompt"])
|
249
271
|
|
250
272
|
rubric_func = reward_function or symmetric_damerau_levenshtein_similarity
|
251
273
|
rubric = vf.Rubric(funcs=[rubric_func], weights=[1.0])
|
glitchlings/main.py
CHANGED
@@ -11,31 +11,12 @@ from . import SAMPLE_TEXT
|
|
11
11
|
from .zoo import (
|
12
12
|
Glitchling,
|
13
13
|
Gaggle,
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
reduple,
|
18
|
-
rushmore,
|
19
|
-
redactyl,
|
20
|
-
scannequin,
|
14
|
+
BUILTIN_GLITCHLINGS,
|
15
|
+
DEFAULT_GLITCHLING_NAMES,
|
16
|
+
parse_glitchling_spec,
|
21
17
|
summon,
|
22
18
|
)
|
23
19
|
|
24
|
-
|
25
|
-
BUILTIN_GLITCHLINGS: dict[str, Glitchling] = {
|
26
|
-
g.name.lower(): g
|
27
|
-
for g in [
|
28
|
-
typogre,
|
29
|
-
mim1c,
|
30
|
-
jargoyle,
|
31
|
-
reduple,
|
32
|
-
rushmore,
|
33
|
-
redactyl,
|
34
|
-
scannequin,
|
35
|
-
]
|
36
|
-
}
|
37
|
-
|
38
|
-
DEFAULT_GLITCHLING_NAMES: list[str] = list(BUILTIN_GLITCHLINGS.keys())
|
39
20
|
MAX_NAME_WIDTH = max(len(glitchling.name) for glitchling in BUILTIN_GLITCHLINGS.values())
|
40
21
|
|
41
22
|
|
@@ -62,8 +43,11 @@ def build_parser() -> argparse.ArgumentParser:
|
|
62
43
|
"--glitchling",
|
63
44
|
dest="glitchlings",
|
64
45
|
action="append",
|
65
|
-
metavar="
|
66
|
-
help=
|
46
|
+
metavar="SPEC",
|
47
|
+
help=(
|
48
|
+
"Glitchling to apply, optionally with parameters like "
|
49
|
+
"Typogre(rate=0.05). Repeat for multiples; defaults to all built-ins."
|
50
|
+
),
|
67
51
|
)
|
68
52
|
parser.add_argument(
|
69
53
|
"-s",
|
@@ -147,23 +131,16 @@ def read_text(args: argparse.Namespace, parser: argparse.ArgumentParser) -> str:
|
|
147
131
|
def summon_glitchlings(
|
148
132
|
names: list[str] | None, parser: argparse.ArgumentParser, seed: int
|
149
133
|
) -> Gaggle:
|
150
|
-
"""Instantiate the requested glitchlings and bundle them in a ``Gaggle``.
|
151
|
-
|
152
|
-
Args:
|
153
|
-
names: Optional list of glitchling names provided by the user.
|
154
|
-
parser: The argument parser used for emitting user-facing errors.
|
155
|
-
seed: Master seed controlling deterministic corruption order.
|
156
|
-
|
157
|
-
Returns:
|
158
|
-
Gaggle: A ready-to-use collection of glitchlings.
|
159
|
-
|
160
|
-
Raises:
|
161
|
-
SystemExit: Raised indirectly via ``parser.error`` when a provided glitchling
|
162
|
-
name is invalid.
|
163
|
-
"""
|
134
|
+
"""Instantiate the requested glitchlings and bundle them in a ``Gaggle``."""
|
164
135
|
|
165
136
|
if names:
|
166
|
-
normalized
|
137
|
+
normalized: list[str | Glitchling] = []
|
138
|
+
for specification in names:
|
139
|
+
try:
|
140
|
+
normalized.append(parse_glitchling_spec(specification))
|
141
|
+
except ValueError as exc:
|
142
|
+
parser.error(str(exc))
|
143
|
+
raise AssertionError("parser.error should exit")
|
167
144
|
else:
|
168
145
|
normalized = DEFAULT_GLITCHLING_NAMES
|
169
146
|
|
@@ -174,6 +151,7 @@ def summon_glitchlings(
|
|
174
151
|
raise AssertionError("parser.error should exit")
|
175
152
|
|
176
153
|
|
154
|
+
|
177
155
|
def show_diff(original: str, corrupted: str) -> None:
|
178
156
|
"""Display a unified diff between the original and corrupted text."""
|
179
157
|
|
glitchlings/util/__init__.py
CHANGED
@@ -141,6 +141,36 @@ _register_layout(
|
|
141
141
|
),
|
142
142
|
)
|
143
143
|
|
144
|
+
_register_layout(
|
145
|
+
"QWERTZ",
|
146
|
+
(
|
147
|
+
"^1234567890ß´",
|
148
|
+
" qwertzuiopü+",
|
149
|
+
" asdfghjklöä#",
|
150
|
+
" yxcvbnm,.-",
|
151
|
+
),
|
152
|
+
)
|
153
|
+
|
154
|
+
_register_layout(
|
155
|
+
"SPANISH_QWERTY",
|
156
|
+
(
|
157
|
+
"º1234567890'¡",
|
158
|
+
" qwertyuiop´+",
|
159
|
+
" asdfghjklñ´",
|
160
|
+
" <zxcvbnm,.-",
|
161
|
+
),
|
162
|
+
)
|
163
|
+
|
164
|
+
_register_layout(
|
165
|
+
"SWEDISH_QWERTY",
|
166
|
+
(
|
167
|
+
"§1234567890+´",
|
168
|
+
" qwertyuiopå¨",
|
169
|
+
" asdfghjklöä'",
|
170
|
+
" <zxcvbnm,.-",
|
171
|
+
),
|
172
|
+
)
|
173
|
+
|
144
174
|
|
145
175
|
class KeyNeighbors:
|
146
176
|
def __init__(self) -> None:
|
glitchlings/zoo/__init__.py
CHANGED
@@ -1,6 +1,11 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import ast
|
4
|
+
from typing import Any
|
5
|
+
|
1
6
|
from .typogre import Typogre, typogre
|
2
7
|
from .mim1c import Mim1c, mim1c
|
3
|
-
from .jargoyle import Jargoyle, jargoyle
|
8
|
+
from .jargoyle import Jargoyle, jargoyle, dependencies_available as _jargoyle_available
|
4
9
|
from .reduple import Reduple, reduple
|
5
10
|
from .rushmore import Rushmore, rushmore
|
6
11
|
from .redactyl import Redactyl, redactyl
|
@@ -25,33 +30,105 @@ __all__ = [
|
|
25
30
|
"Glitchling",
|
26
31
|
"Gaggle",
|
27
32
|
"summon",
|
33
|
+
"BUILTIN_GLITCHLINGS",
|
34
|
+
"DEFAULT_GLITCHLING_NAMES",
|
35
|
+
"parse_glitchling_spec",
|
28
36
|
]
|
29
37
|
|
38
|
+
_HAS_JARGOYLE = _jargoyle_available()
|
39
|
+
|
40
|
+
_BUILTIN_GLITCHLING_LIST: list[Glitchling] = [typogre, mim1c]
|
41
|
+
if _HAS_JARGOYLE:
|
42
|
+
_BUILTIN_GLITCHLING_LIST.append(jargoyle)
|
43
|
+
_BUILTIN_GLITCHLING_LIST.extend([reduple, rushmore, redactyl, scannequin])
|
44
|
+
|
45
|
+
BUILTIN_GLITCHLINGS: dict[str, Glitchling] = {
|
46
|
+
glitchling.name.lower(): glitchling for glitchling in _BUILTIN_GLITCHLING_LIST
|
47
|
+
}
|
48
|
+
|
49
|
+
_BUILTIN_GLITCHLING_TYPES: dict[str, type[Glitchling]] = {
|
50
|
+
typogre.name.lower(): Typogre,
|
51
|
+
mim1c.name.lower(): Mim1c,
|
52
|
+
reduple.name.lower(): Reduple,
|
53
|
+
rushmore.name.lower(): Rushmore,
|
54
|
+
redactyl.name.lower(): Redactyl,
|
55
|
+
scannequin.name.lower(): Scannequin,
|
56
|
+
}
|
57
|
+
if _HAS_JARGOYLE:
|
58
|
+
_BUILTIN_GLITCHLING_TYPES[jargoyle.name.lower()] = Jargoyle
|
59
|
+
|
60
|
+
DEFAULT_GLITCHLING_NAMES: list[str] = list(BUILTIN_GLITCHLINGS.keys())
|
61
|
+
|
62
|
+
|
63
|
+
def parse_glitchling_spec(specification: str) -> Glitchling:
|
64
|
+
"""Return a glitchling instance configured according to ``specification``."""
|
65
|
+
|
66
|
+
text = specification.strip()
|
67
|
+
if not text:
|
68
|
+
raise ValueError("Glitchling specification cannot be empty.")
|
69
|
+
|
70
|
+
if "(" not in text:
|
71
|
+
glitchling = BUILTIN_GLITCHLINGS.get(text.lower())
|
72
|
+
if glitchling is None:
|
73
|
+
raise ValueError(f"Glitchling '{text}' not found.")
|
74
|
+
return glitchling
|
75
|
+
|
76
|
+
if not text.endswith(")"):
|
77
|
+
raise ValueError(f"Invalid parameter syntax for glitchling '{text}'.")
|
78
|
+
|
79
|
+
name_part, arg_source = text[:-1].split("(", 1)
|
80
|
+
name = name_part.strip()
|
81
|
+
if not name:
|
82
|
+
raise ValueError(f"Invalid glitchling specification '{text}'.")
|
83
|
+
|
84
|
+
lower_name = name.lower()
|
85
|
+
glitchling_type = _BUILTIN_GLITCHLING_TYPES.get(lower_name)
|
86
|
+
if glitchling_type is None:
|
87
|
+
raise ValueError(f"Glitchling '{name}' not found.")
|
88
|
+
|
89
|
+
try:
|
90
|
+
call_expr = ast.parse(f"_({arg_source})", mode="eval").body
|
91
|
+
except SyntaxError as exc:
|
92
|
+
raise ValueError(
|
93
|
+
f"Invalid parameter syntax for glitchling '{name}': {exc.msg}"
|
94
|
+
) from exc
|
95
|
+
|
96
|
+
if not isinstance(call_expr, ast.Call) or call_expr.args:
|
97
|
+
raise ValueError(
|
98
|
+
f"Glitchling '{name}' parameters must be provided as keyword arguments."
|
99
|
+
)
|
100
|
+
|
101
|
+
kwargs: dict[str, Any] = {}
|
102
|
+
for keyword in call_expr.keywords:
|
103
|
+
if keyword.arg is None:
|
104
|
+
raise ValueError(
|
105
|
+
f"Glitchling '{name}' does not support unpacking arbitrary keyword arguments."
|
106
|
+
)
|
107
|
+
try:
|
108
|
+
kwargs[keyword.arg] = ast.literal_eval(keyword.value)
|
109
|
+
except (ValueError, SyntaxError) as exc:
|
110
|
+
raise ValueError(
|
111
|
+
f"Failed to parse value for parameter '{keyword.arg}' on glitchling '{name}': {exc}"
|
112
|
+
) from exc
|
113
|
+
|
114
|
+
try:
|
115
|
+
return glitchling_type(**kwargs)
|
116
|
+
except TypeError as exc:
|
117
|
+
raise ValueError(f"Failed to instantiate glitchling '{name}': {exc}") from exc
|
118
|
+
|
30
119
|
|
31
120
|
def summon(glitchlings: list[str | Glitchling], seed: int = 151) -> Gaggle:
|
32
121
|
"""Summon glitchlings by name (using defaults) or instance (to change parameters)."""
|
33
|
-
|
34
|
-
|
35
|
-
for g in [
|
36
|
-
typogre,
|
37
|
-
mim1c,
|
38
|
-
jargoyle,
|
39
|
-
reduple,
|
40
|
-
rushmore,
|
41
|
-
redactyl,
|
42
|
-
scannequin,
|
43
|
-
]
|
44
|
-
}
|
45
|
-
summoned = []
|
122
|
+
|
123
|
+
summoned: list[Glitchling] = []
|
46
124
|
for entry in glitchlings:
|
47
125
|
if isinstance(entry, Glitchling):
|
48
126
|
summoned.append(entry)
|
49
127
|
continue
|
50
128
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
raise ValueError(f"Glitchling '{entry}' not found.")
|
129
|
+
try:
|
130
|
+
summoned.append(parse_glitchling_spec(entry))
|
131
|
+
except ValueError as exc:
|
132
|
+
raise ValueError(str(exc)) from exc
|
56
133
|
|
57
134
|
return Gaggle(summoned, seed=seed)
|
@@ -0,0 +1,34 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from importlib import resources
|
4
|
+
|
5
|
+
_CONFUSION_TABLE: list[tuple[str, list[str]]] | None = None
|
6
|
+
|
7
|
+
|
8
|
+
def load_confusion_table() -> list[tuple[str, list[str]]]:
|
9
|
+
"""Load the OCR confusion table shared by Python and Rust implementations."""
|
10
|
+
global _CONFUSION_TABLE
|
11
|
+
if _CONFUSION_TABLE is not None:
|
12
|
+
return _CONFUSION_TABLE
|
13
|
+
|
14
|
+
data = resources.files(__package__) / "ocr_confusions.tsv"
|
15
|
+
text = data.read_text(encoding="utf-8")
|
16
|
+
indexed_entries: list[tuple[int, tuple[str, list[str]]]] = []
|
17
|
+
for line_number, line in enumerate(text.splitlines()):
|
18
|
+
stripped = line.strip()
|
19
|
+
if not stripped or stripped.startswith("#"):
|
20
|
+
continue
|
21
|
+
parts = stripped.split()
|
22
|
+
if len(parts) < 2:
|
23
|
+
continue
|
24
|
+
source, *replacements = parts
|
25
|
+
indexed_entries.append((line_number, (source, replacements)))
|
26
|
+
|
27
|
+
# Sort longer patterns first to avoid overlapping matches, mirroring the
|
28
|
+
# behaviour of the Rust `confusion_table` helper.
|
29
|
+
indexed_entries.sort(
|
30
|
+
key=lambda item: (-len(item[1][0]), item[0])
|
31
|
+
)
|
32
|
+
entries = [entry for _, entry in indexed_entries]
|
33
|
+
_CONFUSION_TABLE = entries
|
34
|
+
return entries
|
glitchlings/zoo/_rate.py
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
|
4
|
+
def resolve_rate(
|
5
|
+
*,
|
6
|
+
rate: float | None,
|
7
|
+
legacy_value: float | None,
|
8
|
+
default: float,
|
9
|
+
legacy_name: str,
|
10
|
+
) -> float:
|
11
|
+
"""Return the effective rate while enforcing mutual exclusivity."""
|
12
|
+
|
13
|
+
if rate is not None and legacy_value is not None:
|
14
|
+
raise ValueError(
|
15
|
+
f"Specify either 'rate' or '{legacy_name}', not both."
|
16
|
+
)
|
17
|
+
if rate is not None:
|
18
|
+
return rate
|
19
|
+
if legacy_value is not None:
|
20
|
+
return legacy_value
|
21
|
+
return default
|
glitchlings/zoo/core.py
CHANGED
@@ -107,6 +107,7 @@ class Glitchling:
|
|
107
107
|
scope: AttackWave,
|
108
108
|
order: AttackOrder = AttackOrder.NORMAL,
|
109
109
|
seed: int | None = None,
|
110
|
+
pipeline_operation: Callable[["Glitchling"], dict[str, Any] | None] | None = None,
|
110
111
|
**kwargs: Any,
|
111
112
|
) -> None:
|
112
113
|
"""Initialize a glitchling.
|
@@ -128,31 +129,76 @@ class Glitchling:
|
|
128
129
|
self.corruption_function: CorruptionCallable = corruption_function
|
129
130
|
self.level: AttackWave = scope
|
130
131
|
self.order: AttackOrder = order
|
132
|
+
self._pipeline_descriptor_factory = pipeline_operation
|
131
133
|
self.kwargs: dict[str, Any] = {}
|
134
|
+
self._cached_rng_callable: CorruptionCallable | None = None
|
135
|
+
self._cached_rng_expectation: bool | None = None
|
132
136
|
for kw, val in kwargs.items():
|
133
137
|
self.set_param(kw, val)
|
134
138
|
|
135
139
|
def set_param(self, key: str, value: Any) -> None:
|
136
140
|
"""Persist a parameter for use by the corruption callable."""
|
137
141
|
|
138
|
-
|
139
|
-
|
140
|
-
|
142
|
+
aliases = getattr(self, "_param_aliases", {})
|
143
|
+
canonical = aliases.get(key, key)
|
144
|
+
|
145
|
+
# Drop stale alias keys so we only forward canonical kwargs.
|
146
|
+
self.kwargs.pop(key, None)
|
147
|
+
for alias, target in aliases.items():
|
148
|
+
if target == canonical:
|
149
|
+
self.kwargs.pop(alias, None)
|
150
|
+
|
151
|
+
self.kwargs[canonical] = value
|
152
|
+
setattr(self, canonical, value)
|
153
|
+
|
154
|
+
if canonical == "seed":
|
141
155
|
self.reset_rng(value)
|
142
156
|
|
143
|
-
|
144
|
-
|
157
|
+
for alias, target in aliases.items():
|
158
|
+
if target == canonical:
|
159
|
+
setattr(self, alias, value)
|
145
160
|
|
146
|
-
|
161
|
+
def pipeline_operation(self) -> dict[str, Any] | None:
|
162
|
+
"""Return the Rust pipeline operation descriptor for this glitchling."""
|
163
|
+
|
164
|
+
factory = self._pipeline_descriptor_factory
|
165
|
+
if factory is None:
|
166
|
+
return None
|
167
|
+
|
168
|
+
return factory(self)
|
169
|
+
|
170
|
+
def _corruption_expects_rng(self) -> bool:
|
171
|
+
"""Return `True` when the corruption function accepts an rng keyword."""
|
172
|
+
|
173
|
+
cached_callable = self._cached_rng_callable
|
174
|
+
cached_expectation = self._cached_rng_expectation
|
175
|
+
corruption_function = self.corruption_function
|
176
|
+
|
177
|
+
if (
|
178
|
+
cached_callable is corruption_function
|
179
|
+
and cached_expectation is not None
|
180
|
+
):
|
181
|
+
return cached_expectation
|
182
|
+
|
183
|
+
expects_rng = False
|
147
184
|
try:
|
148
|
-
signature = inspect.signature(
|
185
|
+
signature = inspect.signature(corruption_function)
|
149
186
|
except (TypeError, ValueError):
|
150
187
|
signature = None
|
151
188
|
|
152
|
-
expects_rng = False
|
153
189
|
if signature is not None:
|
154
190
|
expects_rng = "rng" in signature.parameters
|
155
191
|
|
192
|
+
self._cached_rng_callable = corruption_function
|
193
|
+
self._cached_rng_expectation = expects_rng
|
194
|
+
return expects_rng
|
195
|
+
|
196
|
+
def __corrupt(self, text: str, *args: Any, **kwargs: Any) -> str:
|
197
|
+
"""Execute the corruption callable, injecting the RNG when required."""
|
198
|
+
|
199
|
+
# Pass rng to underlying corruption function if it expects it.
|
200
|
+
expects_rng = self._corruption_expects_rng()
|
201
|
+
|
156
202
|
if expects_rng:
|
157
203
|
corrupted = self.corruption_function(text, *args, rng=self.rng, **kwargs)
|
158
204
|
else:
|
@@ -231,53 +277,14 @@ class Glitchling:
|
|
231
277
|
self.corruption_function,
|
232
278
|
self.level,
|
233
279
|
self.order,
|
280
|
+
pipeline_operation=self._pipeline_descriptor_factory,
|
234
281
|
**filtered_kwargs,
|
235
282
|
)
|
236
283
|
|
237
284
|
return cls(**filtered_kwargs)
|
238
285
|
|
239
286
|
|
240
|
-
def _pipeline_operation_reduplicate(glitchling: "Glitchling") -> dict[str, Any] | None:
|
241
|
-
rate = glitchling.kwargs.get("reduplication_rate")
|
242
|
-
if rate is None:
|
243
|
-
return None
|
244
|
-
return {"type": "reduplicate", "reduplication_rate": float(rate)}
|
245
|
-
|
246
287
|
|
247
|
-
def _pipeline_operation_delete(glitchling: "Glitchling") -> dict[str, Any] | None:
|
248
|
-
rate = glitchling.kwargs.get("max_deletion_rate")
|
249
|
-
if rate is None:
|
250
|
-
return None
|
251
|
-
return {"type": "delete", "max_deletion_rate": float(rate)}
|
252
|
-
|
253
|
-
|
254
|
-
def _pipeline_operation_redact(glitchling: "Glitchling") -> dict[str, Any] | None:
|
255
|
-
replacement_char = glitchling.kwargs.get("replacement_char")
|
256
|
-
redaction_rate = glitchling.kwargs.get("redaction_rate")
|
257
|
-
merge_adjacent = glitchling.kwargs.get("merge_adjacent")
|
258
|
-
if replacement_char is None or redaction_rate is None or merge_adjacent is None:
|
259
|
-
return None
|
260
|
-
return {
|
261
|
-
"type": "redact",
|
262
|
-
"replacement_char": str(replacement_char),
|
263
|
-
"redaction_rate": float(redaction_rate),
|
264
|
-
"merge_adjacent": bool(merge_adjacent),
|
265
|
-
}
|
266
|
-
|
267
|
-
|
268
|
-
def _pipeline_operation_ocr(glitchling: "Glitchling") -> dict[str, Any] | None:
|
269
|
-
error_rate = glitchling.kwargs.get("error_rate")
|
270
|
-
if error_rate is None:
|
271
|
-
return None
|
272
|
-
return {"type": "ocr", "error_rate": float(error_rate)}
|
273
|
-
|
274
|
-
|
275
|
-
_PIPELINE_OPERATION_BUILDERS: dict[str, Callable[["Glitchling"], dict[str, Any] | None]] = {
|
276
|
-
"Reduple": _pipeline_operation_reduplicate,
|
277
|
-
"Rushmore": _pipeline_operation_delete,
|
278
|
-
"Redactyl": _pipeline_operation_redact,
|
279
|
-
"Scannequin": _pipeline_operation_ocr,
|
280
|
-
}
|
281
288
|
|
282
289
|
|
283
290
|
class Gaggle(Glitchling):
|
@@ -359,10 +366,7 @@ class Gaggle(Glitchling):
|
|
359
366
|
|
360
367
|
descriptors: list[dict[str, Any]] = []
|
361
368
|
for glitchling in self.apply_order:
|
362
|
-
|
363
|
-
if builder is None:
|
364
|
-
return None
|
365
|
-
operation = builder(glitchling)
|
369
|
+
operation = glitchling.pipeline_operation()
|
366
370
|
if operation is None:
|
367
371
|
return None
|
368
372
|
|