glitchlings 0.4.4__cp313-cp313-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

Files changed (47) hide show
  1. glitchlings/__init__.py +67 -0
  2. glitchlings/__main__.py +8 -0
  3. glitchlings/_zoo_rust.cpython-313-x86_64-linux-gnu.so +0 -0
  4. glitchlings/compat.py +284 -0
  5. glitchlings/config.py +388 -0
  6. glitchlings/config.toml +3 -0
  7. glitchlings/dlc/__init__.py +7 -0
  8. glitchlings/dlc/_shared.py +153 -0
  9. glitchlings/dlc/huggingface.py +81 -0
  10. glitchlings/dlc/prime.py +254 -0
  11. glitchlings/dlc/pytorch.py +166 -0
  12. glitchlings/dlc/pytorch_lightning.py +215 -0
  13. glitchlings/lexicon/__init__.py +192 -0
  14. glitchlings/lexicon/_cache.py +110 -0
  15. glitchlings/lexicon/data/default_vector_cache.json +82 -0
  16. glitchlings/lexicon/metrics.py +162 -0
  17. glitchlings/lexicon/vector.py +651 -0
  18. glitchlings/lexicon/wordnet.py +232 -0
  19. glitchlings/main.py +364 -0
  20. glitchlings/util/__init__.py +195 -0
  21. glitchlings/util/adapters.py +27 -0
  22. glitchlings/zoo/__init__.py +168 -0
  23. glitchlings/zoo/_ocr_confusions.py +32 -0
  24. glitchlings/zoo/_rate.py +131 -0
  25. glitchlings/zoo/_rust_extensions.py +143 -0
  26. glitchlings/zoo/_sampling.py +54 -0
  27. glitchlings/zoo/_text_utils.py +100 -0
  28. glitchlings/zoo/adjax.py +128 -0
  29. glitchlings/zoo/apostrofae.py +127 -0
  30. glitchlings/zoo/assets/__init__.py +0 -0
  31. glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
  32. glitchlings/zoo/core.py +582 -0
  33. glitchlings/zoo/jargoyle.py +335 -0
  34. glitchlings/zoo/mim1c.py +109 -0
  35. glitchlings/zoo/ocr_confusions.tsv +30 -0
  36. glitchlings/zoo/redactyl.py +193 -0
  37. glitchlings/zoo/reduple.py +148 -0
  38. glitchlings/zoo/rushmore.py +153 -0
  39. glitchlings/zoo/scannequin.py +171 -0
  40. glitchlings/zoo/typogre.py +231 -0
  41. glitchlings/zoo/zeedub.py +185 -0
  42. glitchlings-0.4.4.dist-info/METADATA +627 -0
  43. glitchlings-0.4.4.dist-info/RECORD +47 -0
  44. glitchlings-0.4.4.dist-info/WHEEL +5 -0
  45. glitchlings-0.4.4.dist-info/entry_points.txt +2 -0
  46. glitchlings-0.4.4.dist-info/licenses/LICENSE +201 -0
  47. glitchlings-0.4.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,148 @@
1
+ import random
2
+ from typing import Any, cast
3
+
4
+ from ._rate import resolve_rate
5
+ from ._rust_extensions import get_rust_operation
6
+ from ._text_utils import WordToken, collect_word_tokens, split_preserving_whitespace
7
+ from .core import AttackWave, Glitchling
8
+
9
+ # Load Rust-accelerated operation if available
10
+ _reduplicate_words_rust = get_rust_operation("reduplicate_words")
11
+
12
+
13
+ def _python_reduplicate_words(
14
+ text: str,
15
+ *,
16
+ rate: float,
17
+ rng: random.Random,
18
+ unweighted: bool = False,
19
+ ) -> str:
20
+ """Randomly reduplicate words in the text.
21
+
22
+ Parameters
23
+ ----------
24
+ - text: Input text.
25
+ - rate: Max proportion of words to reduplicate (default 0.05).
26
+ - rng: RNG used for sampling decisions.
27
+ - unweighted: When True, sample words uniformly instead of length-weighted.
28
+
29
+ Notes
30
+ -----
31
+ - Preserves spacing and punctuation by tokenizing with separators.
32
+ - Deterministic when run with a fixed seed or via Gaggle.
33
+
34
+ """
35
+ tokens = split_preserving_whitespace(text)
36
+ word_tokens = collect_word_tokens(tokens)
37
+
38
+ weighted_tokens: list[tuple[int, float, WordToken]] = []
39
+ for token in word_tokens:
40
+ weight = 1.0 if unweighted else 1.0 / float(token.core_length)
41
+ weighted_tokens.append((token.index, weight, token))
42
+
43
+ if not weighted_tokens:
44
+ return "".join(tokens)
45
+
46
+ effective_rate = max(rate, 0.0)
47
+ if effective_rate <= 0.0:
48
+ return "".join(tokens)
49
+
50
+ mean_weight = sum(weight for _, weight, _ in weighted_tokens) / len(weighted_tokens)
51
+
52
+ for index, weight, token in weighted_tokens:
53
+ if effective_rate >= 1.0:
54
+ probability = 1.0
55
+ else:
56
+ if mean_weight <= 0.0:
57
+ probability = effective_rate
58
+ else:
59
+ probability = min(1.0, effective_rate * (weight / mean_weight))
60
+ if rng.random() >= probability:
61
+ continue
62
+
63
+ prefix, core, suffix = token.prefix, token.core, token.suffix
64
+ tokens[index] = f"{prefix}{core} {core}{suffix}"
65
+ return "".join(tokens)
66
+
67
+
68
+ def reduplicate_words(
69
+ text: str,
70
+ rate: float | None = None,
71
+ seed: int | None = None,
72
+ rng: random.Random | None = None,
73
+ *,
74
+ reduplication_rate: float | None = None,
75
+ unweighted: bool = False,
76
+ ) -> str:
77
+ """Randomly reduplicate words in the text.
78
+
79
+ Falls back to the Python implementation when the optional Rust
80
+ extension is unavailable.
81
+ """
82
+ effective_rate = resolve_rate(
83
+ rate=rate,
84
+ legacy_value=reduplication_rate,
85
+ default=0.01,
86
+ legacy_name="reduplication_rate",
87
+ )
88
+
89
+ if rng is None:
90
+ rng = random.Random(seed)
91
+
92
+ clamped_rate = max(0.0, effective_rate)
93
+ unweighted_flag = bool(unweighted)
94
+
95
+ if _reduplicate_words_rust is not None:
96
+ return cast(str, _reduplicate_words_rust(text, clamped_rate, unweighted_flag, rng))
97
+
98
+ return _python_reduplicate_words(
99
+ text,
100
+ rate=clamped_rate,
101
+ rng=rng,
102
+ unweighted=unweighted_flag,
103
+ )
104
+
105
+
106
+ class Reduple(Glitchling):
107
+ """Glitchling that repeats words to simulate stuttering speech."""
108
+
109
+ def __init__(
110
+ self,
111
+ *,
112
+ rate: float | None = None,
113
+ reduplication_rate: float | None = None,
114
+ seed: int | None = None,
115
+ unweighted: bool = False,
116
+ ) -> None:
117
+ self._param_aliases = {"reduplication_rate": "rate"}
118
+ effective_rate = resolve_rate(
119
+ rate=rate,
120
+ legacy_value=reduplication_rate,
121
+ default=0.01,
122
+ legacy_name="reduplication_rate",
123
+ )
124
+ super().__init__(
125
+ name="Reduple",
126
+ corruption_function=reduplicate_words,
127
+ scope=AttackWave.WORD,
128
+ seed=seed,
129
+ rate=effective_rate,
130
+ unweighted=unweighted,
131
+ )
132
+
133
+ def pipeline_operation(self) -> dict[str, Any] | None:
134
+ rate = self.kwargs.get("rate")
135
+ if rate is None:
136
+ return None
137
+ unweighted = bool(self.kwargs.get("unweighted", False))
138
+ return {
139
+ "type": "reduplicate",
140
+ "reduplication_rate": float(rate),
141
+ "unweighted": unweighted,
142
+ }
143
+
144
+
145
+ reduple = Reduple()
146
+
147
+
148
+ __all__ = ["Reduple", "reduple"]
@@ -0,0 +1,153 @@
1
+ import math
2
+ import random
3
+ import re
4
+ from typing import Any, cast
5
+
6
+ from ._rate import resolve_rate
7
+ from ._rust_extensions import get_rust_operation
8
+ from ._text_utils import WordToken, collect_word_tokens, split_preserving_whitespace
9
+ from .core import AttackWave, Glitchling
10
+
11
+ # Load Rust-accelerated operation if available
12
+ _delete_random_words_rust = get_rust_operation("delete_random_words")
13
+
14
+
15
+ def _python_delete_random_words(
16
+ text: str,
17
+ *,
18
+ rate: float,
19
+ rng: random.Random,
20
+ unweighted: bool = False,
21
+ ) -> str:
22
+ """Delete random words from the input text while preserving whitespace."""
23
+ effective_rate = max(rate, 0.0)
24
+ if effective_rate <= 0.0:
25
+ return text
26
+
27
+ tokens = split_preserving_whitespace(text)
28
+ word_tokens = collect_word_tokens(tokens, skip_first_word=True)
29
+
30
+ weighted_tokens: list[tuple[int, float, WordToken]] = []
31
+ for token in word_tokens:
32
+ weight = 1.0 if unweighted else 1.0 / float(token.core_length)
33
+ weighted_tokens.append((token.index, weight, token))
34
+
35
+ if not weighted_tokens:
36
+ return text
37
+
38
+ allowed_deletions = min(len(weighted_tokens), math.floor(len(weighted_tokens) * effective_rate))
39
+ if allowed_deletions <= 0:
40
+ return text
41
+
42
+ mean_weight = sum(weight for _, weight, _ in weighted_tokens) / len(weighted_tokens)
43
+
44
+ deletions = 0
45
+ for index, weight, token in weighted_tokens:
46
+ if deletions >= allowed_deletions:
47
+ break
48
+
49
+ if effective_rate >= 1.0:
50
+ probability = 1.0
51
+ else:
52
+ if mean_weight <= 0.0:
53
+ probability = effective_rate
54
+ else:
55
+ probability = min(1.0, effective_rate * (weight / mean_weight))
56
+ if rng.random() >= probability:
57
+ continue
58
+
59
+ prefix = token.prefix.strip()
60
+ suffix = token.suffix.strip()
61
+ tokens[index] = f"{prefix}{suffix}"
62
+
63
+ deletions += 1
64
+
65
+ text = "".join(tokens)
66
+ text = re.sub(r"\s+([.,;:])", r"\1", text)
67
+ text = re.sub(r"\s{2,}", " ", text).strip()
68
+
69
+ return text
70
+
71
+
72
+ def delete_random_words(
73
+ text: str,
74
+ rate: float | None = None,
75
+ seed: int | None = None,
76
+ rng: random.Random | None = None,
77
+ *,
78
+ max_deletion_rate: float | None = None,
79
+ unweighted: bool = False,
80
+ ) -> str:
81
+ """Delete random words from the input text.
82
+
83
+ Uses the optional Rust implementation when available.
84
+ """
85
+ effective_rate = resolve_rate(
86
+ rate=rate,
87
+ legacy_value=max_deletion_rate,
88
+ default=0.01,
89
+ legacy_name="max_deletion_rate",
90
+ )
91
+
92
+ if rng is None:
93
+ rng = random.Random(seed)
94
+
95
+ clamped_rate = max(0.0, effective_rate)
96
+ unweighted_flag = bool(unweighted)
97
+
98
+ if _delete_random_words_rust is not None:
99
+ return cast(str, _delete_random_words_rust(text, clamped_rate, unweighted_flag, rng))
100
+
101
+ return _python_delete_random_words(
102
+ text,
103
+ rate=clamped_rate,
104
+ rng=rng,
105
+ unweighted=unweighted_flag,
106
+ )
107
+
108
+
109
+ class Rushmore(Glitchling):
110
+ """Glitchling that deletes words to simulate missing information."""
111
+
112
+ def __init__(
113
+ self,
114
+ *,
115
+ rate: float | None = None,
116
+ max_deletion_rate: float | None = None,
117
+ seed: int | None = None,
118
+ unweighted: bool = False,
119
+ ) -> None:
120
+ self._param_aliases = {"max_deletion_rate": "rate"}
121
+ effective_rate = resolve_rate(
122
+ rate=rate,
123
+ legacy_value=max_deletion_rate,
124
+ default=0.01,
125
+ legacy_name="max_deletion_rate",
126
+ )
127
+ super().__init__(
128
+ name="Rushmore",
129
+ corruption_function=delete_random_words,
130
+ scope=AttackWave.WORD,
131
+ seed=seed,
132
+ rate=effective_rate,
133
+ unweighted=unweighted,
134
+ )
135
+
136
+ def pipeline_operation(self) -> dict[str, Any] | None:
137
+ rate = self.kwargs.get("rate")
138
+ if rate is None:
139
+ rate = self.kwargs.get("max_deletion_rate")
140
+ if rate is None:
141
+ return None
142
+ unweighted = bool(self.kwargs.get("unweighted", False))
143
+ return {
144
+ "type": "delete",
145
+ "max_deletion_rate": float(rate),
146
+ "unweighted": unweighted,
147
+ }
148
+
149
+
150
+ rushmore = Rushmore()
151
+
152
+
153
+ __all__ = ["Rushmore", "rushmore"]
@@ -0,0 +1,171 @@
1
+ import random
2
+ import re
3
+ from typing import Any, cast
4
+
5
+ from ._ocr_confusions import load_confusion_table
6
+ from ._rate import resolve_rate
7
+ from ._rust_extensions import get_rust_operation
8
+ from .core import AttackOrder, AttackWave, Glitchling
9
+
10
+ # Load Rust-accelerated operation if available
11
+ _ocr_artifacts_rust = get_rust_operation("ocr_artifacts")
12
+
13
+
14
+ def _python_ocr_artifacts(
15
+ text: str,
16
+ *,
17
+ rate: float,
18
+ rng: random.Random,
19
+ ) -> str:
20
+ """Introduce OCR-like artifacts into text.
21
+
22
+ Parameters
23
+ ----------
24
+ - text: Input text to corrupt.
25
+ - rate: Max proportion of eligible confusion matches to replace (default 0.02).
26
+ - seed: Optional seed if `rng` not provided.
27
+ - rng: Optional RNG; overrides seed.
28
+
29
+ Notes
30
+ -----
31
+ - Uses a curated set of common OCR confusions (rn↔m, cl↔d, O↔0, l/I/1, etc.).
32
+ - Collects all non-overlapping candidate spans in reading order, then samples
33
+ a subset deterministically with the provided RNG.
34
+ - Replacements can change length (e.g., m→rn), so edits are applied from left
35
+ to right using precomputed spans to avoid index drift.
36
+
37
+ """
38
+ if not text:
39
+ return text
40
+
41
+ # Keep the confusion definitions in a shared data file so both the Python
42
+ # and Rust implementations stay in sync.
43
+ confusion_table = load_confusion_table()
44
+
45
+ # Build candidate matches as (start, end, choices)
46
+ candidates: list[tuple[int, int, list[str]]] = []
47
+
48
+ # To avoid double-counting overlapping patterns (like 'l' inside 'li'),
49
+ # we will scan longer patterns first by sorting by len(src) desc.
50
+ for src, choices in sorted(confusion_table, key=lambda p: -len(p[0])):
51
+ pattern = re.escape(src)
52
+ for m in re.finditer(pattern, text):
53
+ start, end = m.span()
54
+ candidates.append((start, end, choices))
55
+
56
+ if not candidates:
57
+ return text
58
+
59
+ # Decide how many to replace
60
+ k = int(len(candidates) * rate)
61
+ if k <= 0:
62
+ return text
63
+
64
+ # Shuffle deterministically and select non-overlapping k spans
65
+ rng.shuffle(candidates)
66
+ chosen: list[tuple[int, int, str]] = []
67
+ occupied: list[tuple[int, int]] = []
68
+
69
+ def overlaps(a: tuple[int, int], b: tuple[int, int]) -> bool:
70
+ return not (a[1] <= b[0] or b[1] <= a[0])
71
+
72
+ for start, end, choices in candidates:
73
+ if len(chosen) >= k:
74
+ break
75
+ span = (start, end)
76
+ if any(overlaps(span, occ) for occ in occupied):
77
+ continue
78
+ replacement = rng.choice(choices)
79
+ chosen.append((start, end, replacement))
80
+ occupied.append(span)
81
+
82
+ if not chosen:
83
+ return text
84
+
85
+ # Apply edits from left to right
86
+ chosen.sort(key=lambda t: t[0])
87
+ out_parts = []
88
+ cursor = 0
89
+ for start, end, rep in chosen:
90
+ if cursor < start:
91
+ out_parts.append(text[cursor:start])
92
+ out_parts.append(rep)
93
+ cursor = end
94
+ if cursor < len(text):
95
+ out_parts.append(text[cursor:])
96
+
97
+ return "".join(out_parts)
98
+
99
+
100
+ def ocr_artifacts(
101
+ text: str,
102
+ rate: float | None = None,
103
+ seed: int | None = None,
104
+ rng: random.Random | None = None,
105
+ *,
106
+ error_rate: float | None = None,
107
+ ) -> str:
108
+ """Introduce OCR-like artifacts into text.
109
+
110
+ Prefers the Rust implementation when available.
111
+ """
112
+ if not text:
113
+ return text
114
+
115
+ effective_rate = resolve_rate(
116
+ rate=rate,
117
+ legacy_value=error_rate,
118
+ default=0.02,
119
+ legacy_name="error_rate",
120
+ )
121
+
122
+ if rng is None:
123
+ rng = random.Random(seed)
124
+
125
+ clamped_rate = max(0.0, effective_rate)
126
+
127
+ if _ocr_artifacts_rust is not None:
128
+ return cast(str, _ocr_artifacts_rust(text, clamped_rate, rng))
129
+
130
+ return _python_ocr_artifacts(text, rate=clamped_rate, rng=rng)
131
+
132
+
133
+ class Scannequin(Glitchling):
134
+ """Glitchling that simulates OCR artifacts using common confusions."""
135
+
136
+ def __init__(
137
+ self,
138
+ *,
139
+ rate: float | None = None,
140
+ error_rate: float | None = None,
141
+ seed: int | None = None,
142
+ ) -> None:
143
+ self._param_aliases = {"error_rate": "rate"}
144
+ effective_rate = resolve_rate(
145
+ rate=rate,
146
+ legacy_value=error_rate,
147
+ default=0.02,
148
+ legacy_name="error_rate",
149
+ )
150
+ super().__init__(
151
+ name="Scannequin",
152
+ corruption_function=ocr_artifacts,
153
+ scope=AttackWave.CHARACTER,
154
+ order=AttackOrder.LATE,
155
+ seed=seed,
156
+ rate=effective_rate,
157
+ )
158
+
159
+ def pipeline_operation(self) -> dict[str, Any] | None:
160
+ rate = self.kwargs.get("rate")
161
+ if rate is None:
162
+ rate = self.kwargs.get("error_rate")
163
+ if rate is None:
164
+ return None
165
+ return {"type": "ocr", "error_rate": float(rate)}
166
+
167
+
168
+ scannequin = Scannequin()
169
+
170
+
171
+ __all__ = ["Scannequin", "scannequin"]
@@ -0,0 +1,231 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ import random
5
+ from typing import Any, Optional, cast
6
+
7
+ from ..util import KEYNEIGHBORS
8
+ from ._rate import resolve_rate
9
+ from ._rust_extensions import get_rust_operation
10
+ from .core import AttackOrder, AttackWave, Glitchling
11
+
12
+ # Load Rust-accelerated operation if available
13
+ _fatfinger_rust = get_rust_operation("fatfinger")
14
+
15
+
16
+ def _python_unichar(text: str, rng: random.Random) -> str:
17
+ """Collapse one random doubled letter (like 'ee' in 'seed') to a single occurrence."""
18
+ import re
19
+
20
+ matches = list(re.finditer(r"((.)\2)(?=\w)", text))
21
+ if not matches:
22
+ return text
23
+ start, end = rng.choice(matches).span(1)
24
+ return text[:start] + text[start] + text[end:]
25
+
26
+
27
+ def _python_skipped_space(text: str, rng: random.Random) -> str:
28
+ import re
29
+
30
+ space_positions = [m.start() for m in re.finditer(r" ", text)]
31
+ if not space_positions:
32
+ return text
33
+ idx = rng.choice(space_positions)
34
+ return text[:idx] + text[idx + 1 :]
35
+
36
+
37
+ def _python_random_space(text: str, rng: random.Random) -> str:
38
+ if len(text) < 2:
39
+ return text
40
+ idx = rng.randrange(1, len(text))
41
+ return text[:idx] + " " + text[idx:]
42
+
43
+
44
+ def _python_repeated_char(text: str, rng: random.Random) -> str:
45
+ positions = [i for i, c in enumerate(text) if not c.isspace()]
46
+ if not positions:
47
+ return text
48
+ i = rng.choice(positions)
49
+ return text[:i] + text[i] + text[i:]
50
+
51
+
52
+ def _python_is_word_char(c: str) -> bool:
53
+ return c.isalnum() or c == "_"
54
+
55
+
56
+ def _python_eligible_idx(s: str, i: int) -> bool:
57
+ if i < 0 or i >= len(s):
58
+ return False
59
+ if not _python_is_word_char(s[i]):
60
+ return False
61
+ left_ok = i > 0 and _python_is_word_char(s[i - 1])
62
+ right_ok = i + 1 < len(s) and _python_is_word_char(s[i + 1])
63
+ return left_ok and right_ok
64
+
65
+
66
+ def _python_draw_eligible_index(rng: random.Random, s: str, max_tries: int = 16) -> Optional[int]:
67
+ n = len(s)
68
+ if n == 0:
69
+ return None
70
+ for _ in range(max_tries):
71
+ i = rng.randrange(n)
72
+ if _python_eligible_idx(s, i):
73
+ return i
74
+ start = rng.randrange(n)
75
+ i = start
76
+ while True:
77
+ if _python_eligible_idx(s, i):
78
+ return i
79
+ i += 1
80
+ if i == n:
81
+ i = 0
82
+ if i == start:
83
+ return None
84
+
85
+
86
+ def _fatfinger_python(
87
+ text: str,
88
+ *,
89
+ rate: float,
90
+ layout: dict[str, list[str]],
91
+ rng: random.Random,
92
+ ) -> str:
93
+ if rate <= 0.0:
94
+ return text
95
+
96
+ s = text
97
+ max_changes = math.ceil(len(s) * rate)
98
+ if max_changes == 0:
99
+ return s
100
+
101
+ positional_actions = ("char_swap", "missing_char", "extra_char", "nearby_char")
102
+ global_actions = ("skipped_space", "random_space", "unichar", "repeated_char")
103
+ all_actions = positional_actions + global_actions
104
+
105
+ actions_drawn = [rng.choice(all_actions) for _ in range(max_changes)]
106
+
107
+ for action in actions_drawn:
108
+ if action in positional_actions:
109
+ idx = _python_draw_eligible_index(rng, s)
110
+ if idx is None:
111
+ continue
112
+ if action == "char_swap":
113
+ j = idx + 1
114
+ s = s[:idx] + s[j] + s[idx] + s[j + 1 :]
115
+ elif action == "missing_char":
116
+ if _python_eligible_idx(s, idx):
117
+ s = s[:idx] + s[idx + 1 :]
118
+ elif action == "extra_char":
119
+ ch = s[idx]
120
+ neighbors = layout.get(ch.lower(), []) or [ch]
121
+ ins = rng.choice(neighbors) or ch
122
+ s = s[:idx] + ins + s[idx:]
123
+ elif action == "nearby_char":
124
+ ch = s[idx]
125
+ neighbors = layout.get(ch.lower(), [])
126
+ if neighbors:
127
+ rep = rng.choice(neighbors)
128
+ s = s[:idx] + rep + s[idx + 1 :]
129
+ else:
130
+ if action == "skipped_space":
131
+ s = _python_skipped_space(s, rng)
132
+ elif action == "random_space":
133
+ s = _python_random_space(s, rng)
134
+ elif action == "unichar":
135
+ s = _python_unichar(s, rng)
136
+ elif action == "repeated_char":
137
+ s = _python_repeated_char(s, rng)
138
+ return s
139
+
140
+
141
+ def fatfinger(
142
+ text: str,
143
+ rate: float | None = None,
144
+ keyboard: str = "CURATOR_QWERTY",
145
+ seed: int | None = None,
146
+ rng: random.Random | None = None,
147
+ *,
148
+ max_change_rate: float | None = None,
149
+ ) -> str:
150
+ """Introduce character-level "fat finger" edits with a Rust fast path."""
151
+ effective_rate = resolve_rate(
152
+ rate=rate,
153
+ legacy_value=max_change_rate,
154
+ default=0.02,
155
+ legacy_name="max_change_rate",
156
+ )
157
+
158
+ if rng is None:
159
+ rng = random.Random(seed)
160
+ if not text:
161
+ return ""
162
+
163
+ clamped_rate = max(0.0, effective_rate)
164
+ if clamped_rate == 0.0:
165
+ return text
166
+
167
+ layout = getattr(KEYNEIGHBORS, keyboard)
168
+
169
+ if _fatfinger_rust is not None:
170
+ return cast(
171
+ str,
172
+ _fatfinger_rust(text, max_change_rate=clamped_rate, layout=layout, rng=rng),
173
+ )
174
+
175
+ return _fatfinger_python(text, rate=clamped_rate, layout=layout, rng=rng)
176
+
177
+
178
+ class Typogre(Glitchling):
179
+ """Glitchling that introduces deterministic keyboard-typing errors."""
180
+
181
+ def __init__(
182
+ self,
183
+ *,
184
+ rate: float | None = None,
185
+ max_change_rate: float | None = None,
186
+ keyboard: str = "CURATOR_QWERTY",
187
+ seed: int | None = None,
188
+ ) -> None:
189
+ self._param_aliases = {"max_change_rate": "rate"}
190
+ effective_rate = resolve_rate(
191
+ rate=rate,
192
+ legacy_value=max_change_rate,
193
+ default=0.02,
194
+ legacy_name="max_change_rate",
195
+ )
196
+ super().__init__(
197
+ name="Typogre",
198
+ corruption_function=fatfinger,
199
+ scope=AttackWave.CHARACTER,
200
+ order=AttackOrder.EARLY,
201
+ seed=seed,
202
+ rate=effective_rate,
203
+ keyboard=keyboard,
204
+ )
205
+
206
+ def pipeline_operation(self) -> dict[str, Any] | None:
207
+ rate = self.kwargs.get("rate")
208
+ if rate is None:
209
+ rate = self.kwargs.get("max_change_rate")
210
+ if rate is None:
211
+ return None
212
+
213
+ keyboard = self.kwargs.get("keyboard", "CURATOR_QWERTY")
214
+ layout = getattr(KEYNEIGHBORS, str(keyboard), None)
215
+ if layout is None:
216
+ return None
217
+
218
+ serialized_layout = {key: list(value) for key, value in layout.items()}
219
+
220
+ return {
221
+ "type": "typo",
222
+ "rate": float(rate),
223
+ "keyboard": str(keyboard),
224
+ "layout": serialized_layout,
225
+ }
226
+
227
+
228
+ typogre = Typogre()
229
+
230
+
231
+ __all__ = ["Typogre", "typogre"]