glitchlings 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glitchlings/zoo/core.py +45 -5
- glitchlings/zoo/redactyl.py +46 -9
- glitchlings/zoo/reduple.py +35 -8
- glitchlings/zoo/rushmore.py +48 -25
- glitchlings/zoo/scannequin.py +33 -7
- glitchlings/zoo/typogre.py +71 -118
- {glitchlings-0.1.2.dist-info → glitchlings-0.1.3.dist-info}/METADATA +1 -1
- {glitchlings-0.1.2.dist-info → glitchlings-0.1.3.dist-info}/RECORD +11 -11
- {glitchlings-0.1.2.dist-info → glitchlings-0.1.3.dist-info}/WHEEL +0 -0
- {glitchlings-0.1.2.dist-info → glitchlings-0.1.3.dist-info}/entry_points.txt +0 -0
- {glitchlings-0.1.2.dist-info → glitchlings-0.1.3.dist-info}/licenses/LICENSE +0 -0
glitchlings/zoo/core.py
CHANGED
@@ -1,10 +1,13 @@
|
|
1
1
|
"""Core data structures used to model glitchlings and their interactions."""
|
2
2
|
|
3
|
-
|
4
|
-
from datasets import Dataset
|
3
|
+
import inspect
|
5
4
|
import random
|
5
|
+
from enum import IntEnum, auto
|
6
|
+
from hashlib import blake2s
|
6
7
|
from typing import Any, Protocol
|
7
8
|
|
9
|
+
from datasets import Dataset
|
10
|
+
|
8
11
|
|
9
12
|
class CorruptionCallable(Protocol):
|
10
13
|
"""Protocol describing a callable capable of corrupting text."""
|
@@ -77,12 +80,23 @@ class Glitchling:
|
|
77
80
|
|
78
81
|
setattr(self, key, value)
|
79
82
|
self.kwargs[key] = value
|
83
|
+
if key == "seed":
|
84
|
+
self.reset_rng(value)
|
80
85
|
|
81
86
|
def __corrupt(self, text: str, *args: Any, **kwargs: Any) -> str:
|
82
87
|
"""Execute the corruption callable, injecting the RNG when required."""
|
83
88
|
|
84
89
|
# Pass rng to underlying corruption function if it expects it.
|
85
|
-
|
90
|
+
try:
|
91
|
+
signature = inspect.signature(self.corruption_function)
|
92
|
+
except (TypeError, ValueError):
|
93
|
+
signature = None
|
94
|
+
|
95
|
+
expects_rng = False
|
96
|
+
if signature is not None:
|
97
|
+
expects_rng = "rng" in signature.parameters
|
98
|
+
|
99
|
+
if expects_rng:
|
86
100
|
corrupted = self.corruption_function(text, *args, rng=self.rng, **kwargs)
|
87
101
|
else:
|
88
102
|
corrupted = self.corruption_function(text, *args, **kwargs)
|
@@ -104,7 +118,11 @@ class Glitchling:
|
|
104
118
|
def __corrupt_row(row: dict[str, Any]) -> dict[str, Any]:
|
105
119
|
row = dict(row)
|
106
120
|
for column in columns:
|
107
|
-
|
121
|
+
value = row[column]
|
122
|
+
if isinstance(value, list):
|
123
|
+
row[column] = [self.corrupt(item) for item in value]
|
124
|
+
else:
|
125
|
+
row[column] = self.corrupt(value)
|
108
126
|
return row
|
109
127
|
|
110
128
|
return dataset.with_transform(__corrupt_row)
|
@@ -170,7 +188,29 @@ class Gaggle(Glitchling):
|
|
170
188
|
@staticmethod
|
171
189
|
def derive_seed(master_seed: int, glitchling_name: str, index: int) -> int:
|
172
190
|
"""Derive a deterministic seed for a glitchling based on the master seed."""
|
173
|
-
|
191
|
+
def _int_to_bytes(value: int) -> bytes:
|
192
|
+
if value == 0:
|
193
|
+
return b"\x00"
|
194
|
+
|
195
|
+
abs_value = abs(value)
|
196
|
+
length = max(1, (abs_value.bit_length() + 7) // 8)
|
197
|
+
|
198
|
+
if value < 0:
|
199
|
+
while True:
|
200
|
+
try:
|
201
|
+
return value.to_bytes(length, "big", signed=True)
|
202
|
+
except OverflowError:
|
203
|
+
length += 1
|
204
|
+
|
205
|
+
return abs_value.to_bytes(length, "big", signed=False)
|
206
|
+
|
207
|
+
hasher = blake2s(digest_size=8)
|
208
|
+
hasher.update(_int_to_bytes(master_seed))
|
209
|
+
hasher.update(b"\x00")
|
210
|
+
hasher.update(glitchling_name.encode("utf-8"))
|
211
|
+
hasher.update(b"\x00")
|
212
|
+
hasher.update(_int_to_bytes(index))
|
213
|
+
return int.from_bytes(hasher.digest(), "big")
|
174
214
|
|
175
215
|
def sort_glitchlings(self) -> None:
|
176
216
|
"""Sort glitchlings by wave then order to produce application order."""
|
glitchlings/zoo/redactyl.py
CHANGED
@@ -1,17 +1,24 @@
|
|
1
1
|
import re
|
2
2
|
import random
|
3
|
+
|
3
4
|
from .core import Glitchling, AttackWave
|
4
5
|
|
5
6
|
FULL_BLOCK = "█"
|
6
7
|
|
7
8
|
|
8
|
-
|
9
|
+
try:
|
10
|
+
from glitchlings._zoo_rust import redact_words as _redact_words_rust
|
11
|
+
except ImportError: # pragma: no cover - compiled extension not present
|
12
|
+
_redact_words_rust = None
|
13
|
+
|
14
|
+
|
15
|
+
def _python_redact_words(
|
9
16
|
text: str,
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
rng: random.Random
|
17
|
+
*,
|
18
|
+
replacement_char: str,
|
19
|
+
redaction_rate: float,
|
20
|
+
merge_adjacent: bool,
|
21
|
+
rng: random.Random,
|
15
22
|
) -> str:
|
16
23
|
"""Redact random words by replacing their characters.
|
17
24
|
|
@@ -23,12 +30,11 @@ def redact_words(
|
|
23
30
|
- seed: Seed used if `rng` not provided (default 151).
|
24
31
|
- rng: Optional RNG; overrides seed.
|
25
32
|
"""
|
26
|
-
if rng is None:
|
27
|
-
rng = random.Random(seed)
|
28
|
-
|
29
33
|
# Preserve exact spacing and punctuation by using regex
|
30
34
|
tokens = re.split(r"(\s+)", text)
|
31
35
|
word_indices = [i for i, token in enumerate(tokens) if i % 2 == 0 and token.strip()]
|
36
|
+
if not word_indices:
|
37
|
+
raise ValueError("Cannot redact words because the input text contains no redactable words.")
|
32
38
|
num_to_redact = max(1, int(len(word_indices) * redaction_rate))
|
33
39
|
|
34
40
|
# Sample from the indices of actual words
|
@@ -63,6 +69,37 @@ def redact_words(
|
|
63
69
|
return text
|
64
70
|
|
65
71
|
|
72
|
+
def redact_words(
|
73
|
+
text: str,
|
74
|
+
replacement_char: str = FULL_BLOCK,
|
75
|
+
redaction_rate: float = 0.05,
|
76
|
+
merge_adjacent: bool = False,
|
77
|
+
seed: int = 151,
|
78
|
+
rng: random.Random | None = None,
|
79
|
+
) -> str:
|
80
|
+
"""Redact random words by replacing their characters."""
|
81
|
+
|
82
|
+
if rng is None:
|
83
|
+
rng = random.Random(seed)
|
84
|
+
|
85
|
+
if _redact_words_rust is not None:
|
86
|
+
return _redact_words_rust(
|
87
|
+
text,
|
88
|
+
replacement_char,
|
89
|
+
redaction_rate,
|
90
|
+
merge_adjacent,
|
91
|
+
rng,
|
92
|
+
)
|
93
|
+
|
94
|
+
return _python_redact_words(
|
95
|
+
text,
|
96
|
+
replacement_char=replacement_char,
|
97
|
+
redaction_rate=redaction_rate,
|
98
|
+
merge_adjacent=merge_adjacent,
|
99
|
+
rng=rng,
|
100
|
+
)
|
101
|
+
|
102
|
+
|
66
103
|
class Redactyl(Glitchling):
|
67
104
|
"""Glitchling that redacts words with block characters."""
|
68
105
|
|
glitchlings/zoo/reduple.py
CHANGED
@@ -1,13 +1,19 @@
|
|
1
1
|
import re
|
2
2
|
import random
|
3
|
+
|
3
4
|
from .core import Glitchling, AttackWave
|
4
5
|
|
6
|
+
try:
|
7
|
+
from glitchlings._zoo_rust import reduplicate_words as _reduplicate_words_rust
|
8
|
+
except ImportError: # pragma: no cover - compiled extension not present
|
9
|
+
_reduplicate_words_rust = None
|
5
10
|
|
6
|
-
|
11
|
+
|
12
|
+
def _python_reduplicate_words(
|
7
13
|
text: str,
|
8
|
-
|
9
|
-
|
10
|
-
rng: random.Random
|
14
|
+
*,
|
15
|
+
reduplication_rate: float,
|
16
|
+
rng: random.Random,
|
11
17
|
) -> str:
|
12
18
|
"""Randomly reduplicate words in the text.
|
13
19
|
|
@@ -21,9 +27,6 @@ def reduplicate_words(
|
|
21
27
|
- Preserves spacing and punctuation by tokenizing with separators.
|
22
28
|
- Deterministic when run with a fixed seed or via Gaggle.
|
23
29
|
"""
|
24
|
-
if rng is None:
|
25
|
-
rng = random.Random(seed)
|
26
|
-
|
27
30
|
# Preserve exact spacing and punctuation by using regex
|
28
31
|
tokens = re.split(r"(\s+)", text) # Split but keep separators
|
29
32
|
|
@@ -45,10 +48,34 @@ def reduplicate_words(
|
|
45
48
|
tokens[i] = f"{prefix}{core} {core}{suffix}"
|
46
49
|
else:
|
47
50
|
tokens[i] = f"{word} {word}"
|
48
|
-
|
49
51
|
return "".join(tokens)
|
50
52
|
|
51
53
|
|
54
|
+
def reduplicate_words(
|
55
|
+
text: str,
|
56
|
+
reduplication_rate: float = 0.05,
|
57
|
+
seed: int | None = None,
|
58
|
+
rng: random.Random | None = None,
|
59
|
+
) -> str:
|
60
|
+
"""Randomly reduplicate words in the text.
|
61
|
+
|
62
|
+
Falls back to the Python implementation when the optional Rust
|
63
|
+
extension is unavailable.
|
64
|
+
"""
|
65
|
+
|
66
|
+
if rng is None:
|
67
|
+
rng = random.Random(seed)
|
68
|
+
|
69
|
+
if _reduplicate_words_rust is not None:
|
70
|
+
return _reduplicate_words_rust(text, reduplication_rate, rng)
|
71
|
+
|
72
|
+
return _python_reduplicate_words(
|
73
|
+
text,
|
74
|
+
reduplication_rate=reduplication_rate,
|
75
|
+
rng=rng,
|
76
|
+
)
|
77
|
+
|
78
|
+
|
52
79
|
class Reduple(Glitchling):
|
53
80
|
"""Glitchling that repeats words to simulate stuttering speech."""
|
54
81
|
|
glitchlings/zoo/rushmore.py
CHANGED
@@ -1,41 +1,40 @@
|
|
1
|
+
import math
|
1
2
|
import random
|
2
3
|
import re
|
4
|
+
|
3
5
|
from .core import Glitchling, AttackWave
|
4
6
|
|
7
|
+
try:
|
8
|
+
from glitchlings._zoo_rust import delete_random_words as _delete_random_words_rust
|
9
|
+
except ImportError: # pragma: no cover - compiled extension not present
|
10
|
+
_delete_random_words_rust = None
|
5
11
|
|
6
|
-
|
12
|
+
|
13
|
+
def _python_delete_random_words(
|
7
14
|
text: str,
|
8
|
-
|
9
|
-
|
10
|
-
rng: random.Random
|
15
|
+
*,
|
16
|
+
max_deletion_rate: float,
|
17
|
+
rng: random.Random,
|
11
18
|
) -> str:
|
12
|
-
"""Delete random words from the input text.
|
19
|
+
"""Delete random words from the input text while preserving whitespace."""
|
13
20
|
|
14
|
-
|
15
|
-
- text: The input text.
|
16
|
-
- max_deletion_rate: The maximum proportion of words to delete (default 0.01).
|
17
|
-
- seed: Optional seed if `rng` not provided.
|
18
|
-
- rng: Optional RNG; overrides seed.
|
19
|
-
"""
|
20
|
-
if rng is None:
|
21
|
-
rng = random.Random(seed)
|
22
|
-
|
23
|
-
# Preserve exact spacing and punctuation by using regex
|
24
|
-
tokens = re.split(r"(\s+)", text) # Split but keep separators
|
25
|
-
|
26
|
-
for i in range(
|
27
|
-
2, len(tokens), 2
|
28
|
-
): # Every other token is a word, but skip the first word
|
29
|
-
if i >= len(tokens):
|
30
|
-
break
|
21
|
+
tokens = re.split(r"(\s+)", text) # Split but keep separators for later rejoin
|
31
22
|
|
23
|
+
candidate_indices: list[int] = []
|
24
|
+
for i in range(2, len(tokens), 2): # Every other token is a word, skip the first word
|
32
25
|
word = tokens[i]
|
33
|
-
if not word or word.isspace():
|
26
|
+
if not word or word.isspace():
|
34
27
|
continue
|
35
28
|
|
36
|
-
|
29
|
+
candidate_indices.append(i)
|
30
|
+
|
31
|
+
allowed_deletions = math.floor(len(candidate_indices) * max_deletion_rate)
|
32
|
+
if allowed_deletions <= 0:
|
33
|
+
return text
|
34
|
+
|
35
|
+
for i in candidate_indices:
|
37
36
|
if rng.random() < max_deletion_rate:
|
38
|
-
|
37
|
+
word = tokens[i]
|
39
38
|
match = re.match(r"^(\W*)(.*?)(\W*)$", word)
|
40
39
|
if match:
|
41
40
|
prefix, _, suffix = match.groups()
|
@@ -50,6 +49,30 @@ def delete_random_words(
|
|
50
49
|
return text
|
51
50
|
|
52
51
|
|
52
|
+
def delete_random_words(
|
53
|
+
text: str,
|
54
|
+
max_deletion_rate: float = 0.01,
|
55
|
+
seed: int | None = None,
|
56
|
+
rng: random.Random | None = None,
|
57
|
+
) -> str:
|
58
|
+
"""Delete random words from the input text.
|
59
|
+
|
60
|
+
Uses the optional Rust implementation when available.
|
61
|
+
"""
|
62
|
+
|
63
|
+
if rng is None:
|
64
|
+
rng = random.Random(seed)
|
65
|
+
|
66
|
+
if _delete_random_words_rust is not None:
|
67
|
+
return _delete_random_words_rust(text, max_deletion_rate, rng)
|
68
|
+
|
69
|
+
return _python_delete_random_words(
|
70
|
+
text,
|
71
|
+
max_deletion_rate=max_deletion_rate,
|
72
|
+
rng=rng,
|
73
|
+
)
|
74
|
+
|
75
|
+
|
53
76
|
class Rushmore(Glitchling):
|
54
77
|
"""Glitchling that deletes words to simulate missing information."""
|
55
78
|
|
glitchlings/zoo/scannequin.py
CHANGED
@@ -1,13 +1,19 @@
|
|
1
1
|
import re
|
2
2
|
import random
|
3
|
+
|
3
4
|
from .core import Glitchling, AttackWave, AttackOrder
|
4
5
|
|
6
|
+
try:
|
7
|
+
from glitchlings._zoo_rust import ocr_artifacts as _ocr_artifacts_rust
|
8
|
+
except ImportError: # pragma: no cover - compiled extension not present
|
9
|
+
_ocr_artifacts_rust = None
|
5
10
|
|
6
|
-
|
11
|
+
|
12
|
+
def _python_ocr_artifacts(
|
7
13
|
text: str,
|
8
|
-
|
9
|
-
|
10
|
-
rng: random.Random
|
14
|
+
*,
|
15
|
+
error_rate: float,
|
16
|
+
rng: random.Random,
|
11
17
|
) -> str:
|
12
18
|
"""Introduce OCR-like artifacts into text.
|
13
19
|
|
@@ -27,9 +33,6 @@ def ocr_artifacts(
|
|
27
33
|
if not text:
|
28
34
|
return text
|
29
35
|
|
30
|
-
if rng is None:
|
31
|
-
rng = random.Random(seed)
|
32
|
-
|
33
36
|
# map: source -> list of possible replacements
|
34
37
|
# Keep patterns small and specific; longer patterns first avoid overmatching
|
35
38
|
confusion_table: list[tuple[str, list[str]]] = [
|
@@ -115,6 +118,29 @@ def ocr_artifacts(
|
|
115
118
|
return "".join(out_parts)
|
116
119
|
|
117
120
|
|
121
|
+
def ocr_artifacts(
|
122
|
+
text: str,
|
123
|
+
error_rate: float = 0.02,
|
124
|
+
seed: int | None = None,
|
125
|
+
rng: random.Random | None = None,
|
126
|
+
) -> str:
|
127
|
+
"""Introduce OCR-like artifacts into text.
|
128
|
+
|
129
|
+
Prefers the Rust implementation when available.
|
130
|
+
"""
|
131
|
+
|
132
|
+
if not text:
|
133
|
+
return text
|
134
|
+
|
135
|
+
if rng is None:
|
136
|
+
rng = random.Random(seed)
|
137
|
+
|
138
|
+
if _ocr_artifacts_rust is not None:
|
139
|
+
return _ocr_artifacts_rust(text, error_rate, rng)
|
140
|
+
|
141
|
+
return _python_ocr_artifacts(text, error_rate=error_rate, rng=rng)
|
142
|
+
|
143
|
+
|
118
144
|
class Scannequin(Glitchling):
|
119
145
|
"""Glitchling that simulates OCR artifacts using common confusions."""
|
120
146
|
|
glitchlings/zoo/typogre.py
CHANGED
@@ -1,118 +1,81 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import random
|
4
|
+
from typing import Optional
|
5
|
+
|
1
6
|
from .core import Glitchling, AttackWave, AttackOrder
|
2
7
|
from ..util import KEYNEIGHBORS
|
3
|
-
import random
|
4
|
-
import re
|
5
|
-
from typing import Literal, Optional
|
6
8
|
|
7
|
-
|
9
|
+
try:
|
10
|
+
from glitchlings._typogre_rust import fatfinger as _fatfinger_rust
|
11
|
+
except ImportError: # pragma: no cover - compiled extension not present
|
12
|
+
_fatfinger_rust = None
|
8
13
|
|
9
14
|
|
10
|
-
def
|
15
|
+
def _python_unichar(text: str, rng: random.Random) -> str:
|
11
16
|
"""Collapse one random doubled letter (like 'ee' in 'seed') to a single occurrence."""
|
12
|
-
|
17
|
+
import re
|
18
|
+
|
13
19
|
matches = list(re.finditer(r"((.)\2)(?=\w)", text))
|
14
20
|
if not matches:
|
15
21
|
return text
|
16
|
-
|
17
|
-
start, end = m.span(1)
|
18
|
-
# Replace the doubled pair with a single char
|
22
|
+
start, end = rng.choice(matches).span(1)
|
19
23
|
return text[:start] + text[start] + text[end:]
|
20
24
|
|
21
25
|
|
22
|
-
def
|
23
|
-
|
24
|
-
key_neighbors = getattr(KEYNEIGHBORS, "CURATOR_QWERTY")
|
25
|
-
char = text[index]
|
26
|
-
neighbors = key_neighbors.get(char, [])
|
27
|
-
if not neighbors:
|
28
|
-
return text
|
29
|
-
new_char = rng.choice(neighbors)
|
30
|
-
return text[:index] + new_char + text[index + 1 :]
|
31
|
-
|
26
|
+
def _python_skipped_space(text: str, rng: random.Random) -> str:
|
27
|
+
import re
|
32
28
|
|
33
|
-
|
34
|
-
|
35
|
-
index: int,
|
36
|
-
op: Literal["delete", "insert", "swap"],
|
37
|
-
rng: random.Random,
|
38
|
-
key_neighbors=None,
|
39
|
-
):
|
40
|
-
if key_neighbors is None:
|
41
|
-
key_neighbors = getattr(KEYNEIGHBORS, "CURATOR_QWERTY")
|
42
|
-
if index < 0 or index >= len(text):
|
43
|
-
return text
|
44
|
-
if op == "delete":
|
45
|
-
return text[:index] + text[index + 1 :]
|
46
|
-
if op == "swap":
|
47
|
-
if index >= len(text) - 1:
|
48
|
-
return text
|
49
|
-
return text[:index] + text[index + 1] + text[index] + text[index + 2 :]
|
50
|
-
# insert (choose neighbor of this char) – if none, just duplicate char
|
51
|
-
char = text[index]
|
52
|
-
candidates = key_neighbors.get(char, []) or [char]
|
53
|
-
new_char = rng.choice(candidates)
|
54
|
-
return text[:index] + new_char + text[index:]
|
55
|
-
|
56
|
-
|
57
|
-
def repeated_char(text: str, rng: random.Random) -> str:
|
58
|
-
"""Repeat a random non-space character once (e.g., 'cat' -> 'caat')."""
|
59
|
-
positions = [i for i, c in enumerate(text) if not c.isspace()]
|
60
|
-
if not positions:
|
29
|
+
space_positions = [m.start() for m in re.finditer(r" ", text)]
|
30
|
+
if not space_positions:
|
61
31
|
return text
|
62
|
-
|
63
|
-
return text[:
|
32
|
+
idx = rng.choice(space_positions)
|
33
|
+
return text[:idx] + text[idx + 1 :]
|
64
34
|
|
65
35
|
|
66
|
-
def
|
67
|
-
"""Insert a space at a random boundary between characters (excluding ends)."""
|
36
|
+
def _python_random_space(text: str, rng: random.Random) -> str:
|
68
37
|
if len(text) < 2:
|
69
38
|
return text
|
70
39
|
idx = rng.randrange(1, len(text))
|
71
40
|
return text[:idx] + " " + text[idx:]
|
72
41
|
|
73
42
|
|
74
|
-
def
|
75
|
-
|
76
|
-
|
77
|
-
if not space_positions:
|
43
|
+
def _python_repeated_char(text: str, rng: random.Random) -> str:
|
44
|
+
positions = [i for i, c in enumerate(text) if not c.isspace()]
|
45
|
+
if not positions:
|
78
46
|
return text
|
79
|
-
|
80
|
-
|
81
|
-
return text[:idx] + text[idx + 1 :]
|
47
|
+
i = rng.choice(positions)
|
48
|
+
return text[:i] + text[i] + text[i:]
|
82
49
|
|
83
50
|
|
84
|
-
def
|
51
|
+
def _python_is_word_char(c: str) -> bool:
|
85
52
|
return c.isalnum() or c == "_"
|
86
53
|
|
87
54
|
|
88
|
-
def
|
89
|
-
"""O(1) check whether index i is eligible under preserve_first_last."""
|
55
|
+
def _python_eligible_idx(s: str, i: int) -> bool:
|
90
56
|
if i < 0 or i >= len(s):
|
91
57
|
return False
|
92
|
-
if not
|
58
|
+
if not _python_is_word_char(s[i]):
|
93
59
|
return False
|
94
|
-
|
95
|
-
|
96
|
-
right_ok = i + 1 < len(s) and _is_word_char(s[i + 1])
|
60
|
+
left_ok = i > 0 and _python_is_word_char(s[i - 1])
|
61
|
+
right_ok = i + 1 < len(s) and _python_is_word_char(s[i + 1])
|
97
62
|
return left_ok and right_ok
|
98
63
|
|
99
64
|
|
100
|
-
def
|
65
|
+
def _python_draw_eligible_index(
|
101
66
|
rng: random.Random, s: str, max_tries: int = 16
|
102
67
|
) -> Optional[int]:
|
103
|
-
"""Try a few uniform draws; if none hit, do a single wraparound scan."""
|
104
68
|
n = len(s)
|
105
69
|
if n == 0:
|
106
70
|
return None
|
107
71
|
for _ in range(max_tries):
|
108
72
|
i = rng.randrange(n)
|
109
|
-
if
|
73
|
+
if _python_eligible_idx(s, i):
|
110
74
|
return i
|
111
|
-
# Fallback: linear scan starting from a random point (rare path)
|
112
75
|
start = rng.randrange(n)
|
113
76
|
i = start
|
114
77
|
while True:
|
115
|
-
if
|
78
|
+
if _python_eligible_idx(s, i):
|
116
79
|
return i
|
117
80
|
i += 1
|
118
81
|
if i == n:
|
@@ -121,89 +84,78 @@ def _draw_eligible_index(
|
|
121
84
|
return None
|
122
85
|
|
123
86
|
|
124
|
-
def
|
87
|
+
def _fatfinger_python(
|
125
88
|
text: str,
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
rng: random.Random
|
89
|
+
*,
|
90
|
+
max_change_rate: float,
|
91
|
+
layout: dict[str, list[str]],
|
92
|
+
rng: random.Random,
|
130
93
|
) -> str:
|
131
|
-
"""Introduce character-level "fat finger" edits.
|
132
|
-
|
133
|
-
Parameters
|
134
|
-
- text: Input string to corrupt.
|
135
|
-
- max_change_rate: Max proportion of characters to edit (default 0.02).
|
136
|
-
- keyboard: Name of keyboard neighbor map from util.KEYNEIGHBORS to use (default "CURATOR_QWERTY").
|
137
|
-
- seed: Optional seed used if `rng` is not provided; creates a dedicated Random.
|
138
|
-
- rng: Optional random.Random to use; if provided, overrides `seed`.
|
139
|
-
|
140
|
-
Notes
|
141
|
-
- Chooses indices lazily from the current text after each edit to keep offsets valid.
|
142
|
-
- Uses the glitchling's own RNG for determinism when run via Gaggle/summon.
|
143
|
-
"""
|
144
|
-
if rng is None:
|
145
|
-
rng = random.Random(seed)
|
146
|
-
if not text:
|
147
|
-
return ""
|
148
|
-
|
149
94
|
s = text
|
150
95
|
max_changes = max(1, int(len(s) * max_change_rate))
|
151
96
|
|
152
|
-
# Prebind for speed
|
153
|
-
layout = getattr(KEYNEIGHBORS, keyboard)
|
154
|
-
choose = rng.choice
|
155
|
-
|
156
|
-
# Actions that require a specific index vs. "global" actions
|
157
97
|
positional_actions = ("char_swap", "missing_char", "extra_char", "nearby_char")
|
158
98
|
global_actions = ("skipped_space", "random_space", "unichar", "repeated_char")
|
159
99
|
all_actions = positional_actions + global_actions
|
160
100
|
|
161
|
-
|
162
|
-
actions_drawn = [choose(all_actions) for _ in range(max_changes)]
|
101
|
+
actions_drawn = [rng.choice(all_actions) for _ in range(max_changes)]
|
163
102
|
|
164
103
|
for action in actions_drawn:
|
165
104
|
if action in positional_actions:
|
166
|
-
idx =
|
105
|
+
idx = _python_draw_eligible_index(rng, s)
|
167
106
|
if idx is None:
|
168
|
-
continue
|
169
|
-
|
107
|
+
continue
|
170
108
|
if action == "char_swap":
|
171
|
-
# Try swapping with neighbor while respecting word boundaries
|
172
|
-
|
173
109
|
j = idx + 1
|
174
110
|
s = s[:idx] + s[j] + s[idx] + s[j + 1 :]
|
175
|
-
|
176
111
|
elif action == "missing_char":
|
177
|
-
if
|
112
|
+
if _python_eligible_idx(s, idx):
|
178
113
|
s = s[:idx] + s[idx + 1 :]
|
179
|
-
|
180
114
|
elif action == "extra_char":
|
181
115
|
ch = s[idx]
|
182
116
|
neighbors = layout.get(ch.lower(), []) or [ch]
|
183
|
-
ins =
|
117
|
+
ins = rng.choice(neighbors) or ch
|
184
118
|
s = s[:idx] + ins + s[idx:]
|
185
|
-
|
186
119
|
elif action == "nearby_char":
|
187
120
|
ch = s[idx]
|
188
121
|
neighbors = layout.get(ch.lower(), [])
|
189
122
|
if neighbors:
|
190
|
-
rep =
|
123
|
+
rep = rng.choice(neighbors)
|
191
124
|
s = s[:idx] + rep + s[idx + 1 :]
|
192
|
-
|
193
125
|
else:
|
194
|
-
# "Global" actions that internally pick their own positions
|
195
126
|
if action == "skipped_space":
|
196
|
-
s =
|
127
|
+
s = _python_skipped_space(s, rng)
|
197
128
|
elif action == "random_space":
|
198
|
-
s =
|
129
|
+
s = _python_random_space(s, rng)
|
199
130
|
elif action == "unichar":
|
200
|
-
s =
|
131
|
+
s = _python_unichar(s, rng)
|
201
132
|
elif action == "repeated_char":
|
202
|
-
s =
|
203
|
-
|
133
|
+
s = _python_repeated_char(s, rng)
|
204
134
|
return s
|
205
135
|
|
206
136
|
|
137
|
+
def fatfinger(
|
138
|
+
text: str,
|
139
|
+
max_change_rate: float = 0.02,
|
140
|
+
keyboard: str = "CURATOR_QWERTY",
|
141
|
+
seed: int | None = None,
|
142
|
+
rng: random.Random | None = None,
|
143
|
+
) -> str:
|
144
|
+
"""Introduce character-level "fat finger" edits with a Rust fast path."""
|
145
|
+
|
146
|
+
if rng is None:
|
147
|
+
rng = random.Random(seed)
|
148
|
+
if not text:
|
149
|
+
return ""
|
150
|
+
|
151
|
+
layout = getattr(KEYNEIGHBORS, keyboard)
|
152
|
+
|
153
|
+
if _fatfinger_rust is not None:
|
154
|
+
return _fatfinger_rust(text, max_change_rate=max_change_rate, layout=layout, rng=rng)
|
155
|
+
|
156
|
+
return _fatfinger_python(text, max_change_rate=max_change_rate, layout=layout, rng=rng)
|
157
|
+
|
158
|
+
|
207
159
|
class Typogre(Glitchling):
|
208
160
|
"""Glitchling that introduces deterministic keyboard-typing errors."""
|
209
161
|
|
@@ -229,3 +181,4 @@ typogre = Typogre()
|
|
229
181
|
|
230
182
|
|
231
183
|
__all__ = ["Typogre", "typogre"]
|
184
|
+
|
@@ -5,16 +5,16 @@ glitchlings/dlc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
glitchlings/dlc/prime.py,sha256=WnLIon2WbdPGx_PK4vF6nOwJICXudZ6zKGR1hVES4Oc,1452
|
6
6
|
glitchlings/util/__init__.py,sha256=OCpWFtloU-sATBv2XpBGlkR7UFR6RemUtuCheuRA4yw,4018
|
7
7
|
glitchlings/zoo/__init__.py,sha256=hXQci2tysMoRHXiR6NDkWtGkKgcO0xxsMB91eiM_Llc,1344
|
8
|
-
glitchlings/zoo/core.py,sha256=
|
8
|
+
glitchlings/zoo/core.py,sha256=5f9pWBZZSDADiUSs-xUahIqCEb9EUq-YcR_N5HzBAw0,8021
|
9
9
|
glitchlings/zoo/jargoyle.py,sha256=fvBP4ngqZ9BHLmpIjiLqGedriwAMuZc6ryqKT5GWfPw,6924
|
10
10
|
glitchlings/zoo/mim1c.py,sha256=X4jW4YrNqbyG0IEDx7wXUsPTwrUXGw2vXUO1kC2yY94,2471
|
11
|
-
glitchlings/zoo/redactyl.py,sha256=
|
12
|
-
glitchlings/zoo/reduple.py,sha256=
|
13
|
-
glitchlings/zoo/rushmore.py,sha256=
|
14
|
-
glitchlings/zoo/scannequin.py,sha256=
|
15
|
-
glitchlings/zoo/typogre.py,sha256=
|
16
|
-
glitchlings-0.1.
|
17
|
-
glitchlings-0.1.
|
18
|
-
glitchlings-0.1.
|
19
|
-
glitchlings-0.1.
|
20
|
-
glitchlings-0.1.
|
11
|
+
glitchlings/zoo/redactyl.py,sha256=T0SAAbkva4A-tnQkXsUJ43N6Q33TsKElDvldUz69sMQ,3546
|
12
|
+
glitchlings/zoo/reduple.py,sha256=ML4TLQNfOkSaF7G9Sjy_i9ILB4FIl1I101CIppNGmOw,2773
|
13
|
+
glitchlings/zoo/rushmore.py,sha256=FH-pHnj1XKFzLRRQIHOojTkbkCpipNKnxSfxP9UGYZI,2528
|
14
|
+
glitchlings/zoo/scannequin.py,sha256=4QP_dpReUxno0mk5Hnn2uCfd3B6eDa7ZGePuW1dyqBU,4630
|
15
|
+
glitchlings/zoo/typogre.py,sha256=8aYULO4nvdyFDsknAfrlQYKeWz_Tgh5uXAkF3omHe0o,5358
|
16
|
+
glitchlings-0.1.3.dist-info/METADATA,sha256=fwqJfu1FrQwJfAnc5UQIaaN3L7er_FWek0cMzRFSVuw,24978
|
17
|
+
glitchlings-0.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
18
|
+
glitchlings-0.1.3.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
|
19
|
+
glitchlings-0.1.3.dist-info/licenses/LICENSE,sha256=YCvGip-LoaRyu6h0nPo71q6eHEkzUpsE11psDJOIRkw,11337
|
20
|
+
glitchlings-0.1.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|