glitchlings 0.4.1__cp311-cp311-macosx_11_0_universal2.whl → 0.4.3__cp311-cp311-macosx_11_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +30 -17
- glitchlings/__main__.py +0 -1
- glitchlings/_zoo_rust.cpython-311-darwin.so +0 -0
- glitchlings/compat.py +284 -0
- glitchlings/config.py +164 -34
- glitchlings/config.toml +1 -1
- glitchlings/dlc/__init__.py +3 -1
- glitchlings/dlc/_shared.py +68 -0
- glitchlings/dlc/huggingface.py +26 -41
- glitchlings/dlc/prime.py +64 -101
- glitchlings/dlc/pytorch.py +216 -0
- glitchlings/dlc/pytorch_lightning.py +233 -0
- glitchlings/lexicon/__init__.py +12 -33
- glitchlings/lexicon/_cache.py +21 -22
- glitchlings/lexicon/data/default_vector_cache.json +80 -14
- glitchlings/lexicon/metrics.py +1 -8
- glitchlings/lexicon/vector.py +109 -49
- glitchlings/lexicon/wordnet.py +89 -49
- glitchlings/main.py +30 -24
- glitchlings/util/__init__.py +18 -4
- glitchlings/util/adapters.py +27 -0
- glitchlings/zoo/__init__.py +26 -15
- glitchlings/zoo/_ocr_confusions.py +1 -3
- glitchlings/zoo/_rate.py +1 -4
- glitchlings/zoo/_sampling.py +0 -1
- glitchlings/zoo/_text_utils.py +1 -5
- glitchlings/zoo/adjax.py +2 -4
- glitchlings/zoo/apostrofae.py +128 -0
- glitchlings/zoo/assets/__init__.py +0 -0
- glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
- glitchlings/zoo/core.py +152 -87
- glitchlings/zoo/jargoyle.py +50 -45
- glitchlings/zoo/mim1c.py +11 -10
- glitchlings/zoo/redactyl.py +16 -16
- glitchlings/zoo/reduple.py +5 -3
- glitchlings/zoo/rushmore.py +4 -10
- glitchlings/zoo/scannequin.py +7 -6
- glitchlings/zoo/typogre.py +8 -9
- glitchlings/zoo/zeedub.py +6 -3
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/METADATA +101 -4
- glitchlings-0.4.3.dist-info/RECORD +46 -0
- glitchlings/lexicon/graph.py +0 -290
- glitchlings-0.4.1.dist-info/RECORD +0 -39
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/WHEEL +0 -0
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/entry_points.txt +0 -0
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/licenses/LICENSE +0 -0
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/top_level.txt +0 -0
glitchlings/zoo/redactyl.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import re
|
|
2
1
|
import random
|
|
3
|
-
|
|
2
|
+
import re
|
|
3
|
+
from typing import Any, cast
|
|
4
4
|
|
|
5
5
|
from ._rate import resolve_rate
|
|
6
6
|
from ._sampling import weighted_sample_without_replacement
|
|
@@ -32,24 +32,22 @@ def _python_redact_words(
|
|
|
32
32
|
"""Redact random words by replacing their characters.
|
|
33
33
|
|
|
34
34
|
Parameters
|
|
35
|
+
----------
|
|
35
36
|
- text: Input text.
|
|
36
37
|
- replacement_char: The character to use for redaction (default FULL_BLOCK).
|
|
37
38
|
- rate: Max proportion of words to redact (default 0.05).
|
|
38
39
|
- merge_adjacent: If True, merges adjacent redactions across intervening non-word chars.
|
|
39
40
|
- rng: RNG used for sampling decisions.
|
|
40
41
|
- unweighted: When True, sample words uniformly instead of by length.
|
|
42
|
+
|
|
41
43
|
"""
|
|
42
44
|
tokens = split_preserving_whitespace(text)
|
|
43
45
|
word_tokens = collect_word_tokens(tokens)
|
|
44
46
|
if not word_tokens:
|
|
45
|
-
raise ValueError(
|
|
46
|
-
"Cannot redact words because the input text contains no redactable words."
|
|
47
|
-
)
|
|
47
|
+
raise ValueError("Cannot redact words because the input text contains no redactable words.")
|
|
48
48
|
|
|
49
49
|
population = [token.index for token in word_tokens]
|
|
50
|
-
weights = [
|
|
51
|
-
1.0 if unweighted else float(token.core_length) for token in word_tokens
|
|
52
|
-
]
|
|
50
|
+
weights = [1.0 if unweighted else float(token.core_length) for token in word_tokens]
|
|
53
51
|
|
|
54
52
|
clamped_rate = max(0.0, min(rate, 1.0))
|
|
55
53
|
raw_quota = len(population) * clamped_rate
|
|
@@ -105,7 +103,6 @@ def redact_words(
|
|
|
105
103
|
unweighted: bool = False,
|
|
106
104
|
) -> str:
|
|
107
105
|
"""Redact random words by replacing their characters."""
|
|
108
|
-
|
|
109
106
|
effective_rate = resolve_rate(
|
|
110
107
|
rate=rate,
|
|
111
108
|
legacy_value=redaction_rate,
|
|
@@ -122,13 +119,16 @@ def redact_words(
|
|
|
122
119
|
use_rust = _redact_words_rust is not None and isinstance(merge_adjacent, bool)
|
|
123
120
|
|
|
124
121
|
if use_rust:
|
|
125
|
-
return
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
122
|
+
return cast(
|
|
123
|
+
str,
|
|
124
|
+
_redact_words_rust(
|
|
125
|
+
text,
|
|
126
|
+
replacement_char,
|
|
127
|
+
clamped_rate,
|
|
128
|
+
merge_adjacent,
|
|
129
|
+
unweighted_flag,
|
|
130
|
+
rng,
|
|
131
|
+
),
|
|
132
132
|
)
|
|
133
133
|
|
|
134
134
|
return _python_redact_words(
|
glitchlings/zoo/reduple.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import random
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any, cast
|
|
3
3
|
|
|
4
4
|
from ._rate import resolve_rate
|
|
5
5
|
from ._text_utils import WordToken, collect_word_tokens, split_preserving_whitespace
|
|
@@ -21,14 +21,17 @@ def _python_reduplicate_words(
|
|
|
21
21
|
"""Randomly reduplicate words in the text.
|
|
22
22
|
|
|
23
23
|
Parameters
|
|
24
|
+
----------
|
|
24
25
|
- text: Input text.
|
|
25
26
|
- rate: Max proportion of words to reduplicate (default 0.05).
|
|
26
27
|
- rng: RNG used for sampling decisions.
|
|
27
28
|
- unweighted: When True, sample words uniformly instead of length-weighted.
|
|
28
29
|
|
|
29
30
|
Notes
|
|
31
|
+
-----
|
|
30
32
|
- Preserves spacing and punctuation by tokenizing with separators.
|
|
31
33
|
- Deterministic when run with a fixed seed or via Gaggle.
|
|
34
|
+
|
|
32
35
|
"""
|
|
33
36
|
tokens = split_preserving_whitespace(text)
|
|
34
37
|
word_tokens = collect_word_tokens(tokens)
|
|
@@ -77,7 +80,6 @@ def reduplicate_words(
|
|
|
77
80
|
Falls back to the Python implementation when the optional Rust
|
|
78
81
|
extension is unavailable.
|
|
79
82
|
"""
|
|
80
|
-
|
|
81
83
|
effective_rate = resolve_rate(
|
|
82
84
|
rate=rate,
|
|
83
85
|
legacy_value=reduplication_rate,
|
|
@@ -92,7 +94,7 @@ def reduplicate_words(
|
|
|
92
94
|
unweighted_flag = bool(unweighted)
|
|
93
95
|
|
|
94
96
|
if _reduplicate_words_rust is not None:
|
|
95
|
-
return _reduplicate_words_rust(text, clamped_rate, unweighted_flag, rng)
|
|
97
|
+
return cast(str, _reduplicate_words_rust(text, clamped_rate, unweighted_flag, rng))
|
|
96
98
|
|
|
97
99
|
return _python_reduplicate_words(
|
|
98
100
|
text,
|
glitchlings/zoo/rushmore.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import math
|
|
2
2
|
import random
|
|
3
3
|
import re
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any, cast
|
|
5
5
|
|
|
6
6
|
from ._rate import resolve_rate
|
|
7
7
|
from ._text_utils import WordToken, collect_word_tokens, split_preserving_whitespace
|
|
@@ -21,7 +21,6 @@ def _python_delete_random_words(
|
|
|
21
21
|
unweighted: bool = False,
|
|
22
22
|
) -> str:
|
|
23
23
|
"""Delete random words from the input text while preserving whitespace."""
|
|
24
|
-
|
|
25
24
|
effective_rate = max(rate, 0.0)
|
|
26
25
|
if effective_rate <= 0.0:
|
|
27
26
|
return text
|
|
@@ -37,15 +36,11 @@ def _python_delete_random_words(
|
|
|
37
36
|
if not weighted_tokens:
|
|
38
37
|
return text
|
|
39
38
|
|
|
40
|
-
allowed_deletions = min(
|
|
41
|
-
len(weighted_tokens), math.floor(len(weighted_tokens) * effective_rate)
|
|
42
|
-
)
|
|
39
|
+
allowed_deletions = min(len(weighted_tokens), math.floor(len(weighted_tokens) * effective_rate))
|
|
43
40
|
if allowed_deletions <= 0:
|
|
44
41
|
return text
|
|
45
42
|
|
|
46
|
-
mean_weight = sum(weight for _, weight, _ in weighted_tokens) / len(
|
|
47
|
-
weighted_tokens
|
|
48
|
-
)
|
|
43
|
+
mean_weight = sum(weight for _, weight, _ in weighted_tokens) / len(weighted_tokens)
|
|
49
44
|
|
|
50
45
|
deletions = 0
|
|
51
46
|
for index, weight, token in weighted_tokens:
|
|
@@ -88,7 +83,6 @@ def delete_random_words(
|
|
|
88
83
|
|
|
89
84
|
Uses the optional Rust implementation when available.
|
|
90
85
|
"""
|
|
91
|
-
|
|
92
86
|
effective_rate = resolve_rate(
|
|
93
87
|
rate=rate,
|
|
94
88
|
legacy_value=max_deletion_rate,
|
|
@@ -103,7 +97,7 @@ def delete_random_words(
|
|
|
103
97
|
unweighted_flag = bool(unweighted)
|
|
104
98
|
|
|
105
99
|
if _delete_random_words_rust is not None:
|
|
106
|
-
return _delete_random_words_rust(text, clamped_rate, unweighted_flag, rng)
|
|
100
|
+
return cast(str, _delete_random_words_rust(text, clamped_rate, unweighted_flag, rng))
|
|
107
101
|
|
|
108
102
|
return _python_delete_random_words(
|
|
109
103
|
text,
|
glitchlings/zoo/scannequin.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import re
|
|
2
1
|
import random
|
|
3
|
-
|
|
2
|
+
import re
|
|
3
|
+
from typing import Any, cast
|
|
4
4
|
|
|
5
5
|
from ._ocr_confusions import load_confusion_table
|
|
6
|
-
from .core import Glitchling, AttackWave, AttackOrder
|
|
7
6
|
from ._rate import resolve_rate
|
|
7
|
+
from .core import AttackOrder, AttackWave, Glitchling
|
|
8
8
|
|
|
9
9
|
try:
|
|
10
10
|
from glitchlings._zoo_rust import ocr_artifacts as _ocr_artifacts_rust
|
|
@@ -21,17 +21,20 @@ def _python_ocr_artifacts(
|
|
|
21
21
|
"""Introduce OCR-like artifacts into text.
|
|
22
22
|
|
|
23
23
|
Parameters
|
|
24
|
+
----------
|
|
24
25
|
- text: Input text to corrupt.
|
|
25
26
|
- rate: Max proportion of eligible confusion matches to replace (default 0.02).
|
|
26
27
|
- seed: Optional seed if `rng` not provided.
|
|
27
28
|
- rng: Optional RNG; overrides seed.
|
|
28
29
|
|
|
29
30
|
Notes
|
|
31
|
+
-----
|
|
30
32
|
- Uses a curated set of common OCR confusions (rn↔m, cl↔d, O↔0, l/I/1, etc.).
|
|
31
33
|
- Collects all non-overlapping candidate spans in reading order, then samples
|
|
32
34
|
a subset deterministically with the provided RNG.
|
|
33
35
|
- Replacements can change length (e.g., m→rn), so edits are applied from left
|
|
34
36
|
to right using precomputed spans to avoid index drift.
|
|
37
|
+
|
|
35
38
|
"""
|
|
36
39
|
if not text:
|
|
37
40
|
return text
|
|
@@ -107,7 +110,6 @@ def ocr_artifacts(
|
|
|
107
110
|
|
|
108
111
|
Prefers the Rust implementation when available.
|
|
109
112
|
"""
|
|
110
|
-
|
|
111
113
|
if not text:
|
|
112
114
|
return text
|
|
113
115
|
|
|
@@ -124,7 +126,7 @@ def ocr_artifacts(
|
|
|
124
126
|
clamped_rate = max(0.0, effective_rate)
|
|
125
127
|
|
|
126
128
|
if _ocr_artifacts_rust is not None:
|
|
127
|
-
return _ocr_artifacts_rust(text, clamped_rate, rng)
|
|
129
|
+
return cast(str, _ocr_artifacts_rust(text, clamped_rate, rng))
|
|
128
130
|
|
|
129
131
|
return _python_ocr_artifacts(text, rate=clamped_rate, rng=rng)
|
|
130
132
|
|
|
@@ -164,7 +166,6 @@ class Scannequin(Glitchling):
|
|
|
164
166
|
return {"type": "ocr", "error_rate": float(rate)}
|
|
165
167
|
|
|
166
168
|
|
|
167
|
-
|
|
168
169
|
scannequin = Scannequin()
|
|
169
170
|
|
|
170
171
|
|
glitchlings/zoo/typogre.py
CHANGED
|
@@ -2,11 +2,11 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import math
|
|
4
4
|
import random
|
|
5
|
-
from typing import Any, Optional
|
|
5
|
+
from typing import Any, Optional, cast
|
|
6
6
|
|
|
7
|
-
from .core import Glitchling, AttackWave, AttackOrder
|
|
8
|
-
from ._rate import resolve_rate
|
|
9
7
|
from ..util import KEYNEIGHBORS
|
|
8
|
+
from ._rate import resolve_rate
|
|
9
|
+
from .core import AttackOrder, AttackWave, Glitchling
|
|
10
10
|
|
|
11
11
|
try:
|
|
12
12
|
from glitchlings._zoo_rust import fatfinger as _fatfinger_rust
|
|
@@ -64,9 +64,7 @@ def _python_eligible_idx(s: str, i: int) -> bool:
|
|
|
64
64
|
return left_ok and right_ok
|
|
65
65
|
|
|
66
66
|
|
|
67
|
-
def _python_draw_eligible_index(
|
|
68
|
-
rng: random.Random, s: str, max_tries: int = 16
|
|
69
|
-
) -> Optional[int]:
|
|
67
|
+
def _python_draw_eligible_index(rng: random.Random, s: str, max_tries: int = 16) -> Optional[int]:
|
|
70
68
|
n = len(s)
|
|
71
69
|
if n == 0:
|
|
72
70
|
return None
|
|
@@ -151,7 +149,6 @@ def fatfinger(
|
|
|
151
149
|
max_change_rate: float | None = None,
|
|
152
150
|
) -> str:
|
|
153
151
|
"""Introduce character-level "fat finger" edits with a Rust fast path."""
|
|
154
|
-
|
|
155
152
|
effective_rate = resolve_rate(
|
|
156
153
|
rate=rate,
|
|
157
154
|
legacy_value=max_change_rate,
|
|
@@ -171,7 +168,10 @@ def fatfinger(
|
|
|
171
168
|
layout = getattr(KEYNEIGHBORS, keyboard)
|
|
172
169
|
|
|
173
170
|
if _fatfinger_rust is not None:
|
|
174
|
-
return
|
|
171
|
+
return cast(
|
|
172
|
+
str,
|
|
173
|
+
_fatfinger_rust(text, max_change_rate=clamped_rate, layout=layout, rng=rng),
|
|
174
|
+
)
|
|
175
175
|
|
|
176
176
|
return _fatfinger_python(text, rate=clamped_rate, layout=layout, rng=rng)
|
|
177
177
|
|
|
@@ -230,4 +230,3 @@ typogre = Typogre()
|
|
|
230
230
|
|
|
231
231
|
|
|
232
232
|
__all__ = ["Typogre", "typogre"]
|
|
233
|
-
|
glitchlings/zoo/zeedub.py
CHANGED
|
@@ -3,9 +3,10 @@ from __future__ import annotations
|
|
|
3
3
|
import math
|
|
4
4
|
import random
|
|
5
5
|
from collections.abc import Sequence
|
|
6
|
+
from typing import Any, cast
|
|
6
7
|
|
|
7
|
-
from .core import Glitchling, AttackWave, AttackOrder
|
|
8
8
|
from ._rate import resolve_rate
|
|
9
|
+
from .core import AttackOrder, AttackWave, Glitchling
|
|
9
10
|
|
|
10
11
|
try:
|
|
11
12
|
from glitchlings._zoo_rust import inject_zero_widths as _inject_zero_widths_rust
|
|
@@ -77,7 +78,6 @@ def insert_zero_widths(
|
|
|
77
78
|
characters: Sequence[str] | None = None,
|
|
78
79
|
) -> str:
|
|
79
80
|
"""Inject zero-width characters between non-space character pairs."""
|
|
80
|
-
|
|
81
81
|
effective_rate = resolve_rate(
|
|
82
82
|
rate=rate,
|
|
83
83
|
legacy_value=None,
|
|
@@ -115,7 +115,10 @@ def insert_zero_widths(
|
|
|
115
115
|
if hasattr(rng, "getstate"):
|
|
116
116
|
python_state = rng.getstate()
|
|
117
117
|
rng.setstate(state)
|
|
118
|
-
rust_result =
|
|
118
|
+
rust_result = cast(
|
|
119
|
+
str,
|
|
120
|
+
_inject_zero_widths_rust(text, clamped_rate, list(cleaned_palette), rng),
|
|
121
|
+
)
|
|
119
122
|
if rust_result == python_result:
|
|
120
123
|
return rust_result
|
|
121
124
|
if python_state is not None and hasattr(rng, "setstate"):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: glitchlings
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: Monsters for your language games.
|
|
5
5
|
Author: osoleve
|
|
6
6
|
License: Apache License
|
|
@@ -226,19 +226,51 @@ License-File: LICENSE
|
|
|
226
226
|
Requires-Dist: confusable-homoglyphs>=3.3.1
|
|
227
227
|
Requires-Dist: tomli>=2.0.1; python_version < "3.11"
|
|
228
228
|
Requires-Dist: pyyaml>=6.0.0
|
|
229
|
+
Provides-Extra: all
|
|
230
|
+
Requires-Dist: black>=24.4.0; extra == "all"
|
|
231
|
+
Requires-Dist: hypothesis>=6.140.0; extra == "all"
|
|
232
|
+
Requires-Dist: interrogate>=1.5.0; extra == "all"
|
|
233
|
+
Requires-Dist: jellyfish>=1.2.0; extra == "all"
|
|
234
|
+
Requires-Dist: isort>=5.13.0; extra == "all"
|
|
235
|
+
Requires-Dist: mkdocs>=1.6.0; extra == "all"
|
|
236
|
+
Requires-Dist: mkdocs-material>=9.5.0; extra == "all"
|
|
237
|
+
Requires-Dist: mkdocstrings[python]>=0.24.0; extra == "all"
|
|
238
|
+
Requires-Dist: mkdocstrings-python>=1.10.0; extra == "all"
|
|
239
|
+
Requires-Dist: mypy>=1.8.0; extra == "all"
|
|
240
|
+
Requires-Dist: numpy<=2.0,>=1.24; extra == "all"
|
|
241
|
+
Requires-Dist: pre-commit>=3.8.0; extra == "all"
|
|
242
|
+
Requires-Dist: pytest>=8.0.0; extra == "all"
|
|
243
|
+
Requires-Dist: ruff>=0.6.0; extra == "all"
|
|
244
|
+
Requires-Dist: verifiers>=0.1.3.post0; extra == "all"
|
|
229
245
|
Provides-Extra: hf
|
|
230
246
|
Requires-Dist: datasets>=4.0.0; extra == "hf"
|
|
247
|
+
Provides-Extra: lightning
|
|
248
|
+
Requires-Dist: pytorch_lightning>=2.0.0; extra == "lightning"
|
|
231
249
|
Provides-Extra: vectors
|
|
232
250
|
Requires-Dist: numpy<=2.0,>=1.24; extra == "vectors"
|
|
233
251
|
Requires-Dist: spacy>=3.7.2; extra == "vectors"
|
|
234
252
|
Requires-Dist: gensim>=4.3.2; extra == "vectors"
|
|
253
|
+
Provides-Extra: st
|
|
254
|
+
Requires-Dist: sentence-transformers>=3.0.0; extra == "st"
|
|
235
255
|
Provides-Extra: prime
|
|
236
256
|
Requires-Dist: verifiers>=0.1.3.post0; extra == "prime"
|
|
237
257
|
Requires-Dist: jellyfish>=1.2.0; extra == "prime"
|
|
258
|
+
Provides-Extra: torch
|
|
259
|
+
Requires-Dist: torch>=2.0.0; extra == "torch"
|
|
238
260
|
Provides-Extra: dev
|
|
239
261
|
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
240
262
|
Requires-Dist: hypothesis>=6.140.0; extra == "dev"
|
|
241
263
|
Requires-Dist: numpy<=2.0,>=1.24; extra == "dev"
|
|
264
|
+
Requires-Dist: mkdocs>=1.6.0; extra == "dev"
|
|
265
|
+
Requires-Dist: mkdocstrings[python]>=0.24.0; extra == "dev"
|
|
266
|
+
Requires-Dist: mkdocs-material>=9.5.0; extra == "dev"
|
|
267
|
+
Requires-Dist: mkdocstrings-python>=1.10.0; extra == "dev"
|
|
268
|
+
Requires-Dist: interrogate>=1.5.0; extra == "dev"
|
|
269
|
+
Requires-Dist: black>=24.4.0; extra == "dev"
|
|
270
|
+
Requires-Dist: isort>=5.13.0; extra == "dev"
|
|
271
|
+
Requires-Dist: ruff>=0.6.0; extra == "dev"
|
|
272
|
+
Requires-Dist: mypy>=1.8.0; extra == "dev"
|
|
273
|
+
Requires-Dist: pre-commit>=3.8.0; extra == "dev"
|
|
242
274
|
Dynamic: license-file
|
|
243
275
|
|
|
244
276
|
#
|
|
@@ -297,7 +329,7 @@ print(gaggle(SAMPLE_TEXT))
|
|
|
297
329
|
> Onҽ mھrning, wһen Gregor Samƽa woke from trouble𝐝 𝑑reams, he found himself transformed in his bed into a horrible vermin٠ He l lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightlh domed and divided by arches ino stiff sections. The bedding was adly able to cover it and and seemed ready to slide off any moment. His many legxs, pitifully thin compared with the size of the the rest of him, waved about helplessly ashe looked looked.
|
|
298
330
|
|
|
299
331
|
Consult the [Glitchlings Usage Guide](docs/index.md)
|
|
300
|
-
for end-to-end instructions spanning the Python API, CLI, HuggingFace and Prime Intellect
|
|
332
|
+
for end-to-end instructions spanning the Python API, CLI, HuggingFace, PyTorch, and Prime Intellect
|
|
301
333
|
integrations, and the autodetected Rust pipeline (enabled whenever the extension is present).
|
|
302
334
|
|
|
303
335
|
## Motivation
|
|
@@ -338,10 +370,67 @@ They're horrible little gremlins, but they're not _unreasonable_.
|
|
|
338
370
|
|
|
339
371
|
Keyboard warriors can challenge them directly via the `glitchlings` command:
|
|
340
372
|
|
|
373
|
+
<!-- BEGIN: CLI_USAGE -->
|
|
341
374
|
```bash
|
|
342
375
|
# Discover which glitchlings are currently on the loose.
|
|
343
376
|
glitchlings --list
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
```text
|
|
380
|
+
Typogre — scope: Character, order: early
|
|
381
|
+
Apostrofae — scope: Character, order: normal
|
|
382
|
+
Mim1c — scope: Character, order: last
|
|
383
|
+
Jargoyle — scope: Word, order: normal
|
|
384
|
+
Adjax — scope: Word, order: normal
|
|
385
|
+
Reduple — scope: Word, order: normal
|
|
386
|
+
Rushmore — scope: Word, order: normal
|
|
387
|
+
Redactyl — scope: Word, order: normal
|
|
388
|
+
Scannequin — scope: Character, order: late
|
|
389
|
+
Zeedub — scope: Character, order: last
|
|
390
|
+
```
|
|
391
|
+
|
|
392
|
+
```bash
|
|
393
|
+
# Review the full CLI contract.
|
|
394
|
+
glitchlings --help
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
```text
|
|
398
|
+
usage: glitchlings [-h] [-g SPEC] [-s SEED] [-f FILE] [--sample] [--diff]
|
|
399
|
+
[--list] [-c CONFIG]
|
|
400
|
+
[text]
|
|
401
|
+
|
|
402
|
+
Summon glitchlings to corrupt text. Provide input text as an argument, via
|
|
403
|
+
--file, or pipe it on stdin.
|
|
404
|
+
|
|
405
|
+
positional arguments:
|
|
406
|
+
text Text to corrupt. If omitted, stdin is used or --sample
|
|
407
|
+
provides fallback text.
|
|
408
|
+
|
|
409
|
+
options:
|
|
410
|
+
-h, --help show this help message and exit
|
|
411
|
+
-g SPEC, --glitchling SPEC
|
|
412
|
+
Glitchling to apply, optionally with parameters like
|
|
413
|
+
Typogre(rate=0.05). Repeat for multiples; defaults to
|
|
414
|
+
all built-ins.
|
|
415
|
+
-s SEED, --seed SEED Seed controlling deterministic corruption order
|
|
416
|
+
(default: 151).
|
|
417
|
+
-f FILE, --file FILE Read input text from a file instead of the command
|
|
418
|
+
line argument.
|
|
419
|
+
--sample Use the included SAMPLE_TEXT when no other input is
|
|
420
|
+
provided.
|
|
421
|
+
--diff Show a unified diff between the original and corrupted
|
|
422
|
+
text.
|
|
423
|
+
--list List available glitchlings and exit.
|
|
424
|
+
-c CONFIG, --config CONFIG
|
|
425
|
+
Load glitchlings from a YAML configuration file.
|
|
426
|
+
```
|
|
427
|
+
<!-- END: CLI_USAGE -->
|
|
428
|
+
|
|
429
|
+
Run `python docs/build_cli_reference.py` whenever you tweak the CLI so the README stays in sync with the actual output. The script executes the commands above and replaces the block between the markers automatically.
|
|
344
430
|
|
|
431
|
+
Prefer inline tweaks? You can still configure glitchlings directly in the shell:
|
|
432
|
+
|
|
433
|
+
```bash
|
|
345
434
|
# Run Typogre against the contents of a file and inspect the diff.
|
|
346
435
|
glitchlings -g typogre --file documents/report.txt --diff
|
|
347
436
|
|
|
@@ -355,8 +444,6 @@ echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
|
|
|
355
444
|
glitchlings --config experiments/chaos.yaml "Let slips the glitchlings of war"
|
|
356
445
|
```
|
|
357
446
|
|
|
358
|
-
Use `--help` for a complete breakdown of available options, including support for parameterised glitchlings via `-g "Name(arg=value, ...)"` to mirror the Python API.
|
|
359
|
-
|
|
360
447
|
Attack configurations live in plain YAML files so you can version-control experiments without touching code:
|
|
361
448
|
|
|
362
449
|
```yaml
|
|
@@ -394,6 +481,16 @@ _What a nice word, would be a shame if something happened to it._
|
|
|
394
481
|
> - `keyboard (str)`: Keyboard layout key-neighbor map to use (default: "CURATOR_QWERTY"; also accepts "QWERTY", "DVORAK", "COLEMAK", and "AZERTY").
|
|
395
482
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
|
396
483
|
|
|
484
|
+
### Apostrofae
|
|
485
|
+
|
|
486
|
+
_It looks like you're trying to paste some text. Can I help?_
|
|
487
|
+
|
|
488
|
+
> _**Paperclip Manager.**_ Apostrofae scans for balanced runs of straight quotes, apostrophes, and backticks before replacing them with randomly sampled smart-quote pairs from a curated lookup table. The swap happens in-place so contractions and unpaired glyphs remain untouched.
|
|
489
|
+
>
|
|
490
|
+
> Args
|
|
491
|
+
>
|
|
492
|
+
> - `seed (int)`: Optional seed controlling the deterministic smart-quote sampling (default: 151).
|
|
493
|
+
|
|
397
494
|
### Mim1c
|
|
398
495
|
|
|
399
496
|
_Wait, was that...?_
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
glitchlings/__init__.py,sha256=bkyRgzjC8ssidEO9UL9VpbYXQxTV1Hz3VAPOIqd9uMg,1182
|
|
2
|
+
glitchlings/__main__.py,sha256=f-P4jiVBd7ZpS6QxRpa_6SJgOG03UhZhcWasMDRWLs8,120
|
|
3
|
+
glitchlings/_zoo_rust.cpython-311-darwin.so,sha256=PXMyeT6b3p8dGysMHMoAK2r1rNiTYRQqNMZJmkR8xk4,2586944
|
|
4
|
+
glitchlings/compat.py,sha256=T_5Ia8yCzZvsMdicZ2TCcOgDO53_AjNGkSXWTR_qEnA,8908
|
|
5
|
+
glitchlings/config.py,sha256=ofxDMkoMg4j51CFube54aca1Ky9y_ZeVktXpeUEdWmA,12953
|
|
6
|
+
glitchlings/config.toml,sha256=04-Y_JCdQU68SRmwk2qZqrH_bbX4jEH9uh7URtxdIHA,99
|
|
7
|
+
glitchlings/main.py,sha256=uw8VbDgxov1m-wYHPDl2dP5ItpLB4ZHpb0ChJXzcL0o,10623
|
|
8
|
+
glitchlings/dlc/__init__.py,sha256=qlY4nuagy4AAWuPMwmuhwK2m36ktp-qkeiIxC7OXg34,305
|
|
9
|
+
glitchlings/dlc/_shared.py,sha256=EFSnush3rjjaf4La5QfVaf_KEp0U_l_3-q4PKx0A6NQ,1972
|
|
10
|
+
glitchlings/dlc/huggingface.py,sha256=9lW7TnTHA_bXyo4Is8pymZchrB9BIL1bMCP2p7LCMtg,2576
|
|
11
|
+
glitchlings/dlc/prime.py,sha256=qGFI1d4BiOEIgQZ5v9QnlbYx4J4q-vNlh5tWZng11xs,8607
|
|
12
|
+
glitchlings/dlc/pytorch.py,sha256=tfHEDsDAOUnEvImFgRMjqC7Ig_aNVO8suXKpv24C2cA,7823
|
|
13
|
+
glitchlings/dlc/pytorch_lightning.py,sha256=Om45BHYx8tMoUwYOOTk5B5A5AIjNkh58V37OC2IBFxE,8553
|
|
14
|
+
glitchlings/lexicon/__init__.py,sha256=PLuu63iX6GSRypGI4DxiN_U-QmqmDobk1Xb7B5IrsZg,5951
|
|
15
|
+
glitchlings/lexicon/_cache.py,sha256=aWSUb5Ex162dr3HouO2Ic2O8ck3ViEFWs8-XMLKMeJ0,4086
|
|
16
|
+
glitchlings/lexicon/metrics.py,sha256=VBFfFpxjiEwZtK-jS55H8xP7MTC_0OjY8lQ5zSQ9aTY,4572
|
|
17
|
+
glitchlings/lexicon/vector.py,sha256=yWf-vlN2OEHnTCPu7tgDnJbhm47cmhdrTtjR0RZKkUM,22530
|
|
18
|
+
glitchlings/lexicon/wordnet.py,sha256=YcOliPHuesdlekmGspwAyR4fWDDxZWR_dIt_Nsq7ag0,7608
|
|
19
|
+
glitchlings/lexicon/data/default_vector_cache.json,sha256=3iVH0nX8EqMbqOkKWvORCGYtN0LKHn5G_Snlizsnm1g,997
|
|
20
|
+
glitchlings/util/__init__.py,sha256=vc3EAY8ehRjbOiryFdaqvvljXcyNGtZSPiEp9ok1vVw,4674
|
|
21
|
+
glitchlings/util/adapters.py,sha256=psxQFYSFmh1u7NuqtIrKwQP5FOhOrZoxZzc7X7DDi9U,693
|
|
22
|
+
glitchlings/zoo/__init__.py,sha256=1dWZPCTXuh5J7WdCxHX7ZX9bNd8bakzYndxQRhF43i8,5243
|
|
23
|
+
glitchlings/zoo/_ocr_confusions.py,sha256=Ju2_avXiwsr1p8zWFUTOzMxJ8vT5PpYobuGIn4L_sqI,1204
|
|
24
|
+
glitchlings/zoo/_rate.py,sha256=Vb1_5HAzrqr9eAh_zzngSV-d0zI264zcYspnT3VHPkE,504
|
|
25
|
+
glitchlings/zoo/_sampling.py,sha256=KrWyUSsYXghlvktS5hQBO0bPqywEEyA49A2qDWInB7Q,1586
|
|
26
|
+
glitchlings/zoo/_text_utils.py,sha256=fS5L_eq-foBbBdiv4ymI8-O0D0csc3yDekHpX8bqfV4,2754
|
|
27
|
+
glitchlings/zoo/adjax.py,sha256=TABKGQOwpyj_5czSoN8tPyEinwp8oZHKOBfU78ae9n0,3545
|
|
28
|
+
glitchlings/zoo/apostrofae.py,sha256=m2-VPO-ahp0zAEJTHPItXMwnpD9D8bQIjVyyIRzj46k,3922
|
|
29
|
+
glitchlings/zoo/core.py,sha256=3IHYEo8f2K7q4EbSZBYPb4MQXUVoMPm6B0IgsjiWNXk,20493
|
|
30
|
+
glitchlings/zoo/jargoyle.py,sha256=zGXi6WFSzYA_44UXvyK0aj18CMFHIFL4eQeijEHfZl4,11568
|
|
31
|
+
glitchlings/zoo/mim1c.py,sha256=-fgodKWZq--Xw8L2t1EqNbsh48bwX5jZxmiXdoaQShI,3437
|
|
32
|
+
glitchlings/zoo/ocr_confusions.tsv,sha256=KhtR7vJDTITpfTSGa-I7RHr6CK7LkGi2KjdhEWipI6o,183
|
|
33
|
+
glitchlings/zoo/redactyl.py,sha256=9Rtgkg87LnGt47DHKsD8XW25gtg9pv2aXvrFv46XOTQ,5516
|
|
34
|
+
glitchlings/zoo/reduple.py,sha256=ttHha3Yl0SRzEyAx9SfENbJRO_WhmJYL8ow5LGKn248,4258
|
|
35
|
+
glitchlings/zoo/rushmore.py,sha256=R6dgt4HSvkt31foazNmUhO4wL9PHpjh_7pzJ8vQPgO0,4322
|
|
36
|
+
glitchlings/zoo/scannequin.py,sha256=AQ7JPIxLiPFy4fDV6MgO4OFo34dMShc7sipStUaCG40,4900
|
|
37
|
+
glitchlings/zoo/typogre.py,sha256=AuAtx-KyWrk-zX3uuxjkvjiduLyDwGJNW7XYktnsuos,6712
|
|
38
|
+
glitchlings/zoo/zeedub.py,sha256=3VneZOEeL98Ek1VnZQI4V2o1alv41vvMzZXrKc9Lt1s,4875
|
|
39
|
+
glitchlings/zoo/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
|
+
glitchlings/zoo/assets/apostrofae_pairs.json,sha256=bfjSEaMTI_axGNJ93nI431KXU0IVp7ayO42gGcMgL6U,521
|
|
41
|
+
glitchlings-0.4.3.dist-info/licenses/LICENSE,sha256=YCvGip-LoaRyu6h0nPo71q6eHEkzUpsE11psDJOIRkw,11337
|
|
42
|
+
glitchlings-0.4.3.dist-info/METADATA,sha256=NV0-8T4jx5R2Eswhib6B29vAeoiXBdIDBFOu6KrzqdM,32242
|
|
43
|
+
glitchlings-0.4.3.dist-info/WHEEL,sha256=Tgp8Vc-mmQm0KX-V22BSUoymoX1p0w13bZbX85y8hSs,114
|
|
44
|
+
glitchlings-0.4.3.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
|
|
45
|
+
glitchlings-0.4.3.dist-info/top_level.txt,sha256=VHFNBrLjtDwPCYXbGKi6o17Eueedi81eNbR3hBOoST0,12
|
|
46
|
+
glitchlings-0.4.3.dist-info/RECORD,,
|