glitchlings 0.2.1__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glitchlings-0.2.3/MANIFEST.in +4 -0
- {glitchlings-0.2.1/src/glitchlings.egg-info → glitchlings-0.2.3}/PKG-INFO +28 -61
- {glitchlings-0.2.1 → glitchlings-0.2.3}/README.md +21 -59
- {glitchlings-0.2.1 → glitchlings-0.2.3}/pyproject.toml +11 -8
- {glitchlings-0.2.1 → glitchlings-0.2.3}/rust/Cargo.lock +0 -7
- {glitchlings-0.2.1 → glitchlings-0.2.3}/rust/Cargo.toml +0 -1
- glitchlings-0.2.3/rust/zoo/assets/ocr_confusions.tsv +30 -0
- glitchlings-0.2.3/rust/zoo/build.rs +140 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/rust/zoo/src/glitch_ops.rs +1 -1
- {glitchlings-0.2.1 → glitchlings-0.2.3}/rust/zoo/src/lib.rs +2 -1
- {glitchlings-0.2.1 → glitchlings-0.2.3}/rust/zoo/src/resources.rs +24 -34
- glitchlings-0.2.1/rust/typogre/src/lib.rs → glitchlings-0.2.3/rust/zoo/src/typogre.rs +3 -9
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/dlc/prime.py +44 -22
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/main.py +17 -39
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/util/__init__.py +30 -0
- glitchlings-0.2.3/src/glitchlings/zoo/__init__.py +134 -0
- glitchlings-0.2.3/src/glitchlings/zoo/_ocr_confusions.py +34 -0
- glitchlings-0.2.3/src/glitchlings/zoo/_rate.py +21 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/zoo/core.py +56 -52
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/zoo/jargoyle.py +77 -16
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/zoo/mim1c.py +24 -5
- glitchlings-0.2.3/src/glitchlings/zoo/ocr_confusions.tsv +30 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/zoo/redactyl.py +46 -9
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/zoo/reduple.py +36 -8
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/zoo/rushmore.py +40 -8
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/zoo/scannequin.py +42 -37
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/zoo/typogre.py +36 -8
- {glitchlings-0.2.1 → glitchlings-0.2.3/src/glitchlings.egg-info}/PKG-INFO +28 -61
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings.egg-info/SOURCES.txt +5 -2
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings.egg-info/requires.txt +3 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_cli.py +29 -1
- {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_gaggle.py +20 -6
- glitchlings-0.2.3/tests/test_glitchling_core.py +68 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_glitchlings_determinism.py +7 -18
- {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_jargoyle.py +3 -13
- {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_keyboard_layouts.py +18 -1
- {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_parameter_effects.py +16 -11
- glitchlings-0.2.3/tests/test_prime_echo_chamber.py +294 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_property_based.py +3 -3
- {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_rust_backed_glitchlings.py +71 -31
- glitchlings-0.2.1/MANIFEST.in +0 -4
- glitchlings-0.2.1/rust/typogre/Cargo.toml +0 -14
- glitchlings-0.2.1/rust/zoo/build.rs +0 -60
- glitchlings-0.2.1/src/glitchlings/zoo/__init__.py +0 -57
- glitchlings-0.2.1/tests/test_glitchling_core.py +0 -24
- glitchlings-0.2.1/tests/test_prime_echo_chamber.py +0 -99
- {glitchlings-0.2.1 → glitchlings-0.2.3}/LICENSE +0 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/rust/zoo/Cargo.toml +0 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/rust/zoo/src/pipeline.rs +0 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/rust/zoo/src/rng.rs +0 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/rust/zoo/src/text_buffer.rs +0 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/setup.cfg +0 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/__init__.py +0 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/__main__.py +0 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/dlc/__init__.py +0 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/dlc/huggingface.py +0 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings.egg-info/dependency_links.txt +0 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings.egg-info/entry_points.txt +0 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings.egg-info/top_level.txt +0 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_dataset_corruption.py +0 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_huggingface_dlc.py +0 -0
- {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_util.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: glitchlings
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.3
|
4
4
|
Summary: Monsters for your language games.
|
5
5
|
Author: osoleve
|
6
6
|
License: Apache License
|
@@ -215,6 +215,8 @@ Classifier: Intended Audience :: Developers
|
|
215
215
|
Classifier: License :: OSI Approved :: Apache Software License
|
216
216
|
Classifier: Programming Language :: Python
|
217
217
|
Classifier: Programming Language :: Python :: 3
|
218
|
+
Classifier: Programming Language :: Python :: 3.10
|
219
|
+
Classifier: Programming Language :: Python :: 3.11
|
218
220
|
Classifier: Programming Language :: Python :: 3.12
|
219
221
|
Classifier: Programming Language :: Rust
|
220
222
|
Classifier: Operating System :: MacOS :: MacOS X
|
@@ -223,7 +225,7 @@ Classifier: Operating System :: POSIX :: Linux
|
|
223
225
|
Classifier: Operating System :: OS Independent
|
224
226
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
225
227
|
Classifier: Topic :: Software Development :: Testing
|
226
|
-
Requires-Python: >=3.
|
228
|
+
Requires-Python: >=3.10
|
227
229
|
Description-Content-Type: text/markdown
|
228
230
|
License-File: LICENSE
|
229
231
|
Requires-Dist: confusable-homoglyphs>=3.3.1
|
@@ -232,11 +234,14 @@ Provides-Extra: hf
|
|
232
234
|
Requires-Dist: datasets>=4.0.0; extra == "hf"
|
233
235
|
Provides-Extra: wordnet
|
234
236
|
Requires-Dist: nltk>=3.9.1; extra == "wordnet"
|
237
|
+
Requires-Dist: numpy<=2.0,>=1.24; extra == "wordnet"
|
235
238
|
Provides-Extra: prime
|
236
239
|
Requires-Dist: verifiers>=0.1.3.post0; extra == "prime"
|
237
240
|
Provides-Extra: dev
|
238
241
|
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
239
242
|
Requires-Dist: hypothesis>=6.140.0; extra == "dev"
|
243
|
+
Requires-Dist: nltk>=3.9.1; extra == "dev"
|
244
|
+
Requires-Dist: numpy<=2.0,>=1.24; extra == "dev"
|
240
245
|
Dynamic: license-file
|
241
246
|
|
242
247
|
#
|
@@ -277,14 +282,16 @@ After all, what good is general intelligence if it can't handle a little chaos?
|
|
277
282
|
pip install -U glitchlings
|
278
283
|
```
|
279
284
|
|
285
|
+
> Glitchlings requires Python 3.10 or newer.
|
286
|
+
|
280
287
|
```python
|
281
288
|
from glitchlings import Gaggle, SAMPLE_TEXT, Typogre, Mim1c, Reduple, Rushmore
|
282
289
|
|
283
290
|
gaggle = Gaggle([
|
284
|
-
Typogre(
|
285
|
-
Mim1c(
|
291
|
+
Typogre(rate=0.03),
|
292
|
+
Mim1c(rate=0.02),
|
286
293
|
Reduple(seed=404),
|
287
|
-
Rushmore(
|
294
|
+
Rushmore(rate=0.02),
|
288
295
|
])
|
289
296
|
|
290
297
|
print(gaggle(SAMPLE_TEXT))
|
@@ -292,52 +299,9 @@ print(gaggle(SAMPLE_TEXT))
|
|
292
299
|
|
293
300
|
> Onҽ mھrning, wһen Gregor Samƽa woke from trouble𝐝 𝑑reams, he found himself transformed in his bed into a horrible vermin٠ He l lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightlh domed and divided by arches ino stiff sections. The bedding was adly able to cover it and and seemed ready to slide off any moment. His many legxs, pitifully thin compared with the size of the the rest of him, waved about helplessly ashe looked looked.
|
294
301
|
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
- **Direct invocation** – Instantiate a glitchling (or `Gaggle`) and call it on strings, iterables, or datasets. Keep the seed stable to make every run deterministic.
|
300
|
-
- **Dataset corruption** – After ``import glitchlings.dlc.huggingface``, call ``Dataset.glitch(...)`` (or a `Gaggle`'s `.corrupt_dataset`) to perturb a Hugging Face `datasets.Dataset` and return a corrupted copy for training or evaluation.
|
301
|
-
|
302
|
-
### Rust pipeline acceleration (opt-in)
|
303
|
-
|
304
|
-
The refactored Rust pipeline can execute multiple glitchlings without
|
305
|
-
bouncing back through Python, but it is gated behind a feature flag so
|
306
|
-
teams can roll it out gradually. After compiling the Rust extension
|
307
|
-
(`python -m cibuildwheel --output-dir dist`) set
|
308
|
-
`GLITCHLINGS_RUST_PIPELINE=1` (or `true`, `yes`, `on`) before importing
|
309
|
-
`glitchlings`. When the flag is set and the extension is available,
|
310
|
-
`Gaggle` automatically batches compatible glitchlings into the Rust
|
311
|
-
pipeline; otherwise it transparently falls back to the legacy Python
|
312
|
-
loop.
|
313
|
-
|
314
|
-
### Prime Intellect environments
|
315
|
-
|
316
|
-
After `pip install -e .[prime]`, the `glitchlings.dlc.prime.load_environment` helper mirrors `verifiers.load_environment` for Prime Intellect scenarios while optionally applying glitchlings before returning the environment:
|
317
|
-
|
318
|
-
```python
|
319
|
-
from glitchlings import Mim1c, Typogre
|
320
|
-
from glitchlings.dlc.prime import echo_chamber, load_environment
|
321
|
-
|
322
|
-
env = load_environment(
|
323
|
-
"osoleve/syllabify-en",
|
324
|
-
glitchlings=[Mim1c(replacement_rate=0.01), Typogre(max_change_rate=0.02)],
|
325
|
-
seed=404,
|
326
|
-
)
|
327
|
-
|
328
|
-
# Spin up an echo chamber that corrupts a dataset column and
|
329
|
-
# rewards models for perfectly restoring it
|
330
|
-
practice_env = echo_chamber(
|
331
|
-
"osoleve/clean-room",
|
332
|
-
column="text",
|
333
|
-
glitchlings=["Typogre", "Mim1c"],
|
334
|
-
reward_function=lambda prompt, completion, answer: float(completion == answer),
|
335
|
-
)
|
336
|
-
```
|
337
|
-
|
338
|
-
Skip the `glitchlings` argument to receive an untouched verifier dataset, and
|
339
|
-
override `reward_function` when you want to evaluate completions with a custom
|
340
|
-
scoring routine.
|
302
|
+
Consult the [Glitchlings Usage Guide](docs/index.md)
|
303
|
+
for end-to-end instructions spanning the Python API, CLI, HuggingFace and Prime Intellect
|
304
|
+
integrations, and the feature-flagged Rust pipeline.
|
341
305
|
|
342
306
|
## Motivation
|
343
307
|
|
@@ -353,8 +317,8 @@ Glitchlings are standard Python classes, so you can instantiate them with whatev
|
|
353
317
|
```python
|
354
318
|
from glitchlings import Gaggle, Typogre, Mim1c
|
355
319
|
|
356
|
-
custom_typogre = Typogre(
|
357
|
-
selective_mimic = Mim1c(
|
320
|
+
custom_typogre = Typogre(rate=0.1)
|
321
|
+
selective_mimic = Mim1c(rate=0.05, classes=["LATIN", "GREEK"])
|
358
322
|
|
359
323
|
gaggle = Gaggle([custom_typogre, selective_mimic], seed=99)
|
360
324
|
print(gaggle("Summoned heroes do not fear the glitch."))
|
@@ -384,11 +348,14 @@ glitchlings --list
|
|
384
348
|
# Run Typogre against the contents of a file and inspect the diff.
|
385
349
|
glitchlings -g typogre --file documents/report.txt --diff
|
386
350
|
|
351
|
+
# Configure glitchlings inline by passing keyword arguments.
|
352
|
+
glitchlings -g "Typogre(rate=0.05)" "Ghouls just wanna have fun"
|
353
|
+
|
387
354
|
# Pipe text straight into the CLI for an on-the-fly corruption.
|
388
355
|
echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
|
389
356
|
```
|
390
357
|
|
391
|
-
Use `--help` for a complete breakdown of available options.
|
358
|
+
Use `--help` for a complete breakdown of available options, including support for parameterised glitchlings via `-g "Name(arg=value, ...)"` to mirror the Python API.
|
392
359
|
|
393
360
|
## Development
|
394
361
|
|
@@ -406,7 +373,7 @@ _What a nice word, would be a shame if something happened to it._
|
|
406
373
|
>
|
407
374
|
> Args
|
408
375
|
>
|
409
|
-
> - `
|
376
|
+
> - `rate (float)`: The maximum number of edits to make as a percentage of the length (default: 0.02, 2%).
|
410
377
|
> - `keyboard (str)`: Keyboard layout key-neighbor map to use (default: "CURATOR_QWERTY"; also accepts "QWERTY", "DVORAK", "COLEMAK", and "AZERTY").
|
411
378
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
412
379
|
|
@@ -418,7 +385,7 @@ _Wait, was that...?_
|
|
418
385
|
>
|
419
386
|
> Args
|
420
387
|
>
|
421
|
-
> - `
|
388
|
+
> - `rate (float)`: The maximum proportion of characters to replace (default: 0.02, 2%).
|
422
389
|
> - `classes (list[str] | "all")`: Restrict replacements to these Unicode script classes (default: ["LATIN", "GREEK", "CYRILLIC"]).
|
423
390
|
> - `banned_characters (Collection[str])`: Characters that must never appear as replacements (default: none).
|
424
391
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
@@ -431,7 +398,7 @@ _How can a computer need reading glasses?_
|
|
431
398
|
>
|
432
399
|
> Args
|
433
400
|
>
|
434
|
-
> - `
|
401
|
+
> - `rate (float)`: The maximum proportion of eligible confusion spans to replace (default: 0.02, 2%).
|
435
402
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
436
403
|
|
437
404
|
### Jargoyle
|
@@ -442,7 +409,7 @@ _Uh oh. The worst person you know just bought a thesaurus._
|
|
442
409
|
>
|
443
410
|
> Args
|
444
411
|
>
|
445
|
-
> - `
|
412
|
+
> - `rate (float)`: The maximum proportion of words to replace (default: 0.1, 10%).
|
446
413
|
> - `part_of_speech`: The WordNet part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all.
|
447
414
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
448
415
|
|
@@ -454,7 +421,7 @@ _Did you say that or did I?_
|
|
454
421
|
>
|
455
422
|
> Args
|
456
423
|
>
|
457
|
-
> - `
|
424
|
+
> - `rate (float)`: The maximum proportion of words to reduplicate (default: 0.05, 5%).
|
458
425
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
459
426
|
|
460
427
|
### Rushmore
|
@@ -465,7 +432,7 @@ _I accidentally an entire word._
|
|
465
432
|
>
|
466
433
|
> Args
|
467
434
|
>
|
468
|
-
> - `
|
435
|
+
> - `rate (float)`: The maximum proportion of words to delete (default: 0.01, 1%).
|
469
436
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
470
437
|
|
471
438
|
### Redactyl
|
@@ -477,7 +444,7 @@ _Oops, that was my black highlighter._
|
|
477
444
|
> ### Args
|
478
445
|
>
|
479
446
|
> - `replacement_char (str)`: The character to use for redaction (default: █).
|
480
|
-
> - `
|
447
|
+
> - `rate (float)`: The maximum proportion of words to redact (default: 0.05, 5%).
|
481
448
|
> - `merge_adjacent (bool)`: Whether to redact the space between adjacent redacted words (default: False).
|
482
449
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
483
450
|
|
@@ -36,14 +36,16 @@ After all, what good is general intelligence if it can't handle a little chaos?
|
|
36
36
|
pip install -U glitchlings
|
37
37
|
```
|
38
38
|
|
39
|
+
> Glitchlings requires Python 3.10 or newer.
|
40
|
+
|
39
41
|
```python
|
40
42
|
from glitchlings import Gaggle, SAMPLE_TEXT, Typogre, Mim1c, Reduple, Rushmore
|
41
43
|
|
42
44
|
gaggle = Gaggle([
|
43
|
-
Typogre(
|
44
|
-
Mim1c(
|
45
|
+
Typogre(rate=0.03),
|
46
|
+
Mim1c(rate=0.02),
|
45
47
|
Reduple(seed=404),
|
46
|
-
Rushmore(
|
48
|
+
Rushmore(rate=0.02),
|
47
49
|
])
|
48
50
|
|
49
51
|
print(gaggle(SAMPLE_TEXT))
|
@@ -51,52 +53,9 @@ print(gaggle(SAMPLE_TEXT))
|
|
51
53
|
|
52
54
|
> Onҽ mھrning, wһen Gregor Samƽa woke from trouble𝐝 𝑑reams, he found himself transformed in his bed into a horrible vermin٠ He l lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightlh domed and divided by arches ino stiff sections. The bedding was adly able to cover it and and seemed ready to slide off any moment. His many legxs, pitifully thin compared with the size of the the rest of him, waved about helplessly ashe looked looked.
|
53
55
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
- **Direct invocation** – Instantiate a glitchling (or `Gaggle`) and call it on strings, iterables, or datasets. Keep the seed stable to make every run deterministic.
|
59
|
-
- **Dataset corruption** – After ``import glitchlings.dlc.huggingface``, call ``Dataset.glitch(...)`` (or a `Gaggle`'s `.corrupt_dataset`) to perturb a Hugging Face `datasets.Dataset` and return a corrupted copy for training or evaluation.
|
60
|
-
|
61
|
-
### Rust pipeline acceleration (opt-in)
|
62
|
-
|
63
|
-
The refactored Rust pipeline can execute multiple glitchlings without
|
64
|
-
bouncing back through Python, but it is gated behind a feature flag so
|
65
|
-
teams can roll it out gradually. After compiling the Rust extension
|
66
|
-
(`python -m cibuildwheel --output-dir dist`) set
|
67
|
-
`GLITCHLINGS_RUST_PIPELINE=1` (or `true`, `yes`, `on`) before importing
|
68
|
-
`glitchlings`. When the flag is set and the extension is available,
|
69
|
-
`Gaggle` automatically batches compatible glitchlings into the Rust
|
70
|
-
pipeline; otherwise it transparently falls back to the legacy Python
|
71
|
-
loop.
|
72
|
-
|
73
|
-
### Prime Intellect environments
|
74
|
-
|
75
|
-
After `pip install -e .[prime]`, the `glitchlings.dlc.prime.load_environment` helper mirrors `verifiers.load_environment` for Prime Intellect scenarios while optionally applying glitchlings before returning the environment:
|
76
|
-
|
77
|
-
```python
|
78
|
-
from glitchlings import Mim1c, Typogre
|
79
|
-
from glitchlings.dlc.prime import echo_chamber, load_environment
|
80
|
-
|
81
|
-
env = load_environment(
|
82
|
-
"osoleve/syllabify-en",
|
83
|
-
glitchlings=[Mim1c(replacement_rate=0.01), Typogre(max_change_rate=0.02)],
|
84
|
-
seed=404,
|
85
|
-
)
|
86
|
-
|
87
|
-
# Spin up an echo chamber that corrupts a dataset column and
|
88
|
-
# rewards models for perfectly restoring it
|
89
|
-
practice_env = echo_chamber(
|
90
|
-
"osoleve/clean-room",
|
91
|
-
column="text",
|
92
|
-
glitchlings=["Typogre", "Mim1c"],
|
93
|
-
reward_function=lambda prompt, completion, answer: float(completion == answer),
|
94
|
-
)
|
95
|
-
```
|
96
|
-
|
97
|
-
Skip the `glitchlings` argument to receive an untouched verifier dataset, and
|
98
|
-
override `reward_function` when you want to evaluate completions with a custom
|
99
|
-
scoring routine.
|
56
|
+
Consult the [Glitchlings Usage Guide](docs/index.md)
|
57
|
+
for end-to-end instructions spanning the Python API, CLI, HuggingFace and Prime Intellect
|
58
|
+
integrations, and the feature-flagged Rust pipeline.
|
100
59
|
|
101
60
|
## Motivation
|
102
61
|
|
@@ -112,8 +71,8 @@ Glitchlings are standard Python classes, so you can instantiate them with whatev
|
|
112
71
|
```python
|
113
72
|
from glitchlings import Gaggle, Typogre, Mim1c
|
114
73
|
|
115
|
-
custom_typogre = Typogre(
|
116
|
-
selective_mimic = Mim1c(
|
74
|
+
custom_typogre = Typogre(rate=0.1)
|
75
|
+
selective_mimic = Mim1c(rate=0.05, classes=["LATIN", "GREEK"])
|
117
76
|
|
118
77
|
gaggle = Gaggle([custom_typogre, selective_mimic], seed=99)
|
119
78
|
print(gaggle("Summoned heroes do not fear the glitch."))
|
@@ -143,11 +102,14 @@ glitchlings --list
|
|
143
102
|
# Run Typogre against the contents of a file and inspect the diff.
|
144
103
|
glitchlings -g typogre --file documents/report.txt --diff
|
145
104
|
|
105
|
+
# Configure glitchlings inline by passing keyword arguments.
|
106
|
+
glitchlings -g "Typogre(rate=0.05)" "Ghouls just wanna have fun"
|
107
|
+
|
146
108
|
# Pipe text straight into the CLI for an on-the-fly corruption.
|
147
109
|
echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
|
148
110
|
```
|
149
111
|
|
150
|
-
Use `--help` for a complete breakdown of available options.
|
112
|
+
Use `--help` for a complete breakdown of available options, including support for parameterised glitchlings via `-g "Name(arg=value, ...)"` to mirror the Python API.
|
151
113
|
|
152
114
|
## Development
|
153
115
|
|
@@ -165,7 +127,7 @@ _What a nice word, would be a shame if something happened to it._
|
|
165
127
|
>
|
166
128
|
> Args
|
167
129
|
>
|
168
|
-
> - `
|
130
|
+
> - `rate (float)`: The maximum number of edits to make as a percentage of the length (default: 0.02, 2%).
|
169
131
|
> - `keyboard (str)`: Keyboard layout key-neighbor map to use (default: "CURATOR_QWERTY"; also accepts "QWERTY", "DVORAK", "COLEMAK", and "AZERTY").
|
170
132
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
171
133
|
|
@@ -177,7 +139,7 @@ _Wait, was that...?_
|
|
177
139
|
>
|
178
140
|
> Args
|
179
141
|
>
|
180
|
-
> - `
|
142
|
+
> - `rate (float)`: The maximum proportion of characters to replace (default: 0.02, 2%).
|
181
143
|
> - `classes (list[str] | "all")`: Restrict replacements to these Unicode script classes (default: ["LATIN", "GREEK", "CYRILLIC"]).
|
182
144
|
> - `banned_characters (Collection[str])`: Characters that must never appear as replacements (default: none).
|
183
145
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
@@ -190,7 +152,7 @@ _How can a computer need reading glasses?_
|
|
190
152
|
>
|
191
153
|
> Args
|
192
154
|
>
|
193
|
-
> - `
|
155
|
+
> - `rate (float)`: The maximum proportion of eligible confusion spans to replace (default: 0.02, 2%).
|
194
156
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
195
157
|
|
196
158
|
### Jargoyle
|
@@ -201,7 +163,7 @@ _Uh oh. The worst person you know just bought a thesaurus._
|
|
201
163
|
>
|
202
164
|
> Args
|
203
165
|
>
|
204
|
-
> - `
|
166
|
+
> - `rate (float)`: The maximum proportion of words to replace (default: 0.1, 10%).
|
205
167
|
> - `part_of_speech`: The WordNet part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all.
|
206
168
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
207
169
|
|
@@ -213,7 +175,7 @@ _Did you say that or did I?_
|
|
213
175
|
>
|
214
176
|
> Args
|
215
177
|
>
|
216
|
-
> - `
|
178
|
+
> - `rate (float)`: The maximum proportion of words to reduplicate (default: 0.05, 5%).
|
217
179
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
218
180
|
|
219
181
|
### Rushmore
|
@@ -224,7 +186,7 @@ _I accidentally an entire word._
|
|
224
186
|
>
|
225
187
|
> Args
|
226
188
|
>
|
227
|
-
> - `
|
189
|
+
> - `rate (float)`: The maximum proportion of words to delete (default: 0.01, 1%).
|
228
190
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
229
191
|
|
230
192
|
### Redactyl
|
@@ -236,7 +198,7 @@ _Oops, that was my black highlighter._
|
|
236
198
|
> ### Args
|
237
199
|
>
|
238
200
|
> - `replacement_char (str)`: The character to use for redaction (default: █).
|
239
|
-
> - `
|
201
|
+
> - `rate (float)`: The maximum proportion of words to redact (default: 0.05, 5%).
|
240
202
|
> - `merge_adjacent (bool)`: Whether to redact the space between adjacent redacted words (default: False).
|
241
203
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
242
204
|
|
@@ -1,9 +1,9 @@
|
|
1
1
|
[project]
|
2
2
|
name = "glitchlings"
|
3
|
-
version = "0.2.
|
3
|
+
version = "0.2.3"
|
4
4
|
description = "Monsters for your language games."
|
5
5
|
readme = "README.md"
|
6
|
-
requires-python = ">=3.
|
6
|
+
requires-python = ">=3.10"
|
7
7
|
|
8
8
|
dependencies = [
|
9
9
|
"confusable-homoglyphs>=3.3.1",
|
@@ -22,6 +22,8 @@ classifiers = [
|
|
22
22
|
"License :: OSI Approved :: Apache Software License",
|
23
23
|
"Programming Language :: Python",
|
24
24
|
"Programming Language :: Python :: 3",
|
25
|
+
"Programming Language :: Python :: 3.10",
|
26
|
+
"Programming Language :: Python :: 3.11",
|
25
27
|
"Programming Language :: Python :: 3.12",
|
26
28
|
"Programming Language :: Rust",
|
27
29
|
"Operating System :: MacOS :: MacOS X",
|
@@ -46,11 +48,13 @@ glitchlings = "glitchlings.main:main"
|
|
46
48
|
|
47
49
|
[project.optional-dependencies]
|
48
50
|
hf = ["datasets>=4.0.0"]
|
49
|
-
wordnet = ["nltk>=3.9.1"]
|
51
|
+
wordnet = ["nltk>=3.9.1", "numpy>=1.24,<=2.0"]
|
50
52
|
prime = ["verifiers>=0.1.3.post0"]
|
51
53
|
dev = [
|
52
54
|
"pytest>=8.0.0",
|
53
55
|
"hypothesis>=6.140.0",
|
56
|
+
"nltk>=3.9.1",
|
57
|
+
"numpy>=1.24,<=2.0",
|
54
58
|
]
|
55
59
|
|
56
60
|
[build-system]
|
@@ -59,6 +63,10 @@ build-backend = "setuptools.build_meta"
|
|
59
63
|
|
60
64
|
[tool.setuptools]
|
61
65
|
package-dir = {"" = "src"}
|
66
|
+
include-package-data = true
|
67
|
+
|
68
|
+
[tool.setuptools.package-data]
|
69
|
+
"glitchlings.zoo" = ["ocr_confusions.tsv"]
|
62
70
|
|
63
71
|
[tool.setuptools.packages.find]
|
64
72
|
where = ["src"]
|
@@ -69,11 +77,6 @@ path = "rust/zoo/Cargo.toml"
|
|
69
77
|
binding = "PyO3"
|
70
78
|
debug = false
|
71
79
|
|
72
|
-
[[tool.setuptools-rust.ext-modules]]
|
73
|
-
target = "glitchlings._typogre_rust"
|
74
|
-
path = "rust/typogre/Cargo.toml"
|
75
|
-
binding = "PyO3"
|
76
|
-
debug = false
|
77
80
|
|
78
81
|
[tool.pytest.ini_options]
|
79
82
|
pythonpath = [
|
@@ -316,13 +316,6 @@ version = "1.19.0"
|
|
316
316
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
317
317
|
checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
|
318
318
|
|
319
|
-
[[package]]
|
320
|
-
name = "typogre_rust"
|
321
|
-
version = "0.1.0"
|
322
|
-
dependencies = [
|
323
|
-
"pyo3",
|
324
|
-
]
|
325
|
-
|
326
319
|
[[package]]
|
327
320
|
name = "unicode-ident"
|
328
321
|
version = "1.0.19"
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# Source Replacements (space-separated)
|
2
|
+
li h
|
3
|
+
h li
|
4
|
+
rn m
|
5
|
+
m rn
|
6
|
+
cl d
|
7
|
+
d cl
|
8
|
+
I l
|
9
|
+
l I 1
|
10
|
+
1 l I
|
11
|
+
0 O
|
12
|
+
O 0
|
13
|
+
B 8
|
14
|
+
8 B
|
15
|
+
S 5
|
16
|
+
5 S
|
17
|
+
Z 2
|
18
|
+
2 Z
|
19
|
+
G 6
|
20
|
+
6 G
|
21
|
+
“ "
|
22
|
+
” "
|
23
|
+
‘ '
|
24
|
+
’ '
|
25
|
+
— -
|
26
|
+
– -
|
27
|
+
vv w
|
28
|
+
w vv
|
29
|
+
ri n
|
30
|
+
n ri
|
@@ -0,0 +1,140 @@
|
|
1
|
+
use std::env;
|
2
|
+
use std::ffi::{OsStr, OsString};
|
3
|
+
use std::fs;
|
4
|
+
use std::io::{self, ErrorKind};
|
5
|
+
use std::path::PathBuf;
|
6
|
+
use std::process::Command;
|
7
|
+
|
8
|
+
fn main() {
|
9
|
+
prepare_confusion_table().expect("failed to stage OCR confusion table for compilation");
|
10
|
+
pyo3_build_config::add_extension_module_link_args();
|
11
|
+
|
12
|
+
if let Some(python) = configured_python() {
|
13
|
+
link_python(&python);
|
14
|
+
} else if let Some(python) = detect_python() {
|
15
|
+
link_python(&python);
|
16
|
+
}
|
17
|
+
}
|
18
|
+
|
19
|
+
fn configured_python() -> Option<OsString> {
|
20
|
+
std::env::var_os("PYO3_PYTHON")
|
21
|
+
.or_else(|| std::env::var_os("PYTHON"))
|
22
|
+
.filter(|path| !path.is_empty())
|
23
|
+
}
|
24
|
+
|
25
|
+
fn detect_python() -> Option<OsString> {
|
26
|
+
const CANDIDATES: &[&str] = &[
|
27
|
+
"python3.12",
|
28
|
+
"python3.11",
|
29
|
+
"python3.10",
|
30
|
+
"python3",
|
31
|
+
"python",
|
32
|
+
];
|
33
|
+
|
34
|
+
for candidate in CANDIDATES {
|
35
|
+
let status = Command::new(candidate)
|
36
|
+
.arg("-c")
|
37
|
+
.arg("import sys")
|
38
|
+
.output();
|
39
|
+
|
40
|
+
if let Ok(output) = status {
|
41
|
+
if output.status.success() {
|
42
|
+
return Some(OsString::from(candidate));
|
43
|
+
}
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
None
|
48
|
+
}
|
49
|
+
|
50
|
+
fn link_python(python: &OsStr) {
|
51
|
+
if let Some(path) = query_python(
|
52
|
+
python,
|
53
|
+
"import sysconfig; print(sysconfig.get_config_var('LIBDIR') or '')",
|
54
|
+
) {
|
55
|
+
let trimmed = path.trim();
|
56
|
+
if !trimmed.is_empty() {
|
57
|
+
println!("cargo:rustc-link-search=native={trimmed}");
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
if let Some(path) = query_python(
|
62
|
+
python,
|
63
|
+
"import sysconfig; print(sysconfig.get_config_var('LIBPL') or '')",
|
64
|
+
) {
|
65
|
+
let trimmed = path.trim();
|
66
|
+
if !trimmed.is_empty() {
|
67
|
+
println!("cargo:rustc-link-search=native={trimmed}");
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
if let Some(library) = query_python(
|
72
|
+
python,
|
73
|
+
"import sysconfig; print(sysconfig.get_config_var('LDLIBRARY') or '')",
|
74
|
+
) {
|
75
|
+
let name = library.trim();
|
76
|
+
if let Some(stripped) = name.strip_prefix("lib") {
|
77
|
+
let stem = stripped
|
78
|
+
.strip_suffix(".so")
|
79
|
+
.or_else(|| stripped.strip_suffix(".a"))
|
80
|
+
.unwrap_or(stripped);
|
81
|
+
if !stem.is_empty() {
|
82
|
+
println!("cargo:rustc-link-lib={stem}");
|
83
|
+
}
|
84
|
+
}
|
85
|
+
}
|
86
|
+
}
|
87
|
+
|
88
|
+
fn query_python(python: &OsStr, command: &str) -> Option<String> {
|
89
|
+
let output = Command::new(python).arg("-c").arg(command).output().ok()?;
|
90
|
+
if !output.status.success() {
|
91
|
+
return None;
|
92
|
+
}
|
93
|
+
let value = String::from_utf8(output.stdout).ok()?;
|
94
|
+
Some(value)
|
95
|
+
}
|
96
|
+
|
97
|
+
fn prepare_confusion_table() -> io::Result<()> {
|
98
|
+
let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("missing manifest dir"));
|
99
|
+
let out_dir = PathBuf::from(env::var("OUT_DIR").expect("missing OUT_DIR"));
|
100
|
+
|
101
|
+
let repo_path = manifest_dir.join("../../src/glitchlings/zoo/ocr_confusions.tsv");
|
102
|
+
let packaged_path = manifest_dir.join("assets/ocr_confusions.tsv");
|
103
|
+
println!("cargo:rerun-if-changed={}", packaged_path.display());
|
104
|
+
|
105
|
+
let source_path = if repo_path.exists() {
|
106
|
+
println!("cargo:rerun-if-changed={}", repo_path.display());
|
107
|
+
if packaged_path.exists() {
|
108
|
+
let repo_bytes = fs::read(&repo_path)?;
|
109
|
+
let packaged_bytes = fs::read(&packaged_path)?;
|
110
|
+
if repo_bytes != packaged_bytes {
|
111
|
+
return Err(io::Error::new(
|
112
|
+
ErrorKind::Other,
|
113
|
+
format!(
|
114
|
+
"OCR confusion table at {} is out of sync with {}",
|
115
|
+
packaged_path.display(),
|
116
|
+
repo_path.display()
|
117
|
+
),
|
118
|
+
));
|
119
|
+
}
|
120
|
+
}
|
121
|
+
repo_path
|
122
|
+
} else {
|
123
|
+
if !packaged_path.exists() {
|
124
|
+
return Err(io::Error::new(
|
125
|
+
ErrorKind::NotFound,
|
126
|
+
format!(
|
127
|
+
"missing OCR confusion table; looked for {} and {}",
|
128
|
+
repo_path.display(),
|
129
|
+
packaged_path.display()
|
130
|
+
),
|
131
|
+
));
|
132
|
+
}
|
133
|
+
packaged_path
|
134
|
+
};
|
135
|
+
|
136
|
+
fs::create_dir_all(&out_dir)?;
|
137
|
+
fs::copy(&source_path, out_dir.join("ocr_confusions.tsv"))?;
|
138
|
+
Ok(())
|
139
|
+
}
|
140
|
+
|
@@ -500,6 +500,6 @@ mod tests {
|
|
500
500
|
let mut rng = PyRng::new(1);
|
501
501
|
let op = OcrArtifactsOp { error_rate: 1.0 };
|
502
502
|
op.apply(&mut buffer, &mut rng).expect("ocr succeeds");
|
503
|
-
assert_eq!(buffer.to_string(), "Tlie rn
|
503
|
+
assert_eq!(buffer.to_string(), "Tlie rn rri");
|
504
504
|
}
|
505
505
|
}
|
@@ -3,6 +3,7 @@ mod pipeline;
|
|
3
3
|
mod resources;
|
4
4
|
mod rng;
|
5
5
|
mod text_buffer;
|
6
|
+
mod typogre;
|
6
7
|
|
7
8
|
use glitch_ops::{GlitchOp, GlitchRng};
|
8
9
|
use pyo3::prelude::*;
|
@@ -17,7 +18,6 @@ pub use glitch_ops::{
|
|
17
18
|
pub use pipeline::{derive_seed, GlitchDescriptor, Pipeline, PipelineError};
|
18
19
|
pub use rng::{PyRng, PyRngError};
|
19
20
|
pub use text_buffer::{SegmentKind, TextBuffer, TextBufferError, TextSegment, TextSpan};
|
20
|
-
|
21
21
|
struct PythonRngAdapter<'py> {
|
22
22
|
rng: Bound<'py, PyAny>,
|
23
23
|
}
|
@@ -279,5 +279,6 @@ fn _zoo_rust(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
|
|
279
279
|
m.add_function(wrap_pyfunction!(ocr_artifacts, m)?)?;
|
280
280
|
m.add_function(wrap_pyfunction!(redact_words, m)?)?;
|
281
281
|
m.add_function(wrap_pyfunction!(compose_glitchlings, m)?)?;
|
282
|
+
m.add_function(wrap_pyfunction!(typogre::fatfinger, m)?)?;
|
282
283
|
Ok(())
|
283
284
|
}
|