glitchlings 0.4.1__tar.gz → 0.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- {glitchlings-0.4.1 → glitchlings-0.4.3}/MANIFEST.in +1 -1
- {glitchlings-0.4.1 → glitchlings-0.4.3}/PKG-INFO +101 -4
- {glitchlings-0.4.1 → glitchlings-0.4.3}/README.md +68 -3
- glitchlings-0.4.3/pyproject.toml +186 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/rust/Cargo.lock +55 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/rust/zoo/Cargo.toml +1 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/rust/zoo/build.rs +47 -31
- {glitchlings-0.4.1 → glitchlings-0.4.3}/rust/zoo/src/glitch_ops.rs +154 -2
- {glitchlings-0.4.1 → glitchlings-0.4.3}/rust/zoo/src/lib.rs +14 -2
- {glitchlings-0.4.1 → glitchlings-0.4.3}/rust/zoo/src/resources.rs +35 -1
- {glitchlings-0.4.1 → glitchlings-0.4.3}/rust/zoo/src/text_buffer.rs +2 -8
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/__init__.py +30 -17
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/__main__.py +0 -1
- glitchlings-0.4.3/src/glitchlings/compat.py +284 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/config.py +164 -34
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/config.toml +1 -1
- glitchlings-0.4.3/src/glitchlings/dlc/__init__.py +7 -0
- glitchlings-0.4.3/src/glitchlings/dlc/_shared.py +68 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/dlc/huggingface.py +26 -41
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/dlc/prime.py +64 -101
- glitchlings-0.4.3/src/glitchlings/dlc/pytorch.py +216 -0
- glitchlings-0.4.3/src/glitchlings/dlc/pytorch_lightning.py +233 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/lexicon/__init__.py +12 -33
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/lexicon/_cache.py +21 -22
- glitchlings-0.4.3/src/glitchlings/lexicon/data/default_vector_cache.json +82 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/lexicon/metrics.py +1 -8
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/lexicon/vector.py +109 -49
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/lexicon/wordnet.py +89 -49
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/main.py +30 -24
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/util/__init__.py +18 -4
- glitchlings-0.4.3/src/glitchlings/util/adapters.py +27 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/zoo/__init__.py +26 -15
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/zoo/_ocr_confusions.py +1 -3
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/zoo/_rate.py +1 -4
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/zoo/_sampling.py +0 -1
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/zoo/_text_utils.py +1 -5
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/zoo/adjax.py +2 -4
- glitchlings-0.4.3/src/glitchlings/zoo/apostrofae.py +128 -0
- glitchlings-0.4.3/src/glitchlings/zoo/assets/__init__.py +0 -0
- glitchlings-0.4.3/src/glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/zoo/core.py +152 -87
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/zoo/jargoyle.py +50 -45
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/zoo/mim1c.py +11 -10
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/zoo/redactyl.py +16 -16
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/zoo/reduple.py +5 -3
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/zoo/rushmore.py +4 -10
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/zoo/scannequin.py +7 -6
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/zoo/typogre.py +8 -9
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/zoo/zeedub.py +6 -3
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings.egg-info/PKG-INFO +101 -4
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings.egg-info/SOURCES.txt +8 -24
- glitchlings-0.4.3/src/glitchlings.egg-info/requires.txt +58 -0
- glitchlings-0.4.1/pyproject.toml +0 -87
- glitchlings-0.4.1/src/glitchlings/dlc/__init__.py +0 -5
- glitchlings-0.4.1/src/glitchlings/lexicon/data/default_vector_cache.json +0 -16
- glitchlings-0.4.1/src/glitchlings/lexicon/graph.py +0 -290
- glitchlings-0.4.1/src/glitchlings.egg-info/requires.txt +0 -22
- glitchlings-0.4.1/tests/test_benchmarks.py +0 -137
- glitchlings-0.4.1/tests/test_cli.py +0 -369
- glitchlings-0.4.1/tests/test_config.py +0 -196
- glitchlings-0.4.1/tests/test_dataset_corruption.py +0 -128
- glitchlings-0.4.1/tests/test_gaggle.py +0 -68
- glitchlings-0.4.1/tests/test_glitchling_core.py +0 -68
- glitchlings-0.4.1/tests/test_glitchlings_determinism.py +0 -103
- glitchlings-0.4.1/tests/test_graph_lexicon.py +0 -81
- glitchlings-0.4.1/tests/test_huggingface_dlc.py +0 -78
- glitchlings-0.4.1/tests/test_jargoyle.py +0 -209
- glitchlings-0.4.1/tests/test_keyboard_layouts.py +0 -42
- glitchlings-0.4.1/tests/test_lexicon_backends.py +0 -85
- glitchlings-0.4.1/tests/test_lexicon_config.py +0 -56
- glitchlings-0.4.1/tests/test_lexicon_metrics.py +0 -120
- glitchlings-0.4.1/tests/test_parameter_effects.py +0 -281
- glitchlings-0.4.1/tests/test_pipeline_operations.py +0 -95
- glitchlings-0.4.1/tests/test_prime_echo_chamber.py +0 -390
- glitchlings-0.4.1/tests/test_property_based.py +0 -150
- glitchlings-0.4.1/tests/test_rate_and_sampling.py +0 -51
- glitchlings-0.4.1/tests/test_rust_backed_glitchlings.py +0 -931
- glitchlings-0.4.1/tests/test_text_utils.py +0 -37
- glitchlings-0.4.1/tests/test_util.py +0 -35
- glitchlings-0.4.1/tests/test_vector_lexicon.py +0 -438
- {glitchlings-0.4.1 → glitchlings-0.4.3}/LICENSE +0 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/rust/Cargo.toml +0 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/rust/zoo/assets/ocr_confusions.tsv +0 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/rust/zoo/src/pipeline.rs +0 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/rust/zoo/src/rng.rs +0 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/rust/zoo/src/typogre.rs +0 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/rust/zoo/src/zeedub.rs +0 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/setup.cfg +0 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings/zoo/ocr_confusions.tsv +0 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings.egg-info/dependency_links.txt +0 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings.egg-info/entry_points.txt +0 -0
- {glitchlings-0.4.1 → glitchlings-0.4.3}/src/glitchlings.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: glitchlings
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: Monsters for your language games.
|
|
5
5
|
Author: osoleve
|
|
6
6
|
License: Apache License
|
|
@@ -226,19 +226,51 @@ License-File: LICENSE
|
|
|
226
226
|
Requires-Dist: confusable-homoglyphs>=3.3.1
|
|
227
227
|
Requires-Dist: tomli>=2.0.1; python_version < "3.11"
|
|
228
228
|
Requires-Dist: pyyaml>=6.0.0
|
|
229
|
+
Provides-Extra: all
|
|
230
|
+
Requires-Dist: black>=24.4.0; extra == "all"
|
|
231
|
+
Requires-Dist: hypothesis>=6.140.0; extra == "all"
|
|
232
|
+
Requires-Dist: interrogate>=1.5.0; extra == "all"
|
|
233
|
+
Requires-Dist: jellyfish>=1.2.0; extra == "all"
|
|
234
|
+
Requires-Dist: isort>=5.13.0; extra == "all"
|
|
235
|
+
Requires-Dist: mkdocs>=1.6.0; extra == "all"
|
|
236
|
+
Requires-Dist: mkdocs-material>=9.5.0; extra == "all"
|
|
237
|
+
Requires-Dist: mkdocstrings[python]>=0.24.0; extra == "all"
|
|
238
|
+
Requires-Dist: mkdocstrings-python>=1.10.0; extra == "all"
|
|
239
|
+
Requires-Dist: mypy>=1.8.0; extra == "all"
|
|
240
|
+
Requires-Dist: numpy<=2.0,>=1.24; extra == "all"
|
|
241
|
+
Requires-Dist: pre-commit>=3.8.0; extra == "all"
|
|
242
|
+
Requires-Dist: pytest>=8.0.0; extra == "all"
|
|
243
|
+
Requires-Dist: ruff>=0.6.0; extra == "all"
|
|
244
|
+
Requires-Dist: verifiers>=0.1.3.post0; extra == "all"
|
|
229
245
|
Provides-Extra: hf
|
|
230
246
|
Requires-Dist: datasets>=4.0.0; extra == "hf"
|
|
247
|
+
Provides-Extra: lightning
|
|
248
|
+
Requires-Dist: pytorch_lightning>=2.0.0; extra == "lightning"
|
|
231
249
|
Provides-Extra: vectors
|
|
232
250
|
Requires-Dist: numpy<=2.0,>=1.24; extra == "vectors"
|
|
233
251
|
Requires-Dist: spacy>=3.7.2; extra == "vectors"
|
|
234
252
|
Requires-Dist: gensim>=4.3.2; extra == "vectors"
|
|
253
|
+
Provides-Extra: st
|
|
254
|
+
Requires-Dist: sentence-transformers>=3.0.0; extra == "st"
|
|
235
255
|
Provides-Extra: prime
|
|
236
256
|
Requires-Dist: verifiers>=0.1.3.post0; extra == "prime"
|
|
237
257
|
Requires-Dist: jellyfish>=1.2.0; extra == "prime"
|
|
258
|
+
Provides-Extra: torch
|
|
259
|
+
Requires-Dist: torch>=2.0.0; extra == "torch"
|
|
238
260
|
Provides-Extra: dev
|
|
239
261
|
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
240
262
|
Requires-Dist: hypothesis>=6.140.0; extra == "dev"
|
|
241
263
|
Requires-Dist: numpy<=2.0,>=1.24; extra == "dev"
|
|
264
|
+
Requires-Dist: mkdocs>=1.6.0; extra == "dev"
|
|
265
|
+
Requires-Dist: mkdocstrings[python]>=0.24.0; extra == "dev"
|
|
266
|
+
Requires-Dist: mkdocs-material>=9.5.0; extra == "dev"
|
|
267
|
+
Requires-Dist: mkdocstrings-python>=1.10.0; extra == "dev"
|
|
268
|
+
Requires-Dist: interrogate>=1.5.0; extra == "dev"
|
|
269
|
+
Requires-Dist: black>=24.4.0; extra == "dev"
|
|
270
|
+
Requires-Dist: isort>=5.13.0; extra == "dev"
|
|
271
|
+
Requires-Dist: ruff>=0.6.0; extra == "dev"
|
|
272
|
+
Requires-Dist: mypy>=1.8.0; extra == "dev"
|
|
273
|
+
Requires-Dist: pre-commit>=3.8.0; extra == "dev"
|
|
242
274
|
Dynamic: license-file
|
|
243
275
|
|
|
244
276
|
#
|
|
@@ -297,7 +329,7 @@ print(gaggle(SAMPLE_TEXT))
|
|
|
297
329
|
> Onҽ mھrning, wһen Gregor Samƽa woke from trouble𝐝 𝑑reams, he found himself transformed in his bed into a horrible vermin٠ He l lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightlh domed and divided by arches ino stiff sections. The bedding was adly able to cover it and and seemed ready to slide off any moment. His many legxs, pitifully thin compared with the size of the the rest of him, waved about helplessly ashe looked looked.
|
|
298
330
|
|
|
299
331
|
Consult the [Glitchlings Usage Guide](docs/index.md)
|
|
300
|
-
for end-to-end instructions spanning the Python API, CLI, HuggingFace and Prime Intellect
|
|
332
|
+
for end-to-end instructions spanning the Python API, CLI, HuggingFace, PyTorch, and Prime Intellect
|
|
301
333
|
integrations, and the autodetected Rust pipeline (enabled whenever the extension is present).
|
|
302
334
|
|
|
303
335
|
## Motivation
|
|
@@ -338,10 +370,67 @@ They're horrible little gremlins, but they're not _unreasonable_.
|
|
|
338
370
|
|
|
339
371
|
Keyboard warriors can challenge them directly via the `glitchlings` command:
|
|
340
372
|
|
|
373
|
+
<!-- BEGIN: CLI_USAGE -->
|
|
341
374
|
```bash
|
|
342
375
|
# Discover which glitchlings are currently on the loose.
|
|
343
376
|
glitchlings --list
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
```text
|
|
380
|
+
Typogre — scope: Character, order: early
|
|
381
|
+
Apostrofae — scope: Character, order: normal
|
|
382
|
+
Mim1c — scope: Character, order: last
|
|
383
|
+
Jargoyle — scope: Word, order: normal
|
|
384
|
+
Adjax — scope: Word, order: normal
|
|
385
|
+
Reduple — scope: Word, order: normal
|
|
386
|
+
Rushmore — scope: Word, order: normal
|
|
387
|
+
Redactyl — scope: Word, order: normal
|
|
388
|
+
Scannequin — scope: Character, order: late
|
|
389
|
+
Zeedub — scope: Character, order: last
|
|
390
|
+
```
|
|
391
|
+
|
|
392
|
+
```bash
|
|
393
|
+
# Review the full CLI contract.
|
|
394
|
+
glitchlings --help
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
```text
|
|
398
|
+
usage: glitchlings [-h] [-g SPEC] [-s SEED] [-f FILE] [--sample] [--diff]
|
|
399
|
+
[--list] [-c CONFIG]
|
|
400
|
+
[text]
|
|
401
|
+
|
|
402
|
+
Summon glitchlings to corrupt text. Provide input text as an argument, via
|
|
403
|
+
--file, or pipe it on stdin.
|
|
404
|
+
|
|
405
|
+
positional arguments:
|
|
406
|
+
text Text to corrupt. If omitted, stdin is used or --sample
|
|
407
|
+
provides fallback text.
|
|
408
|
+
|
|
409
|
+
options:
|
|
410
|
+
-h, --help show this help message and exit
|
|
411
|
+
-g SPEC, --glitchling SPEC
|
|
412
|
+
Glitchling to apply, optionally with parameters like
|
|
413
|
+
Typogre(rate=0.05). Repeat for multiples; defaults to
|
|
414
|
+
all built-ins.
|
|
415
|
+
-s SEED, --seed SEED Seed controlling deterministic corruption order
|
|
416
|
+
(default: 151).
|
|
417
|
+
-f FILE, --file FILE Read input text from a file instead of the command
|
|
418
|
+
line argument.
|
|
419
|
+
--sample Use the included SAMPLE_TEXT when no other input is
|
|
420
|
+
provided.
|
|
421
|
+
--diff Show a unified diff between the original and corrupted
|
|
422
|
+
text.
|
|
423
|
+
--list List available glitchlings and exit.
|
|
424
|
+
-c CONFIG, --config CONFIG
|
|
425
|
+
Load glitchlings from a YAML configuration file.
|
|
426
|
+
```
|
|
427
|
+
<!-- END: CLI_USAGE -->
|
|
428
|
+
|
|
429
|
+
Run `python docs/build_cli_reference.py` whenever you tweak the CLI so the README stays in sync with the actual output. The script executes the commands above and replaces the block between the markers automatically.
|
|
344
430
|
|
|
431
|
+
Prefer inline tweaks? You can still configure glitchlings directly in the shell:
|
|
432
|
+
|
|
433
|
+
```bash
|
|
345
434
|
# Run Typogre against the contents of a file and inspect the diff.
|
|
346
435
|
glitchlings -g typogre --file documents/report.txt --diff
|
|
347
436
|
|
|
@@ -355,8 +444,6 @@ echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
|
|
|
355
444
|
glitchlings --config experiments/chaos.yaml "Let slips the glitchlings of war"
|
|
356
445
|
```
|
|
357
446
|
|
|
358
|
-
Use `--help` for a complete breakdown of available options, including support for parameterised glitchlings via `-g "Name(arg=value, ...)"` to mirror the Python API.
|
|
359
|
-
|
|
360
447
|
Attack configurations live in plain YAML files so you can version-control experiments without touching code:
|
|
361
448
|
|
|
362
449
|
```yaml
|
|
@@ -394,6 +481,16 @@ _What a nice word, would be a shame if something happened to it._
|
|
|
394
481
|
> - `keyboard (str)`: Keyboard layout key-neighbor map to use (default: "CURATOR_QWERTY"; also accepts "QWERTY", "DVORAK", "COLEMAK", and "AZERTY").
|
|
395
482
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
|
396
483
|
|
|
484
|
+
### Apostrofae
|
|
485
|
+
|
|
486
|
+
_It looks like you're trying to paste some text. Can I help?_
|
|
487
|
+
|
|
488
|
+
> _**Paperclip Manager.**_ Apostrofae scans for balanced runs of straight quotes, apostrophes, and backticks before replacing them with randomly sampled smart-quote pairs from a curated lookup table. The swap happens in-place so contractions and unpaired glyphs remain untouched.
|
|
489
|
+
>
|
|
490
|
+
> Args
|
|
491
|
+
>
|
|
492
|
+
> - `seed (int)`: Optional seed controlling the deterministic smart-quote sampling (default: 151).
|
|
493
|
+
|
|
397
494
|
### Mim1c
|
|
398
495
|
|
|
399
496
|
_Wait, was that...?_
|
|
@@ -54,7 +54,7 @@ print(gaggle(SAMPLE_TEXT))
|
|
|
54
54
|
> Onҽ mھrning, wһen Gregor Samƽa woke from trouble𝐝 𝑑reams, he found himself transformed in his bed into a horrible vermin٠ He l lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightlh domed and divided by arches ino stiff sections. The bedding was adly able to cover it and and seemed ready to slide off any moment. His many legxs, pitifully thin compared with the size of the the rest of him, waved about helplessly ashe looked looked.
|
|
55
55
|
|
|
56
56
|
Consult the [Glitchlings Usage Guide](docs/index.md)
|
|
57
|
-
for end-to-end instructions spanning the Python API, CLI, HuggingFace and Prime Intellect
|
|
57
|
+
for end-to-end instructions spanning the Python API, CLI, HuggingFace, PyTorch, and Prime Intellect
|
|
58
58
|
integrations, and the autodetected Rust pipeline (enabled whenever the extension is present).
|
|
59
59
|
|
|
60
60
|
## Motivation
|
|
@@ -95,10 +95,67 @@ They're horrible little gremlins, but they're not _unreasonable_.
|
|
|
95
95
|
|
|
96
96
|
Keyboard warriors can challenge them directly via the `glitchlings` command:
|
|
97
97
|
|
|
98
|
+
<!-- BEGIN: CLI_USAGE -->
|
|
98
99
|
```bash
|
|
99
100
|
# Discover which glitchlings are currently on the loose.
|
|
100
101
|
glitchlings --list
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
```text
|
|
105
|
+
Typogre — scope: Character, order: early
|
|
106
|
+
Apostrofae — scope: Character, order: normal
|
|
107
|
+
Mim1c — scope: Character, order: last
|
|
108
|
+
Jargoyle — scope: Word, order: normal
|
|
109
|
+
Adjax — scope: Word, order: normal
|
|
110
|
+
Reduple — scope: Word, order: normal
|
|
111
|
+
Rushmore — scope: Word, order: normal
|
|
112
|
+
Redactyl — scope: Word, order: normal
|
|
113
|
+
Scannequin — scope: Character, order: late
|
|
114
|
+
Zeedub — scope: Character, order: last
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
# Review the full CLI contract.
|
|
119
|
+
glitchlings --help
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
```text
|
|
123
|
+
usage: glitchlings [-h] [-g SPEC] [-s SEED] [-f FILE] [--sample] [--diff]
|
|
124
|
+
[--list] [-c CONFIG]
|
|
125
|
+
[text]
|
|
126
|
+
|
|
127
|
+
Summon glitchlings to corrupt text. Provide input text as an argument, via
|
|
128
|
+
--file, or pipe it on stdin.
|
|
129
|
+
|
|
130
|
+
positional arguments:
|
|
131
|
+
text Text to corrupt. If omitted, stdin is used or --sample
|
|
132
|
+
provides fallback text.
|
|
133
|
+
|
|
134
|
+
options:
|
|
135
|
+
-h, --help show this help message and exit
|
|
136
|
+
-g SPEC, --glitchling SPEC
|
|
137
|
+
Glitchling to apply, optionally with parameters like
|
|
138
|
+
Typogre(rate=0.05). Repeat for multiples; defaults to
|
|
139
|
+
all built-ins.
|
|
140
|
+
-s SEED, --seed SEED Seed controlling deterministic corruption order
|
|
141
|
+
(default: 151).
|
|
142
|
+
-f FILE, --file FILE Read input text from a file instead of the command
|
|
143
|
+
line argument.
|
|
144
|
+
--sample Use the included SAMPLE_TEXT when no other input is
|
|
145
|
+
provided.
|
|
146
|
+
--diff Show a unified diff between the original and corrupted
|
|
147
|
+
text.
|
|
148
|
+
--list List available glitchlings and exit.
|
|
149
|
+
-c CONFIG, --config CONFIG
|
|
150
|
+
Load glitchlings from a YAML configuration file.
|
|
151
|
+
```
|
|
152
|
+
<!-- END: CLI_USAGE -->
|
|
153
|
+
|
|
154
|
+
Run `python docs/build_cli_reference.py` whenever you tweak the CLI so the README stays in sync with the actual output. The script executes the commands above and replaces the block between the markers automatically.
|
|
101
155
|
|
|
156
|
+
Prefer inline tweaks? You can still configure glitchlings directly in the shell:
|
|
157
|
+
|
|
158
|
+
```bash
|
|
102
159
|
# Run Typogre against the contents of a file and inspect the diff.
|
|
103
160
|
glitchlings -g typogre --file documents/report.txt --diff
|
|
104
161
|
|
|
@@ -112,8 +169,6 @@ echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
|
|
|
112
169
|
glitchlings --config experiments/chaos.yaml "Let slips the glitchlings of war"
|
|
113
170
|
```
|
|
114
171
|
|
|
115
|
-
Use `--help` for a complete breakdown of available options, including support for parameterised glitchlings via `-g "Name(arg=value, ...)"` to mirror the Python API.
|
|
116
|
-
|
|
117
172
|
Attack configurations live in plain YAML files so you can version-control experiments without touching code:
|
|
118
173
|
|
|
119
174
|
```yaml
|
|
@@ -151,6 +206,16 @@ _What a nice word, would be a shame if something happened to it._
|
|
|
151
206
|
> - `keyboard (str)`: Keyboard layout key-neighbor map to use (default: "CURATOR_QWERTY"; also accepts "QWERTY", "DVORAK", "COLEMAK", and "AZERTY").
|
|
152
207
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
|
153
208
|
|
|
209
|
+
### Apostrofae
|
|
210
|
+
|
|
211
|
+
_It looks like you're trying to paste some text. Can I help?_
|
|
212
|
+
|
|
213
|
+
> _**Paperclip Manager.**_ Apostrofae scans for balanced runs of straight quotes, apostrophes, and backticks before replacing them with randomly sampled smart-quote pairs from a curated lookup table. The swap happens in-place so contractions and unpaired glyphs remain untouched.
|
|
214
|
+
>
|
|
215
|
+
> Args
|
|
216
|
+
>
|
|
217
|
+
> - `seed (int)`: Optional seed controlling the deterministic smart-quote sampling (default: 151).
|
|
218
|
+
|
|
154
219
|
### Mim1c
|
|
155
220
|
|
|
156
221
|
_Wait, was that...?_
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "glitchlings"
|
|
3
|
+
version = "0.4.3"
|
|
4
|
+
description = "Monsters for your language games."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
|
|
8
|
+
dependencies = [
|
|
9
|
+
"confusable-homoglyphs>=3.3.1",
|
|
10
|
+
"tomli>=2.0.1; python_version < '3.11'",
|
|
11
|
+
"pyyaml>=6.0.0",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
authors = [
|
|
15
|
+
{ name = "osoleve" }
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
keywords = [
|
|
19
|
+
"nlp",
|
|
20
|
+
"adversarial augmentation",
|
|
21
|
+
"text augmentation",
|
|
22
|
+
"data augmentation",
|
|
23
|
+
"domain randomization"
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
classifiers = [
|
|
27
|
+
"Development Status :: 3 - Alpha",
|
|
28
|
+
"Intended Audience :: Developers",
|
|
29
|
+
"Programming Language :: Python",
|
|
30
|
+
"Programming Language :: Python :: 3",
|
|
31
|
+
"Programming Language :: Python :: 3.10",
|
|
32
|
+
"Programming Language :: Python :: 3.11",
|
|
33
|
+
"Programming Language :: Python :: 3.12",
|
|
34
|
+
"Programming Language :: Rust",
|
|
35
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
36
|
+
"Topic :: Software Development :: Testing",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[project.license]
|
|
40
|
+
file = "LICENSE"
|
|
41
|
+
|
|
42
|
+
[project.urls]
|
|
43
|
+
Homepage = "https://github.com/osoleve/glitchlings"
|
|
44
|
+
Repository = "https://github.com/osoleve/glitchlings.git"
|
|
45
|
+
Issues = "https://github.com/osoleve/glitchlings/issues"
|
|
46
|
+
Changelog = "https://github.com/osoleve/glitchlings/releases"
|
|
47
|
+
|
|
48
|
+
[project.scripts]
|
|
49
|
+
glitchlings = "glitchlings.main:main"
|
|
50
|
+
|
|
51
|
+
[project.optional-dependencies]
|
|
52
|
+
all = [
|
|
53
|
+
"black>=24.4.0",
|
|
54
|
+
"hypothesis>=6.140.0",
|
|
55
|
+
"interrogate>=1.5.0",
|
|
56
|
+
"jellyfish>=1.2.0",
|
|
57
|
+
"isort>=5.13.0",
|
|
58
|
+
"mkdocs>=1.6.0",
|
|
59
|
+
"mkdocs-material>=9.5.0",
|
|
60
|
+
"mkdocstrings[python]>=0.24.0",
|
|
61
|
+
"mkdocstrings-python>=1.10.0",
|
|
62
|
+
"mypy>=1.8.0",
|
|
63
|
+
"numpy>=1.24,<=2.0",
|
|
64
|
+
"pre-commit>=3.8.0",
|
|
65
|
+
"pytest>=8.0.0",
|
|
66
|
+
"ruff>=0.6.0",
|
|
67
|
+
"verifiers>=0.1.3.post0",
|
|
68
|
+
]
|
|
69
|
+
hf = ["datasets>=4.0.0"]
|
|
70
|
+
lightning = ["pytorch_lightning>=2.0.0"]
|
|
71
|
+
vectors = ["numpy>=1.24,<=2.0", "spacy>=3.7.2", "gensim>=4.3.2"]
|
|
72
|
+
st = ["sentence-transformers>=3.0.0"]
|
|
73
|
+
prime = ["verifiers>=0.1.3.post0", "jellyfish>=1.2.0"]
|
|
74
|
+
torch = ["torch>=2.0.0"]
|
|
75
|
+
dev = [
|
|
76
|
+
"pytest>=8.0.0",
|
|
77
|
+
"hypothesis>=6.140.0",
|
|
78
|
+
"numpy>=1.24,<=2.0",
|
|
79
|
+
"mkdocs>=1.6.0",
|
|
80
|
+
"mkdocstrings[python]>=0.24.0",
|
|
81
|
+
"mkdocs-material>=9.5.0",
|
|
82
|
+
"mkdocstrings-python>=1.10.0",
|
|
83
|
+
"interrogate>=1.5.0",
|
|
84
|
+
"black>=24.4.0",
|
|
85
|
+
"isort>=5.13.0",
|
|
86
|
+
"ruff>=0.6.0",
|
|
87
|
+
"mypy>=1.8.0",
|
|
88
|
+
"pre-commit>=3.8.0",
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
[build-system]
|
|
92
|
+
requires = ["setuptools>=69", "wheel", "setuptools-rust>=1.8.0"]
|
|
93
|
+
build-backend = "setuptools.build_meta"
|
|
94
|
+
|
|
95
|
+
[tool.setuptools]
|
|
96
|
+
package-dir = {"" = "src"}
|
|
97
|
+
include-package-data = true
|
|
98
|
+
|
|
99
|
+
[tool.setuptools.package-data]
|
|
100
|
+
"glitchlings" = ["config.toml"]
|
|
101
|
+
"glitchlings.lexicon" = ["data/*.json"]
|
|
102
|
+
"glitchlings.zoo" = ["ocr_confusions.tsv"]
|
|
103
|
+
"glitchlings.zoo.assets" = ["apostrofae_pairs.json"]
|
|
104
|
+
|
|
105
|
+
[tool.setuptools.packages.find]
|
|
106
|
+
where = ["src"]
|
|
107
|
+
|
|
108
|
+
[[tool.setuptools-rust.ext-modules]]
|
|
109
|
+
target = "glitchlings._zoo_rust"
|
|
110
|
+
path = "rust/zoo/Cargo.toml"
|
|
111
|
+
binding = "PyO3"
|
|
112
|
+
debug = false
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
[tool.pytest.ini_options]
|
|
116
|
+
pythonpath = [
|
|
117
|
+
"src",
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
[tool.interrogate]
|
|
121
|
+
config = true
|
|
122
|
+
fail-under = 80
|
|
123
|
+
ignore-init-module = true
|
|
124
|
+
ignore-module = true
|
|
125
|
+
ignore-nested-functions = true
|
|
126
|
+
ignore-private = true
|
|
127
|
+
ignore-semiprivate = true
|
|
128
|
+
ignore-magic = true
|
|
129
|
+
ignore-property-decorators = false
|
|
130
|
+
color = true
|
|
131
|
+
quiet = false
|
|
132
|
+
exclude = [
|
|
133
|
+
"tests",
|
|
134
|
+
"docs",
|
|
135
|
+
"rust",
|
|
136
|
+
"benchmarks",
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
[tool.black]
|
|
140
|
+
line-length = 100
|
|
141
|
+
target-version = ["py310"]
|
|
142
|
+
|
|
143
|
+
[tool.isort]
|
|
144
|
+
profile = "black"
|
|
145
|
+
line_length = 100
|
|
146
|
+
|
|
147
|
+
[tool.ruff]
|
|
148
|
+
target-version = "py310"
|
|
149
|
+
line-length = 100
|
|
150
|
+
|
|
151
|
+
[tool.ruff.lint]
|
|
152
|
+
select = ["E", "F", "I"]
|
|
153
|
+
|
|
154
|
+
[tool.mypy]
|
|
155
|
+
python_version = "3.10"
|
|
156
|
+
follow_imports = "skip"
|
|
157
|
+
ignore_missing_imports = true
|
|
158
|
+
enable_error_code = ["ignore-without-code"]
|
|
159
|
+
|
|
160
|
+
[[tool.mypy.overrides]]
|
|
161
|
+
module = [
|
|
162
|
+
"glitchlings.util.adapters",
|
|
163
|
+
"glitchlings.dlc._shared",
|
|
164
|
+
"glitchlings.dlc.huggingface",
|
|
165
|
+
"glitchlings.dlc.prime",
|
|
166
|
+
]
|
|
167
|
+
strict = true
|
|
168
|
+
|
|
169
|
+
[[tool.mypy.overrides]]
|
|
170
|
+
module = [
|
|
171
|
+
"glitchlings.compat",
|
|
172
|
+
"glitchlings.config",
|
|
173
|
+
"glitchlings.lexicon",
|
|
174
|
+
"glitchlings.lexicon.*",
|
|
175
|
+
]
|
|
176
|
+
strict = true
|
|
177
|
+
|
|
178
|
+
[[tool.mypy.overrides]]
|
|
179
|
+
module = [
|
|
180
|
+
"glitchlings.main",
|
|
181
|
+
"glitchlings.__main__",
|
|
182
|
+
"glitchlings.__init__",
|
|
183
|
+
"glitchlings.zoo",
|
|
184
|
+
"glitchlings.zoo.*",
|
|
185
|
+
]
|
|
186
|
+
strict = true
|
|
@@ -90,6 +90,12 @@ version = "2.0.6"
|
|
|
90
90
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
91
91
|
checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
|
|
92
92
|
|
|
93
|
+
[[package]]
|
|
94
|
+
name = "itoa"
|
|
95
|
+
version = "1.0.15"
|
|
96
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
97
|
+
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
|
|
98
|
+
|
|
93
99
|
[[package]]
|
|
94
100
|
name = "libc"
|
|
95
101
|
version = "0.2.176"
|
|
@@ -275,12 +281,60 @@ version = "0.8.6"
|
|
|
275
281
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
276
282
|
checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
|
|
277
283
|
|
|
284
|
+
[[package]]
|
|
285
|
+
name = "ryu"
|
|
286
|
+
version = "1.0.20"
|
|
287
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
288
|
+
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
|
|
289
|
+
|
|
278
290
|
[[package]]
|
|
279
291
|
name = "scopeguard"
|
|
280
292
|
version = "1.2.0"
|
|
281
293
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
282
294
|
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
|
283
295
|
|
|
296
|
+
[[package]]
|
|
297
|
+
name = "serde"
|
|
298
|
+
version = "1.0.228"
|
|
299
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
300
|
+
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
|
|
301
|
+
dependencies = [
|
|
302
|
+
"serde_core",
|
|
303
|
+
]
|
|
304
|
+
|
|
305
|
+
[[package]]
|
|
306
|
+
name = "serde_core"
|
|
307
|
+
version = "1.0.228"
|
|
308
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
309
|
+
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
|
|
310
|
+
dependencies = [
|
|
311
|
+
"serde_derive",
|
|
312
|
+
]
|
|
313
|
+
|
|
314
|
+
[[package]]
|
|
315
|
+
name = "serde_derive"
|
|
316
|
+
version = "1.0.228"
|
|
317
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
318
|
+
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
|
|
319
|
+
dependencies = [
|
|
320
|
+
"proc-macro2",
|
|
321
|
+
"quote",
|
|
322
|
+
"syn",
|
|
323
|
+
]
|
|
324
|
+
|
|
325
|
+
[[package]]
|
|
326
|
+
name = "serde_json"
|
|
327
|
+
version = "1.0.145"
|
|
328
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
329
|
+
checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
|
|
330
|
+
dependencies = [
|
|
331
|
+
"itoa",
|
|
332
|
+
"memchr",
|
|
333
|
+
"ryu",
|
|
334
|
+
"serde",
|
|
335
|
+
"serde_core",
|
|
336
|
+
]
|
|
337
|
+
|
|
284
338
|
[[package]]
|
|
285
339
|
name = "smallvec"
|
|
286
340
|
version = "1.15.1"
|
|
@@ -407,5 +461,6 @@ dependencies = [
|
|
|
407
461
|
"pyo3",
|
|
408
462
|
"pyo3-build-config",
|
|
409
463
|
"regex",
|
|
464
|
+
"serde_json",
|
|
410
465
|
"smallvec",
|
|
411
466
|
]
|
|
@@ -6,7 +6,9 @@ use std::path::PathBuf;
|
|
|
6
6
|
use std::process::Command;
|
|
7
7
|
|
|
8
8
|
fn main() {
|
|
9
|
-
|
|
9
|
+
stage_asset("ocr_confusions.tsv").expect("failed to stage OCR confusion table for compilation");
|
|
10
|
+
stage_asset("apostrofae_pairs.json")
|
|
11
|
+
.expect("failed to stage Apostrofae replacement table for compilation");
|
|
10
12
|
pyo3_build_config::add_extension_module_link_args();
|
|
11
13
|
|
|
12
14
|
// Only perform custom Python linking on non-Linux platforms.
|
|
@@ -97,46 +99,60 @@ fn query_python(python: &OsStr, command: &str) -> Option<String> {
|
|
|
97
99
|
Some(value)
|
|
98
100
|
}
|
|
99
101
|
|
|
100
|
-
fn
|
|
102
|
+
fn stage_asset(asset_name: &str) -> io::Result<()> {
|
|
101
103
|
let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("missing manifest dir"));
|
|
102
104
|
let out_dir = PathBuf::from(env::var("OUT_DIR").expect("missing OUT_DIR"));
|
|
103
105
|
|
|
104
|
-
let
|
|
105
|
-
|
|
106
|
+
let repo_candidates = [
|
|
107
|
+
manifest_dir
|
|
108
|
+
.join("../../src/glitchlings/zoo/assets")
|
|
109
|
+
.join(asset_name),
|
|
110
|
+
manifest_dir
|
|
111
|
+
.join("../../src/glitchlings/zoo")
|
|
112
|
+
.join(asset_name),
|
|
113
|
+
];
|
|
114
|
+
let packaged_path = manifest_dir.join("assets").join(asset_name);
|
|
106
115
|
println!("cargo:rerun-if-changed={}", packaged_path.display());
|
|
107
116
|
|
|
108
|
-
let source_path =
|
|
109
|
-
|
|
110
|
-
if
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
117
|
+
let mut source_path: Option<PathBuf> = None;
|
|
118
|
+
for candidate in &repo_candidates {
|
|
119
|
+
if candidate.exists() {
|
|
120
|
+
println!("cargo:rerun-if-changed={}", candidate.display());
|
|
121
|
+
if packaged_path.exists() {
|
|
122
|
+
let repo_bytes = fs::read(candidate)?;
|
|
123
|
+
let packaged_bytes = fs::read(&packaged_path)?;
|
|
124
|
+
if repo_bytes != packaged_bytes {
|
|
125
|
+
return Err(io::Error::new(
|
|
126
|
+
ErrorKind::Other,
|
|
127
|
+
format!(
|
|
128
|
+
"asset {} is out of sync with {}",
|
|
129
|
+
packaged_path.display(),
|
|
130
|
+
candidate.display()
|
|
131
|
+
),
|
|
132
|
+
));
|
|
133
|
+
}
|
|
122
134
|
}
|
|
135
|
+
source_path = Some(candidate.clone());
|
|
136
|
+
break;
|
|
123
137
|
}
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
format!(
|
|
130
|
-
"missing OCR confusion table; looked for {} and {}",
|
|
131
|
-
repo_path.display(),
|
|
132
|
-
packaged_path.display()
|
|
133
|
-
),
|
|
134
|
-
));
|
|
135
|
-
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
let source_path = if let Some(path) = source_path {
|
|
141
|
+
path
|
|
142
|
+
} else if packaged_path.exists() {
|
|
136
143
|
packaged_path
|
|
144
|
+
} else {
|
|
145
|
+
return Err(io::Error::new(
|
|
146
|
+
ErrorKind::NotFound,
|
|
147
|
+
format!(
|
|
148
|
+
"missing asset {asset_name}; looked for {} and {}",
|
|
149
|
+
repo_candidates[0].display(),
|
|
150
|
+
packaged_path.display()
|
|
151
|
+
),
|
|
152
|
+
));
|
|
137
153
|
};
|
|
138
154
|
|
|
139
155
|
fs::create_dir_all(&out_dir)?;
|
|
140
|
-
fs::copy(&source_path, out_dir.join(
|
|
156
|
+
fs::copy(&source_path, out_dir.join(asset_name))?;
|
|
141
157
|
Ok(())
|
|
142
158
|
}
|