glitchlings 0.2.1__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. glitchlings-0.2.3/MANIFEST.in +4 -0
  2. {glitchlings-0.2.1/src/glitchlings.egg-info → glitchlings-0.2.3}/PKG-INFO +28 -61
  3. {glitchlings-0.2.1 → glitchlings-0.2.3}/README.md +21 -59
  4. {glitchlings-0.2.1 → glitchlings-0.2.3}/pyproject.toml +11 -8
  5. {glitchlings-0.2.1 → glitchlings-0.2.3}/rust/Cargo.lock +0 -7
  6. {glitchlings-0.2.1 → glitchlings-0.2.3}/rust/Cargo.toml +0 -1
  7. glitchlings-0.2.3/rust/zoo/assets/ocr_confusions.tsv +30 -0
  8. glitchlings-0.2.3/rust/zoo/build.rs +140 -0
  9. {glitchlings-0.2.1 → glitchlings-0.2.3}/rust/zoo/src/glitch_ops.rs +1 -1
  10. {glitchlings-0.2.1 → glitchlings-0.2.3}/rust/zoo/src/lib.rs +2 -1
  11. {glitchlings-0.2.1 → glitchlings-0.2.3}/rust/zoo/src/resources.rs +24 -34
  12. glitchlings-0.2.1/rust/typogre/src/lib.rs → glitchlings-0.2.3/rust/zoo/src/typogre.rs +3 -9
  13. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/dlc/prime.py +44 -22
  14. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/main.py +17 -39
  15. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/util/__init__.py +30 -0
  16. glitchlings-0.2.3/src/glitchlings/zoo/__init__.py +134 -0
  17. glitchlings-0.2.3/src/glitchlings/zoo/_ocr_confusions.py +34 -0
  18. glitchlings-0.2.3/src/glitchlings/zoo/_rate.py +21 -0
  19. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/zoo/core.py +56 -52
  20. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/zoo/jargoyle.py +77 -16
  21. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/zoo/mim1c.py +24 -5
  22. glitchlings-0.2.3/src/glitchlings/zoo/ocr_confusions.tsv +30 -0
  23. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/zoo/redactyl.py +46 -9
  24. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/zoo/reduple.py +36 -8
  25. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/zoo/rushmore.py +40 -8
  26. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/zoo/scannequin.py +42 -37
  27. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/zoo/typogre.py +36 -8
  28. {glitchlings-0.2.1 → glitchlings-0.2.3/src/glitchlings.egg-info}/PKG-INFO +28 -61
  29. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings.egg-info/SOURCES.txt +5 -2
  30. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings.egg-info/requires.txt +3 -0
  31. {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_cli.py +29 -1
  32. {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_gaggle.py +20 -6
  33. glitchlings-0.2.3/tests/test_glitchling_core.py +68 -0
  34. {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_glitchlings_determinism.py +7 -18
  35. {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_jargoyle.py +3 -13
  36. {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_keyboard_layouts.py +18 -1
  37. {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_parameter_effects.py +16 -11
  38. glitchlings-0.2.3/tests/test_prime_echo_chamber.py +294 -0
  39. {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_property_based.py +3 -3
  40. {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_rust_backed_glitchlings.py +71 -31
  41. glitchlings-0.2.1/MANIFEST.in +0 -4
  42. glitchlings-0.2.1/rust/typogre/Cargo.toml +0 -14
  43. glitchlings-0.2.1/rust/zoo/build.rs +0 -60
  44. glitchlings-0.2.1/src/glitchlings/zoo/__init__.py +0 -57
  45. glitchlings-0.2.1/tests/test_glitchling_core.py +0 -24
  46. glitchlings-0.2.1/tests/test_prime_echo_chamber.py +0 -99
  47. {glitchlings-0.2.1 → glitchlings-0.2.3}/LICENSE +0 -0
  48. {glitchlings-0.2.1 → glitchlings-0.2.3}/rust/zoo/Cargo.toml +0 -0
  49. {glitchlings-0.2.1 → glitchlings-0.2.3}/rust/zoo/src/pipeline.rs +0 -0
  50. {glitchlings-0.2.1 → glitchlings-0.2.3}/rust/zoo/src/rng.rs +0 -0
  51. {glitchlings-0.2.1 → glitchlings-0.2.3}/rust/zoo/src/text_buffer.rs +0 -0
  52. {glitchlings-0.2.1 → glitchlings-0.2.3}/setup.cfg +0 -0
  53. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/__init__.py +0 -0
  54. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/__main__.py +0 -0
  55. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/dlc/__init__.py +0 -0
  56. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings/dlc/huggingface.py +0 -0
  57. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings.egg-info/dependency_links.txt +0 -0
  58. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings.egg-info/entry_points.txt +0 -0
  59. {glitchlings-0.2.1 → glitchlings-0.2.3}/src/glitchlings.egg-info/top_level.txt +0 -0
  60. {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_dataset_corruption.py +0 -0
  61. {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_huggingface_dlc.py +0 -0
  62. {glitchlings-0.2.1 → glitchlings-0.2.3}/tests/test_util.py +0 -0
@@ -0,0 +1,4 @@
1
+ recursive-include rust *.rs *.toml *.lock *.tsv
2
+ recursive-include src/glitchlings/zoo *.tsv
3
+ prune rust/target
4
+ prune rust/zoo/target
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: glitchlings
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Monsters for your language games.
5
5
  Author: osoleve
6
6
  License: Apache License
@@ -215,6 +215,8 @@ Classifier: Intended Audience :: Developers
215
215
  Classifier: License :: OSI Approved :: Apache Software License
216
216
  Classifier: Programming Language :: Python
217
217
  Classifier: Programming Language :: Python :: 3
218
+ Classifier: Programming Language :: Python :: 3.10
219
+ Classifier: Programming Language :: Python :: 3.11
218
220
  Classifier: Programming Language :: Python :: 3.12
219
221
  Classifier: Programming Language :: Rust
220
222
  Classifier: Operating System :: MacOS :: MacOS X
@@ -223,7 +225,7 @@ Classifier: Operating System :: POSIX :: Linux
223
225
  Classifier: Operating System :: OS Independent
224
226
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
225
227
  Classifier: Topic :: Software Development :: Testing
226
- Requires-Python: >=3.12
228
+ Requires-Python: >=3.10
227
229
  Description-Content-Type: text/markdown
228
230
  License-File: LICENSE
229
231
  Requires-Dist: confusable-homoglyphs>=3.3.1
@@ -232,11 +234,14 @@ Provides-Extra: hf
232
234
  Requires-Dist: datasets>=4.0.0; extra == "hf"
233
235
  Provides-Extra: wordnet
234
236
  Requires-Dist: nltk>=3.9.1; extra == "wordnet"
237
+ Requires-Dist: numpy<=2.0,>=1.24; extra == "wordnet"
235
238
  Provides-Extra: prime
236
239
  Requires-Dist: verifiers>=0.1.3.post0; extra == "prime"
237
240
  Provides-Extra: dev
238
241
  Requires-Dist: pytest>=8.0.0; extra == "dev"
239
242
  Requires-Dist: hypothesis>=6.140.0; extra == "dev"
243
+ Requires-Dist: nltk>=3.9.1; extra == "dev"
244
+ Requires-Dist: numpy<=2.0,>=1.24; extra == "dev"
240
245
  Dynamic: license-file
241
246
 
242
247
  #
@@ -277,14 +282,16 @@ After all, what good is general intelligence if it can't handle a little chaos?
277
282
  pip install -U glitchlings
278
283
  ```
279
284
 
285
+ > Glitchlings requires Python 3.10 or newer.
286
+
280
287
  ```python
281
288
  from glitchlings import Gaggle, SAMPLE_TEXT, Typogre, Mim1c, Reduple, Rushmore
282
289
 
283
290
  gaggle = Gaggle([
284
- Typogre(max_change_rate=0.03),
285
- Mim1c(replacement_rate=0.02),
291
+ Typogre(rate=0.03),
292
+ Mim1c(rate=0.02),
286
293
  Reduple(seed=404),
287
- Rushmore(max_deletion_rate=0.02),
294
+ Rushmore(rate=0.02),
288
295
  ])
289
296
 
290
297
  print(gaggle(SAMPLE_TEXT))
@@ -292,52 +299,9 @@ print(gaggle(SAMPLE_TEXT))
292
299
 
293
300
  > Onҽ m‎ھ‎rning, wһen Gregor Samƽa woke from trouble𝐝 𝑑reams, he found himself transformed in his bed into a horrible vermin‎٠‎ He l lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightlh domed and divided by arches ino stiff sections. The bedding was adly able to cover it and and seemed ready to slide off any moment. His many legxs, pitifully thin compared with the size of the the rest of him, waved about helplessly ashe looked looked.
294
301
 
295
- ## Usage
296
-
297
- Glitchlings slot into evaluation pipelines just as easily as they corrupt stray strings.
298
-
299
- - **Direct invocation** – Instantiate a glitchling (or `Gaggle`) and call it on strings, iterables, or datasets. Keep the seed stable to make every run deterministic.
300
- - **Dataset corruption** – After ``import glitchlings.dlc.huggingface``, call ``Dataset.glitch(...)`` (or a `Gaggle`'s `.corrupt_dataset`) to perturb a Hugging Face `datasets.Dataset` and return a corrupted copy for training or evaluation.
301
-
302
- ### Rust pipeline acceleration (opt-in)
303
-
304
- The refactored Rust pipeline can execute multiple glitchlings without
305
- bouncing back through Python, but it is gated behind a feature flag so
306
- teams can roll it out gradually. After compiling the Rust extension
307
- (`python -m cibuildwheel --output-dir dist`) set
308
- `GLITCHLINGS_RUST_PIPELINE=1` (or `true`, `yes`, `on`) before importing
309
- `glitchlings`. When the flag is set and the extension is available,
310
- `Gaggle` automatically batches compatible glitchlings into the Rust
311
- pipeline; otherwise it transparently falls back to the legacy Python
312
- loop.
313
-
314
- ### Prime Intellect environments
315
-
316
- After `pip install -e .[prime]`, the `glitchlings.dlc.prime.load_environment` helper mirrors `verifiers.load_environment` for Prime Intellect scenarios while optionally applying glitchlings before returning the environment:
317
-
318
- ```python
319
- from glitchlings import Mim1c, Typogre
320
- from glitchlings.dlc.prime import echo_chamber, load_environment
321
-
322
- env = load_environment(
323
- "osoleve/syllabify-en",
324
- glitchlings=[Mim1c(replacement_rate=0.01), Typogre(max_change_rate=0.02)],
325
- seed=404,
326
- )
327
-
328
- # Spin up an echo chamber that corrupts a dataset column and
329
- # rewards models for perfectly restoring it
330
- practice_env = echo_chamber(
331
- "osoleve/clean-room",
332
- column="text",
333
- glitchlings=["Typogre", "Mim1c"],
334
- reward_function=lambda prompt, completion, answer: float(completion == answer),
335
- )
336
- ```
337
-
338
- Skip the `glitchlings` argument to receive an untouched verifier dataset, and
339
- override `reward_function` when you want to evaluate completions with a custom
340
- scoring routine.
302
+ Consult the [Glitchlings Usage Guide](docs/index.md)
303
+ for end-to-end instructions spanning the Python API, CLI, HuggingFace and Prime Intellect
304
+ integrations, and the feature-flagged Rust pipeline.
341
305
 
342
306
  ## Motivation
343
307
 
@@ -353,8 +317,8 @@ Glitchlings are standard Python classes, so you can instantiate them with whatev
353
317
  ```python
354
318
  from glitchlings import Gaggle, Typogre, Mim1c
355
319
 
356
- custom_typogre = Typogre(max_change_rate=0.1)
357
- selective_mimic = Mim1c(replacement_rate=0.05, classes=["LATIN", "GREEK"])
320
+ custom_typogre = Typogre(rate=0.1)
321
+ selective_mimic = Mim1c(rate=0.05, classes=["LATIN", "GREEK"])
358
322
 
359
323
  gaggle = Gaggle([custom_typogre, selective_mimic], seed=99)
360
324
  print(gaggle("Summoned heroes do not fear the glitch."))
@@ -384,11 +348,14 @@ glitchlings --list
384
348
  # Run Typogre against the contents of a file and inspect the diff.
385
349
  glitchlings -g typogre --file documents/report.txt --diff
386
350
 
351
+ # Configure glitchlings inline by passing keyword arguments.
352
+ glitchlings -g "Typogre(rate=0.05)" "Ghouls just wanna have fun"
353
+
387
354
  # Pipe text straight into the CLI for an on-the-fly corruption.
388
355
  echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
389
356
  ```
390
357
 
391
- Use `--help` for a complete breakdown of available options.
358
+ Use `--help` for a complete breakdown of available options, including support for parameterised glitchlings via `-g "Name(arg=value, ...)"` to mirror the Python API.
392
359
 
393
360
  ## Development
394
361
 
@@ -406,7 +373,7 @@ _What a nice word, would be a shame if something happened to it._
406
373
  >
407
374
  > Args
408
375
  >
409
- > - `max_change_rate (float)`: The maximum number of edits to make as a percentage of the length (default: 0.02, 2%).
376
+ > - `rate (float)`: The maximum number of edits to make as a percentage of the length (default: 0.02, 2%).
410
377
  > - `keyboard (str)`: Keyboard layout key-neighbor map to use (default: "CURATOR_QWERTY"; also accepts "QWERTY", "DVORAK", "COLEMAK", and "AZERTY").
411
378
  > - `seed (int)`: The random seed for reproducibility (default: 151).
412
379
 
@@ -418,7 +385,7 @@ _Wait, was that...?_
418
385
  >
419
386
  > Args
420
387
  >
421
- > - `replacement_rate (float)`: The maximum proportion of characters to replace (default: 0.02, 2%).
388
+ > - `rate (float)`: The maximum proportion of characters to replace (default: 0.02, 2%).
422
389
  > - `classes (list[str] | "all")`: Restrict replacements to these Unicode script classes (default: ["LATIN", "GREEK", "CYRILLIC"]).
423
390
  > - `banned_characters (Collection[str])`: Characters that must never appear as replacements (default: none).
424
391
  > - `seed (int)`: The random seed for reproducibility (default: 151).
@@ -431,7 +398,7 @@ _How can a computer need reading glasses?_
431
398
  >
432
399
  > Args
433
400
  >
434
- > - `error_rate (float)`: The maximum proportion of eligible confusion spans to replace (default: 0.02, 2%).
401
+ > - `rate (float)`: The maximum proportion of eligible confusion spans to replace (default: 0.02, 2%).
435
402
  > - `seed (int)`: The random seed for reproducibility (default: 151).
436
403
 
437
404
  ### Jargoyle
@@ -442,7 +409,7 @@ _Uh oh. The worst person you know just bought a thesaurus._
442
409
  >
443
410
  > Args
444
411
  >
445
- > - `replacement_rate (float)`: The maximum proportion of words to replace (default: 0.1, 10%).
412
+ > - `rate (float)`: The maximum proportion of words to replace (default: 0.1, 10%).
446
413
  > - `part_of_speech`: The WordNet part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all.
447
414
  > - `seed (int)`: The random seed for reproducibility (default: 151).
448
415
 
@@ -454,7 +421,7 @@ _Did you say that or did I?_
454
421
  >
455
422
  > Args
456
423
  >
457
- > - `reduplication_rate (float)`: The maximum proportion of words to reduplicate (default: 0.05, 5%).
424
+ > - `rate (float)`: The maximum proportion of words to reduplicate (default: 0.05, 5%).
458
425
  > - `seed (int)`: The random seed for reproducibility (default: 151).
459
426
 
460
427
  ### Rushmore
@@ -465,7 +432,7 @@ _I accidentally an entire word._
465
432
  >
466
433
  > Args
467
434
  >
468
- > - `max_deletion_rate (float)`: The maximum proportion of words to delete (default: 0.01, 1%).
435
+ > - `rate (float)`: The maximum proportion of words to delete (default: 0.01, 1%).
469
436
  > - `seed (int)`: The random seed for reproducibility (default: 151).
470
437
 
471
438
  ### Redactyl
@@ -477,7 +444,7 @@ _Oops, that was my black highlighter._
477
444
  > ### Args
478
445
  >
479
446
  > - `replacement_char (str)`: The character to use for redaction (default: █).
480
- > - `redaction_rate (float)`: The maximum proportion of words to redact (default: 0.05, 5%).
447
+ > - `rate (float)`: The maximum proportion of words to redact (default: 0.05, 5%).
481
448
  > - `merge_adjacent (bool)`: Whether to redact the space between adjacent redacted words (default: False).
482
449
  > - `seed (int)`: The random seed for reproducibility (default: 151).
483
450
 
@@ -36,14 +36,16 @@ After all, what good is general intelligence if it can't handle a little chaos?
36
36
  pip install -U glitchlings
37
37
  ```
38
38
 
39
+ > Glitchlings requires Python 3.10 or newer.
40
+
39
41
  ```python
40
42
  from glitchlings import Gaggle, SAMPLE_TEXT, Typogre, Mim1c, Reduple, Rushmore
41
43
 
42
44
  gaggle = Gaggle([
43
- Typogre(max_change_rate=0.03),
44
- Mim1c(replacement_rate=0.02),
45
+ Typogre(rate=0.03),
46
+ Mim1c(rate=0.02),
45
47
  Reduple(seed=404),
46
- Rushmore(max_deletion_rate=0.02),
48
+ Rushmore(rate=0.02),
47
49
  ])
48
50
 
49
51
  print(gaggle(SAMPLE_TEXT))
@@ -51,52 +53,9 @@ print(gaggle(SAMPLE_TEXT))
51
53
 
52
54
  > Onҽ m‎ھ‎rning, wһen Gregor Samƽa woke from trouble𝐝 𝑑reams, he found himself transformed in his bed into a horrible vermin‎٠‎ He l lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightlh domed and divided by arches ino stiff sections. The bedding was adly able to cover it and and seemed ready to slide off any moment. His many legxs, pitifully thin compared with the size of the the rest of him, waved about helplessly ashe looked looked.
53
55
 
54
- ## Usage
55
-
56
- Glitchlings slot into evaluation pipelines just as easily as they corrupt stray strings.
57
-
58
- - **Direct invocation** – Instantiate a glitchling (or `Gaggle`) and call it on strings, iterables, or datasets. Keep the seed stable to make every run deterministic.
59
- - **Dataset corruption** – After ``import glitchlings.dlc.huggingface``, call ``Dataset.glitch(...)`` (or a `Gaggle`'s `.corrupt_dataset`) to perturb a Hugging Face `datasets.Dataset` and return a corrupted copy for training or evaluation.
60
-
61
- ### Rust pipeline acceleration (opt-in)
62
-
63
- The refactored Rust pipeline can execute multiple glitchlings without
64
- bouncing back through Python, but it is gated behind a feature flag so
65
- teams can roll it out gradually. After compiling the Rust extension
66
- (`python -m cibuildwheel --output-dir dist`) set
67
- `GLITCHLINGS_RUST_PIPELINE=1` (or `true`, `yes`, `on`) before importing
68
- `glitchlings`. When the flag is set and the extension is available,
69
- `Gaggle` automatically batches compatible glitchlings into the Rust
70
- pipeline; otherwise it transparently falls back to the legacy Python
71
- loop.
72
-
73
- ### Prime Intellect environments
74
-
75
- After `pip install -e .[prime]`, the `glitchlings.dlc.prime.load_environment` helper mirrors `verifiers.load_environment` for Prime Intellect scenarios while optionally applying glitchlings before returning the environment:
76
-
77
- ```python
78
- from glitchlings import Mim1c, Typogre
79
- from glitchlings.dlc.prime import echo_chamber, load_environment
80
-
81
- env = load_environment(
82
- "osoleve/syllabify-en",
83
- glitchlings=[Mim1c(replacement_rate=0.01), Typogre(max_change_rate=0.02)],
84
- seed=404,
85
- )
86
-
87
- # Spin up an echo chamber that corrupts a dataset column and
88
- # rewards models for perfectly restoring it
89
- practice_env = echo_chamber(
90
- "osoleve/clean-room",
91
- column="text",
92
- glitchlings=["Typogre", "Mim1c"],
93
- reward_function=lambda prompt, completion, answer: float(completion == answer),
94
- )
95
- ```
96
-
97
- Skip the `glitchlings` argument to receive an untouched verifier dataset, and
98
- override `reward_function` when you want to evaluate completions with a custom
99
- scoring routine.
56
+ Consult the [Glitchlings Usage Guide](docs/index.md)
57
+ for end-to-end instructions spanning the Python API, CLI, HuggingFace and Prime Intellect
58
+ integrations, and the feature-flagged Rust pipeline.
100
59
 
101
60
  ## Motivation
102
61
 
@@ -112,8 +71,8 @@ Glitchlings are standard Python classes, so you can instantiate them with whatev
112
71
  ```python
113
72
  from glitchlings import Gaggle, Typogre, Mim1c
114
73
 
115
- custom_typogre = Typogre(max_change_rate=0.1)
116
- selective_mimic = Mim1c(replacement_rate=0.05, classes=["LATIN", "GREEK"])
74
+ custom_typogre = Typogre(rate=0.1)
75
+ selective_mimic = Mim1c(rate=0.05, classes=["LATIN", "GREEK"])
117
76
 
118
77
  gaggle = Gaggle([custom_typogre, selective_mimic], seed=99)
119
78
  print(gaggle("Summoned heroes do not fear the glitch."))
@@ -143,11 +102,14 @@ glitchlings --list
143
102
  # Run Typogre against the contents of a file and inspect the diff.
144
103
  glitchlings -g typogre --file documents/report.txt --diff
145
104
 
105
+ # Configure glitchlings inline by passing keyword arguments.
106
+ glitchlings -g "Typogre(rate=0.05)" "Ghouls just wanna have fun"
107
+
146
108
  # Pipe text straight into the CLI for an on-the-fly corruption.
147
109
  echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
148
110
  ```
149
111
 
150
- Use `--help` for a complete breakdown of available options.
112
+ Use `--help` for a complete breakdown of available options, including support for parameterised glitchlings via `-g "Name(arg=value, ...)"` to mirror the Python API.
151
113
 
152
114
  ## Development
153
115
 
@@ -165,7 +127,7 @@ _What a nice word, would be a shame if something happened to it._
165
127
  >
166
128
  > Args
167
129
  >
168
- > - `max_change_rate (float)`: The maximum number of edits to make as a percentage of the length (default: 0.02, 2%).
130
+ > - `rate (float)`: The maximum number of edits to make as a percentage of the length (default: 0.02, 2%).
169
131
  > - `keyboard (str)`: Keyboard layout key-neighbor map to use (default: "CURATOR_QWERTY"; also accepts "QWERTY", "DVORAK", "COLEMAK", and "AZERTY").
170
132
  > - `seed (int)`: The random seed for reproducibility (default: 151).
171
133
 
@@ -177,7 +139,7 @@ _Wait, was that...?_
177
139
  >
178
140
  > Args
179
141
  >
180
- > - `replacement_rate (float)`: The maximum proportion of characters to replace (default: 0.02, 2%).
142
+ > - `rate (float)`: The maximum proportion of characters to replace (default: 0.02, 2%).
181
143
  > - `classes (list[str] | "all")`: Restrict replacements to these Unicode script classes (default: ["LATIN", "GREEK", "CYRILLIC"]).
182
144
  > - `banned_characters (Collection[str])`: Characters that must never appear as replacements (default: none).
183
145
  > - `seed (int)`: The random seed for reproducibility (default: 151).
@@ -190,7 +152,7 @@ _How can a computer need reading glasses?_
190
152
  >
191
153
  > Args
192
154
  >
193
- > - `error_rate (float)`: The maximum proportion of eligible confusion spans to replace (default: 0.02, 2%).
155
+ > - `rate (float)`: The maximum proportion of eligible confusion spans to replace (default: 0.02, 2%).
194
156
  > - `seed (int)`: The random seed for reproducibility (default: 151).
195
157
 
196
158
  ### Jargoyle
@@ -201,7 +163,7 @@ _Uh oh. The worst person you know just bought a thesaurus._
201
163
  >
202
164
  > Args
203
165
  >
204
- > - `replacement_rate (float)`: The maximum proportion of words to replace (default: 0.1, 10%).
166
+ > - `rate (float)`: The maximum proportion of words to replace (default: 0.1, 10%).
205
167
  > - `part_of_speech`: The WordNet part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all.
206
168
  > - `seed (int)`: The random seed for reproducibility (default: 151).
207
169
 
@@ -213,7 +175,7 @@ _Did you say that or did I?_
213
175
  >
214
176
  > Args
215
177
  >
216
- > - `reduplication_rate (float)`: The maximum proportion of words to reduplicate (default: 0.05, 5%).
178
+ > - `rate (float)`: The maximum proportion of words to reduplicate (default: 0.05, 5%).
217
179
  > - `seed (int)`: The random seed for reproducibility (default: 151).
218
180
 
219
181
  ### Rushmore
@@ -224,7 +186,7 @@ _I accidentally an entire word._
224
186
  >
225
187
  > Args
226
188
  >
227
- > - `max_deletion_rate (float)`: The maximum proportion of words to delete (default: 0.01, 1%).
189
+ > - `rate (float)`: The maximum proportion of words to delete (default: 0.01, 1%).
228
190
  > - `seed (int)`: The random seed for reproducibility (default: 151).
229
191
 
230
192
  ### Redactyl
@@ -236,7 +198,7 @@ _Oops, that was my black highlighter._
236
198
  > ### Args
237
199
  >
238
200
  > - `replacement_char (str)`: The character to use for redaction (default: █).
239
- > - `redaction_rate (float)`: The maximum proportion of words to redact (default: 0.05, 5%).
201
+ > - `rate (float)`: The maximum proportion of words to redact (default: 0.05, 5%).
240
202
  > - `merge_adjacent (bool)`: Whether to redact the space between adjacent redacted words (default: False).
241
203
  > - `seed (int)`: The random seed for reproducibility (default: 151).
242
204
 
@@ -1,9 +1,9 @@
1
1
  [project]
2
2
  name = "glitchlings"
3
- version = "0.2.1"
3
+ version = "0.2.3"
4
4
  description = "Monsters for your language games."
5
5
  readme = "README.md"
6
- requires-python = ">=3.12"
6
+ requires-python = ">=3.10"
7
7
 
8
8
  dependencies = [
9
9
  "confusable-homoglyphs>=3.3.1",
@@ -22,6 +22,8 @@ classifiers = [
22
22
  "License :: OSI Approved :: Apache Software License",
23
23
  "Programming Language :: Python",
24
24
  "Programming Language :: Python :: 3",
25
+ "Programming Language :: Python :: 3.10",
26
+ "Programming Language :: Python :: 3.11",
25
27
  "Programming Language :: Python :: 3.12",
26
28
  "Programming Language :: Rust",
27
29
  "Operating System :: MacOS :: MacOS X",
@@ -46,11 +48,13 @@ glitchlings = "glitchlings.main:main"
46
48
 
47
49
  [project.optional-dependencies]
48
50
  hf = ["datasets>=4.0.0"]
49
- wordnet = ["nltk>=3.9.1"]
51
+ wordnet = ["nltk>=3.9.1", "numpy>=1.24,<=2.0"]
50
52
  prime = ["verifiers>=0.1.3.post0"]
51
53
  dev = [
52
54
  "pytest>=8.0.0",
53
55
  "hypothesis>=6.140.0",
56
+ "nltk>=3.9.1",
57
+ "numpy>=1.24,<=2.0",
54
58
  ]
55
59
 
56
60
  [build-system]
@@ -59,6 +63,10 @@ build-backend = "setuptools.build_meta"
59
63
 
60
64
  [tool.setuptools]
61
65
  package-dir = {"" = "src"}
66
+ include-package-data = true
67
+
68
+ [tool.setuptools.package-data]
69
+ "glitchlings.zoo" = ["ocr_confusions.tsv"]
62
70
 
63
71
  [tool.setuptools.packages.find]
64
72
  where = ["src"]
@@ -69,11 +77,6 @@ path = "rust/zoo/Cargo.toml"
69
77
  binding = "PyO3"
70
78
  debug = false
71
79
 
72
- [[tool.setuptools-rust.ext-modules]]
73
- target = "glitchlings._typogre_rust"
74
- path = "rust/typogre/Cargo.toml"
75
- binding = "PyO3"
76
- debug = false
77
80
 
78
81
  [tool.pytest.ini_options]
79
82
  pythonpath = [
@@ -316,13 +316,6 @@ version = "1.19.0"
316
316
  source = "registry+https://github.com/rust-lang/crates.io-index"
317
317
  checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
318
318
 
319
- [[package]]
320
- name = "typogre_rust"
321
- version = "0.1.0"
322
- dependencies = [
323
- "pyo3",
324
- ]
325
-
326
319
  [[package]]
327
320
  name = "unicode-ident"
328
321
  version = "1.0.19"
@@ -1,6 +1,5 @@
1
1
  [workspace]
2
2
  members = [
3
- "typogre",
4
3
  "zoo",
5
4
  ]
6
5
  resolver = "2"
@@ -0,0 +1,30 @@
1
+ # Source Replacements (space-separated)
2
+ li h
3
+ h li
4
+ rn m
5
+ m rn
6
+ cl d
7
+ d cl
8
+ I l
9
+ l I 1
10
+ 1 l I
11
+ 0 O
12
+ O 0
13
+ B 8
14
+ 8 B
15
+ S 5
16
+ 5 S
17
+ Z 2
18
+ 2 Z
19
+ G 6
20
+ 6 G
21
+ “ "
22
+ ” "
23
+ ‘ '
24
+ ’ '
25
+ — -
26
+ – -
27
+ vv w
28
+ w vv
29
+ ri n
30
+ n ri
@@ -0,0 +1,140 @@
1
+ use std::env;
2
+ use std::ffi::{OsStr, OsString};
3
+ use std::fs;
4
+ use std::io::{self, ErrorKind};
5
+ use std::path::PathBuf;
6
+ use std::process::Command;
7
+
8
+ fn main() {
9
+ prepare_confusion_table().expect("failed to stage OCR confusion table for compilation");
10
+ pyo3_build_config::add_extension_module_link_args();
11
+
12
+ if let Some(python) = configured_python() {
13
+ link_python(&python);
14
+ } else if let Some(python) = detect_python() {
15
+ link_python(&python);
16
+ }
17
+ }
18
+
19
+ fn configured_python() -> Option<OsString> {
20
+ std::env::var_os("PYO3_PYTHON")
21
+ .or_else(|| std::env::var_os("PYTHON"))
22
+ .filter(|path| !path.is_empty())
23
+ }
24
+
25
+ fn detect_python() -> Option<OsString> {
26
+ const CANDIDATES: &[&str] = &[
27
+ "python3.12",
28
+ "python3.11",
29
+ "python3.10",
30
+ "python3",
31
+ "python",
32
+ ];
33
+
34
+ for candidate in CANDIDATES {
35
+ let status = Command::new(candidate)
36
+ .arg("-c")
37
+ .arg("import sys")
38
+ .output();
39
+
40
+ if let Ok(output) = status {
41
+ if output.status.success() {
42
+ return Some(OsString::from(candidate));
43
+ }
44
+ }
45
+ }
46
+
47
+ None
48
+ }
49
+
50
+ fn link_python(python: &OsStr) {
51
+ if let Some(path) = query_python(
52
+ python,
53
+ "import sysconfig; print(sysconfig.get_config_var('LIBDIR') or '')",
54
+ ) {
55
+ let trimmed = path.trim();
56
+ if !trimmed.is_empty() {
57
+ println!("cargo:rustc-link-search=native={trimmed}");
58
+ }
59
+ }
60
+
61
+ if let Some(path) = query_python(
62
+ python,
63
+ "import sysconfig; print(sysconfig.get_config_var('LIBPL') or '')",
64
+ ) {
65
+ let trimmed = path.trim();
66
+ if !trimmed.is_empty() {
67
+ println!("cargo:rustc-link-search=native={trimmed}");
68
+ }
69
+ }
70
+
71
+ if let Some(library) = query_python(
72
+ python,
73
+ "import sysconfig; print(sysconfig.get_config_var('LDLIBRARY') or '')",
74
+ ) {
75
+ let name = library.trim();
76
+ if let Some(stripped) = name.strip_prefix("lib") {
77
+ let stem = stripped
78
+ .strip_suffix(".so")
79
+ .or_else(|| stripped.strip_suffix(".a"))
80
+ .unwrap_or(stripped);
81
+ if !stem.is_empty() {
82
+ println!("cargo:rustc-link-lib={stem}");
83
+ }
84
+ }
85
+ }
86
+ }
87
+
88
+ fn query_python(python: &OsStr, command: &str) -> Option<String> {
89
+ let output = Command::new(python).arg("-c").arg(command).output().ok()?;
90
+ if !output.status.success() {
91
+ return None;
92
+ }
93
+ let value = String::from_utf8(output.stdout).ok()?;
94
+ Some(value)
95
+ }
96
+
97
+ fn prepare_confusion_table() -> io::Result<()> {
98
+ let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("missing manifest dir"));
99
+ let out_dir = PathBuf::from(env::var("OUT_DIR").expect("missing OUT_DIR"));
100
+
101
+ let repo_path = manifest_dir.join("../../src/glitchlings/zoo/ocr_confusions.tsv");
102
+ let packaged_path = manifest_dir.join("assets/ocr_confusions.tsv");
103
+ println!("cargo:rerun-if-changed={}", packaged_path.display());
104
+
105
+ let source_path = if repo_path.exists() {
106
+ println!("cargo:rerun-if-changed={}", repo_path.display());
107
+ if packaged_path.exists() {
108
+ let repo_bytes = fs::read(&repo_path)?;
109
+ let packaged_bytes = fs::read(&packaged_path)?;
110
+ if repo_bytes != packaged_bytes {
111
+ return Err(io::Error::new(
112
+ ErrorKind::Other,
113
+ format!(
114
+ "OCR confusion table at {} is out of sync with {}",
115
+ packaged_path.display(),
116
+ repo_path.display()
117
+ ),
118
+ ));
119
+ }
120
+ }
121
+ repo_path
122
+ } else {
123
+ if !packaged_path.exists() {
124
+ return Err(io::Error::new(
125
+ ErrorKind::NotFound,
126
+ format!(
127
+ "missing OCR confusion table; looked for {} and {}",
128
+ repo_path.display(),
129
+ packaged_path.display()
130
+ ),
131
+ ));
132
+ }
133
+ packaged_path
134
+ };
135
+
136
+ fs::create_dir_all(&out_dir)?;
137
+ fs::copy(&source_path, out_dir.join("ocr_confusions.tsv"))?;
138
+ Ok(())
139
+ }
140
+
@@ -500,6 +500,6 @@ mod tests {
500
500
  let mut rng = PyRng::new(1);
501
501
  let op = OcrArtifactsOp { error_rate: 1.0 };
502
502
  op.apply(&mut buffer, &mut rng).expect("ocr succeeds");
503
- assert_eq!(buffer.to_string(), "Tlie rn m");
503
+ assert_eq!(buffer.to_string(), "Tlie rn rri");
504
504
  }
505
505
  }
@@ -3,6 +3,7 @@ mod pipeline;
3
3
  mod resources;
4
4
  mod rng;
5
5
  mod text_buffer;
6
+ mod typogre;
6
7
 
7
8
  use glitch_ops::{GlitchOp, GlitchRng};
8
9
  use pyo3::prelude::*;
@@ -17,7 +18,6 @@ pub use glitch_ops::{
17
18
  pub use pipeline::{derive_seed, GlitchDescriptor, Pipeline, PipelineError};
18
19
  pub use rng::{PyRng, PyRngError};
19
20
  pub use text_buffer::{SegmentKind, TextBuffer, TextBufferError, TextSegment, TextSpan};
20
-
21
21
  struct PythonRngAdapter<'py> {
22
22
  rng: Bound<'py, PyAny>,
23
23
  }
@@ -279,5 +279,6 @@ fn _zoo_rust(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
279
279
  m.add_function(wrap_pyfunction!(ocr_artifacts, m)?)?;
280
280
  m.add_function(wrap_pyfunction!(redact_words, m)?)?;
281
281
  m.add_function(wrap_pyfunction!(compose_glitchlings, m)?)?;
282
+ m.add_function(wrap_pyfunction!(typogre::fatfinger, m)?)?;
282
283
  Ok(())
283
284
  }