glitchlings 0.1.2__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {glitchlings-0.1.2 → glitchlings-0.1.4}/.gitignore +3 -0
  2. glitchlings-0.1.4/AGENTS.md +55 -0
  3. {glitchlings-0.1.2 → glitchlings-0.1.4}/PKG-INFO +26 -1
  4. {glitchlings-0.1.2 → glitchlings-0.1.4}/README.md +24 -0
  5. glitchlings-0.1.4/docs/index.md +253 -0
  6. {glitchlings-0.1.2 → glitchlings-0.1.4}/pyproject.toml +2 -1
  7. glitchlings-0.1.4/rust/typogre/Cargo.lock +295 -0
  8. glitchlings-0.1.4/rust/typogre/Cargo.toml +14 -0
  9. glitchlings-0.1.4/rust/typogre/src/lib.rs +260 -0
  10. glitchlings-0.1.4/rust/zoo/Cargo.lock +340 -0
  11. glitchlings-0.1.4/rust/zoo/Cargo.toml +15 -0
  12. glitchlings-0.1.4/rust/zoo/src/lib.rs +367 -0
  13. glitchlings-0.1.4/src/glitchlings/dlc/prime.py +113 -0
  14. {glitchlings-0.1.2 → glitchlings-0.1.4}/src/glitchlings/zoo/core.py +45 -5
  15. {glitchlings-0.1.2 → glitchlings-0.1.4}/src/glitchlings/zoo/redactyl.py +46 -9
  16. {glitchlings-0.1.2 → glitchlings-0.1.4}/src/glitchlings/zoo/reduple.py +35 -8
  17. {glitchlings-0.1.2 → glitchlings-0.1.4}/src/glitchlings/zoo/rushmore.py +48 -25
  18. {glitchlings-0.1.2 → glitchlings-0.1.4}/src/glitchlings/zoo/scannequin.py +33 -7
  19. glitchlings-0.1.4/src/glitchlings/zoo/typogre.py +184 -0
  20. glitchlings-0.1.4/tests/test_cli.py +150 -0
  21. glitchlings-0.1.4/tests/test_dataset_corruption.py +51 -0
  22. {glitchlings-0.1.2 → glitchlings-0.1.4}/tests/test_gaggle.py +6 -0
  23. glitchlings-0.1.4/tests/test_glitchling_core.py +24 -0
  24. {glitchlings-0.1.2 → glitchlings-0.1.4}/tests/test_glitchlings_determinism.py +18 -0
  25. {glitchlings-0.1.2 → glitchlings-0.1.4}/tests/test_parameter_effects.py +16 -0
  26. glitchlings-0.1.4/tests/test_property_based.py +93 -0
  27. glitchlings-0.1.4/tests/test_rust_backed_glitchlings.py +110 -0
  28. glitchlings-0.1.4/tests/test_util.py +35 -0
  29. glitchlings-0.1.2/AGENTS.md +0 -42
  30. glitchlings-0.1.2/src/glitchlings/dlc/prime.py +0 -52
  31. glitchlings-0.1.2/src/glitchlings/zoo/typogre.py +0 -231
  32. {glitchlings-0.1.2 → glitchlings-0.1.4}/.github/workflows/publish.yml +0 -0
  33. {glitchlings-0.1.2 → glitchlings-0.1.4}/LICENSE +0 -0
  34. {glitchlings-0.1.2 → glitchlings-0.1.4}/MONSTER_MANUAL.md +0 -0
  35. {glitchlings-0.1.2 → glitchlings-0.1.4}/src/glitchlings/__init__.py +0 -0
  36. {glitchlings-0.1.2 → glitchlings-0.1.4}/src/glitchlings/__main__.py +0 -0
  37. {glitchlings-0.1.2 → glitchlings-0.1.4}/src/glitchlings/dlc/__init__.py +0 -0
  38. {glitchlings-0.1.2 → glitchlings-0.1.4}/src/glitchlings/main.py +0 -0
  39. {glitchlings-0.1.2 → glitchlings-0.1.4}/src/glitchlings/util/__init__.py +0 -0
  40. {glitchlings-0.1.2 → glitchlings-0.1.4}/src/glitchlings/zoo/__init__.py +0 -0
  41. {glitchlings-0.1.2 → glitchlings-0.1.4}/src/glitchlings/zoo/jargoyle.py +0 -0
  42. {glitchlings-0.1.2 → glitchlings-0.1.4}/src/glitchlings/zoo/mim1c.py +0 -0
  43. {glitchlings-0.1.2 → glitchlings-0.1.4}/tests/conftest.py +0 -0
  44. {glitchlings-0.1.2 → glitchlings-0.1.4}/tests/test_jargoyle.py +0 -0
  45. {glitchlings-0.1.2 → glitchlings-0.1.4}/tests/test_keyboard_layouts.py +0 -0
@@ -11,4 +11,7 @@ wheels/
11
11
  .python-version
12
12
  uv.lock
13
13
 
14
+ # Rust build artifacts
15
+ /rust/typogre/target/
16
+
14
17
  RELEASE.md
@@ -0,0 +1,55 @@
1
+ # Glitchlings – Agent Handbook
2
+
3
+ Welcome! This repository corrals a roster of deterministic text-corruption "glitchlings" plus a CLI for orchestrating them.
4
+ Treat this handbook as the default guidance for any work in the repo.
5
+
6
+ ## Repository Tour
7
+ - **`src/glitchlings/`** – Installable Python package.
8
+ - `__init__.py` exposes the public API (glitchlings, `Gaggle`, `summon`, `SAMPLE_TEXT`).
9
+ - `__main__.py` wires `python -m glitchlings` to the CLI entry point in `main.py`.
10
+ - `main.py` implements the CLI: parser construction, text sourcing, glitchling summoning, and optional diff output.
11
+ - **`src/glitchlings/zoo/`** – Core glitchling implementations.
12
+ - `core.py` defines the `Glitchling` base class, `AttackWave`/`AttackOrder` enums, and the `Gaggle` orchestrator.
13
+ - `typogre.py`, `mim1c.py`, `reduple.py`, `rushmore.py`, `redactyl.py`, `jargoyle.py`, and `scannequin.py` provide concrete glitchlings.
14
+ Each module offers a pure-Python implementation and, when available, dispatches to an optional Rust acceleration layer.
15
+ - **`src/glitchlings/util/__init__.py`** – Shared helpers including `SAMPLE_TEXT`, keyboard-neighbour layouts, and diff utilities.
16
+ - **`src/glitchlings/dlc/prime.py`** – Optional DLC integration with the `verifiers` environments (install via `pip install -e .[prime]`).
17
+ - **`rust/`** – PyO3 crates backing the optional Rust extensions.
18
+ - `rust/zoo/` builds `glitchlings._zoo_rust` (used by Reduple, Rushmore, Redactyl, and Scannequin).
19
+ - `rust/typogre/` builds `glitchlings._typogre_rust` (Typogre's fast path).
20
+ - Use `maturin develop` (or `maturin build`) from each crate directory to compile the wheels when you need the acceleration paths.
21
+ - **`tests/`** – Pytest suite covering determinism, keyboard layouts, CLI behaviour, and parity between Python and Rust implementations.
22
+ - `test_glitchlings_determinism.py`, `test_parameter_effects.py`, and `test_gaggle.py` validate orchestration and RNG guarantees.
23
+ - `test_rust_backed_glitchlings.py` ensures Rust fast paths match the Python fallbacks.
24
+ - **Top-level docs** – `README.md` introduces the project and CLI, `MONSTER_MANUAL.md` serves as the glitchling bestiary.
25
+
26
+ ## Coding Conventions
27
+ - Target **Python 3.12+** (see `pyproject.toml`).
28
+ - Follow the import order used in the package: standard library, third-party, then local modules.
29
+ - Every new glitchling must:
30
+ - Subclass `Glitchling`, setting `scope` and `order` via `AttackWave` / `AttackOrder` from `core.py`.
31
+ - Accept keyword-only parameters in `__init__`, forwarding them through `super().__init__` so they are tracked by `set_param`.
32
+ - Drive all randomness through the instance's `rng` (do not rely on module-level RNG state) to keep `Gaggle` runs deterministic.
33
+ - Keep helper functions small and well-scoped; include docstrings that describe behaviour and note any determinism considerations.
34
+ - When mutating token sequences, preserve whitespace and punctuation via separator-preserving regex splits (see `reduple.py`, `rushmore.py`, `redactyl.py`).
35
+ - CLI work should continue the existing UX: validate inputs with `ArgumentParser.error`, keep deterministic output ordering, and gate optional behaviours behind explicit flags.
36
+ - Rust fast paths must remain optional: guard imports with `try`/`except ImportError`, surface identical signatures, and fall back to the Python implementation when the extension is absent.
37
+
38
+ ## Testing & Tooling
39
+ - Run the full suite with `pytest` from the repository root.
40
+ - Some tests rely on the NLTK WordNet corpus; if it is missing they skip automatically. Install it via `python -c "import nltk; nltk.download('wordnet')"` to exercise Jargoyle thoroughly.
41
+ - If you modify Rust-backed modules, rerun `pytest tests/test_rust_backed_glitchlings.py` with and without the compiled extensions to keep both code paths healthy.
42
+ - Optional extras (e.g., DLC) depend on `verifiers`. Install the `prime` extra (`pip install -e .[prime]`) when working in `src/glitchlings/dlc/`.
43
+
44
+ ## Determinism Checklist
45
+ - Expose configurable parameters via `set_param` so fixtures in `tests/test_glitchlings_determinism.py` can reset seeds predictably.
46
+ - Derive RNGs from the enclosing context (`Gaggle.derive_seed`) instead of using global state.
47
+ - When sampling subsets (e.g., replacements or deletions), stabilise candidate ordering before selecting to keep results reproducible.
48
+ - Preserve signature parity between Python and Rust implementations so switching paths does not alter behaviour.
49
+
50
+ ## Workflow Tips
51
+ - Use `summon([...], seed=...)` for programmatic orchestration when reproducing tests or crafting examples.
52
+ - The CLI lists built-in glitchlings (`glitchlings --list`) and can show diffs; update `BUILTIN_GLITCHLINGS` and help text when introducing new creatures.
53
+ - Keep documentation synchronized: update both `README.md` and `MONSTER_MANUAL.md` when adding or altering glitchlings or behaviours.
54
+ - When editing keyboard layouts or homoglyph mappings, ensure downstream consumers continue to work with lowercase keys (`util.KEYNEIGHBORS`).
55
+ - Rust builds are optional—keep the project functional when extensions are absent (e.g., in CI or user installs without `maturin`).
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: glitchlings
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: Monsters for your language games.
5
5
  Project-URL: Homepage, https://github.com/osoleve/glitchlings
6
6
  Project-URL: Repository, https://github.com/osoleve/glitchlings.git
@@ -225,6 +225,7 @@ Requires-Dist: datasets>=4.0.0
225
225
  Requires-Dist: jellyfish>=1.2.0
226
226
  Requires-Dist: nltk>=3.9.1
227
227
  Provides-Extra: dev
228
+ Requires-Dist: hypothesis>=6.100.0; extra == 'dev'
228
229
  Requires-Dist: pytest>=8.0.0; extra == 'dev'
229
230
  Provides-Extra: prime
230
231
  Requires-Dist: verifiers>=0.1.3.post0; extra == 'prime'
@@ -283,6 +284,30 @@ print(gaggle(SAMPLE_TEXT))
283
284
 
284
285
  > Onҽ m‎ھ‎rning, wһen Gregor Samƽa woke from trouble𝐝 𝑑reams, he found himself transformed in his bed into a horrible vermin‎٠‎ He l lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightlh domed and divided by arches ino stiff sections. The bedding was adly able to cover it and and seemed ready to slide off any moment. His many legxs, pitifully thin compared with the size of the the rest of him, waved about helplessly ashe looked looked.
285
286
 
287
+ ## Usage
288
+
289
+ Glitchlings slot into evaluation pipelines just as easily as they corrupt stray strings.
290
+
291
+ - **Direct invocation** – Instantiate a glitchling (or `Gaggle`) and call it on strings, iterables, or datasets. Keep the seed stable to make every run deterministic.
292
+ - **Dataset corruption** – Use a `Gaggle`'s `.corrupt_dataset` helper to perturb a Hugging Face `datasets.Dataset` and return a corrupted copy for training or evaluation.
293
+
294
+ ### Prime Intellect environments
295
+
296
+ After `pip install -e .[prime]`, the `glitchlings.dlc.prime.load_environment` helper mirrors `verifiers.load_environment` for Prime Intellect scenarios while optionally applying glitchlings before returning the environment:
297
+
298
+ ```python
299
+ from glitchlings import Mim1c, Typogre
300
+ from glitchlings.dlc.prime import load_environment
301
+
302
+ env = load_environment(
303
+ "osoleve/syllabify-en",
304
+ glitchlings=[Mim1c(replacement_rate=0.01), Typogre(max_change_rate=0.02)],
305
+ seed=404,
306
+ )
307
+ ```
308
+
309
+ Skip the `glitchlings` argument to receive an untouched verifier dataset.
310
+
286
311
  ## Motivation
287
312
 
288
313
  If your model performs well on a particular task, but not when `Glitchling`s are present, it's a sign that it hasn't actually generalized to the problem.
@@ -51,6 +51,30 @@ print(gaggle(SAMPLE_TEXT))
51
51
 
52
52
  > Onҽ m‎ھ‎rning, wһen Gregor Samƽa woke from trouble𝐝 𝑑reams, he found himself transformed in his bed into a horrible vermin‎٠‎ He l lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightlh domed and divided by arches ino stiff sections. The bedding was adly able to cover it and and seemed ready to slide off any moment. His many legxs, pitifully thin compared with the size of the the rest of him, waved about helplessly ashe looked looked.
53
53
 
54
+ ## Usage
55
+
56
+ Glitchlings slot into evaluation pipelines just as easily as they corrupt stray strings.
57
+
58
+ - **Direct invocation** – Instantiate a glitchling (or `Gaggle`) and call it on strings, iterables, or datasets. Keep the seed stable to make every run deterministic.
59
+ - **Dataset corruption** – Use a `Gaggle`'s `.corrupt_dataset` helper to perturb a Hugging Face `datasets.Dataset` and return a corrupted copy for training or evaluation.
60
+
61
+ ### Prime Intellect environments
62
+
63
+ After `pip install -e .[prime]`, the `glitchlings.dlc.prime.load_environment` helper mirrors `verifiers.load_environment` for Prime Intellect scenarios while optionally applying glitchlings before returning the environment:
64
+
65
+ ```python
66
+ from glitchlings import Mim1c, Typogre
67
+ from glitchlings.dlc.prime import load_environment
68
+
69
+ env = load_environment(
70
+ "osoleve/syllabify-en",
71
+ glitchlings=[Mim1c(replacement_rate=0.01), Typogre(max_change_rate=0.02)],
72
+ seed=404,
73
+ )
74
+ ```
75
+
76
+ Skip the `glitchlings` argument to receive an untouched verifier dataset.
77
+
54
78
  ## Motivation
55
79
 
56
80
  If your model performs well on a particular task, but not when `Glitchling`s are present, it's a sign that it hasn't actually generalized to the problem.
@@ -0,0 +1,253 @@
1
+ # Glitchlings Usage Guide
2
+
3
+ Welcome to the Glitchlings field manual! This GitHub Pages-ready guide explains how to install the toolkit, orchestrate chaos with the `Gaggle`, and wield every individual glitchling (Typogre, Mim1c, Reduple, Rushmore, Redactyl, Jargoyle, and Scannequin). It closes with deep coverage of the optional Prime Intellect integration so you can perturb verifier datasets with confidence.
4
+
5
+ ## Table of contents
6
+
7
+ 1. [Installation](#installation)
8
+ 2. [Quickstart](#quickstart)
9
+ 3. [The Gaggle orchestrator](#the-gaggle-orchestrator)
10
+ 4. [Glitchling reference](#glitchling-reference)
11
+ - [Typogre](#typogre)
12
+ - [Mim1c](#mim1c)
13
+ - [Reduple](#reduple)
14
+ - [Rushmore](#rushmore)
15
+ - [Redactyl](#redactyl)
16
+ - [Jargoyle](#jargoyle)
17
+ - [Scannequin](#scannequin)
18
+ 5. [Dataset workflows](#dataset-workflows)
19
+ 6. [Prime Intellect integration](#prime-intellect-integration)
20
+ 7. [Ensuring determinism](#ensuring-determinism)
21
+ 8. [Testing checklist](#testing-checklist)
22
+ 9. [Additional resources](#additional-resources)
23
+
24
+ ## Installation
25
+
26
+ Install the latest release directly from PyPI:
27
+
28
+ ```bash
29
+ pip install -U glitchlings
30
+ ```
31
+
32
+ Need the optional Prime Intellect loader or the NLTK-powered Jargoyle ready to go? Pull in the documented extras:
33
+
34
+ ```bash
35
+ # Prime Intellect DLC + verifiers dependency
36
+ pip install -U 'glitchlings[prime]'
37
+
38
+ # NLTK WordNet corpora for Jargoyle synonym swaps
39
+ python -m nltk.downloader wordnet
40
+ ```
41
+
42
+ ### Source install
43
+
44
+ When working from a local clone, install in editable mode so your changes take effect immediately:
45
+
46
+ ```bash
47
+ pip install -e .
48
+ ```
49
+
50
+ If you plan to experiment with the PyO3 acceleration crates, install `maturin` and run `maturin develop` from each crate directory inside the `rust/` folder to compile the optional Rust fast paths.
51
+
52
+ ## Quickstart
53
+
54
+ Glitchlings are callable objects that accept strings (and string-like iterables) and return corrupted copies. Summon a single glitchling or gather multiple into a `Gaggle` to orchestrate compound effects:
55
+
56
+ ```python
57
+ from glitchlings import Gaggle, SAMPLE_TEXT, Typogre, Mim1c, Reduple, Rushmore
58
+
59
+ gaggle = Gaggle([
60
+ Typogre(max_change_rate=0.03),
61
+ Mim1c(replacement_rate=0.02),
62
+ Reduple(seed=404),
63
+ Rushmore(max_deletion_rate=0.02),
64
+ ], seed=1234)
65
+
66
+ print(gaggle(SAMPLE_TEXT))
67
+ ```
68
+
69
+ All glitchlings are deterministic: pass a `seed` during construction (or on the enclosing `Gaggle`) to make the chaos reproducible.
70
+
71
+ ### Command line interface
72
+
73
+ Prefer not to touch Python? The `glitchlings` CLI exposes the same functionality:
74
+
75
+ ```bash
76
+ # Discover all built-in glitchlings.
77
+ glitchlings --list
78
+
79
+ # Glitch an entire file with Typogre and inspect the unified diff.
80
+ glitchlings -g typogre --file documents/report.txt --diff
81
+
82
+ # Pipe text through Mim1c for on-the-fly homoglyph swaps.
83
+ echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
84
+ ```
85
+
86
+ Append `--diff` to render a unified diff comparing the original and corrupted outputs. Combine it with `--color=always` in terminals that support ANSI colours to highlight changes more clearly.
87
+
88
+ ## The Gaggle orchestrator
89
+
90
+ The `Gaggle` class coordinates multiple glitchlings with deterministic sequencing and shared seeding:
91
+
92
+ - **Seed derivation** – pass `seed=` to `Gaggle(...)` and it will derive per-glitchling seeds via `derive_seed`, ensuring cross-run stability without repeated outputs.
93
+ - **Attack scopes & order** – glitchlings declare a scope (`document`, `sentence`, `word`, `character`) and attack order (`early`, `late`, etc.). By default the gaggle sorts by scope, then by order so character-level edits (Typogre, Mim1c, Scannequin) happen after word-level operations (Reduple, Rushmore, Redactyl, Jargoyle). Override this via `Gaggle([...], attack_order=[...])` when you need bespoke choreography.
94
+ - **Dynamic configuration** – use `gaggle.set_param("Typogre", "max_change_rate", 0.05)` to tweak nested glitchling parameters without rebuilding the ensemble.
95
+ - **Dataset utilities** – call `gaggle.corrupt_dataset(dataset, columns=[...])` to clone and perturb Hugging Face datasets while leaving the original untouched. Column inference automatically targets `text`, `prompt`, or similar string columns when none are provided.
96
+ - **Summoning from shorthand** – `glitchlings.summon` lets you build a gaggle from names or partially-configured objects (`summon(["typogre", Mim1c(replacement_rate=0.01)], seed=404)`).
97
+
98
+ ## Glitchling reference
99
+
100
+ Each glitchling subclasses the shared `Glitchling` base class and exposes the same interface: call the instance with text, adjust parameters via `set_param`, and rely on deterministic seeds. This section summarises every built-in creature, its defaults, and practical usage notes.
101
+
102
+ ### Typogre
103
+
104
+ - **Scope**: character level (early in the pipeline).
105
+ - **Signature**: `Typogre(max_change_rate=0.02, keyboard="CURATOR_QWERTY", seed=None)`.
106
+ - **Behaviour**: simulates fat-finger typing by swapping neighbouring keys, dropping spaces, inserting doubles, or choosing layout-adjacent characters. Keyboard layouts map through `glitchlings.util.KEYNEIGHBORS` and include curated QWERTY, DVORAK, and custom research boards.
107
+ - **Usage tips**:
108
+ - Lower `max_change_rate` (0.005–0.01) for gentle noise; raise it for more chaotic misspellings.
109
+ - Swap to `keyboard="DVORAK"` or supply a custom adjacency dict to model alternative hardware.
110
+ - Combine with Rushmore deletions to simulate hurried note-taking.
111
+
112
+ ### Mim1c
113
+
114
+ - **Scope**: character level (late attack order so it acts after insertions/deletions).
115
+ - **Signature**: `Mim1c(replacement_rate=0.02, classes=None, seed=None)`.
116
+ - **Behaviour**: replaces alphanumeric characters with visually confusable Unicode homoglyphs via `confusable_homoglyphs` (e.g., `A → Α`, `e → е`). When `classes` is omitted it targets Latin, Greek, and Cyrillic scripts; pass `classes="all"` to consider every alias.
117
+ - **Usage tips**:
118
+ - Restrict `classes` (e.g., `classes=["LATIN"]`) when evaluation pipelines reject non-Latin scripts.
119
+ - Keep `replacement_rate` below 0.03 for legible perturbations; higher values can break tokenisers that expect ASCII.
120
+ - Pairs well with Typogre for keyboard + homoglyph chaos.
121
+
122
+ ### Reduple
123
+
124
+ - **Scope**: word level.
125
+ - **Signature**: `Reduple(reduplication_rate=0.05, seed=None)`.
126
+ - **Behaviour**: randomly repeats words (“reduplication”) to mimic stuttering transcripts or speech disfluencies while preserving whitespace and punctuation.
127
+ - **Usage tips**:
128
+ - Use `reduplication_rate=0.01` to emulate occasional hesitations; bump to ≥0.08 for heavy repetition stress tests.
129
+ - Because edits preserve separators, downstream whitespace-sensitive parsers remain stable.
130
+ - Combine with Jargoyle to mix synonym swaps and repeated words for lexical drift.
131
+
132
+ ### Rushmore
133
+
134
+ - **Scope**: word level.
135
+ - **Signature**: `Rushmore(max_deletion_rate=0.01, seed=None)`.
136
+ - **Behaviour**: deletes randomly selected words (skipping the first to preserve context) and tidies double spaces/punctuation afterwards.
137
+ - **Usage tips**:
138
+ - Keep `max_deletion_rate` conservative (<0.03) to avoid stripping sentences bare.
139
+ - Because the first word is preserved, prepend short context sentences when you need deletions deeper in the passage.
140
+ - Sandwich between Reduple and Redactyl to test summarisation robustness under missing context.
141
+
142
+ ### Redactyl
143
+
144
+ - **Scope**: word level.
145
+ - **Signature**: `Redactyl(replacement_char="█", redaction_rate=0.05, merge_adjacent=False, seed=151)`.
146
+ - **Behaviour**: replaces the core characters of selected words with a replacement glyph (default FULL BLOCK) to simulate document redaction. Optionally merges adjacent redaction blocks across punctuation.
147
+ - **Usage tips**:
148
+ - Switch `replacement_char` to `_` or `*` when terminals struggle with block glyphs.
149
+ - Enable `merge_adjacent=True` to form continuous bars when redacting phrases.
150
+ - When no redactable words exist, the underlying implementation raises a `ValueError`—wrap calls with try/except in automated pipelines.
151
+
152
+ ### Jargoyle
153
+
154
+ - **Scope**: word level.
155
+ - **Signature**: `Jargoyle(replacement_rate=0.1, part_of_speech="n", seed=None)`.
156
+ - **Behaviour**: swaps nouns/verbs/adjectives/adverbs with WordNet synonyms. Downloads the WordNet corpus on demand when missing and maintains deterministic sampling by sorting candidate lemmas.
157
+ - **Usage tips**:
158
+ - Target specific POS tags (e.g., `part_of_speech=("n", "v")`) to limit changes to content words.
159
+ - Lower `replacement_rate` (0.02–0.05) for subtle lexical variety; higher rates explore paraphrasing extremes.
160
+ - Ensure your environment has the WordNet data pre-cached to avoid first-run download delays.
161
+
162
+ ### Scannequin
163
+
164
+ - **Scope**: character level (late order).
165
+ - **Signature**: `Scannequin(error_rate=0.02, seed=None)`.
166
+ - **Behaviour**: introduces OCR-style confusion pairs (rn↔m, cl↔d, O↔0, curly quotes to ASCII, etc.) using deterministic span selection. Supports a Rust acceleration path when compiled.
167
+ - **Usage tips**:
168
+ - Bump `error_rate` for scanned-document stress tests or reduce it for light OCR noise.
169
+ - Because replacements can change token length, run Scannequin after word-level glitchlings to avoid offset drift.
170
+ - Combine with Redactyl to mimic heavily redacted, poorly scanned archives.
171
+
172
+ ## Dataset workflows
173
+
174
+ Leverage the Hugging Face integration to perturb large corpora reproducibly:
175
+
176
+ ```python
177
+ from datasets import load_dataset
178
+ from glitchlings import Gaggle, Typogre, Mim1c
179
+
180
+ dataset = load_dataset("ag_news")
181
+ gaggle = Gaggle([Typogre(max_change_rate=0.02), Mim1c(replacement_rate=0.01)], seed=404)
182
+
183
+ corrupted = gaggle.corrupt_dataset(
184
+ dataset,
185
+ columns=["text"],
186
+ description="ag_news with typographic noise",
187
+ )
188
+ ```
189
+
190
+ Key points:
191
+
192
+ - When `columns` is omitted, Glitchlings infers targets (`prompt`, `question`, or all string columns) using `_resolve_columns` semantics from the Prime loader.
193
+ - The returned dataset is a shallow copy containing both clean and corrupted columns—persist it with `corrupted.push_to_hub(...)` or `corrupted.save_to_disk(...)`.
194
+ - Use dataset-level seeds (`seed=` on the gaggle) so repeated corruptions are stable across machines.
195
+
196
+ ## Prime Intellect integration
197
+
198
+ Installing the `prime` extra exposes `glitchlings.dlc.prime.load_environment`, a convenience wrapper around `verifiers.load_environment` that lets you pre-inject glitchlings into benchmark datasets.
199
+
200
+ ```python
201
+ from glitchlings import Mim1c, Typogre
202
+ from glitchlings.dlc.prime import load_environment, tutorial_level, Difficulty
203
+
204
+ # Load an existing environment and apply custom corruption
205
+ custom_env = load_environment(
206
+ "osoleve/syllabify-en",
207
+ glitchlings=[Mim1c(replacement_rate=0.01), Typogre(max_change_rate=0.02)],
208
+ seed=404,
209
+ columns=["prompt"], # optional; inferred when omitted
210
+ )
211
+
212
+ # Or bootstrap a difficulty-scaled tutorial environment
213
+ practice_env = tutorial_level(
214
+ "osoleve/syllabify-en",
215
+ difficulty=Difficulty.Hard,
216
+ )
217
+ ```
218
+
219
+ Capabilities at a glance:
220
+
221
+ - **Flexible inputs** – pass a string environment slug, an instantiated `verifiers.Environment`, a single glitchling, a list of glitchlings or names, or a pre-built `Gaggle`.
222
+ - **Column inference** – when `columns` is `None`, the loader searches for `prompt`/`question` columns, otherwise falls back to all string-valued columns. Explicitly list columns to target subsets (e.g., prompts but not references).
223
+ - **Deterministic summoning** – non-`Gaggle` inputs are normalised via `summon(...)` with the provided `seed`, so repeated calls produce matching corruption ensembles.
224
+ - **Tutorial difficulty scaling** – `tutorial_level` wires in tuned Mim1c/Typogre parameters multiplied by the selected `Difficulty` enum. Use `Difficulty.Easy` for gentle practice or `Difficulty.Extreme` to hammer robustness.
225
+ - **Dataset mutation** – environments are returned with their dataset replaced by the corrupted clone. Skip the `glitchlings` argument to leave the dataset untouched.
226
+
227
+ ## Ensuring determinism
228
+
229
+ - Derive seeds from the surrounding context (`Gaggle.derive_seed`) when spawning new RNGs.
230
+ - Stabilise candidate order before sampling subsets to keep runs reproducible.
231
+ - Use `set_param` to expose tunable values so they can be reset between tests.
232
+ - When writing new glitchlings, route randomness through the instance RNG rather than module-level state.
233
+
234
+ ## Testing checklist
235
+
236
+ Before publishing changes or documenting new glitchlings, run the Pytest suite from the repository root:
237
+
238
+ ```bash
239
+ pytest
240
+ ```
241
+
242
+ Some tests require the NLTK WordNet corpus. If you see skips mentioning WordNet, install it with:
243
+
244
+ ```bash
245
+ python -c "import nltk; nltk.download('wordnet')"
246
+ ```
247
+
248
+ ## Additional resources
249
+
250
+ - [Monster Manual](../MONSTER_MANUAL.md) – complete bestiary with flavour text.
251
+ - [Repository README](../README.md) – project overview and ASCII ambience.
252
+
253
+ Once the `/docs` folder is published through GitHub Pages, this guide becomes the landing site for your glitchling adventures.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "glitchlings"
3
- version = "0.1.2"
3
+ version = "0.1.4"
4
4
  description = "Monsters for your language games."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -48,6 +48,7 @@ prime = [
48
48
  ]
49
49
  dev = [
50
50
  "pytest>=8.0.0",
51
+ "hypothesis>=6.100.0",
51
52
  ]
52
53
 
53
54
  [build-system]