glitchlings 0.1.3__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {glitchlings-0.1.3 → glitchlings-0.1.4}/PKG-INFO +26 -1
  2. {glitchlings-0.1.3 → glitchlings-0.1.4}/README.md +24 -0
  3. glitchlings-0.1.4/docs/index.md +253 -0
  4. {glitchlings-0.1.3 → glitchlings-0.1.4}/pyproject.toml +2 -1
  5. glitchlings-0.1.4/src/glitchlings/dlc/prime.py +113 -0
  6. {glitchlings-0.1.3 → glitchlings-0.1.4}/tests/test_cli.py +18 -0
  7. glitchlings-0.1.4/tests/test_glitchling_core.py +24 -0
  8. glitchlings-0.1.4/tests/test_property_based.py +93 -0
  9. glitchlings-0.1.4/tests/test_util.py +35 -0
  10. glitchlings-0.1.3/src/glitchlings/dlc/prime.py +0 -52
  11. {glitchlings-0.1.3 → glitchlings-0.1.4}/.github/workflows/publish.yml +0 -0
  12. {glitchlings-0.1.3 → glitchlings-0.1.4}/.gitignore +0 -0
  13. {glitchlings-0.1.3 → glitchlings-0.1.4}/AGENTS.md +0 -0
  14. {glitchlings-0.1.3 → glitchlings-0.1.4}/LICENSE +0 -0
  15. {glitchlings-0.1.3 → glitchlings-0.1.4}/MONSTER_MANUAL.md +0 -0
  16. {glitchlings-0.1.3 → glitchlings-0.1.4}/rust/typogre/Cargo.lock +0 -0
  17. {glitchlings-0.1.3 → glitchlings-0.1.4}/rust/typogre/Cargo.toml +0 -0
  18. {glitchlings-0.1.3 → glitchlings-0.1.4}/rust/typogre/src/lib.rs +0 -0
  19. {glitchlings-0.1.3 → glitchlings-0.1.4}/rust/zoo/Cargo.lock +0 -0
  20. {glitchlings-0.1.3 → glitchlings-0.1.4}/rust/zoo/Cargo.toml +0 -0
  21. {glitchlings-0.1.3 → glitchlings-0.1.4}/rust/zoo/src/lib.rs +0 -0
  22. {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/__init__.py +0 -0
  23. {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/__main__.py +0 -0
  24. {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/dlc/__init__.py +0 -0
  25. {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/main.py +0 -0
  26. {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/util/__init__.py +0 -0
  27. {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/zoo/__init__.py +0 -0
  28. {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/zoo/core.py +0 -0
  29. {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/zoo/jargoyle.py +0 -0
  30. {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/zoo/mim1c.py +0 -0
  31. {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/zoo/redactyl.py +0 -0
  32. {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/zoo/reduple.py +0 -0
  33. {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/zoo/rushmore.py +0 -0
  34. {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/zoo/scannequin.py +0 -0
  35. {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/zoo/typogre.py +0 -0
  36. {glitchlings-0.1.3 → glitchlings-0.1.4}/tests/conftest.py +0 -0
  37. {glitchlings-0.1.3 → glitchlings-0.1.4}/tests/test_dataset_corruption.py +0 -0
  38. {glitchlings-0.1.3 → glitchlings-0.1.4}/tests/test_gaggle.py +0 -0
  39. {glitchlings-0.1.3 → glitchlings-0.1.4}/tests/test_glitchlings_determinism.py +0 -0
  40. {glitchlings-0.1.3 → glitchlings-0.1.4}/tests/test_jargoyle.py +0 -0
  41. {glitchlings-0.1.3 → glitchlings-0.1.4}/tests/test_keyboard_layouts.py +0 -0
  42. {glitchlings-0.1.3 → glitchlings-0.1.4}/tests/test_parameter_effects.py +0 -0
  43. {glitchlings-0.1.3 → glitchlings-0.1.4}/tests/test_rust_backed_glitchlings.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: glitchlings
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: Monsters for your language games.
5
5
  Project-URL: Homepage, https://github.com/osoleve/glitchlings
6
6
  Project-URL: Repository, https://github.com/osoleve/glitchlings.git
@@ -225,6 +225,7 @@ Requires-Dist: datasets>=4.0.0
225
225
  Requires-Dist: jellyfish>=1.2.0
226
226
  Requires-Dist: nltk>=3.9.1
227
227
  Provides-Extra: dev
228
+ Requires-Dist: hypothesis>=6.100.0; extra == 'dev'
228
229
  Requires-Dist: pytest>=8.0.0; extra == 'dev'
229
230
  Provides-Extra: prime
230
231
  Requires-Dist: verifiers>=0.1.3.post0; extra == 'prime'
@@ -283,6 +284,30 @@ print(gaggle(SAMPLE_TEXT))
283
284
 
284
285
  > Onҽ m‎ھ‎rning, wһen Gregor Samƽa woke from trouble𝐝 𝑑reams, he found himself transformed in his bed into a horrible vermin‎٠‎ He l lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightlh domed and divided by arches ino stiff sections. The bedding was adly able to cover it and and seemed ready to slide off any moment. His many legxs, pitifully thin compared with the size of the the rest of him, waved about helplessly ashe looked looked.
285
286
 
287
+ ## Usage
288
+
289
+ Glitchlings slot into evaluation pipelines just as easily as they corrupt stray strings.
290
+
291
+ - **Direct invocation** – Instantiate a glitchling (or `Gaggle`) and call it on strings, iterables, or datasets. Keep the seed stable to make every run deterministic.
292
+ - **Dataset corruption** – Use a `Gaggle`'s `.corrupt_dataset` helper to perturb a Hugging Face `datasets.Dataset` and return a corrupted copy for training or evaluation.
293
+
294
+ ### Prime Intellect environments
295
+
296
+ After `pip install -e .[prime]`, the `glitchlings.dlc.prime.load_environment` helper mirrors `verifiers.load_environment` for Prime Intellect scenarios while optionally applying glitchlings before returning the environment:
297
+
298
+ ```python
299
+ from glitchlings import Mim1c, Typogre
300
+ from glitchlings.dlc.prime import load_environment
301
+
302
+ env = load_environment(
303
+ "osoleve/syllabify-en",
304
+ glitchlings=[Mim1c(replacement_rate=0.01), Typogre(max_change_rate=0.02)],
305
+ seed=404,
306
+ )
307
+ ```
308
+
309
+ Skip the `glitchlings` argument to receive an untouched verifier dataset.
310
+
286
311
  ## Motivation
287
312
 
288
313
  If your model performs well on a particular task, but not when `Glitchling`s are present, it's a sign that it hasn't actually generalized to the problem.
@@ -51,6 +51,30 @@ print(gaggle(SAMPLE_TEXT))
51
51
 
52
52
  > Onҽ m‎ھ‎rning, wһen Gregor Samƽa woke from trouble𝐝 𝑑reams, he found himself transformed in his bed into a horrible vermin‎٠‎ He l lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightlh domed and divided by arches ino stiff sections. The bedding was adly able to cover it and and seemed ready to slide off any moment. His many legxs, pitifully thin compared with the size of the the rest of him, waved about helplessly ashe looked looked.
53
53
 
54
+ ## Usage
55
+
56
+ Glitchlings slot into evaluation pipelines just as easily as they corrupt stray strings.
57
+
58
+ - **Direct invocation** – Instantiate a glitchling (or `Gaggle`) and call it on strings, iterables, or datasets. Keep the seed stable to make every run deterministic.
59
+ - **Dataset corruption** – Use a `Gaggle`'s `.corrupt_dataset` helper to perturb a Hugging Face `datasets.Dataset` and return a corrupted copy for training or evaluation.
60
+
61
+ ### Prime Intellect environments
62
+
63
+ After `pip install -e .[prime]`, the `glitchlings.dlc.prime.load_environment` helper mirrors `verifiers.load_environment` for Prime Intellect scenarios while optionally applying glitchlings before returning the environment:
64
+
65
+ ```python
66
+ from glitchlings import Mim1c, Typogre
67
+ from glitchlings.dlc.prime import load_environment
68
+
69
+ env = load_environment(
70
+ "osoleve/syllabify-en",
71
+ glitchlings=[Mim1c(replacement_rate=0.01), Typogre(max_change_rate=0.02)],
72
+ seed=404,
73
+ )
74
+ ```
75
+
76
+ Skip the `glitchlings` argument to receive an untouched verifier dataset.
77
+
54
78
  ## Motivation
55
79
 
56
80
  If your model performs well on a particular task, but not when `Glitchling`s are present, it's a sign that it hasn't actually generalized to the problem.
@@ -0,0 +1,253 @@
1
+ # Glitchlings Usage Guide
2
+
3
+ Welcome to the Glitchlings field manual! This GitHub Pages-ready guide explains how to install the toolkit, orchestrate chaos with the `Gaggle`, and wield every individual glitchling (Typogre, Mim1c, Reduple, Rushmore, Redactyl, Jargoyle, and Scannequin). It closes with deep coverage of the optional Prime Intellect integration so you can perturb verifier datasets with confidence.
4
+
5
+ ## Table of contents
6
+
7
+ 1. [Installation](#installation)
8
+ 2. [Quickstart](#quickstart)
9
+ 3. [The Gaggle orchestrator](#the-gaggle-orchestrator)
10
+ 4. [Glitchling reference](#glitchling-reference)
11
+ - [Typogre](#typogre)
12
+ - [Mim1c](#mim1c)
13
+ - [Reduple](#reduple)
14
+ - [Rushmore](#rushmore)
15
+ - [Redactyl](#redactyl)
16
+ - [Jargoyle](#jargoyle)
17
+ - [Scannequin](#scannequin)
18
+ 5. [Dataset workflows](#dataset-workflows)
19
+ 6. [Prime Intellect integration](#prime-intellect-integration)
20
+ 7. [Ensuring determinism](#ensuring-determinism)
21
+ 8. [Testing checklist](#testing-checklist)
22
+ 9. [Additional resources](#additional-resources)
23
+
24
+ ## Installation
25
+
26
+ Install the latest release directly from PyPI:
27
+
28
+ ```bash
29
+ pip install -U glitchlings
30
+ ```
31
+
32
+ Need the optional Prime Intellect loader or the NLTK-powered Jargoyle ready to go? Pull in the documented extras:
33
+
34
+ ```bash
35
+ # Prime Intellect DLC + verifiers dependency
36
+ pip install -U 'glitchlings[prime]'
37
+
38
+ # NLTK WordNet corpora for Jargoyle synonym swaps
39
+ python -m nltk.downloader wordnet
40
+ ```
41
+
42
+ ### Source install
43
+
44
+ When working from a local clone, install in editable mode so your changes take effect immediately:
45
+
46
+ ```bash
47
+ pip install -e .
48
+ ```
49
+
50
+ If you plan to experiment with the PyO3 acceleration crates, install `maturin` and run `maturin develop` from each crate directory inside the `rust/` folder to compile the optional Rust fast paths.
51
+
52
+ ## Quickstart
53
+
54
+ Glitchlings are callable objects that accept strings (and string-like iterables) and return corrupted copies. Summon a single glitchling or gather multiple into a `Gaggle` to orchestrate compound effects:
55
+
56
+ ```python
57
+ from glitchlings import Gaggle, SAMPLE_TEXT, Typogre, Mim1c, Reduple, Rushmore
58
+
59
+ gaggle = Gaggle([
60
+ Typogre(max_change_rate=0.03),
61
+ Mim1c(replacement_rate=0.02),
62
+ Reduple(seed=404),
63
+ Rushmore(max_deletion_rate=0.02),
64
+ ], seed=1234)
65
+
66
+ print(gaggle(SAMPLE_TEXT))
67
+ ```
68
+
69
+ All glitchlings are deterministic: pass a `seed` during construction (or on the enclosing `Gaggle`) to make the chaos reproducible.
70
+
71
+ ### Command line interface
72
+
73
+ Prefer not to touch Python? The `glitchlings` CLI exposes the same functionality:
74
+
75
+ ```bash
76
+ # Discover all built-in glitchlings.
77
+ glitchlings --list
78
+
79
+ # Glitch an entire file with Typogre and inspect the unified diff.
80
+ glitchlings -g typogre --file documents/report.txt --diff
81
+
82
+ # Pipe text through Mim1c for on-the-fly homoglyph swaps.
83
+ echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
84
+ ```
85
+
86
+ Append `--diff` to render a unified diff comparing the original and corrupted outputs. Combine it with `--color=always` in terminals that support ANSI colours to highlight changes more clearly.
87
+
88
+ ## The Gaggle orchestrator
89
+
90
+ The `Gaggle` class coordinates multiple glitchlings with deterministic sequencing and shared seeding:
91
+
92
+ - **Seed derivation** – pass `seed=` to `Gaggle(...)` and it will derive per-glitchling seeds via `derive_seed`, ensuring cross-run stability without repeated outputs.
93
+ - **Attack scopes & order** – glitchlings declare a scope (`document`, `sentence`, `word`, `character`) and attack order (`early`, `late`, etc.). By default the gaggle sorts by scope, then by order so character-level edits (Typogre, Mim1c, Scannequin) happen after word-level operations (Reduple, Rushmore, Redactyl, Jargoyle). Override this via `Gaggle([...], attack_order=[...])` when you need bespoke choreography.
94
+ - **Dynamic configuration** – use `gaggle.set_param("Typogre", "max_change_rate", 0.05)` to tweak nested glitchling parameters without rebuilding the ensemble.
95
+ - **Dataset utilities** – call `gaggle.corrupt_dataset(dataset, columns=[...])` to clone and perturb Hugging Face datasets while leaving the original untouched. Column inference automatically targets `text`, `prompt`, or similar string columns when none are provided.
96
+ - **Summoning from shorthand** – `glitchlings.summon` lets you build a gaggle from names or partially-configured objects (`summon(["typogre", Mim1c(replacement_rate=0.01)], seed=404)`).
97
+
98
+ ## Glitchling reference
99
+
100
+ Each glitchling subclasses the shared `Glitchling` base class and exposes the same interface: call the instance with text, adjust parameters via `set_param`, and rely on deterministic seeds. This section summarises every built-in creature, its defaults, and practical usage notes.
101
+
102
+ ### Typogre
103
+
104
+ - **Scope**: character level (early in the pipeline).
105
+ - **Signature**: `Typogre(max_change_rate=0.02, keyboard="CURATOR_QWERTY", seed=None)`.
106
+ - **Behaviour**: simulates fat-finger typing by swapping neighbouring keys, dropping spaces, inserting doubles, or choosing layout-adjacent characters. Keyboard layouts map through `glitchlings.util.KEYNEIGHBORS` and include curated QWERTY, DVORAK, and custom research boards.
107
+ - **Usage tips**:
108
+ - Lower `max_change_rate` (0.005–0.01) for gentle noise; raise it for more chaotic misspellings.
109
+ - Swap to `keyboard="DVORAK"` or supply a custom adjacency dict to model alternative hardware.
110
+ - Combine with Rushmore deletions to simulate hurried note-taking.
111
+
112
+ ### Mim1c
113
+
114
+ - **Scope**: character level (late attack order so it acts after insertions/deletions).
115
+ - **Signature**: `Mim1c(replacement_rate=0.02, classes=None, seed=None)`.
116
+ - **Behaviour**: replaces alphanumeric characters with visually confusable Unicode homoglyphs via `confusable_homoglyphs` (e.g., `A → Α`, `e → е`). When `classes` is omitted it targets Latin, Greek, and Cyrillic scripts; pass `classes="all"` to consider every alias.
117
+ - **Usage tips**:
118
+ - Restrict `classes` (e.g., `classes=["LATIN"]`) when evaluation pipelines reject non-Latin scripts.
119
+ - Keep `replacement_rate` below 0.03 for legible perturbations; higher values can break tokenisers that expect ASCII.
120
+ - Pairs well with Typogre for keyboard + homoglyph chaos.
121
+
122
+ ### Reduple
123
+
124
+ - **Scope**: word level.
125
+ - **Signature**: `Reduple(reduplication_rate=0.05, seed=None)`.
126
+ - **Behaviour**: randomly repeats words (“reduplication”) to mimic stuttering transcripts or speech disfluencies while preserving whitespace and punctuation.
127
+ - **Usage tips**:
128
+ - Use `reduplication_rate=0.01` to emulate occasional hesitations; bump to ≥0.08 for heavy repetition stress tests.
129
+ - Because edits preserve separators, downstream whitespace-sensitive parsers remain stable.
130
+ - Combine with Jargoyle to mix synonym swaps and repeated words for lexical drift.
131
+
132
+ ### Rushmore
133
+
134
+ - **Scope**: word level.
135
+ - **Signature**: `Rushmore(max_deletion_rate=0.01, seed=None)`.
136
+ - **Behaviour**: deletes randomly selected words (skipping the first to preserve context) and tidies double spaces/punctuation afterwards.
137
+ - **Usage tips**:
138
+ - Keep `max_deletion_rate` conservative (<0.03) to avoid stripping sentences bare.
139
+ - Because the first word is preserved, prepend short context sentences when you need deletions deeper in the passage.
140
+ - Sandwich between Reduple and Redactyl to test summarisation robustness under missing context.
141
+
142
+ ### Redactyl
143
+
144
+ - **Scope**: word level.
145
+ - **Signature**: `Redactyl(replacement_char="█", redaction_rate=0.05, merge_adjacent=False, seed=151)`.
146
+ - **Behaviour**: replaces the core characters of selected words with a replacement glyph (default FULL BLOCK) to simulate document redaction. Optionally merges adjacent redaction blocks across punctuation.
147
+ - **Usage tips**:
148
+ - Switch `replacement_char` to `_` or `*` when terminals struggle with block glyphs.
149
+ - Enable `merge_adjacent=True` to form continuous bars when redacting phrases.
150
+ - When no redactable words exist, the underlying implementation raises a `ValueError`—wrap calls with try/except in automated pipelines.
151
+
152
+ ### Jargoyle
153
+
154
+ - **Scope**: word level.
155
+ - **Signature**: `Jargoyle(replacement_rate=0.1, part_of_speech="n", seed=None)`.
156
+ - **Behaviour**: swaps nouns/verbs/adjectives/adverbs with WordNet synonyms. Downloads the WordNet corpus on demand when missing and maintains deterministic sampling by sorting candidate lemmas.
157
+ - **Usage tips**:
158
+ - Target specific POS tags (e.g., `part_of_speech=("n", "v")`) to limit changes to content words.
159
+ - Lower `replacement_rate` (0.02–0.05) for subtle lexical variety; higher rates explore paraphrasing extremes.
160
+ - Ensure your environment has the WordNet data pre-cached to avoid first-run download delays.
161
+
162
+ ### Scannequin
163
+
164
+ - **Scope**: character level (late order).
165
+ - **Signature**: `Scannequin(error_rate=0.02, seed=None)`.
166
+ - **Behaviour**: introduces OCR-style confusion pairs (rn↔m, cl↔d, O↔0, curly quotes to ASCII, etc.) using deterministic span selection. Supports a Rust acceleration path when compiled.
167
+ - **Usage tips**:
168
+ - Bump `error_rate` for scanned-document stress tests or reduce it for light OCR noise.
169
+ - Because replacements can change token length, run Scannequin after word-level glitchlings to avoid offset drift.
170
+ - Combine with Redactyl to mimic heavily redacted, poorly scanned archives.
171
+
172
+ ## Dataset workflows
173
+
174
+ Leverage the Hugging Face integration to perturb large corpora reproducibly:
175
+
176
+ ```python
177
+ from datasets import load_dataset
178
+ from glitchlings import Gaggle, Typogre, Mim1c
179
+
180
+ dataset = load_dataset("ag_news")
181
+ gaggle = Gaggle([Typogre(max_change_rate=0.02), Mim1c(replacement_rate=0.01)], seed=404)
182
+
183
+ corrupted = gaggle.corrupt_dataset(
184
+ dataset,
185
+ columns=["text"],
186
+ description="ag_news with typographic noise",
187
+ )
188
+ ```
189
+
190
+ Key points:
191
+
192
+ - When `columns` is omitted, Glitchlings infers targets (`prompt`, `question`, or all string columns) using `_resolve_columns` semantics from the Prime loader.
193
+ - The returned dataset is a shallow copy containing both clean and corrupted columns—persist it with `corrupted.push_to_hub(...)` or `corrupted.save_to_disk(...)`.
194
+ - Use dataset-level seeds (`seed=` on the gaggle) so repeated corruptions are stable across machines.
195
+
196
+ ## Prime Intellect integration
197
+
198
+ Installing the `prime` extra exposes `glitchlings.dlc.prime.load_environment`, a convenience wrapper around `verifiers.load_environment` that lets you pre-inject glitchlings into benchmark datasets.
199
+
200
+ ```python
201
+ from glitchlings import Mim1c, Typogre
202
+ from glitchlings.dlc.prime import load_environment, tutorial_level, Difficulty
203
+
204
+ # Load an existing environment and apply custom corruption
205
+ custom_env = load_environment(
206
+ "osoleve/syllabify-en",
207
+ glitchlings=[Mim1c(replacement_rate=0.01), Typogre(max_change_rate=0.02)],
208
+ seed=404,
209
+ columns=["prompt"], # optional; inferred when omitted
210
+ )
211
+
212
+ # Or bootstrap a difficulty-scaled tutorial environment
213
+ practice_env = tutorial_level(
214
+ "osoleve/syllabify-en",
215
+ difficulty=Difficulty.Hard,
216
+ )
217
+ ```
218
+
219
+ Capabilities at a glance:
220
+
221
+ - **Flexible inputs** – pass a string environment slug, an instantiated `verifiers.Environment`, a single glitchling, a list of glitchlings or names, or a pre-built `Gaggle`.
222
+ - **Column inference** – when `columns` is `None`, the loader searches for `prompt`/`question` columns, otherwise falls back to all string-valued columns. Explicitly list columns to target subsets (e.g., prompts but not references).
223
+ - **Deterministic summoning** – non-`Gaggle` inputs are normalised via `summon(...)` with the provided `seed`, so repeated calls produce matching corruption ensembles.
224
+ - **Tutorial difficulty scaling** – `tutorial_level` wires in tuned Mim1c/Typogre parameters multiplied by the selected `Difficulty` enum. Use `Difficulty.Easy` for gentle practice or `Difficulty.Extreme` to hammer robustness.
225
+ - **Dataset mutation** – environments are returned with their dataset replaced by the corrupted clone. Skip the `glitchlings` argument to leave the dataset untouched.
226
+
227
+ ## Ensuring determinism
228
+
229
+ - Derive seeds from the surrounding context (`Gaggle.derive_seed`) when spawning new RNGs.
230
+ - Stabilise candidate order before sampling subsets to keep runs reproducible.
231
+ - Use `set_param` to expose tunable values so they can be reset between tests.
232
+ - When writing new glitchlings, route randomness through the instance RNG rather than module-level state.
233
+
234
+ ## Testing checklist
235
+
236
+ Before publishing changes or documenting new glitchlings, run the Pytest suite from the repository root:
237
+
238
+ ```bash
239
+ pytest
240
+ ```
241
+
242
+ Some tests require the NLTK WordNet corpus. If you see skips mentioning WordNet, install it with:
243
+
244
+ ```bash
245
+ python -c "import nltk; nltk.download('wordnet')"
246
+ ```
247
+
248
+ ## Additional resources
249
+
250
+ - [Monster Manual](../MONSTER_MANUAL.md) – complete bestiary with flavour text.
251
+ - [Repository README](../README.md) – project overview and ASCII ambience.
252
+
253
+ Once the `/docs` folder is published through GitHub Pages, this guide becomes the landing site for your glitchling adventures.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "glitchlings"
3
- version = "0.1.3"
3
+ version = "0.1.4"
4
4
  description = "Monsters for your language games."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -48,6 +48,7 @@ prime = [
48
48
  ]
49
49
  dev = [
50
50
  "pytest>=8.0.0",
51
+ "hypothesis>=6.100.0",
51
52
  ]
52
53
 
53
54
  [build-system]
@@ -0,0 +1,113 @@
1
+ """Integration helpers for the optional verifiers prime DLC."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Iterable, Sequence
6
+ from enum import Enum
7
+
8
+ import verifiers as vf
9
+
10
+ try:
11
+ from datasets import Dataset
12
+ except ModuleNotFoundError: # pragma: no cover - optional dependency
13
+ Dataset = object # type: ignore[assignment]
14
+
15
+ from ..zoo import Gaggle, Glitchling, Mim1c, Typogre, summon
16
+
17
+
18
+ def _resolve_environment(env: str | vf.Environment) -> vf.Environment:
19
+ """Return a fully-instantiated verifier environment."""
20
+
21
+ if isinstance(env, str):
22
+ env = vf.load_environment(env)
23
+
24
+ if not isinstance(env, vf.Environment):
25
+ raise TypeError("Invalid environment type")
26
+
27
+ return env
28
+
29
+
30
+ def _resolve_columns(dataset: Dataset, columns: Sequence[str] | None) -> list[str]:
31
+ """Identify which dataset columns should be corrupted."""
32
+
33
+ available = set(dataset.column_names)
34
+
35
+ if columns is not None:
36
+ missing = sorted(set(columns) - available)
37
+ if missing:
38
+ missing_str = ", ".join(missing)
39
+ raise ValueError(f"Columns not found in dataset: {missing_str}")
40
+ return list(columns)
41
+
42
+ for candidate in ("prompt", "question"):
43
+ if candidate in available:
44
+ return [candidate]
45
+
46
+ sample = dataset[0] if len(dataset) else {}
47
+ inferred = [
48
+ name
49
+ for name in dataset.column_names
50
+ if isinstance(sample.get(name), str)
51
+ ]
52
+
53
+ if inferred:
54
+ return inferred
55
+
56
+ raise ValueError("Unable to determine which dataset columns to corrupt.")
57
+
58
+
59
+ class Difficulty(Enum):
60
+ """Difficulty levels for tutorial environments."""
61
+
62
+ Easy = 0.25
63
+ Normal = 1.0
64
+ Hard = 1.75
65
+ Extreme = 3
66
+ Impossible = 9
67
+
68
+
69
+ def tutorial_level(
70
+ env: vf.Environment | str,
71
+ seed: int = 151,
72
+ difficulty: Difficulty = Difficulty.Normal,
73
+ ) -> vf.Environment:
74
+ """Create a low-corruption environment using tuned defaults."""
75
+
76
+ tuned_mim1c = Mim1c(replacement_rate=0.01 * difficulty.value)
77
+ tuned_typogre = Typogre(max_change_rate=0.025 * difficulty.value)
78
+
79
+ return load_environment(
80
+ env,
81
+ glitchlings=[tuned_mim1c, tuned_typogre],
82
+ seed=seed,
83
+ )
84
+
85
+
86
+ def load_environment(
87
+ env: str | vf.Environment,
88
+ glitchlings: Iterable[str | Glitchling] | Glitchling | str | Gaggle | None = None,
89
+ *,
90
+ seed: int = 151,
91
+ columns: Sequence[str] | None = None,
92
+ ) -> vf.Environment:
93
+ """Load an environment and optionally corrupt it with glitchlings."""
94
+
95
+ environment = _resolve_environment(env)
96
+
97
+ if glitchlings is None:
98
+ return environment
99
+
100
+ if isinstance(glitchlings, Gaggle):
101
+ gaggle = glitchlings
102
+ else:
103
+ if isinstance(glitchlings, (Glitchling, str)):
104
+ resolved = [glitchlings]
105
+ else:
106
+ resolved = list(glitchlings)
107
+
108
+ gaggle = summon(resolved, seed=seed)
109
+
110
+ dataset = environment.dataset
111
+ corrupt_columns = _resolve_columns(dataset, columns)
112
+ environment.dataset = gaggle.corrupt_dataset(dataset, corrupt_columns)
113
+ return environment
@@ -130,3 +130,21 @@ def test_read_text_requires_input(monkeypatch, capsys):
130
130
  read_text(args, parser)
131
131
  captured = capsys.readouterr()
132
132
  assert "No input text provided" in captured.err
133
+
134
+
135
+ def test_read_text_consumes_stdin(monkeypatch):
136
+ parser = build_parser()
137
+ args = parser.parse_args([])
138
+
139
+ sentinel = "stdin payload"
140
+
141
+ class DummyStdin:
142
+ def isatty(self):
143
+ return False
144
+
145
+ def read(self):
146
+ return sentinel
147
+
148
+ monkeypatch.setattr("sys.stdin", DummyStdin())
149
+
150
+ assert read_text(args, parser) == sentinel
@@ -0,0 +1,24 @@
1
+ from glitchlings.zoo.typogre import Typogre
2
+
3
+
4
+ def test_typogre_clone_preserves_configuration_and_seed_behavior() -> None:
5
+ original = Typogre(max_change_rate=0.05, keyboard="AZERTY", seed=111)
6
+
7
+ clone = original.clone(seed=222)
8
+
9
+ assert isinstance(clone, Typogre)
10
+ assert clone.max_change_rate == original.max_change_rate
11
+ assert clone.keyboard == original.keyboard
12
+
13
+ sample_text = "The quick brown fox jumps over the lazy dog."
14
+
15
+ original.reset_rng()
16
+ original_result = original(sample_text)
17
+
18
+ clone.reset_rng()
19
+ clone_result_first = clone(sample_text)
20
+ clone.reset_rng()
21
+ clone_result_second = clone(sample_text)
22
+
23
+ assert clone_result_first == clone_result_second
24
+ assert clone_result_first != original_result
@@ -0,0 +1,93 @@
1
+ """Property-based tests covering core orchestration primitives."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import string
6
+
7
+ from hypothesis import assume, given, strategies as st
8
+
9
+ from glitchlings.zoo.core import AttackOrder, AttackWave, Gaggle, Glitchling
10
+
11
+
12
+ def _build_corruption(name: str, amplitude: int):
13
+ """Create a deterministic corruption function driven by the provided RNG.
14
+
15
+ The function appends a marker tied to the glitchling name along with a
16
+ pseudo-random suffix that depends on the glitchling's RNG. This allows the
17
+ tests to assert that derived seeds and ordering are both respected.
18
+ """
19
+
20
+ choices = (name + "xyz").replace("|", "_")
21
+
22
+ def _corrupt(text: str, *, rng) -> str:
23
+ if amplitude == 0:
24
+ return f"{text}|{name}"
25
+ suffix = "".join(rng.choice(choices) for _ in range(amplitude))
26
+ return f"{text}|{name}:{suffix}"
27
+
28
+ return _corrupt
29
+
30
+
31
+ @st.composite
32
+ def glitchling_specs(draw):
33
+ name = draw(
34
+ st.text(alphabet=string.ascii_letters + string.digits, min_size=1, max_size=8)
35
+ )
36
+ wave = draw(st.sampled_from(list(AttackWave)))
37
+ order = draw(st.sampled_from(list(AttackOrder)))
38
+ amplitude = draw(st.integers(min_value=0, max_value=4))
39
+ return {"name": name, "wave": wave, "order": order, "amplitude": amplitude}
40
+
41
+
42
+ @given(
43
+ master_seed=st.integers(min_value=-(2**63), max_value=2**63 - 1),
44
+ specs=st.lists(glitchling_specs(), min_size=1, max_size=5, unique_by=lambda s: s["name"]),
45
+ )
46
+ def test_gaggle_ordering_and_determinism(master_seed, specs):
47
+ """Gaggles should honour ordering guarantees and deterministic RNG use."""
48
+
49
+ glitchlings = [
50
+ Glitchling(
51
+ name=spec["name"],
52
+ corruption_function=_build_corruption(spec["name"], spec["amplitude"]),
53
+ scope=spec["wave"],
54
+ order=spec["order"],
55
+ )
56
+ for spec in specs
57
+ ]
58
+
59
+ gaggle = Gaggle(glitchlings, seed=master_seed)
60
+
61
+ expected = [
62
+ spec["name"]
63
+ for spec in sorted(
64
+ specs,
65
+ key=lambda spec: (spec["wave"], spec["order"], spec["name"]),
66
+ )
67
+ ]
68
+ actual = [g.name for g in gaggle.apply_order]
69
+ assert actual == expected
70
+
71
+ text = "payload"
72
+ first_run = gaggle(text)
73
+ second_run = Gaggle(glitchlings, seed=master_seed)(text)
74
+ assert first_run == second_run
75
+
76
+
77
+ @given(
78
+ left=st.tuples(
79
+ st.integers(min_value=-(2**63), max_value=2**63 - 1),
80
+ st.text(alphabet=string.ascii_letters + string.digits, min_size=0, max_size=12),
81
+ st.integers(min_value=0, max_value=1024),
82
+ ),
83
+ right=st.tuples(
84
+ st.integers(min_value=-(2**63), max_value=2**63 - 1),
85
+ st.text(alphabet=string.ascii_letters + string.digits, min_size=0, max_size=12),
86
+ st.integers(min_value=0, max_value=1024),
87
+ ),
88
+ )
89
+ def test_derived_seeds_change_with_inputs(left, right):
90
+ """Changing any component of the derivation tuple should alter the seed."""
91
+
92
+ assume(left != right)
93
+ assert Gaggle.derive_seed(*left) != Gaggle.derive_seed(*right)
@@ -0,0 +1,35 @@
1
+ import pytest
2
+
3
+ from glitchlings.util import string_diffs
4
+
5
+
6
+ def test_string_diffs_groups_consecutive_edits_and_skips_equals():
7
+ result = string_diffs("kitten", "sitting")
8
+
9
+ assert result == [
10
+ [("replace", "k", "s")],
11
+ [("replace", "e", "i")],
12
+ [("insert", "", "g")],
13
+ ]
14
+
15
+ for group in result:
16
+ assert group
17
+ assert all(tag != "equal" for tag, *_ in group)
18
+
19
+
20
+ @pytest.mark.parametrize(
21
+ "a,b,expected",
22
+ [
23
+ ("flaw", "lawn", [[("delete", "f", "")], [("insert", "", "n")]]),
24
+ (
25
+ "distance",
26
+ "instance",
27
+ [
28
+ [("delete", "d", "")],
29
+ [("insert", "", "n")],
30
+ ],
31
+ ),
32
+ ],
33
+ )
34
+ def test_string_diffs_handles_multiple_edit_groups(a: str, b: str, expected: list[list[tuple[str, str, str]]]):
35
+ assert string_diffs(a, b) == expected
@@ -1,52 +0,0 @@
1
- from enum import Enum
2
- import functools as ft
3
-
4
- import verifiers as vf
5
- from datasets import Dataset
6
-
7
- from ..zoo import Glitchling, Gaggle, Mim1c, Typogre, summon
8
-
9
-
10
- class Difficulty(Enum):
11
- """Difficulty levels for tutorial environments."""
12
-
13
- Easy = 0.25
14
- Normal = 1.0
15
- Hard = 1.75
16
- Extreme = 3
17
- Impossible = 9
18
-
19
-
20
- def tutorial_level(
21
- env: vf.Environment | str, seed=151, difficulty: Difficulty = Difficulty.Normal
22
- ) -> vf.Environment:
23
- """Create a low-corruption environment."""
24
-
25
- tuned_mim1c = Mim1c(replacement_rate=0.01 * difficulty.value)
26
- tuned_typogre = Typogre(max_change_rate=0.025 * difficulty.value)
27
-
28
- glitchlings: Gaggle = summon([tuned_mim1c, tuned_typogre], seed=seed)
29
-
30
- if isinstance(env, str):
31
- env = vf.load_environment(env)
32
-
33
- assert isinstance(env, vf.Environment), "Invalid environment type"
34
-
35
- if "prompt" in env.dataset.column_names:
36
- env.dataset = glitchlings.corrupt_dataset(env.dataset, ["prompt"])
37
- elif "question" in env.dataset.column_names:
38
- env.dataset = glitchlings.corrupt_dataset(env.dataset, ["question"])
39
- else:
40
- raise ValueError("Can't find prompt or question column")
41
-
42
- return env
43
-
44
-
45
- def load_environment(
46
- env: str | vf.Environment,
47
- seed=151,
48
- difficulty: Difficulty = Difficulty.Normal,
49
- loader=tutorial_level,
50
- ) -> vf.Environment:
51
- """Load an environment by name."""
52
- return loader(env, seed=seed, difficulty=difficulty)
File without changes
File without changes
File without changes