glitchlings 0.1.3__tar.gz → 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {glitchlings-0.1.3 → glitchlings-0.1.4}/PKG-INFO +26 -1
- {glitchlings-0.1.3 → glitchlings-0.1.4}/README.md +24 -0
- glitchlings-0.1.4/docs/index.md +253 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/pyproject.toml +2 -1
- glitchlings-0.1.4/src/glitchlings/dlc/prime.py +113 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/tests/test_cli.py +18 -0
- glitchlings-0.1.4/tests/test_glitchling_core.py +24 -0
- glitchlings-0.1.4/tests/test_property_based.py +93 -0
- glitchlings-0.1.4/tests/test_util.py +35 -0
- glitchlings-0.1.3/src/glitchlings/dlc/prime.py +0 -52
- {glitchlings-0.1.3 → glitchlings-0.1.4}/.github/workflows/publish.yml +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/.gitignore +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/AGENTS.md +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/LICENSE +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/MONSTER_MANUAL.md +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/rust/typogre/Cargo.lock +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/rust/typogre/Cargo.toml +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/rust/typogre/src/lib.rs +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/rust/zoo/Cargo.lock +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/rust/zoo/Cargo.toml +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/rust/zoo/src/lib.rs +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/__init__.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/__main__.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/dlc/__init__.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/main.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/util/__init__.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/zoo/__init__.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/zoo/core.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/zoo/jargoyle.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/zoo/mim1c.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/zoo/redactyl.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/zoo/reduple.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/zoo/rushmore.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/zoo/scannequin.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/src/glitchlings/zoo/typogre.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/tests/conftest.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/tests/test_dataset_corruption.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/tests/test_gaggle.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/tests/test_glitchlings_determinism.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/tests/test_jargoyle.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/tests/test_keyboard_layouts.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/tests/test_parameter_effects.py +0 -0
- {glitchlings-0.1.3 → glitchlings-0.1.4}/tests/test_rust_backed_glitchlings.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: glitchlings
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.4
|
4
4
|
Summary: Monsters for your language games.
|
5
5
|
Project-URL: Homepage, https://github.com/osoleve/glitchlings
|
6
6
|
Project-URL: Repository, https://github.com/osoleve/glitchlings.git
|
@@ -225,6 +225,7 @@ Requires-Dist: datasets>=4.0.0
|
|
225
225
|
Requires-Dist: jellyfish>=1.2.0
|
226
226
|
Requires-Dist: nltk>=3.9.1
|
227
227
|
Provides-Extra: dev
|
228
|
+
Requires-Dist: hypothesis>=6.100.0; extra == 'dev'
|
228
229
|
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
229
230
|
Provides-Extra: prime
|
230
231
|
Requires-Dist: verifiers>=0.1.3.post0; extra == 'prime'
|
@@ -283,6 +284,30 @@ print(gaggle(SAMPLE_TEXT))
|
|
283
284
|
|
284
285
|
> Onҽ mھrning, wһen Gregor Samƽa woke from trouble𝐝 𝑑reams, he found himself transformed in his bed into a horrible vermin٠ He l lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightlh domed and divided by arches ino stiff sections. The bedding was adly able to cover it and and seemed ready to slide off any moment. His many legxs, pitifully thin compared with the size of the the rest of him, waved about helplessly ashe looked looked.
|
285
286
|
|
287
|
+
## Usage
|
288
|
+
|
289
|
+
Glitchlings slot into evaluation pipelines just as easily as they corrupt stray strings.
|
290
|
+
|
291
|
+
- **Direct invocation** – Instantiate a glitchling (or `Gaggle`) and call it on strings, iterables, or datasets. Keep the seed stable to make every run deterministic.
|
292
|
+
- **Dataset corruption** – Use a `Gaggle`'s `.corrupt_dataset` helper to perturb a Hugging Face `datasets.Dataset` and return a corrupted copy for training or evaluation.
|
293
|
+
|
294
|
+
### Prime Intellect environments
|
295
|
+
|
296
|
+
After `pip install -e .[prime]`, the `glitchlings.dlc.prime.load_environment` helper mirrors `verifiers.load_environment` for Prime Intellect scenarios while optionally applying glitchlings before returning the environment:
|
297
|
+
|
298
|
+
```python
|
299
|
+
from glitchlings import Mim1c, Typogre
|
300
|
+
from glitchlings.dlc.prime import load_environment
|
301
|
+
|
302
|
+
env = load_environment(
|
303
|
+
"osoleve/syllabify-en",
|
304
|
+
glitchlings=[Mim1c(replacement_rate=0.01), Typogre(max_change_rate=0.02)],
|
305
|
+
seed=404,
|
306
|
+
)
|
307
|
+
```
|
308
|
+
|
309
|
+
Skip the `glitchlings` argument to receive an untouched verifier dataset.
|
310
|
+
|
286
311
|
## Motivation
|
287
312
|
|
288
313
|
If your model performs well on a particular task, but not when `Glitchling`s are present, it's a sign that it hasn't actually generalized to the problem.
|
@@ -51,6 +51,30 @@ print(gaggle(SAMPLE_TEXT))
|
|
51
51
|
|
52
52
|
> Onҽ mھrning, wһen Gregor Samƽa woke from trouble𝐝 𝑑reams, he found himself transformed in his bed into a horrible vermin٠ He l lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightlh domed and divided by arches ino stiff sections. The bedding was adly able to cover it and and seemed ready to slide off any moment. His many legxs, pitifully thin compared with the size of the the rest of him, waved about helplessly ashe looked looked.
|
53
53
|
|
54
|
+
## Usage
|
55
|
+
|
56
|
+
Glitchlings slot into evaluation pipelines just as easily as they corrupt stray strings.
|
57
|
+
|
58
|
+
- **Direct invocation** – Instantiate a glitchling (or `Gaggle`) and call it on strings, iterables, or datasets. Keep the seed stable to make every run deterministic.
|
59
|
+
- **Dataset corruption** – Use a `Gaggle`'s `.corrupt_dataset` helper to perturb a Hugging Face `datasets.Dataset` and return a corrupted copy for training or evaluation.
|
60
|
+
|
61
|
+
### Prime Intellect environments
|
62
|
+
|
63
|
+
After `pip install -e .[prime]`, the `glitchlings.dlc.prime.load_environment` helper mirrors `verifiers.load_environment` for Prime Intellect scenarios while optionally applying glitchlings before returning the environment:
|
64
|
+
|
65
|
+
```python
|
66
|
+
from glitchlings import Mim1c, Typogre
|
67
|
+
from glitchlings.dlc.prime import load_environment
|
68
|
+
|
69
|
+
env = load_environment(
|
70
|
+
"osoleve/syllabify-en",
|
71
|
+
glitchlings=[Mim1c(replacement_rate=0.01), Typogre(max_change_rate=0.02)],
|
72
|
+
seed=404,
|
73
|
+
)
|
74
|
+
```
|
75
|
+
|
76
|
+
Skip the `glitchlings` argument to receive an untouched verifier dataset.
|
77
|
+
|
54
78
|
## Motivation
|
55
79
|
|
56
80
|
If your model performs well on a particular task, but not when `Glitchling`s are present, it's a sign that it hasn't actually generalized to the problem.
|
@@ -0,0 +1,253 @@
|
|
1
|
+
# Glitchlings Usage Guide
|
2
|
+
|
3
|
+
Welcome to the Glitchlings field manual! This GitHub Pages-ready guide explains how to install the toolkit, orchestrate chaos with the `Gaggle`, and wield every individual glitchling (Typogre, Mim1c, Reduple, Rushmore, Redactyl, Jargoyle, and Scannequin). It closes with deep coverage of the optional Prime Intellect integration so you can perturb verifier datasets with confidence.
|
4
|
+
|
5
|
+
## Table of contents
|
6
|
+
|
7
|
+
1. [Installation](#installation)
|
8
|
+
2. [Quickstart](#quickstart)
|
9
|
+
3. [The Gaggle orchestrator](#the-gaggle-orchestrator)
|
10
|
+
4. [Glitchling reference](#glitchling-reference)
|
11
|
+
- [Typogre](#typogre)
|
12
|
+
- [Mim1c](#mim1c)
|
13
|
+
- [Reduple](#reduple)
|
14
|
+
- [Rushmore](#rushmore)
|
15
|
+
- [Redactyl](#redactyl)
|
16
|
+
- [Jargoyle](#jargoyle)
|
17
|
+
- [Scannequin](#scannequin)
|
18
|
+
5. [Dataset workflows](#dataset-workflows)
|
19
|
+
6. [Prime Intellect integration](#prime-intellect-integration)
|
20
|
+
7. [Ensuring determinism](#ensuring-determinism)
|
21
|
+
8. [Testing checklist](#testing-checklist)
|
22
|
+
9. [Additional resources](#additional-resources)
|
23
|
+
|
24
|
+
## Installation
|
25
|
+
|
26
|
+
Install the latest release directly from PyPI:
|
27
|
+
|
28
|
+
```bash
|
29
|
+
pip install -U glitchlings
|
30
|
+
```
|
31
|
+
|
32
|
+
Need the optional Prime Intellect loader or the NLTK-powered Jargoyle ready to go? Pull in the documented extras:
|
33
|
+
|
34
|
+
```bash
|
35
|
+
# Prime Intellect DLC + verifiers dependency
|
36
|
+
pip install -U 'glitchlings[prime]'
|
37
|
+
|
38
|
+
# NLTK WordNet corpora for Jargoyle synonym swaps
|
39
|
+
python -m nltk.downloader wordnet
|
40
|
+
```
|
41
|
+
|
42
|
+
### Source install
|
43
|
+
|
44
|
+
When working from a local clone, install in editable mode so your changes take effect immediately:
|
45
|
+
|
46
|
+
```bash
|
47
|
+
pip install -e .
|
48
|
+
```
|
49
|
+
|
50
|
+
If you plan to experiment with the PyO3 acceleration crates, install `maturin` and run `maturin develop` from each crate directory inside the `rust/` folder to compile the optional Rust fast paths.
|
51
|
+
|
52
|
+
## Quickstart
|
53
|
+
|
54
|
+
Glitchlings are callable objects that accept strings (and string-like iterables) and return corrupted copies. Summon a single glitchling or gather multiple into a `Gaggle` to orchestrate compound effects:
|
55
|
+
|
56
|
+
```python
|
57
|
+
from glitchlings import Gaggle, SAMPLE_TEXT, Typogre, Mim1c, Reduple, Rushmore
|
58
|
+
|
59
|
+
gaggle = Gaggle([
|
60
|
+
Typogre(max_change_rate=0.03),
|
61
|
+
Mim1c(replacement_rate=0.02),
|
62
|
+
Reduple(seed=404),
|
63
|
+
Rushmore(max_deletion_rate=0.02),
|
64
|
+
], seed=1234)
|
65
|
+
|
66
|
+
print(gaggle(SAMPLE_TEXT))
|
67
|
+
```
|
68
|
+
|
69
|
+
All glitchlings are deterministic: pass a `seed` during construction (or on the enclosing `Gaggle`) to make the chaos reproducible.
|
70
|
+
|
71
|
+
### Command line interface
|
72
|
+
|
73
|
+
Prefer not to touch Python? The `glitchlings` CLI exposes the same functionality:
|
74
|
+
|
75
|
+
```bash
|
76
|
+
# Discover all built-in glitchlings.
|
77
|
+
glitchlings --list
|
78
|
+
|
79
|
+
# Glitch an entire file with Typogre and inspect the unified diff.
|
80
|
+
glitchlings -g typogre --file documents/report.txt --diff
|
81
|
+
|
82
|
+
# Pipe text through Mim1c for on-the-fly homoglyph swaps.
|
83
|
+
echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
|
84
|
+
```
|
85
|
+
|
86
|
+
Append `--diff` to render a unified diff comparing the original and corrupted outputs. Combine it with `--color=always` in terminals that support ANSI colours to highlight changes more clearly.
|
87
|
+
|
88
|
+
## The Gaggle orchestrator
|
89
|
+
|
90
|
+
The `Gaggle` class coordinates multiple glitchlings with deterministic sequencing and shared seeding:
|
91
|
+
|
92
|
+
- **Seed derivation** – pass `seed=` to `Gaggle(...)` and it will derive per-glitchling seeds via `derive_seed`, ensuring cross-run stability without repeated outputs.
|
93
|
+
- **Attack scopes & order** – glitchlings declare a scope (`document`, `sentence`, `word`, `character`) and attack order (`early`, `late`, etc.). By default the gaggle sorts by scope, then by order so character-level edits (Typogre, Mim1c, Scannequin) happen after word-level operations (Reduple, Rushmore, Redactyl, Jargoyle). Override this via `Gaggle([...], attack_order=[...])` when you need bespoke choreography.
|
94
|
+
- **Dynamic configuration** – use `gaggle.set_param("Typogre", "max_change_rate", 0.05)` to tweak nested glitchling parameters without rebuilding the ensemble.
|
95
|
+
- **Dataset utilities** – call `gaggle.corrupt_dataset(dataset, columns=[...])` to clone and perturb Hugging Face datasets while leaving the original untouched. Column inference automatically targets `text`, `prompt`, or similar string columns when none are provided.
|
96
|
+
- **Summoning from shorthand** – `glitchlings.summon` lets you build a gaggle from names or partially-configured objects (`summon(["typogre", Mim1c(replacement_rate=0.01)], seed=404)`).
|
97
|
+
|
98
|
+
## Glitchling reference
|
99
|
+
|
100
|
+
Each glitchling subclasses the shared `Glitchling` base class and exposes the same interface: call the instance with text, adjust parameters via `set_param`, and rely on deterministic seeds. This section summarises every built-in creature, its defaults, and practical usage notes.
|
101
|
+
|
102
|
+
### Typogre
|
103
|
+
|
104
|
+
- **Scope**: character level (early in the pipeline).
|
105
|
+
- **Signature**: `Typogre(max_change_rate=0.02, keyboard="CURATOR_QWERTY", seed=None)`.
|
106
|
+
- **Behaviour**: simulates fat-finger typing by swapping neighbouring keys, dropping spaces, inserting doubles, or choosing layout-adjacent characters. Keyboard layouts map through `glitchlings.util.KEYNEIGHBORS` and include curated QWERTY, DVORAK, and custom research boards.
|
107
|
+
- **Usage tips**:
|
108
|
+
- Lower `max_change_rate` (0.005–0.01) for gentle noise; raise it for more chaotic misspellings.
|
109
|
+
- Swap to `keyboard="DVORAK"` or supply a custom adjacency dict to model alternative hardware.
|
110
|
+
- Combine with Rushmore deletions to simulate hurried note-taking.
|
111
|
+
|
112
|
+
### Mim1c
|
113
|
+
|
114
|
+
- **Scope**: character level (late attack order so it acts after insertions/deletions).
|
115
|
+
- **Signature**: `Mim1c(replacement_rate=0.02, classes=None, seed=None)`.
|
116
|
+
- **Behaviour**: replaces alphanumeric characters with visually confusable Unicode homoglyphs via `confusable_homoglyphs` (e.g., `A → Α`, `e → е`). When `classes` is omitted it targets Latin, Greek, and Cyrillic scripts; pass `classes="all"` to consider every alias.
|
117
|
+
- **Usage tips**:
|
118
|
+
- Restrict `classes` (e.g., `classes=["LATIN"]`) when evaluation pipelines reject non-Latin scripts.
|
119
|
+
- Keep `replacement_rate` below 0.03 for legible perturbations; higher values can break tokenisers that expect ASCII.
|
120
|
+
- Pairs well with Typogre for keyboard + homoglyph chaos.
|
121
|
+
|
122
|
+
### Reduple
|
123
|
+
|
124
|
+
- **Scope**: word level.
|
125
|
+
- **Signature**: `Reduple(reduplication_rate=0.05, seed=None)`.
|
126
|
+
- **Behaviour**: randomly repeats words (“reduplication”) to mimic stuttering transcripts or speech disfluencies while preserving whitespace and punctuation.
|
127
|
+
- **Usage tips**:
|
128
|
+
- Use `reduplication_rate=0.01` to emulate occasional hesitations; bump to ≥0.08 for heavy repetition stress tests.
|
129
|
+
- Because edits preserve separators, downstream whitespace-sensitive parsers remain stable.
|
130
|
+
- Combine with Jargoyle to mix synonym swaps and repeated words for lexical drift.
|
131
|
+
|
132
|
+
### Rushmore
|
133
|
+
|
134
|
+
- **Scope**: word level.
|
135
|
+
- **Signature**: `Rushmore(max_deletion_rate=0.01, seed=None)`.
|
136
|
+
- **Behaviour**: deletes randomly selected words (skipping the first to preserve context) and tidies double spaces/punctuation afterwards.
|
137
|
+
- **Usage tips**:
|
138
|
+
- Keep `max_deletion_rate` conservative (<0.03) to avoid stripping sentences bare.
|
139
|
+
- Because the first word is preserved, prepend short context sentences when you need deletions deeper in the passage.
|
140
|
+
- Sandwich between Reduple and Redactyl to test summarisation robustness under missing context.
|
141
|
+
|
142
|
+
### Redactyl
|
143
|
+
|
144
|
+
- **Scope**: word level.
|
145
|
+
- **Signature**: `Redactyl(replacement_char="█", redaction_rate=0.05, merge_adjacent=False, seed=151)`.
|
146
|
+
- **Behaviour**: replaces the core characters of selected words with a replacement glyph (default FULL BLOCK) to simulate document redaction. Optionally merges adjacent redaction blocks across punctuation.
|
147
|
+
- **Usage tips**:
|
148
|
+
- Switch `replacement_char` to `_` or `*` when terminals struggle with block glyphs.
|
149
|
+
- Enable `merge_adjacent=True` to form continuous bars when redacting phrases.
|
150
|
+
- When no redactable words exist, the underlying implementation raises a `ValueError`—wrap calls with try/except in automated pipelines.
|
151
|
+
|
152
|
+
### Jargoyle
|
153
|
+
|
154
|
+
- **Scope**: word level.
|
155
|
+
- **Signature**: `Jargoyle(replacement_rate=0.1, part_of_speech="n", seed=None)`.
|
156
|
+
- **Behaviour**: swaps nouns/verbs/adjectives/adverbs with WordNet synonyms. Downloads the WordNet corpus on demand when missing and maintains deterministic sampling by sorting candidate lemmas.
|
157
|
+
- **Usage tips**:
|
158
|
+
- Target specific POS tags (e.g., `part_of_speech=("n", "v")`) to limit changes to content words.
|
159
|
+
- Lower `replacement_rate` (0.02–0.05) for subtle lexical variety; higher rates explore paraphrasing extremes.
|
160
|
+
- Ensure your environment has the WordNet data pre-cached to avoid first-run download delays.
|
161
|
+
|
162
|
+
### Scannequin
|
163
|
+
|
164
|
+
- **Scope**: character level (late order).
|
165
|
+
- **Signature**: `Scannequin(error_rate=0.02, seed=None)`.
|
166
|
+
- **Behaviour**: introduces OCR-style confusion pairs (rn↔m, cl↔d, O↔0, curly quotes to ASCII, etc.) using deterministic span selection. Supports a Rust acceleration path when compiled.
|
167
|
+
- **Usage tips**:
|
168
|
+
- Bump `error_rate` for scanned-document stress tests or reduce it for light OCR noise.
|
169
|
+
- Because replacements can change token length, run Scannequin after word-level glitchlings to avoid offset drift.
|
170
|
+
- Combine with Redactyl to mimic heavily redacted, poorly scanned archives.
|
171
|
+
|
172
|
+
## Dataset workflows
|
173
|
+
|
174
|
+
Leverage the Hugging Face integration to perturb large corpora reproducibly:
|
175
|
+
|
176
|
+
```python
|
177
|
+
from datasets import load_dataset
|
178
|
+
from glitchlings import Gaggle, Typogre, Mim1c
|
179
|
+
|
180
|
+
dataset = load_dataset("ag_news")
|
181
|
+
gaggle = Gaggle([Typogre(max_change_rate=0.02), Mim1c(replacement_rate=0.01)], seed=404)
|
182
|
+
|
183
|
+
corrupted = gaggle.corrupt_dataset(
|
184
|
+
dataset,
|
185
|
+
columns=["text"],
|
186
|
+
description="ag_news with typographic noise",
|
187
|
+
)
|
188
|
+
```
|
189
|
+
|
190
|
+
Key points:
|
191
|
+
|
192
|
+
- When `columns` is omitted, Glitchlings infers targets (`prompt`, `question`, or all string columns) using `_resolve_columns` semantics from the Prime loader.
|
193
|
+
- The returned dataset is a shallow copy containing both clean and corrupted columns—persist it with `corrupted.push_to_hub(...)` or `corrupted.save_to_disk(...)`.
|
194
|
+
- Use dataset-level seeds (`seed=` on the gaggle) so repeated corruptions are stable across machines.
|
195
|
+
|
196
|
+
## Prime Intellect integration
|
197
|
+
|
198
|
+
Installing the `prime` extra exposes `glitchlings.dlc.prime.load_environment`, a convenience wrapper around `verifiers.load_environment` that lets you pre-inject glitchlings into benchmark datasets.
|
199
|
+
|
200
|
+
```python
|
201
|
+
from glitchlings import Mim1c, Typogre
|
202
|
+
from glitchlings.dlc.prime import load_environment, tutorial_level, Difficulty
|
203
|
+
|
204
|
+
# Load an existing environment and apply custom corruption
|
205
|
+
custom_env = load_environment(
|
206
|
+
"osoleve/syllabify-en",
|
207
|
+
glitchlings=[Mim1c(replacement_rate=0.01), Typogre(max_change_rate=0.02)],
|
208
|
+
seed=404,
|
209
|
+
columns=["prompt"], # optional; inferred when omitted
|
210
|
+
)
|
211
|
+
|
212
|
+
# Or bootstrap a difficulty-scaled tutorial environment
|
213
|
+
practice_env = tutorial_level(
|
214
|
+
"osoleve/syllabify-en",
|
215
|
+
difficulty=Difficulty.Hard,
|
216
|
+
)
|
217
|
+
```
|
218
|
+
|
219
|
+
Capabilities at a glance:
|
220
|
+
|
221
|
+
- **Flexible inputs** – pass a string environment slug, an instantiated `verifiers.Environment`, a single glitchling, a list of glitchlings or names, or a pre-built `Gaggle`.
|
222
|
+
- **Column inference** – when `columns` is `None`, the loader searches for `prompt`/`question` columns, otherwise falls back to all string-valued columns. Explicitly list columns to target subsets (e.g., prompts but not references).
|
223
|
+
- **Deterministic summoning** – non-`Gaggle` inputs are normalised via `summon(...)` with the provided `seed`, so repeated calls produce matching corruption ensembles.
|
224
|
+
- **Tutorial difficulty scaling** – `tutorial_level` wires in tuned Mim1c/Typogre parameters multiplied by the selected `Difficulty` enum. Use `Difficulty.Easy` for gentle practice or `Difficulty.Extreme` to hammer robustness.
|
225
|
+
- **Dataset mutation** – environments are returned with their dataset replaced by the corrupted clone. Skip the `glitchlings` argument to leave the dataset untouched.
|
226
|
+
|
227
|
+
## Ensuring determinism
|
228
|
+
|
229
|
+
- Derive seeds from the surrounding context (`Gaggle.derive_seed`) when spawning new RNGs.
|
230
|
+
- Stabilise candidate order before sampling subsets to keep runs reproducible.
|
231
|
+
- Use `set_param` to expose tunable values so they can be reset between tests.
|
232
|
+
- When writing new glitchlings, route randomness through the instance RNG rather than module-level state.
|
233
|
+
|
234
|
+
## Testing checklist
|
235
|
+
|
236
|
+
Before publishing changes or documenting new glitchlings, run the Pytest suite from the repository root:
|
237
|
+
|
238
|
+
```bash
|
239
|
+
pytest
|
240
|
+
```
|
241
|
+
|
242
|
+
Some tests require the NLTK WordNet corpus. If you see skips mentioning WordNet, install it with:
|
243
|
+
|
244
|
+
```bash
|
245
|
+
python -c "import nltk; nltk.download('wordnet')"
|
246
|
+
```
|
247
|
+
|
248
|
+
## Additional resources
|
249
|
+
|
250
|
+
- [Monster Manual](../MONSTER_MANUAL.md) – complete bestiary with flavour text.
|
251
|
+
- [Repository README](../README.md) – project overview and ASCII ambience.
|
252
|
+
|
253
|
+
Once the `/docs` folder is published through GitHub Pages, this guide becomes the landing site for your glitchling adventures.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "glitchlings"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.4"
|
4
4
|
description = "Monsters for your language games."
|
5
5
|
readme = "README.md"
|
6
6
|
requires-python = ">=3.12"
|
@@ -48,6 +48,7 @@ prime = [
|
|
48
48
|
]
|
49
49
|
dev = [
|
50
50
|
"pytest>=8.0.0",
|
51
|
+
"hypothesis>=6.100.0",
|
51
52
|
]
|
52
53
|
|
53
54
|
[build-system]
|
@@ -0,0 +1,113 @@
|
|
1
|
+
"""Integration helpers for the optional verifiers prime DLC."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from collections.abc import Iterable, Sequence
|
6
|
+
from enum import Enum
|
7
|
+
|
8
|
+
import verifiers as vf
|
9
|
+
|
10
|
+
try:
|
11
|
+
from datasets import Dataset
|
12
|
+
except ModuleNotFoundError: # pragma: no cover - optional dependency
|
13
|
+
Dataset = object # type: ignore[assignment]
|
14
|
+
|
15
|
+
from ..zoo import Gaggle, Glitchling, Mim1c, Typogre, summon
|
16
|
+
|
17
|
+
|
18
|
+
def _resolve_environment(env: str | vf.Environment) -> vf.Environment:
|
19
|
+
"""Return a fully-instantiated verifier environment."""
|
20
|
+
|
21
|
+
if isinstance(env, str):
|
22
|
+
env = vf.load_environment(env)
|
23
|
+
|
24
|
+
if not isinstance(env, vf.Environment):
|
25
|
+
raise TypeError("Invalid environment type")
|
26
|
+
|
27
|
+
return env
|
28
|
+
|
29
|
+
|
30
|
+
def _resolve_columns(dataset: Dataset, columns: Sequence[str] | None) -> list[str]:
|
31
|
+
"""Identify which dataset columns should be corrupted."""
|
32
|
+
|
33
|
+
available = set(dataset.column_names)
|
34
|
+
|
35
|
+
if columns is not None:
|
36
|
+
missing = sorted(set(columns) - available)
|
37
|
+
if missing:
|
38
|
+
missing_str = ", ".join(missing)
|
39
|
+
raise ValueError(f"Columns not found in dataset: {missing_str}")
|
40
|
+
return list(columns)
|
41
|
+
|
42
|
+
for candidate in ("prompt", "question"):
|
43
|
+
if candidate in available:
|
44
|
+
return [candidate]
|
45
|
+
|
46
|
+
sample = dataset[0] if len(dataset) else {}
|
47
|
+
inferred = [
|
48
|
+
name
|
49
|
+
for name in dataset.column_names
|
50
|
+
if isinstance(sample.get(name), str)
|
51
|
+
]
|
52
|
+
|
53
|
+
if inferred:
|
54
|
+
return inferred
|
55
|
+
|
56
|
+
raise ValueError("Unable to determine which dataset columns to corrupt.")
|
57
|
+
|
58
|
+
|
59
|
+
class Difficulty(Enum):
|
60
|
+
"""Difficulty levels for tutorial environments."""
|
61
|
+
|
62
|
+
Easy = 0.25
|
63
|
+
Normal = 1.0
|
64
|
+
Hard = 1.75
|
65
|
+
Extreme = 3
|
66
|
+
Impossible = 9
|
67
|
+
|
68
|
+
|
69
|
+
def tutorial_level(
|
70
|
+
env: vf.Environment | str,
|
71
|
+
seed: int = 151,
|
72
|
+
difficulty: Difficulty = Difficulty.Normal,
|
73
|
+
) -> vf.Environment:
|
74
|
+
"""Create a low-corruption environment using tuned defaults."""
|
75
|
+
|
76
|
+
tuned_mim1c = Mim1c(replacement_rate=0.01 * difficulty.value)
|
77
|
+
tuned_typogre = Typogre(max_change_rate=0.025 * difficulty.value)
|
78
|
+
|
79
|
+
return load_environment(
|
80
|
+
env,
|
81
|
+
glitchlings=[tuned_mim1c, tuned_typogre],
|
82
|
+
seed=seed,
|
83
|
+
)
|
84
|
+
|
85
|
+
|
86
|
+
def load_environment(
|
87
|
+
env: str | vf.Environment,
|
88
|
+
glitchlings: Iterable[str | Glitchling] | Glitchling | str | Gaggle | None = None,
|
89
|
+
*,
|
90
|
+
seed: int = 151,
|
91
|
+
columns: Sequence[str] | None = None,
|
92
|
+
) -> vf.Environment:
|
93
|
+
"""Load an environment and optionally corrupt it with glitchlings."""
|
94
|
+
|
95
|
+
environment = _resolve_environment(env)
|
96
|
+
|
97
|
+
if glitchlings is None:
|
98
|
+
return environment
|
99
|
+
|
100
|
+
if isinstance(glitchlings, Gaggle):
|
101
|
+
gaggle = glitchlings
|
102
|
+
else:
|
103
|
+
if isinstance(glitchlings, (Glitchling, str)):
|
104
|
+
resolved = [glitchlings]
|
105
|
+
else:
|
106
|
+
resolved = list(glitchlings)
|
107
|
+
|
108
|
+
gaggle = summon(resolved, seed=seed)
|
109
|
+
|
110
|
+
dataset = environment.dataset
|
111
|
+
corrupt_columns = _resolve_columns(dataset, columns)
|
112
|
+
environment.dataset = gaggle.corrupt_dataset(dataset, corrupt_columns)
|
113
|
+
return environment
|
@@ -130,3 +130,21 @@ def test_read_text_requires_input(monkeypatch, capsys):
|
|
130
130
|
read_text(args, parser)
|
131
131
|
captured = capsys.readouterr()
|
132
132
|
assert "No input text provided" in captured.err
|
133
|
+
|
134
|
+
|
135
|
+
def test_read_text_consumes_stdin(monkeypatch):
|
136
|
+
parser = build_parser()
|
137
|
+
args = parser.parse_args([])
|
138
|
+
|
139
|
+
sentinel = "stdin payload"
|
140
|
+
|
141
|
+
class DummyStdin:
|
142
|
+
def isatty(self):
|
143
|
+
return False
|
144
|
+
|
145
|
+
def read(self):
|
146
|
+
return sentinel
|
147
|
+
|
148
|
+
monkeypatch.setattr("sys.stdin", DummyStdin())
|
149
|
+
|
150
|
+
assert read_text(args, parser) == sentinel
|
@@ -0,0 +1,24 @@
|
|
1
|
+
from glitchlings.zoo.typogre import Typogre
|
2
|
+
|
3
|
+
|
4
|
+
def test_typogre_clone_preserves_configuration_and_seed_behavior() -> None:
|
5
|
+
original = Typogre(max_change_rate=0.05, keyboard="AZERTY", seed=111)
|
6
|
+
|
7
|
+
clone = original.clone(seed=222)
|
8
|
+
|
9
|
+
assert isinstance(clone, Typogre)
|
10
|
+
assert clone.max_change_rate == original.max_change_rate
|
11
|
+
assert clone.keyboard == original.keyboard
|
12
|
+
|
13
|
+
sample_text = "The quick brown fox jumps over the lazy dog."
|
14
|
+
|
15
|
+
original.reset_rng()
|
16
|
+
original_result = original(sample_text)
|
17
|
+
|
18
|
+
clone.reset_rng()
|
19
|
+
clone_result_first = clone(sample_text)
|
20
|
+
clone.reset_rng()
|
21
|
+
clone_result_second = clone(sample_text)
|
22
|
+
|
23
|
+
assert clone_result_first == clone_result_second
|
24
|
+
assert clone_result_first != original_result
|
@@ -0,0 +1,93 @@
|
|
1
|
+
"""Property-based tests covering core orchestration primitives."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import string
|
6
|
+
|
7
|
+
from hypothesis import assume, given, strategies as st
|
8
|
+
|
9
|
+
from glitchlings.zoo.core import AttackOrder, AttackWave, Gaggle, Glitchling
|
10
|
+
|
11
|
+
|
12
|
+
def _build_corruption(name: str, amplitude: int):
|
13
|
+
"""Create a deterministic corruption function driven by the provided RNG.
|
14
|
+
|
15
|
+
The function appends a marker tied to the glitchling name along with a
|
16
|
+
pseudo-random suffix that depends on the glitchling's RNG. This allows the
|
17
|
+
tests to assert that derived seeds and ordering are both respected.
|
18
|
+
"""
|
19
|
+
|
20
|
+
choices = (name + "xyz").replace("|", "_")
|
21
|
+
|
22
|
+
def _corrupt(text: str, *, rng) -> str:
|
23
|
+
if amplitude == 0:
|
24
|
+
return f"{text}|{name}"
|
25
|
+
suffix = "".join(rng.choice(choices) for _ in range(amplitude))
|
26
|
+
return f"{text}|{name}:{suffix}"
|
27
|
+
|
28
|
+
return _corrupt
|
29
|
+
|
30
|
+
|
31
|
+
@st.composite
|
32
|
+
def glitchling_specs(draw):
|
33
|
+
name = draw(
|
34
|
+
st.text(alphabet=string.ascii_letters + string.digits, min_size=1, max_size=8)
|
35
|
+
)
|
36
|
+
wave = draw(st.sampled_from(list(AttackWave)))
|
37
|
+
order = draw(st.sampled_from(list(AttackOrder)))
|
38
|
+
amplitude = draw(st.integers(min_value=0, max_value=4))
|
39
|
+
return {"name": name, "wave": wave, "order": order, "amplitude": amplitude}
|
40
|
+
|
41
|
+
|
42
|
+
@given(
|
43
|
+
master_seed=st.integers(min_value=-(2**63), max_value=2**63 - 1),
|
44
|
+
specs=st.lists(glitchling_specs(), min_size=1, max_size=5, unique_by=lambda s: s["name"]),
|
45
|
+
)
|
46
|
+
def test_gaggle_ordering_and_determinism(master_seed, specs):
|
47
|
+
"""Gaggles should honour ordering guarantees and deterministic RNG use."""
|
48
|
+
|
49
|
+
glitchlings = [
|
50
|
+
Glitchling(
|
51
|
+
name=spec["name"],
|
52
|
+
corruption_function=_build_corruption(spec["name"], spec["amplitude"]),
|
53
|
+
scope=spec["wave"],
|
54
|
+
order=spec["order"],
|
55
|
+
)
|
56
|
+
for spec in specs
|
57
|
+
]
|
58
|
+
|
59
|
+
gaggle = Gaggle(glitchlings, seed=master_seed)
|
60
|
+
|
61
|
+
expected = [
|
62
|
+
spec["name"]
|
63
|
+
for spec in sorted(
|
64
|
+
specs,
|
65
|
+
key=lambda spec: (spec["wave"], spec["order"], spec["name"]),
|
66
|
+
)
|
67
|
+
]
|
68
|
+
actual = [g.name for g in gaggle.apply_order]
|
69
|
+
assert actual == expected
|
70
|
+
|
71
|
+
text = "payload"
|
72
|
+
first_run = gaggle(text)
|
73
|
+
second_run = Gaggle(glitchlings, seed=master_seed)(text)
|
74
|
+
assert first_run == second_run
|
75
|
+
|
76
|
+
|
77
|
+
@given(
|
78
|
+
left=st.tuples(
|
79
|
+
st.integers(min_value=-(2**63), max_value=2**63 - 1),
|
80
|
+
st.text(alphabet=string.ascii_letters + string.digits, min_size=0, max_size=12),
|
81
|
+
st.integers(min_value=0, max_value=1024),
|
82
|
+
),
|
83
|
+
right=st.tuples(
|
84
|
+
st.integers(min_value=-(2**63), max_value=2**63 - 1),
|
85
|
+
st.text(alphabet=string.ascii_letters + string.digits, min_size=0, max_size=12),
|
86
|
+
st.integers(min_value=0, max_value=1024),
|
87
|
+
),
|
88
|
+
)
|
89
|
+
def test_derived_seeds_change_with_inputs(left, right):
|
90
|
+
"""Changing any component of the derivation tuple should alter the seed."""
|
91
|
+
|
92
|
+
assume(left != right)
|
93
|
+
assert Gaggle.derive_seed(*left) != Gaggle.derive_seed(*right)
|
@@ -0,0 +1,35 @@
|
|
1
|
+
import pytest
|
2
|
+
|
3
|
+
from glitchlings.util import string_diffs
|
4
|
+
|
5
|
+
|
6
|
+
def test_string_diffs_groups_consecutive_edits_and_skips_equals():
|
7
|
+
result = string_diffs("kitten", "sitting")
|
8
|
+
|
9
|
+
assert result == [
|
10
|
+
[("replace", "k", "s")],
|
11
|
+
[("replace", "e", "i")],
|
12
|
+
[("insert", "", "g")],
|
13
|
+
]
|
14
|
+
|
15
|
+
for group in result:
|
16
|
+
assert group
|
17
|
+
assert all(tag != "equal" for tag, *_ in group)
|
18
|
+
|
19
|
+
|
20
|
+
@pytest.mark.parametrize(
|
21
|
+
"a,b,expected",
|
22
|
+
[
|
23
|
+
("flaw", "lawn", [[("delete", "f", "")], [("insert", "", "n")]]),
|
24
|
+
(
|
25
|
+
"distance",
|
26
|
+
"instance",
|
27
|
+
[
|
28
|
+
[("delete", "d", "")],
|
29
|
+
[("insert", "", "n")],
|
30
|
+
],
|
31
|
+
),
|
32
|
+
],
|
33
|
+
)
|
34
|
+
def test_string_diffs_handles_multiple_edit_groups(a: str, b: str, expected: list[list[tuple[str, str, str]]]):
|
35
|
+
assert string_diffs(a, b) == expected
|
@@ -1,52 +0,0 @@
|
|
1
|
-
from enum import Enum
|
2
|
-
import functools as ft
|
3
|
-
|
4
|
-
import verifiers as vf
|
5
|
-
from datasets import Dataset
|
6
|
-
|
7
|
-
from ..zoo import Glitchling, Gaggle, Mim1c, Typogre, summon
|
8
|
-
|
9
|
-
|
10
|
-
class Difficulty(Enum):
|
11
|
-
"""Difficulty levels for tutorial environments."""
|
12
|
-
|
13
|
-
Easy = 0.25
|
14
|
-
Normal = 1.0
|
15
|
-
Hard = 1.75
|
16
|
-
Extreme = 3
|
17
|
-
Impossible = 9
|
18
|
-
|
19
|
-
|
20
|
-
def tutorial_level(
|
21
|
-
env: vf.Environment | str, seed=151, difficulty: Difficulty = Difficulty.Normal
|
22
|
-
) -> vf.Environment:
|
23
|
-
"""Create a low-corruption environment."""
|
24
|
-
|
25
|
-
tuned_mim1c = Mim1c(replacement_rate=0.01 * difficulty.value)
|
26
|
-
tuned_typogre = Typogre(max_change_rate=0.025 * difficulty.value)
|
27
|
-
|
28
|
-
glitchlings: Gaggle = summon([tuned_mim1c, tuned_typogre], seed=seed)
|
29
|
-
|
30
|
-
if isinstance(env, str):
|
31
|
-
env = vf.load_environment(env)
|
32
|
-
|
33
|
-
assert isinstance(env, vf.Environment), "Invalid environment type"
|
34
|
-
|
35
|
-
if "prompt" in env.dataset.column_names:
|
36
|
-
env.dataset = glitchlings.corrupt_dataset(env.dataset, ["prompt"])
|
37
|
-
elif "question" in env.dataset.column_names:
|
38
|
-
env.dataset = glitchlings.corrupt_dataset(env.dataset, ["question"])
|
39
|
-
else:
|
40
|
-
raise ValueError("Can't find prompt or question column")
|
41
|
-
|
42
|
-
return env
|
43
|
-
|
44
|
-
|
45
|
-
def load_environment(
|
46
|
-
env: str | vf.Environment,
|
47
|
-
seed=151,
|
48
|
-
difficulty: Difficulty = Difficulty.Normal,
|
49
|
-
loader=tutorial_level,
|
50
|
-
) -> vf.Environment:
|
51
|
-
"""Load an environment by name."""
|
52
|
-
return loader(env, seed=seed, difficulty=difficulty)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|