glitchlings 0.2.5__cp312-cp312-win_amd64.whl → 0.9.3__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. glitchlings/__init__.py +36 -17
  2. glitchlings/__main__.py +0 -1
  3. glitchlings/_zoo_rust/__init__.py +12 -0
  4. glitchlings/_zoo_rust.cp312-win_amd64.pyd +0 -0
  5. glitchlings/assets/__init__.py +180 -0
  6. glitchlings/assets/apostrofae_pairs.json +32 -0
  7. glitchlings/assets/ekkokin_homophones.json +2014 -0
  8. glitchlings/assets/hokey_assets.json +193 -0
  9. glitchlings/assets/lexemes/academic.json +1049 -0
  10. glitchlings/assets/lexemes/colors.json +1333 -0
  11. glitchlings/assets/lexemes/corporate.json +716 -0
  12. glitchlings/assets/lexemes/cyberpunk.json +22 -0
  13. glitchlings/assets/lexemes/lovecraftian.json +23 -0
  14. glitchlings/assets/lexemes/synonyms.json +3354 -0
  15. glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
  16. glitchlings/assets/pipeline_assets.json +29 -0
  17. glitchlings/attack/__init__.py +53 -0
  18. glitchlings/attack/compose.py +299 -0
  19. glitchlings/attack/core.py +465 -0
  20. glitchlings/attack/encode.py +114 -0
  21. glitchlings/attack/metrics.py +104 -0
  22. glitchlings/attack/metrics_dispatch.py +70 -0
  23. glitchlings/attack/tokenization.py +157 -0
  24. glitchlings/auggie.py +283 -0
  25. glitchlings/compat/__init__.py +9 -0
  26. glitchlings/compat/loaders.py +355 -0
  27. glitchlings/compat/types.py +41 -0
  28. glitchlings/conf/__init__.py +41 -0
  29. glitchlings/conf/loaders.py +331 -0
  30. glitchlings/conf/schema.py +156 -0
  31. glitchlings/conf/types.py +72 -0
  32. glitchlings/config.toml +2 -0
  33. glitchlings/constants.py +59 -0
  34. glitchlings/dev/__init__.py +3 -0
  35. glitchlings/dev/docs.py +45 -0
  36. glitchlings/dlc/__init__.py +17 -3
  37. glitchlings/dlc/_shared.py +296 -0
  38. glitchlings/dlc/gutenberg.py +400 -0
  39. glitchlings/dlc/huggingface.py +37 -65
  40. glitchlings/dlc/prime.py +55 -114
  41. glitchlings/dlc/pytorch.py +98 -0
  42. glitchlings/dlc/pytorch_lightning.py +173 -0
  43. glitchlings/internal/__init__.py +16 -0
  44. glitchlings/internal/rust.py +159 -0
  45. glitchlings/internal/rust_ffi.py +432 -0
  46. glitchlings/main.py +123 -32
  47. glitchlings/runtime_config.py +24 -0
  48. glitchlings/util/__init__.py +29 -176
  49. glitchlings/util/adapters.py +65 -0
  50. glitchlings/util/keyboards.py +311 -0
  51. glitchlings/util/transcripts.py +108 -0
  52. glitchlings/zoo/__init__.py +47 -24
  53. glitchlings/zoo/assets/__init__.py +29 -0
  54. glitchlings/zoo/core.py +301 -167
  55. glitchlings/zoo/core_execution.py +98 -0
  56. glitchlings/zoo/core_planning.py +451 -0
  57. glitchlings/zoo/corrupt_dispatch.py +295 -0
  58. glitchlings/zoo/ekkokin.py +118 -0
  59. glitchlings/zoo/hokey.py +137 -0
  60. glitchlings/zoo/jargoyle.py +179 -274
  61. glitchlings/zoo/mim1c.py +106 -68
  62. glitchlings/zoo/pedant/__init__.py +107 -0
  63. glitchlings/zoo/pedant/core.py +105 -0
  64. glitchlings/zoo/pedant/forms.py +74 -0
  65. glitchlings/zoo/pedant/stones.py +74 -0
  66. glitchlings/zoo/redactyl.py +44 -175
  67. glitchlings/zoo/rng.py +259 -0
  68. glitchlings/zoo/rushmore.py +359 -116
  69. glitchlings/zoo/scannequin.py +18 -125
  70. glitchlings/zoo/transforms.py +386 -0
  71. glitchlings/zoo/typogre.py +76 -162
  72. glitchlings/zoo/validation.py +477 -0
  73. glitchlings/zoo/zeedub.py +33 -86
  74. glitchlings-0.9.3.dist-info/METADATA +334 -0
  75. glitchlings-0.9.3.dist-info/RECORD +80 -0
  76. {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/entry_points.txt +1 -0
  77. glitchlings/zoo/_ocr_confusions.py +0 -34
  78. glitchlings/zoo/_rate.py +0 -21
  79. glitchlings/zoo/reduple.py +0 -169
  80. glitchlings-0.2.5.dist-info/METADATA +0 -490
  81. glitchlings-0.2.5.dist-info/RECORD +0 -27
  82. /glitchlings/{zoo → assets}/ocr_confusions.tsv +0 -0
  83. {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/WHEEL +0 -0
  84. {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/licenses/LICENSE +0 -0
  85. {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,334 @@
1
+ Metadata-Version: 2.4
2
+ Name: glitchlings
3
+ Version: 0.9.3
4
+ Summary: Monsters for your language games.
5
+ Author: osoleve
6
+ License-Expression: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/osoleve/glitchlings
8
+ Project-URL: Repository, https://github.com/osoleve/glitchlings.git
9
+ Project-URL: Issues, https://github.com/osoleve/glitchlings/issues
10
+ Project-URL: Changelog, https://github.com/osoleve/glitchlings/releases
11
+ Keywords: nlp,adversarial augmentation,text augmentation,data augmentation,domain randomization
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Programming Language :: Rust
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Classifier: Topic :: Software Development :: Testing
23
+ Requires-Python: >=3.10
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: tomli>=2.0.1; python_version < "3.11"
27
+ Requires-Dist: importlib-resources>=5.0.0; python_version < "3.11"
28
+ Requires-Dist: packaging>=23.0
29
+ Requires-Dist: pyyaml>=6.0.0
30
+ Provides-Extra: all
31
+ Requires-Dist: hypothesis>=6.140.0; extra == "all"
32
+ Requires-Dist: interrogate>=1.5.0; extra == "all"
33
+ Requires-Dist: jellyfish==1.2.0; extra == "all"
34
+ Requires-Dist: isort>=5.13.0; extra == "all"
35
+ Requires-Dist: mkdocs>=1.6.0; extra == "all"
36
+ Requires-Dist: mkdocs-material>=9.5.0; extra == "all"
37
+ Requires-Dist: mkdocstrings[python]>=0.24.0; extra == "all"
38
+ Requires-Dist: mkdocstrings-python>=1.10.0; extra == "all"
39
+ Requires-Dist: mypy>=1.8.0; extra == "all"
40
+ Requires-Dist: numpy<3.0,>=1.24; extra == "all"
41
+ Requires-Dist: pre-commit>=3.8.0; extra == "all"
42
+ Requires-Dist: py-gutenberg==1.0.0; extra == "all"
43
+ Requires-Dist: pytest>=8.0.0; extra == "all"
44
+ Requires-Dist: pytest-cov>=4.1.0; extra == "all"
45
+ Requires-Dist: ruff>=0.6.0; extra == "all"
46
+ Requires-Dist: types-PyYAML>=6.0.0; extra == "all"
47
+ Requires-Dist: verifiers>=0.1.8; extra == "all"
48
+ Requires-Dist: tiktoken>=0.3.0; extra == "all"
49
+ Requires-Dist: tokenizers>=0.13.0; extra == "all"
50
+ Provides-Extra: attack
51
+ Requires-Dist: tiktoken>=0.3.0; extra == "attack"
52
+ Requires-Dist: tokenizers>=0.13.0; extra == "attack"
53
+ Provides-Extra: gutenberg
54
+ Requires-Dist: py-gutenberg==1.0.0; extra == "gutenberg"
55
+ Provides-Extra: hf
56
+ Requires-Dist: datasets>=4.0.0; extra == "hf"
57
+ Provides-Extra: lightning
58
+ Requires-Dist: pytorch_lightning>=2.0.0; extra == "lightning"
59
+ Provides-Extra: prime
60
+ Requires-Dist: verifiers>=0.1.8; extra == "prime"
61
+ Requires-Dist: jellyfish==1.2.0; extra == "prime"
62
+ Provides-Extra: torch
63
+ Requires-Dist: torch>=2.0.0; extra == "torch"
64
+ Provides-Extra: dev
65
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
66
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
67
+ Requires-Dist: hypothesis>=6.140.0; extra == "dev"
68
+ Requires-Dist: numpy<3.0,>=1.24; extra == "dev"
69
+ Requires-Dist: mkdocs>=1.6.0; extra == "dev"
70
+ Requires-Dist: mkdocstrings[python]>=0.24.0; extra == "dev"
71
+ Requires-Dist: mkdocs-material>=9.5.0; extra == "dev"
72
+ Requires-Dist: mkdocstrings-python>=1.10.0; extra == "dev"
73
+ Requires-Dist: interrogate>=1.5.0; extra == "dev"
74
+ Requires-Dist: isort>=5.13.0; extra == "dev"
75
+ Requires-Dist: ruff>=0.6.0; extra == "dev"
76
+ Requires-Dist: mypy>=1.8.0; extra == "dev"
77
+ Requires-Dist: pre-commit>=3.8.0; extra == "dev"
78
+ Requires-Dist: types-PyYAML>=6.0.0; extra == "dev"
79
+ Dynamic: license-file
80
+
81
+ #
82
+
83
+ ```plaintext
84
+ .─') _ .─') _
85
+ ( OO) ) ( OO ) )
86
+ ░██████ ░██ ░██ ░██ ░██ ░██ ░██
87
+ ░██ ░██ ░██ ░██ ░██ ░██
88
+ ░██ ░██ ░██░████████ ░███████ ░████████ ░██ ░██░████████ ░████████ ░███████
89
+ ░██ █████ ░██ ░██ ░██ ░██('─.░██ ░██ ░██ ░██ ░██░██ ░██ ░██.─')░██ ░██
90
+ ░██ ██ ░██ ░██ ░██ ░██( OO ) ╱░██ ░██ ░██ ░██░██ ░██ ░██(OO)░██ ░███████
91
+ ░██ ░███ ░██ ░██ ░██ ░██ ░██ ░██ ░██ ░██ ░██░██ ░██ ░██ o ░███ ░██
92
+ ░█████░█ ░██ ░██ ░████ ░███████ ░██ ░██ ░██ ░██░██ ░██ ░█████░██ ░███████
93
+ ░██
94
+ ░███████
95
+
96
+ Every language game breeds monsters.
97
+ ```
98
+
99
+ ![Python Versions](https://img.shields.io/pypi/pyversions/glitchlings.svg)
100
+ [![PyPI version](https://img.shields.io/pypi/v/glitchlings.svg)](https://pypi.org/project/glitchlings/)
101
+ ![Wheel](https://img.shields.io/pypi/wheel/glitchlings.svg)
102
+ ![Linting and Typing](https://github.com/osoleve/glitchlings/actions/workflows/ci.yml/badge.svg)
103
+ ![Entropy Budget](https://img.shields.io/badge/entropy-lifegiving-magenta.svg)
104
+ ![Chaos](https://img.shields.io/badge/chaos-friend--shaped-chartreuse.svg)
105
+ ![Charm](https://img.shields.io/badge/jouissance-indefatigable-cyan.svg)
106
+ ![Lore Compliance](https://img.shields.io/badge/ISO--474--▓▓-Z--Compliant-blue.svg)
107
+
108
+ `Glitchlings` are **utilities for corrupting the text inputs to your language models in deterministic, _linguistically principled_** ways.
109
+ Each embodies a different way that documents can be compromised in the wild.
110
+
111
+ If reinforcement learning environments are games, then `Glitchling`s are enemies to breathe new life into old challenges.
112
+
113
+ They do this by breaking surface patterns in the input while keeping the target output intact.
114
+
115
+ Some `Glitchling`s are petty nuisances. Some `Glitchling`s are eldritch horrors.
116
+ Together, they create truly nightmarish scenarios for your language models.
117
+
118
+ After all, what good is general intelligence if it can't handle a little chaos?
119
+
120
+ -_The Curator_
121
+
122
+ ## Motivation
123
+
124
+ If your model performs well on a particular task, but not when `Glitchling`s are present, it's a sign that it hasn't actually generalized to the problem.
125
+
126
+ Conversely, training a model to perform well in the presence of the types of perturbations introduced by `Glitchling`s should help it generalize better.
127
+
128
+ ## Quickstart
129
+
130
+ ```python
131
+ pip install -U glitchlings
132
+ ```
133
+
134
+ The fastest way to get started is to ask my assistant, `Auggie`, to prepare a custom mix of glitchlings for you:
135
+
136
+ ```python
137
+ from glitchlings import Auggie, SAMPLE_TEXT
138
+
139
+ auggie = (
140
+ Auggie(seed=404)
141
+ .typo(rate=0.015)
142
+ .confusable(rate=0.01)
143
+ .homophone(rate=0.02)
144
+ )
145
+
146
+ print(auggie(SAMPLE_TEXT))
147
+ ```
148
+
149
+ > One morning, when Gregor Samsa woke from troubld dreams, he found himself transformed in his bed into a horible vermin. He layed on his armour-like back, and if he lifted his head a little he could see his brown belly, slightly domed and divided by arches into stiff sections. The bedding was hardly able to cover it and seemed ready to slide off any moment. His many legs, pitifully thin compared with the size of the rest of him, waved about helplessly as he looked.
150
+
151
+ **You're more than welcome to summon them directly, if you're feeling brave:**
152
+
153
+ ```python
154
+ from glitchlings import Gaggle, SAMPLE_TEXT, Typogre, Mim1c, Ekkokin
155
+
156
+ gaggle = Gaggle(
157
+ [
158
+ Typogre(rate=0.015),
159
+ Mim1c(rate=0.01),
160
+ Ekkokin(rate=0.02),
161
+ ],
162
+ seed=404
163
+ )
164
+ ```
165
+
166
+ Consult the [Glitchlings Usage Guide](docs/index.md)
167
+ for end-to-end instructions spanning the Python API, CLI, and third-party integrations.
168
+
169
+ ## Your First Battle
170
+
171
+ Summon your chosen `Glitchling` (_or a few, if ya nasty_) and call it on your text or slot it into `Dataset.map(...)`, supplying a seed if desired.
172
+ Glitchlings are standard Python classes:
173
+
174
+ ```python
175
+ from glitchlings import Gaggle, Typogre, Mim1c
176
+
177
+ custom_typogre = Typogre(rate=0.1)
178
+ selective_mimic = Mim1c(rate=0.05, classes=["LATIN", "GREEK"])
179
+
180
+ gaggle = Gaggle([custom_typogre, selective_mimic], seed=99)
181
+ corrupted = gaggle("We Await Silent Tristero's Empire.")
182
+ print(corrupted)
183
+ ```
184
+
185
+ Calling a `Glitchling` on a `str` transparently calls `.corrupt(str, ...) -> str`.
186
+ This means that as long as your glitchlings get along logically, they play nicely with one another.
187
+
188
+ When summoned as or gathered into a `Gaggle`, the `Glitchling`s will automatically order themselves into attack waves, based on the scope of the change they make:
189
+
190
+ 1. Document
191
+ 2. Paragraph
192
+ 3. Sentence
193
+ 4. Word
194
+ 5. Character
195
+
196
+ They're horrible little gremlins, but they're not _unreasonable_.
197
+
198
+ ## Command-Line Interface (CLI)
199
+
200
+ Keyboard warriors can challenge them directly via the `glitchlings` command (see the generated CLI reference in `docs/cli.md` for the full contract):
201
+
202
+ ```bash
203
+ # Discover which glitchlings are currently on the loose.
204
+ glitchlings --list
205
+
206
+ # Review the full CLI contract.
207
+ glitchlings --help
208
+
209
+ # Run Typogre against the contents of a file and inspect the diff.
210
+ glitchlings -g typogre --file documents/report.txt --diff
211
+
212
+ # Configure glitchlings inline by passing keyword arguments.
213
+ glitchlings -g "Typogre(rate=0.05)" "Ghouls just wanna have fun"
214
+
215
+ # Pipe text straight into the CLI for an on-the-fly corruption.
216
+ echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
217
+
218
+ # Emit a structured Attack report with tokens, token IDs, and metrics.
219
+ glitchlings --report json --sample
220
+ ```
221
+
222
+ ## Configuration Files
223
+
224
+ Configurations live in plain YAML files so you can version-control experiments without touching code:
225
+
226
+ ```bash
227
+ # Load a roster from a YAML attack configuration.
228
+ glitchlings --config experiments/chaos.yaml "Let slips the glitchlings of war"
229
+ ```
230
+
231
+ ```yaml
232
+ # experiments/chaos.yaml
233
+ seed: 31337
234
+ glitchlings:
235
+ - name: Typogre
236
+ rate: 0.04
237
+ - "Rushmore(rate=0.12, unweighted=True)"
238
+ - name: Zeedub
239
+ parameters:
240
+ rate: 0.02
241
+ characters: ["\u200b", "\u2060"]
242
+ ```
243
+
244
+ ## Attack on Token
245
+
246
+ Looking to compare before/after corruption with metrics and stable seeds? Reach for the [`Attack` helper](docs/attack.md), which bundles tokenization, metrics, and transcript batching into a single utility. It accepts plain `list[str]` batches, renders quick `summary()` reports, and can compare multiple tokenizers via `Attack.compare(...)` when you need a metrics matrix.
247
+
248
+ ## Development
249
+
250
+ Follow the [development setup guide](docs/development.md) for editable installs, automated tests, and tips on enabling the Rust pipeline while you hack on new glitchlings.
251
+
252
+ ## Starter 'lings
253
+
254
+ For maintainability reasons, all `Glitchling` have consented to be given nicknames once they're in your care. See the [Monster Manual](MONSTER_MANUAL.md) for a complete bestiary.
255
+
256
+ ### Typogre
257
+
258
+ _What a nice word, would be a shame if something happened to it._
259
+
260
+ > _**Fatfinger.**_ Typogre introduces character-level errors (duplicating, dropping, adding, or swapping) based on the layout of a keyboard (QWERTY by default, with Dvorak and Colemak variants built-in).
261
+
262
+ ### Mim1c
263
+
264
+ _Wait, was that...?_
265
+
266
+ > _**Confusion.**_ Mim1c replaces non-space characters with Unicode Confusables, characters that are distinct but would not usually confuse a human reader.
267
+
268
+ ### Hokey
269
+
270
+ _She's soooooo coooool!_
271
+
272
+ > _**Passionista.**_ Hokey gets a little excited and streeeeetches words for emphasis.
273
+ >
274
+ > _Apocryphal Glitchling contributed by Chloé Nunes_
275
+
276
+ ### Scannequin
277
+
278
+ _How can a computer need reading glasses?_
279
+
280
+ > _**OCArtifacts.**_ Scannequin mimics optical character recognition errors by swapping visually similar character sequences (like rn↔m, cl↔d, O↔0, l/I/1).
281
+
282
+ ### Zeedub
283
+
284
+ _Watch your step around here._
285
+
286
+ > _**Invisible Ink.**_ Zeedub slips zero-width codepoints between non-space character pairs, forcing models to reason about text whose visible form masks hidden glyphs.
287
+
288
+ ### Ekkokin
289
+
290
+ _Did you hear what I heard?_
291
+
292
+ > _**Echo Chamber.**_ Ekkokin swaps words with curated homophones so the text still sounds right while the spelling drifts. Groups are normalised to prevent duplicates and casing is preserved when substitutions fire.
293
+
294
+ ### Jargoyle
295
+
296
+ _Uh oh. The worst person you know just bought a thesaurus._
297
+
298
+ > _**Sesquipedalianism.**_ Jargoyle insufferably replaces words with synonyms at random, without regard for connotational or denotational differences.
299
+
300
+ ### Rushmore
301
+
302
+ _I accidentally an entire word._
303
+
304
+ > _**Tactical Scrambler.**_ Rushmore randomly drops, duplicates, or swaps words in the text to simulate hasty writing, editing mistakes, or transmission errors.
305
+
306
+ ### Redactyl
307
+
308
+ _Oops, that was my black highlighter._
309
+
310
+ > _**FOIA Reply.**_ Redactyl obscures random words in your document like an NSA analyst with a bad sense of humor.
311
+
312
+ ## Apocrypha
313
+
314
+ Cave paintings and oral tradition contain many depictions of strange, otherworldly `Glitchling`s.
315
+ These _Apocryphal `Glitchling`_ are said to possess unique abilities or behaviors.
316
+ If you encounter one of these elusive beings, please document your findings and share them with _The Curator_.
317
+
318
+ ### Ensuring Reproducible Corruption
319
+
320
+ Every `Glitchling` should own its own independent `random.Random` instance. That means:
321
+
322
+ - No `random.seed(...)` calls touch Python's global RNG.
323
+ - Supplying a `seed` when you construct a `Glitchling` (or when you `summon(...)`) makes its behavior reproducible.
324
+ - Re-running a `Gaggle` with the same master seed and the same input text (_and same external data!_) yields identical corruption output.
325
+ - Corruption functions are written to accept an `rng` parameter internally so that all randomness is centralized and testable.
326
+
327
+ #### At Wits' End?
328
+
329
+ If you're trying to add a new glitchling and can't seem to make it deterministic, here are some places to look for determinism-breaking code:
330
+
331
+ 1. Search for any direct calls to `random.choice`, `random.shuffle`, or `set(...)` ordering without going through the provided `rng`.
332
+ 2. Ensure you sort collections before shuffling or sampling.
333
+ 3. Make sure indices are chosen from a stable reference (e.g., original text) when applying length‑changing edits.
334
+ 4. Make sure there are enough sort keys to maintain stability.
@@ -0,0 +1,80 @@
1
+ glitchlings/__init__.py,sha256=YuzCOa41eGE9HfQ4SAGup5Kpp9P4B7x6LBhDdfG-t1s,1123
2
+ glitchlings/__main__.py,sha256=nB7btO_T4wBFOcyawfWpjEindVrUfTqqV5hdeeS1HT8,128
3
+ glitchlings/_zoo_rust.cp312-win_amd64.pyd,sha256=_TxMtxPzPDKDDJRXEswD1OPiy5lsZAFS7yVMGVB41lE,3484672
4
+ glitchlings/auggie.py,sha256=ShLFd-Ic6D_yScXa7XLNcpyFV2uJcrYInQe22JctuvU,9051
5
+ glitchlings/config.toml,sha256=051Rri2m5ebSekQ4Z8kGWUaXsuH7b7QcVl8zFMpQUak,105
6
+ glitchlings/constants.py,sha256=OeijOqSpbT03spKqoAYSJ-B9pfQ6-h5s-K8s0OI-Wzo,1674
7
+ glitchlings/main.py,sha256=1Z_2Nja1KJyhxrWIDLS1YDOAmaIRzXKSDRzOu2Tsju0,9301
8
+ glitchlings/runtime_config.py,sha256=Lt6e2p-Ykmt0jkFZlTJl31T_7Mtx9AmkQr1QA7UVBE4,450
9
+ glitchlings/_zoo_rust/__init__.py,sha256=bApb6gCn5zGYGluVn7LIaB96BTfNnIhhw_TRXF6QrxE,285
10
+ glitchlings/assets/__init__.py,sha256=DE2nT3P8guxbeqoi807hwJDaTQGIhAbCADVfAg4iLJE,5982
11
+ glitchlings/assets/apostrofae_pairs.json,sha256=lPLFLndzn_f7_5wZizxsLMnwBY4O63zsCvDjyJ56MLA,553
12
+ glitchlings/assets/ekkokin_homophones.json,sha256=SWp7jFWehrshTwC76W8MA248C87yP1xQrp8iKTxM1vE,19258
13
+ glitchlings/assets/hokey_assets.json,sha256=1GaSEzXwtT1nvf0B9mFyLzHOcqzKbPreibsC6iBWAHA,3083
14
+ glitchlings/assets/mim1c_homoglyphs.json.gz.b64,sha256=SIKCmFw7Tm5NkoNf-CqntytqqQJfYpRmWXdlDt92s6U,82968
15
+ glitchlings/assets/ocr_confusions.tsv,sha256=S-IJEYCIXYKT1Uu7Id8Lnvg5pw528yNigTtWUdnMv9k,213
16
+ glitchlings/assets/pipeline_assets.json,sha256=CHj25Z1l6BTe6Y-c1hGMx_8fXgAaiJE5OKY0Q0ZGna0,532
17
+ glitchlings/assets/lexemes/academic.json,sha256=qgwocuJCuyYZHXSp4fqch4kH4n94Q-EvAefwvVzxVH0,16879
18
+ glitchlings/assets/lexemes/colors.json,sha256=rdanKP6cZwXY56cur3yKN_D15xHrtZwE7JwNckdRUpE,17986
19
+ glitchlings/assets/lexemes/corporate.json,sha256=1t7LEzZoCdsvMCihtR7R1ACLherW1B4i7S_C5LQewvQ,11880
20
+ glitchlings/assets/lexemes/cyberpunk.json,sha256=ZLa-sV3Bpl2k2NW1WJauA21EbEGBqNZOghLN86bFS8A,1360
21
+ glitchlings/assets/lexemes/lovecraftian.json,sha256=XPONzHTD3ZoIowwyGvFp8oqZ9VJ5corQI_ls5bE0CD8,1599
22
+ glitchlings/assets/lexemes/synonyms.json,sha256=JQPySTgfcUNEk9ZU7W4RCNZPbQvQ0aafEHdDST2-pz4,47878
23
+ glitchlings/attack/__init__.py,sha256=tst8ueSJe6NYYj9sxCa0u_8epLj-vqMwD-RmgmHt4a4,1454
24
+ glitchlings/attack/compose.py,sha256=lahyb2f125WOVrggPOUcb6N1xmbjwrvKFOEIdShzTes,9984
25
+ glitchlings/attack/core.py,sha256=CB2tt4225Tk7QctmuW6bCQ52rdcA-5deTquCa8f7OKE,18461
26
+ glitchlings/attack/encode.py,sha256=yUOtV1P9iAlEjDycReOaKAkBBtAa4aSqXzN2xUypAcY,3352
27
+ glitchlings/attack/metrics.py,sha256=XYN8l9zFQZ6yhgfaI5BleR6aXBNKRhiz6ARCDsv5vrg,3088
28
+ glitchlings/attack/metrics_dispatch.py,sha256=U4cvRPD3nzteBeTqX1WCAFQSGpCjkl3_PzcP0ZQRCmQ,2137
29
+ glitchlings/attack/tokenization.py,sha256=i3iCdDsW4Sg-jPcvxFZH4eO27aitQ-U5apBVZAk_aOU,5437
30
+ glitchlings/compat/__init__.py,sha256=vHd79zAxelHzcDyzY9dEBzKoGJtwGi7Lu_RdcL1BL78,317
31
+ glitchlings/compat/loaders.py,sha256=ONJaVdKq6uLjIg2i9EQpP4cZeEbE4rTjoi8Qa7fisGI,12119
32
+ glitchlings/compat/types.py,sha256=DVFS0s0D_K-iVaSR5LJHTVpMm3TzG8CXMZvZrsc3mqI,975
33
+ glitchlings/conf/__init__.py,sha256=_eZwO3y53_UahbgW_LMUTFXrkTiH2daK0cpvIVJiNTY,1052
34
+ glitchlings/conf/loaders.py,sha256=Xh3LXDBpP9NT4mc4paQeQcZFjDN-AiCFVaUSK88CdAE,10804
35
+ glitchlings/conf/schema.py,sha256=kCeGT_XCNLC0yS0Qd2MbygPOayos2gHiMcxrVjG2oNg,5358
36
+ glitchlings/conf/types.py,sha256=w52gt46VTF0Q04TEygMkHvHj-MyjZeJU_8PuqtDTTwM,1905
37
+ glitchlings/dev/__init__.py,sha256=z-7rcrkV0wexkZMVH7jZRML5juQkFv3EFOTmVgW8EX0,94
38
+ glitchlings/dev/docs.py,sha256=2cYLDVmBsigdNb4l897vFz-61J2qWVBw2UafOno8c9o,1128
39
+ glitchlings/dlc/__init__.py,sha256=F7Ba_OXz4tv_hjKMyJEjU8j1weDzl4GvsCGwnBlmySk,799
40
+ glitchlings/dlc/_shared.py,sha256=kQhIFQSAaWRcfqcS2cJCN3mAlL9nn4p1iKXEqfhUnnw,10557
41
+ glitchlings/dlc/gutenberg.py,sha256=WL37NpsOV8TQchHXa4sksSCs1GA0gsRGcFDM4F5IcJQ,13976
42
+ glitchlings/dlc/huggingface.py,sha256=nMRkLVWyY9oLO_DUT_vjHWRUpLp4hLWchQKTi9D7mic,2275
43
+ glitchlings/dlc/prime.py,sha256=BT7ta1P7NWyItNteU4XgEY-xswa0nIgRhqXbzfi13yg,7743
44
+ glitchlings/dlc/pytorch.py,sha256=FMAgR4qVg432bOalvkGtpKh_uZYbdMLgLh5o-eO0vGo,3533
45
+ glitchlings/dlc/pytorch_lightning.py,sha256=2L1cqzIdYbABrLV7LqBmZHAuXj1ncn3RYJMQHKhh1oE,7303
46
+ glitchlings/internal/__init__.py,sha256=76MgFj19zewI0iyxKcrDQph4Kjd1pE8r9inxi7dSW1I,518
47
+ glitchlings/internal/rust.py,sha256=_DuSpPa_C91OYqSNnHWC4AqBFcIrDkIjWry1XVbuVMM,4938
48
+ glitchlings/internal/rust_ffi.py,sha256=FkdWIIFI9OZ-nAYFwxcYIt-vYlFEwsrFRq_H0VMQ6VI,12278
49
+ glitchlings/util/__init__.py,sha256=q20xcxERkKrfMBVqF4Q6mg2UgyFngfbFF5zqCTooO9s,1022
50
+ glitchlings/util/adapters.py,sha256=HnwwQN6g3Mit4h6VrmRfBXN4Lrh5FqnTZSGnIUqOLe4,2489
51
+ glitchlings/util/keyboards.py,sha256=bxbLi_qq2icjz45JedAAeZZslcqwtiIQgjT7YPokWj8,6729
52
+ glitchlings/util/transcripts.py,sha256=kHKny4gdnc9zixLmNJ9zv90TmEhk3ql1koyBm2-65GQ,3495
53
+ glitchlings/zoo/__init__.py,sha256=u29QxRVJGRjg3wMYwAlm9avlX_fm9nIsQn3XqfF2T6o,4936
54
+ glitchlings/zoo/core.py,sha256=pqK9cnj0efpIW7pZwN9BwteIWMqeO6whk2IDEvyH-fQ,21143
55
+ glitchlings/zoo/core_execution.py,sha256=quiTLyTEchPMPx7bQsk4tdYrDZ7aSiiAs5F7WWsOmmQ,3123
56
+ glitchlings/zoo/core_planning.py,sha256=49FtUrfzMje05PKQKkM81y-jDt2_Qbs7dgJaDUV5KBo,14940
57
+ glitchlings/zoo/corrupt_dispatch.py,sha256=-XIxIb9AEcI3KV79o12sfupny99oCXzdvnBr52UBCSc,9262
58
+ glitchlings/zoo/ekkokin.py,sha256=K_Png2lML6THcYa3AoWagHziRNaEHEvQtnSsKzRoU1Q,3425
59
+ glitchlings/zoo/hokey.py,sha256=xdIwcMiOIXl7PmonjPZo4PLKsD_p6priZwoOpsLl6-g,4401
60
+ glitchlings/zoo/jargoyle.py,sha256=mVDCkRSv09wZ4KyRG7P9WjexVyVZo0NKGNXci8Tcxnk,7885
61
+ glitchlings/zoo/mim1c.py,sha256=s0eQpCvxlMLm6WMpD9GzrHpBK2wD5neU1KNkw2SmRcY,4752
62
+ glitchlings/zoo/redactyl.py,sha256=mc8QeRgdGDH35LZnrncl-Q27mBGv7Q6VL-Zf2H--gG8,3060
63
+ glitchlings/zoo/rng.py,sha256=Mb3UnHM7t3SGidAs-N61KiTVj85azB89JsiXGPkapQ8,8130
64
+ glitchlings/zoo/rushmore.py,sha256=lfBdSTqt-GEQPFCLzYGtXHVqX8N1nE8BVAYYHrUsrSc,13061
65
+ glitchlings/zoo/scannequin.py,sha256=-RtpfqYk1fAVyK_wLkCGopq2p0VsbO2tUthMYINuzX0,1852
66
+ glitchlings/zoo/transforms.py,sha256=kLIriPIumQjL5uAhRwrGe6gbgq3bXx6kpGE7BgA_eBE,11651
67
+ glitchlings/zoo/typogre.py,sha256=bUtLmIHupZuJz2cGFJ0qwpb4yOWaC58l0kTjVo6RJws,4568
68
+ glitchlings/zoo/validation.py,sha256=ehOxf2jzJx8SBQHtvw5QtlbcCDwTg7bLfJ0UCHQf_fk,14523
69
+ glitchlings/zoo/zeedub.py,sha256=XueKsAtcgml6DBo4eDKAQAjrXmlO8rHa8KHTrTD_5Yk,2719
70
+ glitchlings/zoo/assets/__init__.py,sha256=2x6eIdZliPnbt9w3eVjhNbXxZ-Aj0Vt369LI_pfmyho,566
71
+ glitchlings/zoo/pedant/__init__.py,sha256=w1wBJtk4wBqd8-zVqPskiYsJ6NQ0kUvPKKIltunLbLI,2918
72
+ glitchlings/zoo/pedant/core.py,sha256=WGDmNRdnemGtCkcirli_8ClFV8gvhTcaHS7_-R4hFOc,3089
73
+ glitchlings/zoo/pedant/forms.py,sha256=aYKdjN9E77v7D2ARvn7ordZ0T4eCdtlu1Qiob1on6kE,1685
74
+ glitchlings/zoo/pedant/stones.py,sha256=atw4l1AdWDuxtUXRN9-jiqWoZHQv70YVCTyMbegETKU,2364
75
+ glitchlings-0.9.3.dist-info/licenses/LICENSE,sha256=EFEP1evBfHaxsMTBjxm0sZVRp2wct8QLvHE1saII5FI,11538
76
+ glitchlings-0.9.3.dist-info/METADATA,sha256=YQKwglA3KrpAZKr94TPfWRVkuSDoMbexHNC35lOZZj4,15016
77
+ glitchlings-0.9.3.dist-info/WHEEL,sha256=8UP9x9puWI0P1V_d7K2oMTBqfeLNm21CTzZ_Ptr0NXU,101
78
+ glitchlings-0.9.3.dist-info/entry_points.txt,sha256=0pVGF4NIGjlDEt5Q0i0QUSUNigl5h9rhazn6DWEfyXg,107
79
+ glitchlings-0.9.3.dist-info/top_level.txt,sha256=VHFNBrLjtDwPCYXbGKi6o17Eueedi81eNbR3hBOoST0,12
80
+ glitchlings-0.9.3.dist-info/RECORD,,
@@ -1,2 +1,3 @@
1
1
  [console_scripts]
2
2
  glitchlings = glitchlings.main:main
3
+ glitchlings-refresh-docs = glitchlings.dev.docs:main
@@ -1,34 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from importlib import resources
4
-
5
- _CONFUSION_TABLE: list[tuple[str, list[str]]] | None = None
6
-
7
-
8
- def load_confusion_table() -> list[tuple[str, list[str]]]:
9
- """Load the OCR confusion table shared by Python and Rust implementations."""
10
- global _CONFUSION_TABLE
11
- if _CONFUSION_TABLE is not None:
12
- return _CONFUSION_TABLE
13
-
14
- data = resources.files(__package__) / "ocr_confusions.tsv"
15
- text = data.read_text(encoding="utf-8")
16
- indexed_entries: list[tuple[int, tuple[str, list[str]]]] = []
17
- for line_number, line in enumerate(text.splitlines()):
18
- stripped = line.strip()
19
- if not stripped or stripped.startswith("#"):
20
- continue
21
- parts = stripped.split()
22
- if len(parts) < 2:
23
- continue
24
- source, *replacements = parts
25
- indexed_entries.append((line_number, (source, replacements)))
26
-
27
- # Sort longer patterns first to avoid overlapping matches, mirroring the
28
- # behaviour of the Rust `confusion_table` helper.
29
- indexed_entries.sort(
30
- key=lambda item: (-len(item[1][0]), item[0])
31
- )
32
- entries = [entry for _, entry in indexed_entries]
33
- _CONFUSION_TABLE = entries
34
- return entries
glitchlings/zoo/_rate.py DELETED
@@ -1,21 +0,0 @@
1
- from __future__ import annotations
2
-
3
-
4
- def resolve_rate(
5
- *,
6
- rate: float | None,
7
- legacy_value: float | None,
8
- default: float,
9
- legacy_name: str,
10
- ) -> float:
11
- """Return the effective rate while enforcing mutual exclusivity."""
12
-
13
- if rate is not None and legacy_value is not None:
14
- raise ValueError(
15
- f"Specify either 'rate' or '{legacy_name}', not both."
16
- )
17
- if rate is not None:
18
- return rate
19
- if legacy_value is not None:
20
- return legacy_value
21
- return default
@@ -1,169 +0,0 @@
1
- import re
2
- import random
3
- from typing import Any
4
-
5
- from .core import Glitchling, AttackWave
6
- from ._rate import resolve_rate
7
-
8
- try:
9
- from glitchlings._zoo_rust import reduplicate_words as _reduplicate_words_rust
10
- except ImportError: # pragma: no cover - compiled extension not present
11
- _reduplicate_words_rust = None
12
-
13
-
14
- def _python_reduplicate_words(
15
- text: str,
16
- *,
17
- rate: float,
18
- rng: random.Random,
19
- unweighted: bool = False,
20
- ) -> str:
21
- """Randomly reduplicate words in the text.
22
-
23
- Parameters
24
- - text: Input text.
25
- - rate: Max proportion of words to reduplicate (default 0.05).
26
- - rng: RNG used for sampling decisions.
27
- - unweighted: When True, sample words uniformly instead of length-weighted.
28
-
29
- Notes
30
- - Preserves spacing and punctuation by tokenizing with separators.
31
- - Deterministic when run with a fixed seed or via Gaggle.
32
- """
33
- # Preserve exact spacing and punctuation by using regex
34
- tokens = re.split(r"(\s+)", text) # Split but keep separators
35
-
36
- candidate_weights: list[tuple[int, float]] = []
37
- for i in range(0, len(tokens), 2): # Every other token is a word
38
- if i >= len(tokens):
39
- break
40
-
41
- word = tokens[i]
42
- if not word or word.isspace(): # Skip empty or whitespace
43
- continue
44
-
45
- match = re.match(r"^(\W*)(.*?)(\W*)$", word)
46
- core = match.group(2) if match else word
47
- core_length = len(core) if core else len(word)
48
- if core_length <= 0:
49
- core_length = len(word.strip()) or len(word)
50
- if core_length <= 0:
51
- core_length = 1
52
- weight = 1.0 if unweighted else 1.0 / core_length
53
- candidate_weights.append((i, weight))
54
-
55
- if not candidate_weights:
56
- return "".join(tokens)
57
-
58
- effective_rate = max(rate, 0.0)
59
- if effective_rate <= 0.0:
60
- return "".join(tokens)
61
-
62
- mean_weight = sum(weight for _, weight in candidate_weights) / len(
63
- candidate_weights
64
- )
65
-
66
- for index, weight in candidate_weights:
67
- if effective_rate >= 1.0:
68
- probability = 1.0
69
- else:
70
- if mean_weight <= 0.0:
71
- probability = effective_rate
72
- else:
73
- probability = min(1.0, effective_rate * (weight / mean_weight))
74
- if rng.random() >= probability:
75
- continue
76
-
77
- word = tokens[index]
78
- match = re.match(r"^(\W*)(.*?)(\W*)$", word)
79
- if match:
80
- prefix, core, suffix = match.groups()
81
- # Reduplicate with a space: "word" -> "word word"
82
- tokens[index] = f"{prefix}{core} {core}{suffix}"
83
- else:
84
- tokens[index] = f"{word} {word}"
85
- return "".join(tokens)
86
-
87
-
88
- def reduplicate_words(
89
- text: str,
90
- rate: float | None = None,
91
- seed: int | None = None,
92
- rng: random.Random | None = None,
93
- *,
94
- reduplication_rate: float | None = None,
95
- unweighted: bool = False,
96
- ) -> str:
97
- """Randomly reduplicate words in the text.
98
-
99
- Falls back to the Python implementation when the optional Rust
100
- extension is unavailable.
101
- """
102
-
103
- effective_rate = resolve_rate(
104
- rate=rate,
105
- legacy_value=reduplication_rate,
106
- default=0.01,
107
- legacy_name="reduplication_rate",
108
- )
109
-
110
- if rng is None:
111
- rng = random.Random(seed)
112
-
113
- clamped_rate = max(0.0, effective_rate)
114
- unweighted_flag = bool(unweighted)
115
-
116
- if _reduplicate_words_rust is not None:
117
- return _reduplicate_words_rust(text, clamped_rate, unweighted_flag, rng)
118
-
119
- return _python_reduplicate_words(
120
- text,
121
- rate=clamped_rate,
122
- rng=rng,
123
- unweighted=unweighted_flag,
124
- )
125
-
126
-
127
- class Reduple(Glitchling):
128
- """Glitchling that repeats words to simulate stuttering speech."""
129
-
130
- def __init__(
131
- self,
132
- *,
133
- rate: float | None = None,
134
- reduplication_rate: float | None = None,
135
- seed: int | None = None,
136
- unweighted: bool = False,
137
- ) -> None:
138
- self._param_aliases = {"reduplication_rate": "rate"}
139
- effective_rate = resolve_rate(
140
- rate=rate,
141
- legacy_value=reduplication_rate,
142
- default=0.01,
143
- legacy_name="reduplication_rate",
144
- )
145
- super().__init__(
146
- name="Reduple",
147
- corruption_function=reduplicate_words,
148
- scope=AttackWave.WORD,
149
- seed=seed,
150
- rate=effective_rate,
151
- unweighted=unweighted,
152
- )
153
-
154
- def pipeline_operation(self) -> dict[str, Any] | None:
155
- rate = self.kwargs.get("rate")
156
- if rate is None:
157
- return None
158
- unweighted = bool(self.kwargs.get("unweighted", False))
159
- return {
160
- "type": "reduplicate",
161
- "reduplication_rate": float(rate),
162
- "unweighted": unweighted,
163
- }
164
-
165
-
166
- reduple = Reduple()
167
-
168
-
169
- __all__ = ["Reduple", "reduple"]