glitchlings 0.3.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- {glitchlings-0.3.0 → glitchlings-0.4.0}/MANIFEST.in +2 -0
- {glitchlings-0.3.0/src/glitchlings.egg-info → glitchlings-0.4.0}/PKG-INFO +30 -8
- {glitchlings-0.3.0 → glitchlings-0.4.0}/README.md +22 -2
- {glitchlings-0.3.0 → glitchlings-0.4.0}/pyproject.toml +13 -4
- {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/Cargo.lock +1 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/Cargo.toml +1 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/src/glitch_ops.rs +411 -29
- {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/src/lib.rs +77 -9
- {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/src/resources.rs +11 -6
- {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/src/typogre.rs +94 -56
- {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/src/zeedub.rs +15 -9
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/__init__.py +4 -0
- glitchlings-0.4.0/src/glitchlings/config.py +258 -0
- glitchlings-0.4.0/src/glitchlings/config.toml +3 -0
- glitchlings-0.4.0/src/glitchlings/lexicon/__init__.py +191 -0
- glitchlings-0.4.0/src/glitchlings/lexicon/data/default_vector_cache.json +16 -0
- glitchlings-0.4.0/src/glitchlings/lexicon/graph.py +303 -0
- glitchlings-0.4.0/src/glitchlings/lexicon/metrics.py +169 -0
- glitchlings-0.4.0/src/glitchlings/lexicon/vector.py +610 -0
- glitchlings-0.4.0/src/glitchlings/lexicon/wordnet.py +182 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/main.py +145 -5
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/__init__.py +15 -0
- glitchlings-0.4.0/src/glitchlings/zoo/_sampling.py +55 -0
- glitchlings-0.4.0/src/glitchlings/zoo/_text_utils.py +104 -0
- glitchlings-0.4.0/src/glitchlings/zoo/jargoyle.py +326 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/redactyl.py +26 -54
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/reduple.py +10 -21
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/rushmore.py +15 -21
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/typogre.py +22 -1
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/zeedub.py +40 -1
- {glitchlings-0.3.0 → glitchlings-0.4.0/src/glitchlings.egg-info}/PKG-INFO +30 -8
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings.egg-info/SOURCES.txt +15 -1
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings.egg-info/requires.txt +7 -3
- {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_cli.py +132 -4
- glitchlings-0.4.0/tests/test_config.py +59 -0
- glitchlings-0.4.0/tests/test_graph_lexicon.py +70 -0
- glitchlings-0.4.0/tests/test_jargoyle.py +209 -0
- glitchlings-0.4.0/tests/test_lexicon_config.py +56 -0
- glitchlings-0.4.0/tests/test_lexicon_metrics.py +120 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_parameter_effects.py +33 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_prime_echo_chamber.py +0 -10
- {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_rust_backed_glitchlings.py +69 -14
- glitchlings-0.4.0/tests/test_vector_lexicon.py +193 -0
- glitchlings-0.3.0/src/glitchlings/zoo/_text_utils.py +0 -42
- glitchlings-0.3.0/src/glitchlings/zoo/jargoyle.py +0 -336
- glitchlings-0.3.0/tests/test_jargoyle.py +0 -44
- {glitchlings-0.3.0 → glitchlings-0.4.0}/LICENSE +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/Cargo.toml +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/assets/ocr_confusions.tsv +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/build.rs +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/src/pipeline.rs +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/src/rng.rs +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/src/text_buffer.rs +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/setup.cfg +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/__main__.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/dlc/__init__.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/dlc/huggingface.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/dlc/prime.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/util/__init__.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/_ocr_confusions.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/_rate.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/adjax.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/core.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/mim1c.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/ocr_confusions.tsv +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/scannequin.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings.egg-info/dependency_links.txt +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings.egg-info/entry_points.txt +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings.egg-info/top_level.txt +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_benchmarks.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_dataset_corruption.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_gaggle.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_glitchling_core.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_glitchlings_determinism.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_huggingface_dlc.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_keyboard_layouts.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_property_based.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_text_utils.py +0 -0
- {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: glitchlings
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Monsters for your language games.
|
|
5
5
|
Author: osoleve
|
|
6
6
|
License: Apache License
|
|
@@ -209,7 +209,7 @@ Project-URL: Homepage, https://github.com/osoleve/glitchlings
|
|
|
209
209
|
Project-URL: Repository, https://github.com/osoleve/glitchlings.git
|
|
210
210
|
Project-URL: Issues, https://github.com/osoleve/glitchlings/issues
|
|
211
211
|
Project-URL: Changelog, https://github.com/osoleve/glitchlings/releases
|
|
212
|
-
Keywords: nlp,
|
|
212
|
+
Keywords: nlp,adversarial augmentation,text augmentation,data augmentation,domain randomization
|
|
213
213
|
Classifier: Development Status :: 3 - Alpha
|
|
214
214
|
Classifier: Intended Audience :: Developers
|
|
215
215
|
Classifier: Programming Language :: Python
|
|
@@ -224,18 +224,20 @@ Requires-Python: >=3.10
|
|
|
224
224
|
Description-Content-Type: text/markdown
|
|
225
225
|
License-File: LICENSE
|
|
226
226
|
Requires-Dist: confusable-homoglyphs>=3.3.1
|
|
227
|
+
Requires-Dist: tomli>=2.0.1; python_version < "3.11"
|
|
228
|
+
Requires-Dist: pyyaml>=6.0.0
|
|
227
229
|
Provides-Extra: hf
|
|
228
230
|
Requires-Dist: datasets>=4.0.0; extra == "hf"
|
|
229
|
-
Provides-Extra:
|
|
230
|
-
Requires-Dist:
|
|
231
|
-
Requires-Dist:
|
|
231
|
+
Provides-Extra: vectors
|
|
232
|
+
Requires-Dist: numpy<=2.0,>=1.24; extra == "vectors"
|
|
233
|
+
Requires-Dist: spacy>=3.7.2; extra == "vectors"
|
|
234
|
+
Requires-Dist: gensim>=4.3.2; extra == "vectors"
|
|
232
235
|
Provides-Extra: prime
|
|
233
236
|
Requires-Dist: verifiers>=0.1.3.post0; extra == "prime"
|
|
234
237
|
Requires-Dist: jellyfish>=1.2.0; extra == "prime"
|
|
235
238
|
Provides-Extra: dev
|
|
236
239
|
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
237
240
|
Requires-Dist: hypothesis>=6.140.0; extra == "dev"
|
|
238
|
-
Requires-Dist: nltk>=3.9.1; extra == "dev"
|
|
239
241
|
Requires-Dist: numpy<=2.0,>=1.24; extra == "dev"
|
|
240
242
|
Dynamic: license-file
|
|
241
243
|
|
|
@@ -348,10 +350,30 @@ glitchlings -g "Typogre(rate=0.05)" "Ghouls just wanna have fun"
|
|
|
348
350
|
|
|
349
351
|
# Pipe text straight into the CLI for an on-the-fly corruption.
|
|
350
352
|
echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
|
|
353
|
+
|
|
354
|
+
# Load a roster from a YAML attack configuration.
|
|
355
|
+
glitchlings --config experiments/chaos.yaml "Let slips the glitchlings of war"
|
|
351
356
|
```
|
|
352
357
|
|
|
353
358
|
Use `--help` for a complete breakdown of available options, including support for parameterised glitchlings via `-g "Name(arg=value, ...)"` to mirror the Python API.
|
|
354
359
|
|
|
360
|
+
Attack configurations live in plain YAML files so you can version-control experiments without touching code:
|
|
361
|
+
|
|
362
|
+
```yaml
|
|
363
|
+
# experiments/chaos.yaml
|
|
364
|
+
seed: 31337
|
|
365
|
+
glitchlings:
|
|
366
|
+
- name: Typogre
|
|
367
|
+
rate: 0.04
|
|
368
|
+
- "Rushmore(rate=0.12, unweighted=True)"
|
|
369
|
+
- name: Zeedub
|
|
370
|
+
parameters:
|
|
371
|
+
rate: 0.02
|
|
372
|
+
characters: ["\u200b", "\u2060"]
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
Pass the file to `glitchlings --config` or load it from Python with `glitchlings.load_attack_config` and `glitchlings.build_gaggle`.
|
|
376
|
+
|
|
355
377
|
## Development
|
|
356
378
|
|
|
357
379
|
Follow the [development setup guide](docs/development.md) for editable installs, automated tests, and tips on enabling the Rust pipeline while you hack on new glitchlings.
|
|
@@ -416,8 +438,8 @@ _Uh oh. The worst person you know just bought a thesaurus._
|
|
|
416
438
|
>
|
|
417
439
|
> Args
|
|
418
440
|
>
|
|
419
|
-
> - `rate (float)`: The maximum proportion of words to replace (default: 0.
|
|
420
|
-
|
|
441
|
+
> - `rate (float)`: The maximum proportion of words to replace (default: 0.01, 1%).
|
|
442
|
+
- `part_of_speech`: The WordNet-style part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all. Vector/graph backends ignore this filter while still honouring deterministic sampling.
|
|
421
443
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
|
422
444
|
|
|
423
445
|
### Reduple
|
|
@@ -107,10 +107,30 @@ glitchlings -g "Typogre(rate=0.05)" "Ghouls just wanna have fun"
|
|
|
107
107
|
|
|
108
108
|
# Pipe text straight into the CLI for an on-the-fly corruption.
|
|
109
109
|
echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
|
|
110
|
+
|
|
111
|
+
# Load a roster from a YAML attack configuration.
|
|
112
|
+
glitchlings --config experiments/chaos.yaml "Let slips the glitchlings of war"
|
|
110
113
|
```
|
|
111
114
|
|
|
112
115
|
Use `--help` for a complete breakdown of available options, including support for parameterised glitchlings via `-g "Name(arg=value, ...)"` to mirror the Python API.
|
|
113
116
|
|
|
117
|
+
Attack configurations live in plain YAML files so you can version-control experiments without touching code:
|
|
118
|
+
|
|
119
|
+
```yaml
|
|
120
|
+
# experiments/chaos.yaml
|
|
121
|
+
seed: 31337
|
|
122
|
+
glitchlings:
|
|
123
|
+
- name: Typogre
|
|
124
|
+
rate: 0.04
|
|
125
|
+
- "Rushmore(rate=0.12, unweighted=True)"
|
|
126
|
+
- name: Zeedub
|
|
127
|
+
parameters:
|
|
128
|
+
rate: 0.02
|
|
129
|
+
characters: ["\u200b", "\u2060"]
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
Pass the file to `glitchlings --config` or load it from Python with `glitchlings.load_attack_config` and `glitchlings.build_gaggle`.
|
|
133
|
+
|
|
114
134
|
## Development
|
|
115
135
|
|
|
116
136
|
Follow the [development setup guide](docs/development.md) for editable installs, automated tests, and tips on enabling the Rust pipeline while you hack on new glitchlings.
|
|
@@ -175,8 +195,8 @@ _Uh oh. The worst person you know just bought a thesaurus._
|
|
|
175
195
|
>
|
|
176
196
|
> Args
|
|
177
197
|
>
|
|
178
|
-
> - `rate (float)`: The maximum proportion of words to replace (default: 0.
|
|
179
|
-
|
|
198
|
+
> - `rate (float)`: The maximum proportion of words to replace (default: 0.01, 1%).
|
|
199
|
+
- `part_of_speech`: The WordNet-style part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all. Vector/graph backends ignore this filter while still honouring deterministic sampling.
|
|
180
200
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
|
181
201
|
|
|
182
202
|
### Reduple
|
|
@@ -1,19 +1,27 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "glitchlings"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.4.0"
|
|
4
4
|
description = "Monsters for your language games."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.10"
|
|
7
7
|
|
|
8
8
|
dependencies = [
|
|
9
9
|
"confusable-homoglyphs>=3.3.1",
|
|
10
|
+
"tomli>=2.0.1; python_version < '3.11'",
|
|
11
|
+
"pyyaml>=6.0.0",
|
|
10
12
|
]
|
|
11
13
|
|
|
12
14
|
authors = [
|
|
13
15
|
{ name = "osoleve" }
|
|
14
16
|
]
|
|
15
17
|
|
|
16
|
-
keywords = [
|
|
18
|
+
keywords = [
|
|
19
|
+
"nlp",
|
|
20
|
+
"adversarial augmentation",
|
|
21
|
+
"text augmentation",
|
|
22
|
+
"data augmentation",
|
|
23
|
+
"domain randomization"
|
|
24
|
+
]
|
|
17
25
|
|
|
18
26
|
classifiers = [
|
|
19
27
|
"Development Status :: 3 - Alpha",
|
|
@@ -42,12 +50,11 @@ glitchlings = "glitchlings.main:main"
|
|
|
42
50
|
|
|
43
51
|
[project.optional-dependencies]
|
|
44
52
|
hf = ["datasets>=4.0.0"]
|
|
45
|
-
|
|
53
|
+
vectors = ["numpy>=1.24,<=2.0", "spacy>=3.7.2", "gensim>=4.3.2"]
|
|
46
54
|
prime = ["verifiers>=0.1.3.post0", "jellyfish>=1.2.0"]
|
|
47
55
|
dev = [
|
|
48
56
|
"pytest>=8.0.0",
|
|
49
57
|
"hypothesis>=6.140.0",
|
|
50
|
-
"nltk>=3.9.1",
|
|
51
58
|
"numpy>=1.24,<=2.0",
|
|
52
59
|
]
|
|
53
60
|
|
|
@@ -60,6 +67,8 @@ package-dir = {"" = "src"}
|
|
|
60
67
|
include-package-data = true
|
|
61
68
|
|
|
62
69
|
[tool.setuptools.package-data]
|
|
70
|
+
"glitchlings" = ["config.toml"]
|
|
71
|
+
"glitchlings.lexicon" = ["data/*.json"]
|
|
63
72
|
"glitchlings.zoo" = ["ocr_confusions.tsv"]
|
|
64
73
|
|
|
65
74
|
[tool.setuptools.packages.find]
|