glitchlings 0.3.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

Files changed (79) hide show
  1. {glitchlings-0.3.0 → glitchlings-0.4.0}/MANIFEST.in +2 -0
  2. {glitchlings-0.3.0/src/glitchlings.egg-info → glitchlings-0.4.0}/PKG-INFO +30 -8
  3. {glitchlings-0.3.0 → glitchlings-0.4.0}/README.md +22 -2
  4. {glitchlings-0.3.0 → glitchlings-0.4.0}/pyproject.toml +13 -4
  5. {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/Cargo.lock +1 -0
  6. {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/Cargo.toml +1 -0
  7. {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/src/glitch_ops.rs +411 -29
  8. {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/src/lib.rs +77 -9
  9. {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/src/resources.rs +11 -6
  10. {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/src/typogre.rs +94 -56
  11. {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/src/zeedub.rs +15 -9
  12. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/__init__.py +4 -0
  13. glitchlings-0.4.0/src/glitchlings/config.py +258 -0
  14. glitchlings-0.4.0/src/glitchlings/config.toml +3 -0
  15. glitchlings-0.4.0/src/glitchlings/lexicon/__init__.py +191 -0
  16. glitchlings-0.4.0/src/glitchlings/lexicon/data/default_vector_cache.json +16 -0
  17. glitchlings-0.4.0/src/glitchlings/lexicon/graph.py +303 -0
  18. glitchlings-0.4.0/src/glitchlings/lexicon/metrics.py +169 -0
  19. glitchlings-0.4.0/src/glitchlings/lexicon/vector.py +610 -0
  20. glitchlings-0.4.0/src/glitchlings/lexicon/wordnet.py +182 -0
  21. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/main.py +145 -5
  22. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/__init__.py +15 -0
  23. glitchlings-0.4.0/src/glitchlings/zoo/_sampling.py +55 -0
  24. glitchlings-0.4.0/src/glitchlings/zoo/_text_utils.py +104 -0
  25. glitchlings-0.4.0/src/glitchlings/zoo/jargoyle.py +326 -0
  26. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/redactyl.py +26 -54
  27. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/reduple.py +10 -21
  28. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/rushmore.py +15 -21
  29. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/typogre.py +22 -1
  30. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/zeedub.py +40 -1
  31. {glitchlings-0.3.0 → glitchlings-0.4.0/src/glitchlings.egg-info}/PKG-INFO +30 -8
  32. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings.egg-info/SOURCES.txt +15 -1
  33. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings.egg-info/requires.txt +7 -3
  34. {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_cli.py +132 -4
  35. glitchlings-0.4.0/tests/test_config.py +59 -0
  36. glitchlings-0.4.0/tests/test_graph_lexicon.py +70 -0
  37. glitchlings-0.4.0/tests/test_jargoyle.py +209 -0
  38. glitchlings-0.4.0/tests/test_lexicon_config.py +56 -0
  39. glitchlings-0.4.0/tests/test_lexicon_metrics.py +120 -0
  40. {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_parameter_effects.py +33 -0
  41. {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_prime_echo_chamber.py +0 -10
  42. {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_rust_backed_glitchlings.py +69 -14
  43. glitchlings-0.4.0/tests/test_vector_lexicon.py +193 -0
  44. glitchlings-0.3.0/src/glitchlings/zoo/_text_utils.py +0 -42
  45. glitchlings-0.3.0/src/glitchlings/zoo/jargoyle.py +0 -336
  46. glitchlings-0.3.0/tests/test_jargoyle.py +0 -44
  47. {glitchlings-0.3.0 → glitchlings-0.4.0}/LICENSE +0 -0
  48. {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/Cargo.toml +0 -0
  49. {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/assets/ocr_confusions.tsv +0 -0
  50. {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/build.rs +0 -0
  51. {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/src/pipeline.rs +0 -0
  52. {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/src/rng.rs +0 -0
  53. {glitchlings-0.3.0 → glitchlings-0.4.0}/rust/zoo/src/text_buffer.rs +0 -0
  54. {glitchlings-0.3.0 → glitchlings-0.4.0}/setup.cfg +0 -0
  55. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/__main__.py +0 -0
  56. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/dlc/__init__.py +0 -0
  57. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/dlc/huggingface.py +0 -0
  58. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/dlc/prime.py +0 -0
  59. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/util/__init__.py +0 -0
  60. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/_ocr_confusions.py +0 -0
  61. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/_rate.py +0 -0
  62. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/adjax.py +0 -0
  63. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/core.py +0 -0
  64. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/mim1c.py +0 -0
  65. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/ocr_confusions.tsv +0 -0
  66. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings/zoo/scannequin.py +0 -0
  67. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings.egg-info/dependency_links.txt +0 -0
  68. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings.egg-info/entry_points.txt +0 -0
  69. {glitchlings-0.3.0 → glitchlings-0.4.0}/src/glitchlings.egg-info/top_level.txt +0 -0
  70. {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_benchmarks.py +0 -0
  71. {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_dataset_corruption.py +0 -0
  72. {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_gaggle.py +0 -0
  73. {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_glitchling_core.py +0 -0
  74. {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_glitchlings_determinism.py +0 -0
  75. {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_huggingface_dlc.py +0 -0
  76. {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_keyboard_layouts.py +0 -0
  77. {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_property_based.py +0 -0
  78. {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_text_utils.py +0 -0
  79. {glitchlings-0.3.0 → glitchlings-0.4.0}/tests/test_util.py +0 -0
@@ -1,4 +1,6 @@
1
1
  recursive-include rust *.rs *.toml *.lock *.tsv
2
2
  recursive-include src/glitchlings/zoo *.tsv
3
+ recursive-include src/glitchlings/lexicon/data *.json
4
+ include src/glitchlings/config.toml
3
5
  prune rust/target
4
6
  prune rust/zoo/target
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: glitchlings
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Monsters for your language games.
5
5
  Author: osoleve
6
6
  License: Apache License
@@ -209,7 +209,7 @@ Project-URL: Homepage, https://github.com/osoleve/glitchlings
209
209
  Project-URL: Repository, https://github.com/osoleve/glitchlings.git
210
210
  Project-URL: Issues, https://github.com/osoleve/glitchlings/issues
211
211
  Project-URL: Changelog, https://github.com/osoleve/glitchlings/releases
212
- Keywords: nlp,text,adversarial augmentation,text augmentation,large language models,llms,data augmentation,rlvr
212
+ Keywords: nlp,adversarial augmentation,text augmentation,data augmentation,domain randomization
213
213
  Classifier: Development Status :: 3 - Alpha
214
214
  Classifier: Intended Audience :: Developers
215
215
  Classifier: Programming Language :: Python
@@ -224,18 +224,20 @@ Requires-Python: >=3.10
224
224
  Description-Content-Type: text/markdown
225
225
  License-File: LICENSE
226
226
  Requires-Dist: confusable-homoglyphs>=3.3.1
227
+ Requires-Dist: tomli>=2.0.1; python_version < "3.11"
228
+ Requires-Dist: pyyaml>=6.0.0
227
229
  Provides-Extra: hf
228
230
  Requires-Dist: datasets>=4.0.0; extra == "hf"
229
- Provides-Extra: wordnet
230
- Requires-Dist: nltk>=3.9.1; extra == "wordnet"
231
- Requires-Dist: numpy<=2.0,>=1.24; extra == "wordnet"
231
+ Provides-Extra: vectors
232
+ Requires-Dist: numpy<=2.0,>=1.24; extra == "vectors"
233
+ Requires-Dist: spacy>=3.7.2; extra == "vectors"
234
+ Requires-Dist: gensim>=4.3.2; extra == "vectors"
232
235
  Provides-Extra: prime
233
236
  Requires-Dist: verifiers>=0.1.3.post0; extra == "prime"
234
237
  Requires-Dist: jellyfish>=1.2.0; extra == "prime"
235
238
  Provides-Extra: dev
236
239
  Requires-Dist: pytest>=8.0.0; extra == "dev"
237
240
  Requires-Dist: hypothesis>=6.140.0; extra == "dev"
238
- Requires-Dist: nltk>=3.9.1; extra == "dev"
239
241
  Requires-Dist: numpy<=2.0,>=1.24; extra == "dev"
240
242
  Dynamic: license-file
241
243
 
@@ -348,10 +350,30 @@ glitchlings -g "Typogre(rate=0.05)" "Ghouls just wanna have fun"
348
350
 
349
351
  # Pipe text straight into the CLI for an on-the-fly corruption.
350
352
  echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
353
+
354
+ # Load a roster from a YAML attack configuration.
355
+ glitchlings --config experiments/chaos.yaml "Let slips the glitchlings of war"
351
356
  ```
352
357
 
353
358
  Use `--help` for a complete breakdown of available options, including support for parameterised glitchlings via `-g "Name(arg=value, ...)"` to mirror the Python API.
354
359
 
360
+ Attack configurations live in plain YAML files so you can version-control experiments without touching code:
361
+
362
+ ```yaml
363
+ # experiments/chaos.yaml
364
+ seed: 31337
365
+ glitchlings:
366
+ - name: Typogre
367
+ rate: 0.04
368
+ - "Rushmore(rate=0.12, unweighted=True)"
369
+ - name: Zeedub
370
+ parameters:
371
+ rate: 0.02
372
+ characters: ["\u200b", "\u2060"]
373
+ ```
374
+
375
+ Pass the file to `glitchlings --config` or load it from Python with `glitchlings.load_attack_config` and `glitchlings.build_gaggle`.
376
+
355
377
  ## Development
356
378
 
357
379
  Follow the [development setup guide](docs/development.md) for editable installs, automated tests, and tips on enabling the Rust pipeline while you hack on new glitchlings.
@@ -416,8 +438,8 @@ _Uh oh. The worst person you know just bought a thesaurus._
416
438
  >
417
439
  > Args
418
440
  >
419
- > - `rate (float)`: The maximum proportion of words to replace (default: 0.1, 10%).
420
- > - `part_of_speech`: The WordNet part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all.
441
+ > - `rate (float)`: The maximum proportion of words to replace (default: 0.01, 1%).
442
+ - `part_of_speech`: The WordNet-style part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all. Vector/graph backends ignore this filter while still honouring deterministic sampling.
421
443
  > - `seed (int)`: The random seed for reproducibility (default: 151).
422
444
 
423
445
  ### Reduple
@@ -107,10 +107,30 @@ glitchlings -g "Typogre(rate=0.05)" "Ghouls just wanna have fun"
107
107
 
108
108
  # Pipe text straight into the CLI for an on-the-fly corruption.
109
109
  echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
110
+
111
+ # Load a roster from a YAML attack configuration.
112
+ glitchlings --config experiments/chaos.yaml "Let slips the glitchlings of war"
110
113
  ```
111
114
 
112
115
  Use `--help` for a complete breakdown of available options, including support for parameterised glitchlings via `-g "Name(arg=value, ...)"` to mirror the Python API.
113
116
 
117
+ Attack configurations live in plain YAML files so you can version-control experiments without touching code:
118
+
119
+ ```yaml
120
+ # experiments/chaos.yaml
121
+ seed: 31337
122
+ glitchlings:
123
+ - name: Typogre
124
+ rate: 0.04
125
+ - "Rushmore(rate=0.12, unweighted=True)"
126
+ - name: Zeedub
127
+ parameters:
128
+ rate: 0.02
129
+ characters: ["\u200b", "\u2060"]
130
+ ```
131
+
132
+ Pass the file to `glitchlings --config` or load it from Python with `glitchlings.load_attack_config` and `glitchlings.build_gaggle`.
133
+
114
134
  ## Development
115
135
 
116
136
  Follow the [development setup guide](docs/development.md) for editable installs, automated tests, and tips on enabling the Rust pipeline while you hack on new glitchlings.
@@ -175,8 +195,8 @@ _Uh oh. The worst person you know just bought a thesaurus._
175
195
  >
176
196
  > Args
177
197
  >
178
- > - `rate (float)`: The maximum proportion of words to replace (default: 0.1, 10%).
179
- > - `part_of_speech`: The WordNet part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all.
198
+ > - `rate (float)`: The maximum proportion of words to replace (default: 0.01, 1%).
199
+ - `part_of_speech`: The WordNet-style part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all. Vector/graph backends ignore this filter while still honouring deterministic sampling.
180
200
  > - `seed (int)`: The random seed for reproducibility (default: 151).
181
201
 
182
202
  ### Reduple
@@ -1,19 +1,27 @@
1
1
  [project]
2
2
  name = "glitchlings"
3
- version = "0.3.0"
3
+ version = "0.4.0"
4
4
  description = "Monsters for your language games."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
7
7
 
8
8
  dependencies = [
9
9
  "confusable-homoglyphs>=3.3.1",
10
+ "tomli>=2.0.1; python_version < '3.11'",
11
+ "pyyaml>=6.0.0",
10
12
  ]
11
13
 
12
14
  authors = [
13
15
  { name = "osoleve" }
14
16
  ]
15
17
 
16
- keywords = ["nlp", "text", "adversarial augmentation", "text augmentation", "large language models", "llms", "data augmentation", "rlvr"]
18
+ keywords = [
19
+ "nlp",
20
+ "adversarial augmentation",
21
+ "text augmentation",
22
+ "data augmentation",
23
+ "domain randomization"
24
+ ]
17
25
 
18
26
  classifiers = [
19
27
  "Development Status :: 3 - Alpha",
@@ -42,12 +50,11 @@ glitchlings = "glitchlings.main:main"
42
50
 
43
51
  [project.optional-dependencies]
44
52
  hf = ["datasets>=4.0.0"]
45
- wordnet = ["nltk>=3.9.1", "numpy>=1.24,<=2.0"]
53
+ vectors = ["numpy>=1.24,<=2.0", "spacy>=3.7.2", "gensim>=4.3.2"]
46
54
  prime = ["verifiers>=0.1.3.post0", "jellyfish>=1.2.0"]
47
55
  dev = [
48
56
  "pytest>=8.0.0",
49
57
  "hypothesis>=6.140.0",
50
- "nltk>=3.9.1",
51
58
  "numpy>=1.24,<=2.0",
52
59
  ]
53
60
 
@@ -60,6 +67,8 @@ package-dir = {"" = "src"}
60
67
  include-package-data = true
61
68
 
62
69
  [tool.setuptools.package-data]
70
+ "glitchlings" = ["config.toml"]
71
+ "glitchlings.lexicon" = ["data/*.json"]
63
72
  "glitchlings.zoo" = ["ocr_confusions.tsv"]
64
73
 
65
74
  [tool.setuptools.packages.find]
@@ -407,4 +407,5 @@ dependencies = [
407
407
  "pyo3",
408
408
  "pyo3-build-config",
409
409
  "regex",
410
+ "smallvec",
410
411
  ]
@@ -13,6 +13,7 @@ pyo3 = { workspace = true }
13
13
  regex = { workspace = true }
14
14
  once_cell = { workspace = true }
15
15
  blake2 = { workspace = true }
16
+ smallvec = "1"
16
17
 
17
18
  [package.metadata.maturin]
18
19
  module-name = "glitchlings._zoo_rust"