glitchlings 0.3.0__cp312-cp312-win_amd64.whl → 0.4.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

@@ -4,11 +4,7 @@ import re
4
4
  from typing import Any
5
5
 
6
6
  from ._rate import resolve_rate
7
- from ._text_utils import (
8
- split_preserving_whitespace,
9
- split_token_edges,
10
- token_core_length,
11
- )
7
+ from ._text_utils import WordToken, collect_word_tokens, split_preserving_whitespace
12
8
  from .core import AttackWave, Glitchling
13
9
 
14
10
  try:
@@ -31,30 +27,28 @@ def _python_delete_random_words(
31
27
  return text
32
28
 
33
29
  tokens = split_preserving_whitespace(text)
30
+ word_tokens = collect_word_tokens(tokens, skip_first_word=True)
34
31
 
35
- candidate_data: list[tuple[int, float]] = []
36
- for i in range(2, len(tokens), 2):
37
- word = tokens[i]
38
- if not word or word.isspace():
39
- continue
40
-
41
- length = token_core_length(word)
42
- weight = 1.0 if unweighted else 1.0 / length
43
- candidate_data.append((i, weight))
32
+ weighted_tokens: list[tuple[int, float, WordToken]] = []
33
+ for token in word_tokens:
34
+ weight = 1.0 if unweighted else 1.0 / float(token.core_length)
35
+ weighted_tokens.append((token.index, weight, token))
44
36
 
45
- if not candidate_data:
37
+ if not weighted_tokens:
46
38
  return text
47
39
 
48
40
  allowed_deletions = min(
49
- len(candidate_data), math.floor(len(candidate_data) * effective_rate)
41
+ len(weighted_tokens), math.floor(len(weighted_tokens) * effective_rate)
50
42
  )
51
43
  if allowed_deletions <= 0:
52
44
  return text
53
45
 
54
- mean_weight = sum(weight for _, weight in candidate_data) / len(candidate_data)
46
+ mean_weight = sum(weight for _, weight, _ in weighted_tokens) / len(
47
+ weighted_tokens
48
+ )
55
49
 
56
50
  deletions = 0
57
- for index, weight in candidate_data:
51
+ for index, weight, token in weighted_tokens:
58
52
  if deletions >= allowed_deletions:
59
53
  break
60
54
 
@@ -68,9 +62,9 @@ def _python_delete_random_words(
68
62
  if rng.random() >= probability:
69
63
  continue
70
64
 
71
- word = tokens[index]
72
- prefix, _, suffix = split_token_edges(word)
73
- tokens[index] = f"{prefix.strip()}{suffix.strip()}"
65
+ prefix = token.prefix.strip()
66
+ suffix = token.suffix.strip()
67
+ tokens[index] = f"{prefix}{suffix}"
74
68
 
75
69
  deletions += 1
76
70
 
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import math
4
4
  import random
5
- from typing import Optional
5
+ from typing import Any, Optional
6
6
 
7
7
  from .core import Glitchling, AttackWave, AttackOrder
8
8
  from ._rate import resolve_rate
@@ -204,6 +204,27 @@ class Typogre(Glitchling):
204
204
  keyboard=keyboard,
205
205
  )
206
206
 
207
+ def pipeline_operation(self) -> dict[str, Any] | None:
208
+ rate = self.kwargs.get("rate")
209
+ if rate is None:
210
+ rate = self.kwargs.get("max_change_rate")
211
+ if rate is None:
212
+ return None
213
+
214
+ keyboard = self.kwargs.get("keyboard", "CURATOR_QWERTY")
215
+ layout = getattr(KEYNEIGHBORS, str(keyboard), None)
216
+ if layout is None:
217
+ return None
218
+
219
+ serialized_layout = {key: list(value) for key, value in layout.items()}
220
+
221
+ return {
222
+ "type": "typo",
223
+ "rate": float(rate),
224
+ "keyboard": str(keyboard),
225
+ "layout": serialized_layout,
226
+ }
227
+
207
228
 
208
229
  typogre = Typogre()
209
230
 
glitchlings/zoo/zeedub.py CHANGED
@@ -101,7 +101,26 @@ def insert_zero_widths(
101
101
  return text
102
102
 
103
103
  if _inject_zero_widths_rust is not None:
104
- return _inject_zero_widths_rust(text, clamped_rate, list(cleaned_palette), rng)
104
+ state = None
105
+ python_state = None
106
+ if hasattr(rng, "getstate") and hasattr(rng, "setstate"):
107
+ state = rng.getstate()
108
+ python_result = _python_insert_zero_widths(
109
+ text,
110
+ rate=clamped_rate,
111
+ rng=rng,
112
+ characters=cleaned_palette,
113
+ )
114
+ if state is not None:
115
+ if hasattr(rng, "getstate"):
116
+ python_state = rng.getstate()
117
+ rng.setstate(state)
118
+ rust_result = _inject_zero_widths_rust(text, clamped_rate, list(cleaned_palette), rng)
119
+ if rust_result == python_result:
120
+ return rust_result
121
+ if python_state is not None and hasattr(rng, "setstate"):
122
+ rng.setstate(python_state)
123
+ return python_result
105
124
 
106
125
  return _python_insert_zero_widths(
107
126
  text,
@@ -137,6 +156,26 @@ class Zeedub(Glitchling):
137
156
  characters=tuple(characters) if characters is not None else None,
138
157
  )
139
158
 
159
+ def pipeline_operation(self) -> dict[str, Any] | None:
160
+ rate = self.kwargs.get("rate")
161
+ if rate is None:
162
+ return None
163
+
164
+ raw_characters = self.kwargs.get("characters")
165
+ if raw_characters is None:
166
+ palette = tuple(_DEFAULT_ZERO_WIDTH_CHARACTERS)
167
+ else:
168
+ palette = tuple(str(char) for char in raw_characters if char)
169
+
170
+ if not palette:
171
+ return None
172
+
173
+ return {
174
+ "type": "zwj",
175
+ "rate": float(rate),
176
+ "characters": list(palette),
177
+ }
178
+
140
179
 
141
180
  zeedub = Zeedub()
142
181
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: glitchlings
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Monsters for your language games.
5
5
  Author: osoleve
6
6
  License: Apache License
@@ -209,7 +209,7 @@ Project-URL: Homepage, https://github.com/osoleve/glitchlings
209
209
  Project-URL: Repository, https://github.com/osoleve/glitchlings.git
210
210
  Project-URL: Issues, https://github.com/osoleve/glitchlings/issues
211
211
  Project-URL: Changelog, https://github.com/osoleve/glitchlings/releases
212
- Keywords: nlp,text,adversarial augmentation,text augmentation,large language models,llms,data augmentation,rlvr
212
+ Keywords: nlp,adversarial augmentation,text augmentation,data augmentation,domain randomization
213
213
  Classifier: Development Status :: 3 - Alpha
214
214
  Classifier: Intended Audience :: Developers
215
215
  Classifier: Programming Language :: Python
@@ -224,18 +224,20 @@ Requires-Python: >=3.10
224
224
  Description-Content-Type: text/markdown
225
225
  License-File: LICENSE
226
226
  Requires-Dist: confusable-homoglyphs>=3.3.1
227
+ Requires-Dist: tomli>=2.0.1; python_version < "3.11"
228
+ Requires-Dist: pyyaml>=6.0.0
227
229
  Provides-Extra: hf
228
230
  Requires-Dist: datasets>=4.0.0; extra == "hf"
229
- Provides-Extra: wordnet
230
- Requires-Dist: nltk>=3.9.1; extra == "wordnet"
231
- Requires-Dist: numpy<=2.0,>=1.24; extra == "wordnet"
231
+ Provides-Extra: vectors
232
+ Requires-Dist: numpy<=2.0,>=1.24; extra == "vectors"
233
+ Requires-Dist: spacy>=3.7.2; extra == "vectors"
234
+ Requires-Dist: gensim>=4.3.2; extra == "vectors"
232
235
  Provides-Extra: prime
233
236
  Requires-Dist: verifiers>=0.1.3.post0; extra == "prime"
234
237
  Requires-Dist: jellyfish>=1.2.0; extra == "prime"
235
238
  Provides-Extra: dev
236
239
  Requires-Dist: pytest>=8.0.0; extra == "dev"
237
240
  Requires-Dist: hypothesis>=6.140.0; extra == "dev"
238
- Requires-Dist: nltk>=3.9.1; extra == "dev"
239
241
  Requires-Dist: numpy<=2.0,>=1.24; extra == "dev"
240
242
  Dynamic: license-file
241
243
 
@@ -348,10 +350,30 @@ glitchlings -g "Typogre(rate=0.05)" "Ghouls just wanna have fun"
348
350
 
349
351
  # Pipe text straight into the CLI for an on-the-fly corruption.
350
352
  echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
353
+
354
+ # Load a roster from a YAML attack configuration.
355
+ glitchlings --config experiments/chaos.yaml "Let slips the glitchlings of war"
351
356
  ```
352
357
 
353
358
  Use `--help` for a complete breakdown of available options, including support for parameterised glitchlings via `-g "Name(arg=value, ...)"` to mirror the Python API.
354
359
 
360
+ Attack configurations live in plain YAML files so you can version-control experiments without touching code:
361
+
362
+ ```yaml
363
+ # experiments/chaos.yaml
364
+ seed: 31337
365
+ glitchlings:
366
+ - name: Typogre
367
+ rate: 0.04
368
+ - "Rushmore(rate=0.12, unweighted=True)"
369
+ - name: Zeedub
370
+ parameters:
371
+ rate: 0.02
372
+ characters: ["\u200b", "\u2060"]
373
+ ```
374
+
375
+ Pass the file to `glitchlings --config` or load it from Python with `glitchlings.load_attack_config` and `glitchlings.build_gaggle`.
376
+
355
377
  ## Development
356
378
 
357
379
  Follow the [development setup guide](docs/development.md) for editable installs, automated tests, and tips on enabling the Rust pipeline while you hack on new glitchlings.
@@ -416,8 +438,8 @@ _Uh oh. The worst person you know just bought a thesaurus._
416
438
  >
417
439
  > Args
418
440
  >
419
- > - `rate (float)`: The maximum proportion of words to replace (default: 0.1, 10%).
420
- > - `part_of_speech`: The WordNet part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all.
441
+ > - `rate (float)`: The maximum proportion of words to replace (default: 0.01, 1%).
442
+ - `part_of_speech`: The WordNet-style part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all. Vector/graph backends ignore this filter while still honouring deterministic sampling.
421
443
  > - `seed (int)`: The random seed for reproducibility (default: 151).
422
444
 
423
445
  ### Reduple
@@ -0,0 +1,38 @@
1
+ glitchlings/__init__.py,sha256=onTBFM_ih4O1E1ntZaNaFxiMcD5NtCGc4TaX276sgNk,867
2
+ glitchlings/__main__.py,sha256=pqNe1C9hMf8pap4oh6x6yo2h4Nsa2RFSaMWHfGtNXj0,130
3
+ glitchlings/_zoo_rust.cp312-win_amd64.pyd,sha256=yuHypdVdmcEzqbRRrSsGD9w1vgSTYJOyOW83zKCi1WE,2071040
4
+ glitchlings/config.py,sha256=P6qMJdHrvZYAIUj7NXxrpEKcv6lDy8Eo4toWu3ctsHs,8043
5
+ glitchlings/config.toml,sha256=EOEqKUwPygOv7MuEuExil5ZQfwKV1H2hU-Z5aBKQ440,111
6
+ glitchlings/main.py,sha256=sKdUk4trBS-yNbEAOwdTXldg87TSPwnSAr_inecj4bE,10442
7
+ glitchlings/dlc/__init__.py,sha256=IHD-GGhVFb7SVzErvf2YCJkOR4wGo0nFHXkn_daMvS8,146
8
+ glitchlings/dlc/huggingface.py,sha256=PIesnDIEvyJxj1IuLw2P9nVPTr4Nv81XM7w2axfyhkA,3029
9
+ glitchlings/dlc/prime.py,sha256=b5CE1qDl5MxZjTudlKrqMsmSGxXNKZ16krqPyrr2nK8,9569
10
+ glitchlings/lexicon/__init__.py,sha256=kCQ0_Dbh39gQNVeL2PAC-8ByUOXyK0mnHZmsJ9x70z0,5854
11
+ glitchlings/lexicon/graph.py,sha256=mQ-EA-q_EIqGrWezB-hFtU8iohabFYYI3W3KE-pHOP4,10836
12
+ glitchlings/lexicon/metrics.py,sha256=zwd3vm9-pUQbSywORXgHyHCfAjIJs8S5rPNQ70UYgUo,4768
13
+ glitchlings/lexicon/vector.py,sha256=b_mlYjwn3mCyOHrdlPdfwqBaB25jSBMZNNyvmK0Kb0g,20894
14
+ glitchlings/lexicon/wordnet.py,sha256=OrI5L3K3wYddVw66JmvokirqxDvZangKWC7VXYimVi0,5834
15
+ glitchlings/lexicon/data/default_vector_cache.json,sha256=fLT-v1sgF0lv88aPwOP23br9azrjeAkJ1ft6OgPlMeM,741
16
+ glitchlings/util/__init__.py,sha256=GoyQuHTfGRkHzuZwJji6QWSiGd_LHa9QiyjjEpBFW7E,4679
17
+ glitchlings/zoo/__init__.py,sha256=AQu2Z0ECuzQDXwM_DbGaNzkcjLu8WptJS17sh6WTyyA,4975
18
+ glitchlings/zoo/_ocr_confusions.py,sha256=W59Aa5MBDwRF65f8GV-6XwGAmlR5Uk7pa5qvHvhIYdY,1252
19
+ glitchlings/zoo/_rate.py,sha256=EYUWXYyR2IK0zYBWyBOlnUjDxU32JE9mZTZeodVx5CA,548
20
+ glitchlings/zoo/_sampling.py,sha256=UK-XmEERjtY7nLaWDp81yktuZ_K80Un-9tvj4MjsHcg,1642
21
+ glitchlings/zoo/_text_utils.py,sha256=YxV069L8c0YSn5iCp72Dv8XCdfhbcFeBrbMoBeKIDns,2862
22
+ glitchlings/zoo/adjax.py,sha256=G2diAEsQ8T4mjFCcTeiGzLF0261n7LjLyW5HyVCy3R4,3661
23
+ glitchlings/zoo/core.py,sha256=sK3F1OVifbzQFsDrG-pQIImcGP7YfccwTfbqFTJi8Fc,14622
24
+ glitchlings/zoo/jargoyle.py,sha256=yRSVRKHdnaUfZMDJ6miBhRQpPHfYK9iD2DCYMp-7Dec,11807
25
+ glitchlings/zoo/mim1c.py,sha256=3ddNOzWgLABuEOh5T98Xk439ejx-YHGI7ErXET03Crc,3537
26
+ glitchlings/zoo/ocr_confusions.tsv,sha256=S-IJEYCIXYKT1Uu7Id8Lnvg5pw528yNigTtWUdnMv9k,213
27
+ glitchlings/zoo/redactyl.py,sha256=P2YYo1V_u62WIj8zqgDpbzDsreGx2I77BJ0RkdCKhXU,5651
28
+ glitchlings/zoo/reduple.py,sha256=w90xQWQKwkY3tItk8S20emDQy4FLDbee9rrPyh_ffpg,4363
29
+ glitchlings/zoo/rushmore.py,sha256=amAk44TIQBN4rU1c-W1g7I6WForGJMGxNb8uaa8Zfaw,4495
30
+ glitchlings/zoo/scannequin.py,sha256=TJyNYTTIB7rxZH3XKIETy0YVf4EjsMgGWYmYaxH9jxU,5030
31
+ glitchlings/zoo/typogre.py,sha256=j7LuAyYLrP6LjmCm8Jwi_wPxhTAP_TmWbt5pQrOvFZk,6901
32
+ glitchlings/zoo/zeedub.py,sha256=J8F1XZeCMQVVtzWwNiFhOeogjBt1BsOtqrnDjlwUcl8,4984
33
+ glitchlings-0.4.0.dist-info/licenses/LICENSE,sha256=EFEP1evBfHaxsMTBjxm0sZVRp2wct8QLvHE1saII5FI,11538
34
+ glitchlings-0.4.0.dist-info/METADATA,sha256=84RCkkFpmUjS7sLda_e7UAuUuJe8JLGxNDRpTpK5Ofc,28872
35
+ glitchlings-0.4.0.dist-info/WHEEL,sha256=8UP9x9puWI0P1V_d7K2oMTBqfeLNm21CTzZ_Ptr0NXU,101
36
+ glitchlings-0.4.0.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
37
+ glitchlings-0.4.0.dist-info/top_level.txt,sha256=VHFNBrLjtDwPCYXbGKi6o17Eueedi81eNbR3hBOoST0,12
38
+ glitchlings-0.4.0.dist-info/RECORD,,
@@ -1,29 +0,0 @@
1
- glitchlings/__init__.py,sha256=lqzYzB1RdQnw-NpWXN2dtcEGDkQ-OkC2OTEL16HDMYc,730
2
- glitchlings/__main__.py,sha256=pqNe1C9hMf8pap4oh6x6yo2h4Nsa2RFSaMWHfGtNXj0,130
3
- glitchlings/_zoo_rust.cp312-win_amd64.pyd,sha256=AW-mnIw4O-B53whfrlhwNRB-OEAssSdgnSJEUfjxZvc,2024960
4
- glitchlings/main.py,sha256=QrSSLWcKh1_NDfJDGh-3UVKdI7AkzfMy6Jz1ouxIgnE,6149
5
- glitchlings/dlc/__init__.py,sha256=IHD-GGhVFb7SVzErvf2YCJkOR4wGo0nFHXkn_daMvS8,146
6
- glitchlings/dlc/huggingface.py,sha256=PIesnDIEvyJxj1IuLw2P9nVPTr4Nv81XM7w2axfyhkA,3029
7
- glitchlings/dlc/prime.py,sha256=b5CE1qDl5MxZjTudlKrqMsmSGxXNKZ16krqPyrr2nK8,9569
8
- glitchlings/util/__init__.py,sha256=GoyQuHTfGRkHzuZwJji6QWSiGd_LHa9QiyjjEpBFW7E,4679
9
- glitchlings/zoo/__init__.py,sha256=LryHn930FuEdKRyvtRu7breBvz9IYYTvJv7yGIxLd5Y,4520
10
- glitchlings/zoo/_ocr_confusions.py,sha256=W59Aa5MBDwRF65f8GV-6XwGAmlR5Uk7pa5qvHvhIYdY,1252
11
- glitchlings/zoo/_rate.py,sha256=EYUWXYyR2IK0zYBWyBOlnUjDxU32JE9mZTZeodVx5CA,548
12
- glitchlings/zoo/_text_utils.py,sha256=pul6iGtVWir4mX-Mq5ni06JFOzf6x3J82iYSICXJCGE,1162
13
- glitchlings/zoo/adjax.py,sha256=G2diAEsQ8T4mjFCcTeiGzLF0261n7LjLyW5HyVCy3R4,3661
14
- glitchlings/zoo/core.py,sha256=sK3F1OVifbzQFsDrG-pQIImcGP7YfccwTfbqFTJi8Fc,14622
15
- glitchlings/zoo/jargoyle.py,sha256=1fnL_8bv1Y-T2h1C6NRzIylYyOuAUI-BiMReFewqh00,11002
16
- glitchlings/zoo/mim1c.py,sha256=3ddNOzWgLABuEOh5T98Xk439ejx-YHGI7ErXET03Crc,3537
17
- glitchlings/zoo/ocr_confusions.tsv,sha256=S-IJEYCIXYKT1Uu7Id8Lnvg5pw528yNigTtWUdnMv9k,213
18
- glitchlings/zoo/redactyl.py,sha256=poBzhXtApDa55G7iVCGEM4v1_YSYh3LfEAp2fkVFIJ4,6579
19
- glitchlings/zoo/reduple.py,sha256=orgS3ajpuGTDN-QqGuYgfkEI7yVCgIXHtL_HHp8jGmE,4471
20
- glitchlings/zoo/rushmore.py,sha256=rUluMdjvSxaVlUfK9_N0F108O5Exoa4klWLumrV2CgA,4535
21
- glitchlings/zoo/scannequin.py,sha256=TJyNYTTIB7rxZH3XKIETy0YVf4EjsMgGWYmYaxH9jxU,5030
22
- glitchlings/zoo/typogre.py,sha256=olTTXDmFkVQ3r-T1vxm2mLomRvIDXHrNHfgin316wzE,6221
23
- glitchlings/zoo/zeedub.py,sha256=n1qTKE_Dl0m8SEKhaP91oHAyJ484NxaGLPu_ZLr0Ldo,3696
24
- glitchlings-0.3.0.dist-info/licenses/LICENSE,sha256=EFEP1evBfHaxsMTBjxm0sZVRp2wct8QLvHE1saII5FI,11538
25
- glitchlings-0.3.0.dist-info/METADATA,sha256=b9uWb19S04moT94a_onQBjurDfBHJulwIW4R2ep84mE,28084
26
- glitchlings-0.3.0.dist-info/WHEEL,sha256=8UP9x9puWI0P1V_d7K2oMTBqfeLNm21CTzZ_Ptr0NXU,101
27
- glitchlings-0.3.0.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
28
- glitchlings-0.3.0.dist-info/top_level.txt,sha256=VHFNBrLjtDwPCYXbGKi6o17Eueedi81eNbR3hBOoST0,12
29
- glitchlings-0.3.0.dist-info/RECORD,,