glitchlings 0.3.0__cp311-cp311-macosx_11_0_universal2.whl → 0.4.0__cp311-cp311-macosx_11_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

@@ -4,11 +4,7 @@ import re
4
4
  from typing import Any
5
5
 
6
6
  from ._rate import resolve_rate
7
- from ._text_utils import (
8
- split_preserving_whitespace,
9
- split_token_edges,
10
- token_core_length,
11
- )
7
+ from ._text_utils import WordToken, collect_word_tokens, split_preserving_whitespace
12
8
  from .core import AttackWave, Glitchling
13
9
 
14
10
  try:
@@ -31,30 +27,28 @@ def _python_delete_random_words(
31
27
  return text
32
28
 
33
29
  tokens = split_preserving_whitespace(text)
30
+ word_tokens = collect_word_tokens(tokens, skip_first_word=True)
34
31
 
35
- candidate_data: list[tuple[int, float]] = []
36
- for i in range(2, len(tokens), 2):
37
- word = tokens[i]
38
- if not word or word.isspace():
39
- continue
40
-
41
- length = token_core_length(word)
42
- weight = 1.0 if unweighted else 1.0 / length
43
- candidate_data.append((i, weight))
32
+ weighted_tokens: list[tuple[int, float, WordToken]] = []
33
+ for token in word_tokens:
34
+ weight = 1.0 if unweighted else 1.0 / float(token.core_length)
35
+ weighted_tokens.append((token.index, weight, token))
44
36
 
45
- if not candidate_data:
37
+ if not weighted_tokens:
46
38
  return text
47
39
 
48
40
  allowed_deletions = min(
49
- len(candidate_data), math.floor(len(candidate_data) * effective_rate)
41
+ len(weighted_tokens), math.floor(len(weighted_tokens) * effective_rate)
50
42
  )
51
43
  if allowed_deletions <= 0:
52
44
  return text
53
45
 
54
- mean_weight = sum(weight for _, weight in candidate_data) / len(candidate_data)
46
+ mean_weight = sum(weight for _, weight, _ in weighted_tokens) / len(
47
+ weighted_tokens
48
+ )
55
49
 
56
50
  deletions = 0
57
- for index, weight in candidate_data:
51
+ for index, weight, token in weighted_tokens:
58
52
  if deletions >= allowed_deletions:
59
53
  break
60
54
 
@@ -68,9 +62,9 @@ def _python_delete_random_words(
68
62
  if rng.random() >= probability:
69
63
  continue
70
64
 
71
- word = tokens[index]
72
- prefix, _, suffix = split_token_edges(word)
73
- tokens[index] = f"{prefix.strip()}{suffix.strip()}"
65
+ prefix = token.prefix.strip()
66
+ suffix = token.suffix.strip()
67
+ tokens[index] = f"{prefix}{suffix}"
74
68
 
75
69
  deletions += 1
76
70
 
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import math
4
4
  import random
5
- from typing import Optional
5
+ from typing import Any, Optional
6
6
 
7
7
  from .core import Glitchling, AttackWave, AttackOrder
8
8
  from ._rate import resolve_rate
@@ -204,6 +204,27 @@ class Typogre(Glitchling):
204
204
  keyboard=keyboard,
205
205
  )
206
206
 
207
+ def pipeline_operation(self) -> dict[str, Any] | None:
208
+ rate = self.kwargs.get("rate")
209
+ if rate is None:
210
+ rate = self.kwargs.get("max_change_rate")
211
+ if rate is None:
212
+ return None
213
+
214
+ keyboard = self.kwargs.get("keyboard", "CURATOR_QWERTY")
215
+ layout = getattr(KEYNEIGHBORS, str(keyboard), None)
216
+ if layout is None:
217
+ return None
218
+
219
+ serialized_layout = {key: list(value) for key, value in layout.items()}
220
+
221
+ return {
222
+ "type": "typo",
223
+ "rate": float(rate),
224
+ "keyboard": str(keyboard),
225
+ "layout": serialized_layout,
226
+ }
227
+
207
228
 
208
229
  typogre = Typogre()
209
230
 
glitchlings/zoo/zeedub.py CHANGED
@@ -101,7 +101,26 @@ def insert_zero_widths(
101
101
  return text
102
102
 
103
103
  if _inject_zero_widths_rust is not None:
104
- return _inject_zero_widths_rust(text, clamped_rate, list(cleaned_palette), rng)
104
+ state = None
105
+ python_state = None
106
+ if hasattr(rng, "getstate") and hasattr(rng, "setstate"):
107
+ state = rng.getstate()
108
+ python_result = _python_insert_zero_widths(
109
+ text,
110
+ rate=clamped_rate,
111
+ rng=rng,
112
+ characters=cleaned_palette,
113
+ )
114
+ if state is not None:
115
+ if hasattr(rng, "getstate"):
116
+ python_state = rng.getstate()
117
+ rng.setstate(state)
118
+ rust_result = _inject_zero_widths_rust(text, clamped_rate, list(cleaned_palette), rng)
119
+ if rust_result == python_result:
120
+ return rust_result
121
+ if python_state is not None and hasattr(rng, "setstate"):
122
+ rng.setstate(python_state)
123
+ return python_result
105
124
 
106
125
  return _python_insert_zero_widths(
107
126
  text,
@@ -137,6 +156,26 @@ class Zeedub(Glitchling):
137
156
  characters=tuple(characters) if characters is not None else None,
138
157
  )
139
158
 
159
+ def pipeline_operation(self) -> dict[str, Any] | None:
160
+ rate = self.kwargs.get("rate")
161
+ if rate is None:
162
+ return None
163
+
164
+ raw_characters = self.kwargs.get("characters")
165
+ if raw_characters is None:
166
+ palette = tuple(_DEFAULT_ZERO_WIDTH_CHARACTERS)
167
+ else:
168
+ palette = tuple(str(char) for char in raw_characters if char)
169
+
170
+ if not palette:
171
+ return None
172
+
173
+ return {
174
+ "type": "zwj",
175
+ "rate": float(rate),
176
+ "characters": list(palette),
177
+ }
178
+
140
179
 
141
180
  zeedub = Zeedub()
142
181
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: glitchlings
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Monsters for your language games.
5
5
  Author: osoleve
6
6
  License: Apache License
@@ -209,7 +209,7 @@ Project-URL: Homepage, https://github.com/osoleve/glitchlings
209
209
  Project-URL: Repository, https://github.com/osoleve/glitchlings.git
210
210
  Project-URL: Issues, https://github.com/osoleve/glitchlings/issues
211
211
  Project-URL: Changelog, https://github.com/osoleve/glitchlings/releases
212
- Keywords: nlp,text,adversarial augmentation,text augmentation,large language models,llms,data augmentation,rlvr
212
+ Keywords: nlp,adversarial augmentation,text augmentation,data augmentation,domain randomization
213
213
  Classifier: Development Status :: 3 - Alpha
214
214
  Classifier: Intended Audience :: Developers
215
215
  Classifier: Programming Language :: Python
@@ -224,18 +224,20 @@ Requires-Python: >=3.10
224
224
  Description-Content-Type: text/markdown
225
225
  License-File: LICENSE
226
226
  Requires-Dist: confusable-homoglyphs>=3.3.1
227
+ Requires-Dist: tomli>=2.0.1; python_version < "3.11"
228
+ Requires-Dist: pyyaml>=6.0.0
227
229
  Provides-Extra: hf
228
230
  Requires-Dist: datasets>=4.0.0; extra == "hf"
229
- Provides-Extra: wordnet
230
- Requires-Dist: nltk>=3.9.1; extra == "wordnet"
231
- Requires-Dist: numpy<=2.0,>=1.24; extra == "wordnet"
231
+ Provides-Extra: vectors
232
+ Requires-Dist: numpy<=2.0,>=1.24; extra == "vectors"
233
+ Requires-Dist: spacy>=3.7.2; extra == "vectors"
234
+ Requires-Dist: gensim>=4.3.2; extra == "vectors"
232
235
  Provides-Extra: prime
233
236
  Requires-Dist: verifiers>=0.1.3.post0; extra == "prime"
234
237
  Requires-Dist: jellyfish>=1.2.0; extra == "prime"
235
238
  Provides-Extra: dev
236
239
  Requires-Dist: pytest>=8.0.0; extra == "dev"
237
240
  Requires-Dist: hypothesis>=6.140.0; extra == "dev"
238
- Requires-Dist: nltk>=3.9.1; extra == "dev"
239
241
  Requires-Dist: numpy<=2.0,>=1.24; extra == "dev"
240
242
  Dynamic: license-file
241
243
 
@@ -348,10 +350,30 @@ glitchlings -g "Typogre(rate=0.05)" "Ghouls just wanna have fun"
348
350
 
349
351
  # Pipe text straight into the CLI for an on-the-fly corruption.
350
352
  echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
353
+
354
+ # Load a roster from a YAML attack configuration.
355
+ glitchlings --config experiments/chaos.yaml "Let slips the glitchlings of war"
351
356
  ```
352
357
 
353
358
  Use `--help` for a complete breakdown of available options, including support for parameterised glitchlings via `-g "Name(arg=value, ...)"` to mirror the Python API.
354
359
 
360
+ Attack configurations live in plain YAML files so you can version-control experiments without touching code:
361
+
362
+ ```yaml
363
+ # experiments/chaos.yaml
364
+ seed: 31337
365
+ glitchlings:
366
+ - name: Typogre
367
+ rate: 0.04
368
+ - "Rushmore(rate=0.12, unweighted=True)"
369
+ - name: Zeedub
370
+ parameters:
371
+ rate: 0.02
372
+ characters: ["\u200b", "\u2060"]
373
+ ```
374
+
375
+ Pass the file to `glitchlings --config` or load it from Python with `glitchlings.load_attack_config` and `glitchlings.build_gaggle`.
376
+
355
377
  ## Development
356
378
 
357
379
  Follow the [development setup guide](docs/development.md) for editable installs, automated tests, and tips on enabling the Rust pipeline while you hack on new glitchlings.
@@ -416,8 +438,8 @@ _Uh oh. The worst person you know just bought a thesaurus._
416
438
  >
417
439
  > Args
418
440
  >
419
- > - `rate (float)`: The maximum proportion of words to replace (default: 0.1, 10%).
420
- > - `part_of_speech`: The WordNet part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all.
441
+ > - `rate (float)`: The maximum proportion of words to replace (default: 0.01, 1%).
442
+ - `part_of_speech`: The WordNet-style part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all. Vector/graph backends ignore this filter while still honouring deterministic sampling.
421
443
  > - `seed (int)`: The random seed for reproducibility (default: 151).
422
444
 
423
445
  ### Reduple
@@ -0,0 +1,38 @@
1
+ glitchlings/__init__.py,sha256=hEmQ1rl3G5uZBDbfJX_W4aIUNSsPAsy_Ai5DgQHasvk,813
2
+ glitchlings/__main__.py,sha256=EOiBgay0x6B9VlSDzSQvMuoq6bHJdSvFSgcAVGGKkd4,121
3
+ glitchlings/_zoo_rust.cpython-311-darwin.so,sha256=l05pofs9b3aNpJ9yj9RTt7_DGzFP_aQl3BiEkWDjFyA,2450000
4
+ glitchlings/config.py,sha256=hwkcMkhEvUzK8FECgG6kbf_4MpMQcopskiSgXzK5B3o,7785
5
+ glitchlings/config.toml,sha256=MWwgbx1-KIRAY3JZmMrCVbZNxFjHgRJXbtNAVuUNcxY,108
6
+ glitchlings/main.py,sha256=Rw9pCgNrGxwzC1rZbbng7cHUP9xlL0WWWTdjW95XiSM,10084
7
+ glitchlings/dlc/__init__.py,sha256=eTLEEWrVWPqniXHqee4W23H1rjElI1PQ_jcqWFe9D3g,141
8
+ glitchlings/dlc/huggingface.py,sha256=I1QWanWVxO02awgSpHDtgQEVF-9AQRLtsta2RCitWhE,2933
9
+ glitchlings/dlc/prime.py,sha256=wpRMNtgka1vNlEzifeCjGMp1q_-QclZn3NxXczGnNpM,9278
10
+ glitchlings/lexicon/__init__.py,sha256=-w35jPtg7WCP_IfRxAUZBNFXeSnlIaVfbJiPDI3f3K4,5663
11
+ glitchlings/lexicon/graph.py,sha256=_2w5shu-fEieDN-egpqLvMu0rxG78RAQWqENU0r7PlM,10533
12
+ glitchlings/lexicon/metrics.py,sha256=W8TCemZaCjBOUSX8G7JdgQAbMykXXfRTfodkDSkc3aQ,4599
13
+ glitchlings/lexicon/vector.py,sha256=Qqspc8KR4hqJiTTiXnu8DCIp2ROYPgEKK4RM4kLkyGY,20284
14
+ glitchlings/lexicon/wordnet.py,sha256=FwjTtVPOQEmWEXL0Sl4faM-C4PPNkDu_z7-FyINlh3c,5652
15
+ glitchlings/lexicon/data/default_vector_cache.json,sha256=7obKHqmR3odbTfgJPWLSRFYFh4J_6uvv_CntCSe_EjI,725
16
+ glitchlings/util/__init__.py,sha256=7KiZ0gKMjocfd34cajneZhTqYb7Hkwi_PpjltPqvkNI,4498
17
+ glitchlings/zoo/__init__.py,sha256=eFYmaWeFDlSqfaiED51HWM-OqiTo_BOz0ASeyhOwOsw,4818
18
+ glitchlings/zoo/_ocr_confusions.py,sha256=MkCbwk9T24SO2pD3JNPajYCfpMMlm2vQ5_sJty5GoXE,1218
19
+ glitchlings/zoo/_rate.py,sha256=TMyfVFV7pLxSGVswPlOAtBvk25Bjtx5xXTtpb_utgik,527
20
+ glitchlings/zoo/_sampling.py,sha256=VOSWDgYWXIiAuKxn2IckFJhpRgGotQP_KW28db8kTKI,1587
21
+ glitchlings/zoo/_text_utils.py,sha256=nAfFT_VdXMXciCR7eQ5EAmym5wvzL6_Sdn9dvCx2s3Q,2758
22
+ glitchlings/zoo/adjax.py,sha256=N3CzfM7m7mAYgFcQYLQkqK2VYLw_vFvEMBM2aNU--ZA,3530
23
+ glitchlings/zoo/core.py,sha256=fhceCZKa9W1vVlhpR2zVKBXnzgJICB2-nmDywiqx4js,14207
24
+ glitchlings/zoo/jargoyle.py,sha256=6-DJxUFz2AjT-iQDFlK2ZG9pVwq2boDtslEzCNyI_04,11481
25
+ glitchlings/zoo/mim1c.py,sha256=yAt1ngR3j2KXLbzc8LhrQlIWRO_KT5dFK1EE8QivMAQ,3429
26
+ glitchlings/zoo/ocr_confusions.tsv,sha256=KhtR7vJDTITpfTSGa-I7RHr6CK7LkGi2KjdhEWipI6o,183
27
+ glitchlings/zoo/redactyl.py,sha256=H4PwAMBCIsDw1KBOBiTR3VUbRZwynqakwwfx3wHjVp8,5457
28
+ glitchlings/zoo/reduple.py,sha256=Q9NRCdvUgaHvvJu8A0n6zW9v_L3pdmNZbWqaJ7uycw4,4216
29
+ glitchlings/zoo/rushmore.py,sha256=J1wd4IB7WOAR2TdntkxCMZWseWR0Yii8UQZ7ucfpWCc,4335
30
+ glitchlings/zoo/scannequin.py,sha256=Ps8nxysKjkJV408zaL1kjVjy4jliATDBpYcNHLWbNFg,4859
31
+ glitchlings/zoo/typogre.py,sha256=0fYaxOEiTnxiCqmsiSN1r_wl1vC1Ueaiks2e94kks70,6668
32
+ glitchlings/zoo/zeedub.py,sha256=l51swlo556-TXhDk4nayHOm1XgHwWmfUKzQ01YMuCpE,4801
33
+ glitchlings-0.4.0.dist-info/licenses/LICENSE,sha256=YCvGip-LoaRyu6h0nPo71q6eHEkzUpsE11psDJOIRkw,11337
34
+ glitchlings-0.4.0.dist-info/METADATA,sha256=Dldj4SIrrNF6TKAvvJghd_L4lVrzdViqb8DWMSvPWVE,28345
35
+ glitchlings-0.4.0.dist-info/WHEEL,sha256=Tgp8Vc-mmQm0KX-V22BSUoymoX1p0w13bZbX85y8hSs,114
36
+ glitchlings-0.4.0.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
37
+ glitchlings-0.4.0.dist-info/top_level.txt,sha256=VHFNBrLjtDwPCYXbGKi6o17Eueedi81eNbR3hBOoST0,12
38
+ glitchlings-0.4.0.dist-info/RECORD,,
@@ -1,29 +0,0 @@
1
- glitchlings/__init__.py,sha256=BLwp5ncEEVTurUDEo6DZcYjYz7r12LzblLfOcVc4MEU,680
2
- glitchlings/__main__.py,sha256=EOiBgay0x6B9VlSDzSQvMuoq6bHJdSvFSgcAVGGKkd4,121
3
- glitchlings/_zoo_rust.cpython-311-darwin.so,sha256=K9GCUmlYT8zipQe_Ak5IgrUKJVI0AKbz7CbJEgoOaqo,2406592
4
- glitchlings/main.py,sha256=u6969Vl0n47e3S-ZlYZBj3HWVsjs-hvW6RpF9RYuXnc,5931
5
- glitchlings/dlc/__init__.py,sha256=eTLEEWrVWPqniXHqee4W23H1rjElI1PQ_jcqWFe9D3g,141
6
- glitchlings/dlc/huggingface.py,sha256=I1QWanWVxO02awgSpHDtgQEVF-9AQRLtsta2RCitWhE,2933
7
- glitchlings/dlc/prime.py,sha256=wpRMNtgka1vNlEzifeCjGMp1q_-QclZn3NxXczGnNpM,9278
8
- glitchlings/util/__init__.py,sha256=7KiZ0gKMjocfd34cajneZhTqYb7Hkwi_PpjltPqvkNI,4498
9
- glitchlings/zoo/__init__.py,sha256=sTmh-1u02kgjYlpRPz9lF9c1aXHamcShRXUOGK87J5Q,4378
10
- glitchlings/zoo/_ocr_confusions.py,sha256=MkCbwk9T24SO2pD3JNPajYCfpMMlm2vQ5_sJty5GoXE,1218
11
- glitchlings/zoo/_rate.py,sha256=TMyfVFV7pLxSGVswPlOAtBvk25Bjtx5xXTtpb_utgik,527
12
- glitchlings/zoo/_text_utils.py,sha256=ZXy5khgoMTZp7NHdekkkj4vQjeMWGK2bzXPwIECBIfo,1120
13
- glitchlings/zoo/adjax.py,sha256=N3CzfM7m7mAYgFcQYLQkqK2VYLw_vFvEMBM2aNU--ZA,3530
14
- glitchlings/zoo/core.py,sha256=fhceCZKa9W1vVlhpR2zVKBXnzgJICB2-nmDywiqx4js,14207
15
- glitchlings/zoo/jargoyle.py,sha256=T6vPWBxceIPE6gOQ7BaihaqALOJwzXuhfiZzvKa4S50,10666
16
- glitchlings/zoo/mim1c.py,sha256=yAt1ngR3j2KXLbzc8LhrQlIWRO_KT5dFK1EE8QivMAQ,3429
17
- glitchlings/zoo/ocr_confusions.tsv,sha256=KhtR7vJDTITpfTSGa-I7RHr6CK7LkGi2KjdhEWipI6o,183
18
- glitchlings/zoo/redactyl.py,sha256=8xsamnVt1RFy7ztvfgfJDwCadQIlN-9fDz-TLfBQ89k,6357
19
- glitchlings/zoo/reduple.py,sha256=IQM0WYinWJWjMIaBSuPPcpOXOynly9Tp2UtJEZxibGk,4313
20
- glitchlings/zoo/rushmore.py,sha256=Cw6qpk3jp8DjtxmFALd5zTIOnS6C0tIkoPFA7F-xlVk,4369
21
- glitchlings/zoo/scannequin.py,sha256=Ps8nxysKjkJV408zaL1kjVjy4jliATDBpYcNHLWbNFg,4859
22
- glitchlings/zoo/typogre.py,sha256=xD02ldcMIA07XsdSts2bUniOc-k_DqTf0PBMaXGjLZE,6009
23
- glitchlings/zoo/zeedub.py,sha256=D6rGk3O02OQ9jEIO9o0Ag-maVzNPN5O6qO3klG6Y62c,3552
24
- glitchlings-0.3.0.dist-info/licenses/LICENSE,sha256=YCvGip-LoaRyu6h0nPo71q6eHEkzUpsE11psDJOIRkw,11337
25
- glitchlings-0.3.0.dist-info/METADATA,sha256=iflxCI-vHtZP-omUePVGqx0QHLoFiJoHzcM7aSNvboQ,27579
26
- glitchlings-0.3.0.dist-info/WHEEL,sha256=Tgp8Vc-mmQm0KX-V22BSUoymoX1p0w13bZbX85y8hSs,114
27
- glitchlings-0.3.0.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
28
- glitchlings-0.3.0.dist-info/top_level.txt,sha256=VHFNBrLjtDwPCYXbGKi6o17Eueedi81eNbR3hBOoST0,12
29
- glitchlings-0.3.0.dist-info/RECORD,,