glitchlings 0.2.6__cp310-cp310-macosx_11_0_universal2.whl → 0.4.0__cp310-cp310-macosx_11_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +8 -0
- glitchlings/_zoo_rust.cpython-310-darwin.so +0 -0
- glitchlings/config.py +258 -0
- glitchlings/config.toml +3 -0
- glitchlings/lexicon/__init__.py +191 -0
- glitchlings/lexicon/data/default_vector_cache.json +16 -0
- glitchlings/lexicon/graph.py +303 -0
- glitchlings/lexicon/metrics.py +169 -0
- glitchlings/lexicon/vector.py +610 -0
- glitchlings/lexicon/wordnet.py +182 -0
- glitchlings/main.py +145 -5
- glitchlings/zoo/__init__.py +20 -1
- glitchlings/zoo/_sampling.py +55 -0
- glitchlings/zoo/_text_utils.py +104 -0
- glitchlings/zoo/adjax.py +131 -0
- glitchlings/zoo/core.py +16 -14
- glitchlings/zoo/jargoyle.py +190 -200
- glitchlings/zoo/redactyl.py +32 -67
- glitchlings/zoo/reduple.py +13 -35
- glitchlings/zoo/rushmore.py +17 -28
- glitchlings/zoo/typogre.py +22 -1
- glitchlings/zoo/zeedub.py +40 -1
- {glitchlings-0.2.6.dist-info → glitchlings-0.4.0.dist-info}/METADATA +48 -11
- glitchlings-0.4.0.dist-info/RECORD +38 -0
- glitchlings-0.2.6.dist-info/RECORD +0 -27
- {glitchlings-0.2.6.dist-info → glitchlings-0.4.0.dist-info}/WHEEL +0 -0
- {glitchlings-0.2.6.dist-info → glitchlings-0.4.0.dist-info}/entry_points.txt +0 -0
- {glitchlings-0.2.6.dist-info → glitchlings-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {glitchlings-0.2.6.dist-info → glitchlings-0.4.0.dist-info}/top_level.txt +0 -0
glitchlings/zoo/redactyl.py
CHANGED
|
@@ -2,8 +2,14 @@ import re
|
|
|
2
2
|
import random
|
|
3
3
|
from typing import Any
|
|
4
4
|
|
|
5
|
-
from .core import Glitchling, AttackWave
|
|
6
5
|
from ._rate import resolve_rate
|
|
6
|
+
from ._sampling import weighted_sample_without_replacement
|
|
7
|
+
from ._text_utils import (
|
|
8
|
+
WordToken,
|
|
9
|
+
collect_word_tokens,
|
|
10
|
+
split_preserving_whitespace,
|
|
11
|
+
)
|
|
12
|
+
from .core import AttackWave, Glitchling
|
|
7
13
|
|
|
8
14
|
FULL_BLOCK = "█"
|
|
9
15
|
|
|
@@ -14,41 +20,6 @@ except ImportError: # pragma: no cover - compiled extension not present
|
|
|
14
20
|
_redact_words_rust = None
|
|
15
21
|
|
|
16
22
|
|
|
17
|
-
def _weighted_sample_without_replacement(
|
|
18
|
-
population: list[int],
|
|
19
|
-
weights: list[float],
|
|
20
|
-
*,
|
|
21
|
-
k: int,
|
|
22
|
-
rng: random.Random,
|
|
23
|
-
) -> list[int]:
|
|
24
|
-
"""Select `k` unique indices according to the given weights."""
|
|
25
|
-
|
|
26
|
-
selections: list[int] = []
|
|
27
|
-
items = list(zip(population, weights))
|
|
28
|
-
if k <= 0 or not items:
|
|
29
|
-
return selections
|
|
30
|
-
if k > len(items):
|
|
31
|
-
raise ValueError("Sample larger than population or is negative")
|
|
32
|
-
|
|
33
|
-
for _ in range(k):
|
|
34
|
-
total_weight = sum(weight for _, weight in items)
|
|
35
|
-
if total_weight <= 0:
|
|
36
|
-
chosen_index = rng.randrange(len(items))
|
|
37
|
-
else:
|
|
38
|
-
threshold = rng.random() * total_weight
|
|
39
|
-
cumulative = 0.0
|
|
40
|
-
chosen_index = len(items) - 1
|
|
41
|
-
for idx, (_, weight) in enumerate(items):
|
|
42
|
-
cumulative += weight
|
|
43
|
-
if cumulative >= threshold:
|
|
44
|
-
chosen_index = idx
|
|
45
|
-
break
|
|
46
|
-
value, _ = items.pop(chosen_index)
|
|
47
|
-
selections.append(value)
|
|
48
|
-
|
|
49
|
-
return selections
|
|
50
|
-
|
|
51
|
-
|
|
52
23
|
def _python_redact_words(
|
|
53
24
|
text: str,
|
|
54
25
|
*,
|
|
@@ -68,53 +39,47 @@ def _python_redact_words(
|
|
|
68
39
|
- rng: RNG used for sampling decisions.
|
|
69
40
|
- unweighted: When True, sample words uniformly instead of by length.
|
|
70
41
|
"""
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
if not word_indices:
|
|
42
|
+
tokens = split_preserving_whitespace(text)
|
|
43
|
+
word_tokens = collect_word_tokens(tokens)
|
|
44
|
+
if not word_tokens:
|
|
75
45
|
raise ValueError(
|
|
76
46
|
"Cannot redact words because the input text contains no redactable words."
|
|
77
47
|
)
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
if core_length <= 0:
|
|
87
|
-
core_length = 1
|
|
88
|
-
weights.append(1.0 if unweighted else float(core_length))
|
|
89
|
-
raw_quota = len(word_indices) * rate
|
|
48
|
+
|
|
49
|
+
population = [token.index for token in word_tokens]
|
|
50
|
+
weights = [
|
|
51
|
+
1.0 if unweighted else float(token.core_length) for token in word_tokens
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
clamped_rate = max(0.0, min(rate, 1.0))
|
|
55
|
+
raw_quota = len(population) * clamped_rate
|
|
90
56
|
num_to_redact = int(raw_quota)
|
|
91
|
-
if
|
|
57
|
+
if clamped_rate > 0.0:
|
|
92
58
|
num_to_redact = max(1, num_to_redact)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
59
|
+
num_to_redact = min(num_to_redact, len(population))
|
|
60
|
+
if num_to_redact <= 0:
|
|
61
|
+
return "".join(tokens)
|
|
62
|
+
|
|
63
|
+
indices_to_redact = weighted_sample_without_replacement(
|
|
64
|
+
population,
|
|
97
65
|
weights,
|
|
98
66
|
k=num_to_redact,
|
|
99
67
|
rng=rng,
|
|
100
68
|
)
|
|
101
69
|
indices_to_redact.sort()
|
|
102
70
|
|
|
71
|
+
token_by_index: dict[int, WordToken] = {token.index: token for token in word_tokens}
|
|
72
|
+
|
|
103
73
|
for i in indices_to_redact:
|
|
104
74
|
if i >= len(tokens):
|
|
105
75
|
break
|
|
106
76
|
|
|
107
|
-
|
|
108
|
-
if
|
|
77
|
+
token = token_by_index.get(i)
|
|
78
|
+
if token is None:
|
|
109
79
|
continue
|
|
110
80
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
if match:
|
|
114
|
-
prefix, core, suffix = match.groups()
|
|
115
|
-
tokens[i] = f"{prefix}{replacement_char * len(core)}{suffix}"
|
|
116
|
-
else:
|
|
117
|
-
tokens[i] = f"{replacement_char * len(word)}"
|
|
81
|
+
prefix, core, suffix = token.prefix, token.core, token.suffix
|
|
82
|
+
tokens[i] = f"{prefix}{replacement_char * len(core)}{suffix}"
|
|
118
83
|
|
|
119
84
|
text = "".join(tokens)
|
|
120
85
|
|
|
@@ -151,7 +116,7 @@ def redact_words(
|
|
|
151
116
|
if rng is None:
|
|
152
117
|
rng = random.Random(seed)
|
|
153
118
|
|
|
154
|
-
clamped_rate = max(0.0, effective_rate)
|
|
119
|
+
clamped_rate = max(0.0, min(effective_rate, 1.0))
|
|
155
120
|
unweighted_flag = bool(unweighted)
|
|
156
121
|
|
|
157
122
|
use_rust = _redact_words_rust is not None and isinstance(merge_adjacent, bool)
|
glitchlings/zoo/reduple.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import re
|
|
2
1
|
import random
|
|
3
2
|
from typing import Any
|
|
4
3
|
|
|
5
|
-
from .core import Glitchling, AttackWave
|
|
6
4
|
from ._rate import resolve_rate
|
|
5
|
+
from ._text_utils import WordToken, collect_word_tokens, split_preserving_whitespace
|
|
6
|
+
from .core import AttackWave, Glitchling
|
|
7
7
|
|
|
8
8
|
try:
|
|
9
9
|
from glitchlings._zoo_rust import reduplicate_words as _reduplicate_words_rust
|
|
@@ -30,40 +30,24 @@ def _python_reduplicate_words(
|
|
|
30
30
|
- Preserves spacing and punctuation by tokenizing with separators.
|
|
31
31
|
- Deterministic when run with a fixed seed or via Gaggle.
|
|
32
32
|
"""
|
|
33
|
-
|
|
34
|
-
|
|
33
|
+
tokens = split_preserving_whitespace(text)
|
|
34
|
+
word_tokens = collect_word_tokens(tokens)
|
|
35
35
|
|
|
36
|
-
|
|
37
|
-
for
|
|
38
|
-
if
|
|
39
|
-
|
|
36
|
+
weighted_tokens: list[tuple[int, float, WordToken]] = []
|
|
37
|
+
for token in word_tokens:
|
|
38
|
+
weight = 1.0 if unweighted else 1.0 / float(token.core_length)
|
|
39
|
+
weighted_tokens.append((token.index, weight, token))
|
|
40
40
|
|
|
41
|
-
|
|
42
|
-
if not word or word.isspace(): # Skip empty or whitespace
|
|
43
|
-
continue
|
|
44
|
-
|
|
45
|
-
match = re.match(r"^(\W*)(.*?)(\W*)$", word)
|
|
46
|
-
core = match.group(2) if match else word
|
|
47
|
-
core_length = len(core) if core else len(word)
|
|
48
|
-
if core_length <= 0:
|
|
49
|
-
core_length = len(word.strip()) or len(word)
|
|
50
|
-
if core_length <= 0:
|
|
51
|
-
core_length = 1
|
|
52
|
-
weight = 1.0 if unweighted else 1.0 / core_length
|
|
53
|
-
candidate_weights.append((i, weight))
|
|
54
|
-
|
|
55
|
-
if not candidate_weights:
|
|
41
|
+
if not weighted_tokens:
|
|
56
42
|
return "".join(tokens)
|
|
57
43
|
|
|
58
44
|
effective_rate = max(rate, 0.0)
|
|
59
45
|
if effective_rate <= 0.0:
|
|
60
46
|
return "".join(tokens)
|
|
61
47
|
|
|
62
|
-
mean_weight = sum(weight for _, weight in
|
|
63
|
-
candidate_weights
|
|
64
|
-
)
|
|
48
|
+
mean_weight = sum(weight for _, weight, _ in weighted_tokens) / len(weighted_tokens)
|
|
65
49
|
|
|
66
|
-
for index, weight in
|
|
50
|
+
for index, weight, token in weighted_tokens:
|
|
67
51
|
if effective_rate >= 1.0:
|
|
68
52
|
probability = 1.0
|
|
69
53
|
else:
|
|
@@ -74,14 +58,8 @@ def _python_reduplicate_words(
|
|
|
74
58
|
if rng.random() >= probability:
|
|
75
59
|
continue
|
|
76
60
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
if match:
|
|
80
|
-
prefix, core, suffix = match.groups()
|
|
81
|
-
# Reduplicate with a space: "word" -> "word word"
|
|
82
|
-
tokens[index] = f"{prefix}{core} {core}{suffix}"
|
|
83
|
-
else:
|
|
84
|
-
tokens[index] = f"{word} {word}"
|
|
61
|
+
prefix, core, suffix = token.prefix, token.core, token.suffix
|
|
62
|
+
tokens[index] = f"{prefix}{core} {core}{suffix}"
|
|
85
63
|
return "".join(tokens)
|
|
86
64
|
|
|
87
65
|
|
glitchlings/zoo/rushmore.py
CHANGED
|
@@ -3,8 +3,9 @@ import random
|
|
|
3
3
|
import re
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
|
-
from .core import Glitchling, AttackWave
|
|
7
6
|
from ._rate import resolve_rate
|
|
7
|
+
from ._text_utils import WordToken, collect_word_tokens, split_preserving_whitespace
|
|
8
|
+
from .core import AttackWave, Glitchling
|
|
8
9
|
|
|
9
10
|
try:
|
|
10
11
|
from glitchlings._zoo_rust import delete_random_words as _delete_random_words_rust
|
|
@@ -25,37 +26,29 @@ def _python_delete_random_words(
|
|
|
25
26
|
if effective_rate <= 0.0:
|
|
26
27
|
return text
|
|
27
28
|
|
|
28
|
-
tokens =
|
|
29
|
+
tokens = split_preserving_whitespace(text)
|
|
30
|
+
word_tokens = collect_word_tokens(tokens, skip_first_word=True)
|
|
29
31
|
|
|
30
|
-
|
|
31
|
-
for
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
continue
|
|
32
|
+
weighted_tokens: list[tuple[int, float, WordToken]] = []
|
|
33
|
+
for token in word_tokens:
|
|
34
|
+
weight = 1.0 if unweighted else 1.0 / float(token.core_length)
|
|
35
|
+
weighted_tokens.append((token.index, weight, token))
|
|
35
36
|
|
|
36
|
-
|
|
37
|
-
core = match.group(2) if match else word
|
|
38
|
-
core_length = len(core) if core else len(word)
|
|
39
|
-
if core_length <= 0:
|
|
40
|
-
core_length = len(word.strip()) or len(word)
|
|
41
|
-
if core_length <= 0:
|
|
42
|
-
core_length = 1
|
|
43
|
-
weight = 1.0 if unweighted else 1.0 / core_length
|
|
44
|
-
candidate_data.append((i, weight))
|
|
45
|
-
|
|
46
|
-
if not candidate_data:
|
|
37
|
+
if not weighted_tokens:
|
|
47
38
|
return text
|
|
48
39
|
|
|
49
40
|
allowed_deletions = min(
|
|
50
|
-
len(
|
|
41
|
+
len(weighted_tokens), math.floor(len(weighted_tokens) * effective_rate)
|
|
51
42
|
)
|
|
52
43
|
if allowed_deletions <= 0:
|
|
53
44
|
return text
|
|
54
45
|
|
|
55
|
-
mean_weight = sum(weight for _, weight in
|
|
46
|
+
mean_weight = sum(weight for _, weight, _ in weighted_tokens) / len(
|
|
47
|
+
weighted_tokens
|
|
48
|
+
)
|
|
56
49
|
|
|
57
50
|
deletions = 0
|
|
58
|
-
for index, weight in
|
|
51
|
+
for index, weight, token in weighted_tokens:
|
|
59
52
|
if deletions >= allowed_deletions:
|
|
60
53
|
break
|
|
61
54
|
|
|
@@ -69,13 +62,9 @@ def _python_delete_random_words(
|
|
|
69
62
|
if rng.random() >= probability:
|
|
70
63
|
continue
|
|
71
64
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
prefix, _, suffix = match.groups()
|
|
76
|
-
tokens[index] = f"{prefix.strip()}{suffix.strip()}"
|
|
77
|
-
else:
|
|
78
|
-
tokens[index] = ""
|
|
65
|
+
prefix = token.prefix.strip()
|
|
66
|
+
suffix = token.suffix.strip()
|
|
67
|
+
tokens[index] = f"{prefix}{suffix}"
|
|
79
68
|
|
|
80
69
|
deletions += 1
|
|
81
70
|
|
glitchlings/zoo/typogre.py
CHANGED
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import math
|
|
4
4
|
import random
|
|
5
|
-
from typing import Optional
|
|
5
|
+
from typing import Any, Optional
|
|
6
6
|
|
|
7
7
|
from .core import Glitchling, AttackWave, AttackOrder
|
|
8
8
|
from ._rate import resolve_rate
|
|
@@ -204,6 +204,27 @@ class Typogre(Glitchling):
|
|
|
204
204
|
keyboard=keyboard,
|
|
205
205
|
)
|
|
206
206
|
|
|
207
|
+
def pipeline_operation(self) -> dict[str, Any] | None:
|
|
208
|
+
rate = self.kwargs.get("rate")
|
|
209
|
+
if rate is None:
|
|
210
|
+
rate = self.kwargs.get("max_change_rate")
|
|
211
|
+
if rate is None:
|
|
212
|
+
return None
|
|
213
|
+
|
|
214
|
+
keyboard = self.kwargs.get("keyboard", "CURATOR_QWERTY")
|
|
215
|
+
layout = getattr(KEYNEIGHBORS, str(keyboard), None)
|
|
216
|
+
if layout is None:
|
|
217
|
+
return None
|
|
218
|
+
|
|
219
|
+
serialized_layout = {key: list(value) for key, value in layout.items()}
|
|
220
|
+
|
|
221
|
+
return {
|
|
222
|
+
"type": "typo",
|
|
223
|
+
"rate": float(rate),
|
|
224
|
+
"keyboard": str(keyboard),
|
|
225
|
+
"layout": serialized_layout,
|
|
226
|
+
}
|
|
227
|
+
|
|
207
228
|
|
|
208
229
|
typogre = Typogre()
|
|
209
230
|
|
glitchlings/zoo/zeedub.py
CHANGED
|
@@ -101,7 +101,26 @@ def insert_zero_widths(
|
|
|
101
101
|
return text
|
|
102
102
|
|
|
103
103
|
if _inject_zero_widths_rust is not None:
|
|
104
|
-
|
|
104
|
+
state = None
|
|
105
|
+
python_state = None
|
|
106
|
+
if hasattr(rng, "getstate") and hasattr(rng, "setstate"):
|
|
107
|
+
state = rng.getstate()
|
|
108
|
+
python_result = _python_insert_zero_widths(
|
|
109
|
+
text,
|
|
110
|
+
rate=clamped_rate,
|
|
111
|
+
rng=rng,
|
|
112
|
+
characters=cleaned_palette,
|
|
113
|
+
)
|
|
114
|
+
if state is not None:
|
|
115
|
+
if hasattr(rng, "getstate"):
|
|
116
|
+
python_state = rng.getstate()
|
|
117
|
+
rng.setstate(state)
|
|
118
|
+
rust_result = _inject_zero_widths_rust(text, clamped_rate, list(cleaned_palette), rng)
|
|
119
|
+
if rust_result == python_result:
|
|
120
|
+
return rust_result
|
|
121
|
+
if python_state is not None and hasattr(rng, "setstate"):
|
|
122
|
+
rng.setstate(python_state)
|
|
123
|
+
return python_result
|
|
105
124
|
|
|
106
125
|
return _python_insert_zero_widths(
|
|
107
126
|
text,
|
|
@@ -137,6 +156,26 @@ class Zeedub(Glitchling):
|
|
|
137
156
|
characters=tuple(characters) if characters is not None else None,
|
|
138
157
|
)
|
|
139
158
|
|
|
159
|
+
def pipeline_operation(self) -> dict[str, Any] | None:
|
|
160
|
+
rate = self.kwargs.get("rate")
|
|
161
|
+
if rate is None:
|
|
162
|
+
return None
|
|
163
|
+
|
|
164
|
+
raw_characters = self.kwargs.get("characters")
|
|
165
|
+
if raw_characters is None:
|
|
166
|
+
palette = tuple(_DEFAULT_ZERO_WIDTH_CHARACTERS)
|
|
167
|
+
else:
|
|
168
|
+
palette = tuple(str(char) for char in raw_characters if char)
|
|
169
|
+
|
|
170
|
+
if not palette:
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
return {
|
|
174
|
+
"type": "zwj",
|
|
175
|
+
"rate": float(rate),
|
|
176
|
+
"characters": list(palette),
|
|
177
|
+
}
|
|
178
|
+
|
|
140
179
|
|
|
141
180
|
zeedub = Zeedub()
|
|
142
181
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: glitchlings
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Monsters for your language games.
|
|
5
5
|
Author: osoleve
|
|
6
6
|
License: Apache License
|
|
@@ -209,7 +209,7 @@ Project-URL: Homepage, https://github.com/osoleve/glitchlings
|
|
|
209
209
|
Project-URL: Repository, https://github.com/osoleve/glitchlings.git
|
|
210
210
|
Project-URL: Issues, https://github.com/osoleve/glitchlings/issues
|
|
211
211
|
Project-URL: Changelog, https://github.com/osoleve/glitchlings/releases
|
|
212
|
-
Keywords: nlp,
|
|
212
|
+
Keywords: nlp,adversarial augmentation,text augmentation,data augmentation,domain randomization
|
|
213
213
|
Classifier: Development Status :: 3 - Alpha
|
|
214
214
|
Classifier: Intended Audience :: Developers
|
|
215
215
|
Classifier: Programming Language :: Python
|
|
@@ -224,18 +224,20 @@ Requires-Python: >=3.10
|
|
|
224
224
|
Description-Content-Type: text/markdown
|
|
225
225
|
License-File: LICENSE
|
|
226
226
|
Requires-Dist: confusable-homoglyphs>=3.3.1
|
|
227
|
+
Requires-Dist: tomli>=2.0.1; python_version < "3.11"
|
|
228
|
+
Requires-Dist: pyyaml>=6.0.0
|
|
227
229
|
Provides-Extra: hf
|
|
228
230
|
Requires-Dist: datasets>=4.0.0; extra == "hf"
|
|
229
|
-
Provides-Extra:
|
|
230
|
-
Requires-Dist:
|
|
231
|
-
Requires-Dist:
|
|
231
|
+
Provides-Extra: vectors
|
|
232
|
+
Requires-Dist: numpy<=2.0,>=1.24; extra == "vectors"
|
|
233
|
+
Requires-Dist: spacy>=3.7.2; extra == "vectors"
|
|
234
|
+
Requires-Dist: gensim>=4.3.2; extra == "vectors"
|
|
232
235
|
Provides-Extra: prime
|
|
233
236
|
Requires-Dist: verifiers>=0.1.3.post0; extra == "prime"
|
|
234
237
|
Requires-Dist: jellyfish>=1.2.0; extra == "prime"
|
|
235
238
|
Provides-Extra: dev
|
|
236
239
|
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
237
240
|
Requires-Dist: hypothesis>=6.140.0; extra == "dev"
|
|
238
|
-
Requires-Dist: nltk>=3.9.1; extra == "dev"
|
|
239
241
|
Requires-Dist: numpy<=2.0,>=1.24; extra == "dev"
|
|
240
242
|
Dynamic: license-file
|
|
241
243
|
|
|
@@ -348,10 +350,30 @@ glitchlings -g "Typogre(rate=0.05)" "Ghouls just wanna have fun"
|
|
|
348
350
|
|
|
349
351
|
# Pipe text straight into the CLI for an on-the-fly corruption.
|
|
350
352
|
echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
|
|
353
|
+
|
|
354
|
+
# Load a roster from a YAML attack configuration.
|
|
355
|
+
glitchlings --config experiments/chaos.yaml "Let slips the glitchlings of war"
|
|
351
356
|
```
|
|
352
357
|
|
|
353
358
|
Use `--help` for a complete breakdown of available options, including support for parameterised glitchlings via `-g "Name(arg=value, ...)"` to mirror the Python API.
|
|
354
359
|
|
|
360
|
+
Attack configurations live in plain YAML files so you can version-control experiments without touching code:
|
|
361
|
+
|
|
362
|
+
```yaml
|
|
363
|
+
# experiments/chaos.yaml
|
|
364
|
+
seed: 31337
|
|
365
|
+
glitchlings:
|
|
366
|
+
- name: Typogre
|
|
367
|
+
rate: 0.04
|
|
368
|
+
- "Rushmore(rate=0.12, unweighted=True)"
|
|
369
|
+
- name: Zeedub
|
|
370
|
+
parameters:
|
|
371
|
+
rate: 0.02
|
|
372
|
+
characters: ["\u200b", "\u2060"]
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
Pass the file to `glitchlings --config` or load it from Python with `glitchlings.load_attack_config` and `glitchlings.build_gaggle`.
|
|
376
|
+
|
|
355
377
|
## Development
|
|
356
378
|
|
|
357
379
|
Follow the [development setup guide](docs/development.md) for editable installs, automated tests, and tips on enabling the Rust pipeline while you hack on new glitchlings.
|
|
@@ -416,8 +438,8 @@ _Uh oh. The worst person you know just bought a thesaurus._
|
|
|
416
438
|
>
|
|
417
439
|
> Args
|
|
418
440
|
>
|
|
419
|
-
> - `rate (float)`: The maximum proportion of words to replace (default: 0.
|
|
420
|
-
|
|
441
|
+
> - `rate (float)`: The maximum proportion of words to replace (default: 0.01, 1%).
|
|
442
|
+
- `part_of_speech`: The WordNet-style part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all. Vector/graph backends ignore this filter while still honouring deterministic sampling.
|
|
421
443
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
|
422
444
|
|
|
423
445
|
### Reduple
|
|
@@ -428,7 +450,8 @@ _Did you say that or did I?_
|
|
|
428
450
|
>
|
|
429
451
|
> Args
|
|
430
452
|
>
|
|
431
|
-
> - `rate (float)`: The maximum proportion of words to reduplicate (default: 0.
|
|
453
|
+
> - `rate (float)`: The maximum proportion of words to reduplicate (default: 0.01, 1%).
|
|
454
|
+
> - `unweighted (bool)`: Sample words uniformly instead of favouring shorter tokens (default: False).
|
|
432
455
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
|
433
456
|
|
|
434
457
|
### Rushmore
|
|
@@ -440,6 +463,19 @@ _I accidentally an entire word._
|
|
|
440
463
|
> Args
|
|
441
464
|
>
|
|
442
465
|
> - `rate (float)`: The maximum proportion of words to delete (default: 0.01, 1%).
|
|
466
|
+
> - `unweighted (bool)`: Sample words uniformly instead of favouring shorter tokens (default: False).
|
|
467
|
+
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
|
468
|
+
|
|
469
|
+
### Adjax
|
|
470
|
+
|
|
471
|
+
_Keep your hands and punctuation where I can see them._
|
|
472
|
+
|
|
473
|
+
> _**Perfect Shuffle.**_ Adjax trades the cores of neighbouring words while leaving punctuation, casing, and surrounding whitespace untouched, turning fluent prose into locally scrambled tongue-twisters.
|
|
474
|
+
>
|
|
475
|
+
> Args
|
|
476
|
+
>
|
|
477
|
+
> - `rate (float)`: Probability that each adjacent pair swaps cores (default: 0.5, 50%).
|
|
478
|
+
> - `swap_rate (float)`: Alias for `rate`, retained for backward compatibility.
|
|
443
479
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
|
444
480
|
|
|
445
481
|
### Redactyl
|
|
@@ -450,9 +486,10 @@ _Oops, that was my black highlighter._
|
|
|
450
486
|
>
|
|
451
487
|
> ### Args
|
|
452
488
|
>
|
|
453
|
-
> - `replacement_char (str)`: The character to use for redaction (default:
|
|
454
|
-
> - `rate (float)`: The maximum proportion of words to redact (default: 0.
|
|
489
|
+
> - `replacement_char (str)`: The character to use for redaction (default: FULL_BLOCK).
|
|
490
|
+
> - `rate (float)`: The maximum proportion of words to redact (default: 0.025, 2.5%).
|
|
455
491
|
> - `merge_adjacent (bool)`: Whether to redact the space between adjacent redacted words (default: False).
|
|
492
|
+
> - `unweighted (bool)`: Sample words uniformly instead of biasing toward longer tokens (default: False).
|
|
456
493
|
> - `seed (int)`: The random seed for reproducibility (default: 151).
|
|
457
494
|
|
|
458
495
|
## Field Report: Uncontained Specimens
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
glitchlings/__init__.py,sha256=hEmQ1rl3G5uZBDbfJX_W4aIUNSsPAsy_Ai5DgQHasvk,813
|
|
2
|
+
glitchlings/__main__.py,sha256=EOiBgay0x6B9VlSDzSQvMuoq6bHJdSvFSgcAVGGKkd4,121
|
|
3
|
+
glitchlings/_zoo_rust.cpython-310-darwin.so,sha256=DLLDpALegooeoS_wCp9TjFD6h6UJUuBBhRt6tgcV3zk,2450096
|
|
4
|
+
glitchlings/config.py,sha256=hwkcMkhEvUzK8FECgG6kbf_4MpMQcopskiSgXzK5B3o,7785
|
|
5
|
+
glitchlings/config.toml,sha256=MWwgbx1-KIRAY3JZmMrCVbZNxFjHgRJXbtNAVuUNcxY,108
|
|
6
|
+
glitchlings/main.py,sha256=Rw9pCgNrGxwzC1rZbbng7cHUP9xlL0WWWTdjW95XiSM,10084
|
|
7
|
+
glitchlings/dlc/__init__.py,sha256=eTLEEWrVWPqniXHqee4W23H1rjElI1PQ_jcqWFe9D3g,141
|
|
8
|
+
glitchlings/dlc/huggingface.py,sha256=I1QWanWVxO02awgSpHDtgQEVF-9AQRLtsta2RCitWhE,2933
|
|
9
|
+
glitchlings/dlc/prime.py,sha256=wpRMNtgka1vNlEzifeCjGMp1q_-QclZn3NxXczGnNpM,9278
|
|
10
|
+
glitchlings/lexicon/__init__.py,sha256=-w35jPtg7WCP_IfRxAUZBNFXeSnlIaVfbJiPDI3f3K4,5663
|
|
11
|
+
glitchlings/lexicon/graph.py,sha256=_2w5shu-fEieDN-egpqLvMu0rxG78RAQWqENU0r7PlM,10533
|
|
12
|
+
glitchlings/lexicon/metrics.py,sha256=W8TCemZaCjBOUSX8G7JdgQAbMykXXfRTfodkDSkc3aQ,4599
|
|
13
|
+
glitchlings/lexicon/vector.py,sha256=Qqspc8KR4hqJiTTiXnu8DCIp2ROYPgEKK4RM4kLkyGY,20284
|
|
14
|
+
glitchlings/lexicon/wordnet.py,sha256=FwjTtVPOQEmWEXL0Sl4faM-C4PPNkDu_z7-FyINlh3c,5652
|
|
15
|
+
glitchlings/lexicon/data/default_vector_cache.json,sha256=7obKHqmR3odbTfgJPWLSRFYFh4J_6uvv_CntCSe_EjI,725
|
|
16
|
+
glitchlings/util/__init__.py,sha256=7KiZ0gKMjocfd34cajneZhTqYb7Hkwi_PpjltPqvkNI,4498
|
|
17
|
+
glitchlings/zoo/__init__.py,sha256=eFYmaWeFDlSqfaiED51HWM-OqiTo_BOz0ASeyhOwOsw,4818
|
|
18
|
+
glitchlings/zoo/_ocr_confusions.py,sha256=MkCbwk9T24SO2pD3JNPajYCfpMMlm2vQ5_sJty5GoXE,1218
|
|
19
|
+
glitchlings/zoo/_rate.py,sha256=TMyfVFV7pLxSGVswPlOAtBvk25Bjtx5xXTtpb_utgik,527
|
|
20
|
+
glitchlings/zoo/_sampling.py,sha256=VOSWDgYWXIiAuKxn2IckFJhpRgGotQP_KW28db8kTKI,1587
|
|
21
|
+
glitchlings/zoo/_text_utils.py,sha256=nAfFT_VdXMXciCR7eQ5EAmym5wvzL6_Sdn9dvCx2s3Q,2758
|
|
22
|
+
glitchlings/zoo/adjax.py,sha256=N3CzfM7m7mAYgFcQYLQkqK2VYLw_vFvEMBM2aNU--ZA,3530
|
|
23
|
+
glitchlings/zoo/core.py,sha256=fhceCZKa9W1vVlhpR2zVKBXnzgJICB2-nmDywiqx4js,14207
|
|
24
|
+
glitchlings/zoo/jargoyle.py,sha256=6-DJxUFz2AjT-iQDFlK2ZG9pVwq2boDtslEzCNyI_04,11481
|
|
25
|
+
glitchlings/zoo/mim1c.py,sha256=yAt1ngR3j2KXLbzc8LhrQlIWRO_KT5dFK1EE8QivMAQ,3429
|
|
26
|
+
glitchlings/zoo/ocr_confusions.tsv,sha256=KhtR7vJDTITpfTSGa-I7RHr6CK7LkGi2KjdhEWipI6o,183
|
|
27
|
+
glitchlings/zoo/redactyl.py,sha256=H4PwAMBCIsDw1KBOBiTR3VUbRZwynqakwwfx3wHjVp8,5457
|
|
28
|
+
glitchlings/zoo/reduple.py,sha256=Q9NRCdvUgaHvvJu8A0n6zW9v_L3pdmNZbWqaJ7uycw4,4216
|
|
29
|
+
glitchlings/zoo/rushmore.py,sha256=J1wd4IB7WOAR2TdntkxCMZWseWR0Yii8UQZ7ucfpWCc,4335
|
|
30
|
+
glitchlings/zoo/scannequin.py,sha256=Ps8nxysKjkJV408zaL1kjVjy4jliATDBpYcNHLWbNFg,4859
|
|
31
|
+
glitchlings/zoo/typogre.py,sha256=0fYaxOEiTnxiCqmsiSN1r_wl1vC1Ueaiks2e94kks70,6668
|
|
32
|
+
glitchlings/zoo/zeedub.py,sha256=l51swlo556-TXhDk4nayHOm1XgHwWmfUKzQ01YMuCpE,4801
|
|
33
|
+
glitchlings-0.4.0.dist-info/licenses/LICENSE,sha256=YCvGip-LoaRyu6h0nPo71q6eHEkzUpsE11psDJOIRkw,11337
|
|
34
|
+
glitchlings-0.4.0.dist-info/METADATA,sha256=Dldj4SIrrNF6TKAvvJghd_L4lVrzdViqb8DWMSvPWVE,28345
|
|
35
|
+
glitchlings-0.4.0.dist-info/WHEEL,sha256=G4cu_uTI97hAXSudQC0D9fpgNQkuavCNljtwFXiUqZM,114
|
|
36
|
+
glitchlings-0.4.0.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
|
|
37
|
+
glitchlings-0.4.0.dist-info/top_level.txt,sha256=VHFNBrLjtDwPCYXbGKi6o17Eueedi81eNbR3hBOoST0,12
|
|
38
|
+
glitchlings-0.4.0.dist-info/RECORD,,
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
glitchlings/__init__.py,sha256=ui8kzf7mK5YAlFY1Og5UX5Rp14v4wC2ZqHihAJBBj6s,632
|
|
2
|
-
glitchlings/__main__.py,sha256=EOiBgay0x6B9VlSDzSQvMuoq6bHJdSvFSgcAVGGKkd4,121
|
|
3
|
-
glitchlings/_zoo_rust.cpython-310-darwin.so,sha256=4cUtfVEjY-3czJzu-DajFXyqfmgP_vjYZT4P1-Ip9WE,2389632
|
|
4
|
-
glitchlings/main.py,sha256=u6969Vl0n47e3S-ZlYZBj3HWVsjs-hvW6RpF9RYuXnc,5931
|
|
5
|
-
glitchlings/dlc/__init__.py,sha256=eTLEEWrVWPqniXHqee4W23H1rjElI1PQ_jcqWFe9D3g,141
|
|
6
|
-
glitchlings/dlc/huggingface.py,sha256=I1QWanWVxO02awgSpHDtgQEVF-9AQRLtsta2RCitWhE,2933
|
|
7
|
-
glitchlings/dlc/prime.py,sha256=wpRMNtgka1vNlEzifeCjGMp1q_-QclZn3NxXczGnNpM,9278
|
|
8
|
-
glitchlings/util/__init__.py,sha256=7KiZ0gKMjocfd34cajneZhTqYb7Hkwi_PpjltPqvkNI,4498
|
|
9
|
-
glitchlings/zoo/__init__.py,sha256=pdQSiQjMCqnhrM3qSRvu98FJd-EyXLNNwvthnYSXpmM,4282
|
|
10
|
-
glitchlings/zoo/_ocr_confusions.py,sha256=MkCbwk9T24SO2pD3JNPajYCfpMMlm2vQ5_sJty5GoXE,1218
|
|
11
|
-
glitchlings/zoo/_rate.py,sha256=TMyfVFV7pLxSGVswPlOAtBvk25Bjtx5xXTtpb_utgik,527
|
|
12
|
-
glitchlings/zoo/core.py,sha256=xLF9Op07KtMH0ql1-O7KyZ6lLESsdeNkvxdyiSOzhAc,14236
|
|
13
|
-
glitchlings/zoo/jargoyle.py,sha256=T6vPWBxceIPE6gOQ7BaihaqALOJwzXuhfiZzvKa4S50,10666
|
|
14
|
-
glitchlings/zoo/mim1c.py,sha256=yAt1ngR3j2KXLbzc8LhrQlIWRO_KT5dFK1EE8QivMAQ,3429
|
|
15
|
-
glitchlings/zoo/ocr_confusions.tsv,sha256=KhtR7vJDTITpfTSGa-I7RHr6CK7LkGi2KjdhEWipI6o,183
|
|
16
|
-
glitchlings/zoo/redactyl.py,sha256=wn7hxbtA0xMRuIXa6NNeeNOi0h0S8vh2bAa3x5Ec_Y0,6783
|
|
17
|
-
glitchlings/zoo/reduple.py,sha256=YNhTBH25XsXLeQD8xxXPE_JJMiCtmEpUFGGn36rd2tY,4857
|
|
18
|
-
glitchlings/zoo/rushmore.py,sha256=oG8MmMbrpmHH4rOp-NXkQznVlBCtSnrOttAZMdVlMkc,4729
|
|
19
|
-
glitchlings/zoo/scannequin.py,sha256=Ps8nxysKjkJV408zaL1kjVjy4jliATDBpYcNHLWbNFg,4859
|
|
20
|
-
glitchlings/zoo/typogre.py,sha256=xD02ldcMIA07XsdSts2bUniOc-k_DqTf0PBMaXGjLZE,6009
|
|
21
|
-
glitchlings/zoo/zeedub.py,sha256=D6rGk3O02OQ9jEIO9o0Ag-maVzNPN5O6qO3klG6Y62c,3552
|
|
22
|
-
glitchlings-0.2.6.dist-info/licenses/LICENSE,sha256=YCvGip-LoaRyu6h0nPo71q6eHEkzUpsE11psDJOIRkw,11337
|
|
23
|
-
glitchlings-0.2.6.dist-info/METADATA,sha256=5Xg6w5_-87bIRXY51i-nd7EmJMVPLtcBH_V3tj74CWI,26749
|
|
24
|
-
glitchlings-0.2.6.dist-info/WHEEL,sha256=G4cu_uTI97hAXSudQC0D9fpgNQkuavCNljtwFXiUqZM,114
|
|
25
|
-
glitchlings-0.2.6.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
|
|
26
|
-
glitchlings-0.2.6.dist-info/top_level.txt,sha256=VHFNBrLjtDwPCYXbGKi6o17Eueedi81eNbR3hBOoST0,12
|
|
27
|
-
glitchlings-0.2.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|