glitchlings 0.2.2__cp312-cp312-win_amd64.whl → 0.2.4__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
glitchlings/dlc/prime.py CHANGED
@@ -79,8 +79,8 @@ def tutorial_level(
79
79
  ) -> vf.Environment:
80
80
  """Create a low-corruption environment using tuned defaults."""
81
81
 
82
- tuned_mim1c = Mim1c(replacement_rate=0.01 * difficulty.value)
83
- tuned_typogre = Typogre(max_change_rate=0.025 * difficulty.value)
82
+ tuned_mim1c = Mim1c(rate=0.01 * difficulty.value)
83
+ tuned_typogre = Typogre(rate=0.025 * difficulty.value)
84
84
 
85
85
  return load_environment(
86
86
  env,
@@ -220,32 +220,54 @@ def echo_chamber(
220
220
  "Specify which split to use when the dataset loads as a DatasetDict."
221
221
  )
222
222
 
223
- prompts: list[list[dict[str, str]]] = []
224
- answers: list[str] = []
223
+ filtered_dataset = hf_dataset.filter(
224
+ lambda row: row.get(column) is not None,
225
+ load_from_cache_file=False,
226
+ )
225
227
 
226
- for row in hf_dataset:
227
- value = row.get(column)
228
- if value is None:
229
- continue
228
+ source_column_names = list(filtered_dataset.column_names)
230
229
 
231
- text = str(value)
232
- prompts.append(
233
- [
234
- {"role": "system", "content": instructions},
235
- {"role": "user", "content": f"Corrupted text:\n{text}"},
236
- ]
237
- )
238
- answers.append(text)
230
+ def _build_prompt(row: dict[str, Any]) -> dict[str, Any]:
231
+ text = str(row[column])
232
+ prompt = [
233
+ {"role": "system", "content": instructions},
234
+ {"role": "user", "content": f"Corrupted text:\n{text}"},
235
+ ]
236
+ return {"prompt": prompt, "answer": text}
239
237
 
240
- if not prompts:
241
- raise ValueError(
242
- f"Column '{column}' did not yield any textual entries in dataset '{dataset_id}'."
243
- )
238
+ base_dataset = filtered_dataset.map(
239
+ _build_prompt,
240
+ remove_columns=source_column_names,
241
+ load_from_cache_file=False,
242
+ )
244
243
 
245
- dataset = HFDataset.from_dict({"prompt": prompts, "answer": answers})
244
+ try:
245
+ dataset_length = len(base_dataset) # type: ignore[arg-type]
246
+ except TypeError:
247
+ preview_rows: list[dict[str, Any]]
248
+ take_fn = getattr(base_dataset, "take", None)
249
+ if callable(take_fn):
250
+ preview_rows = list(take_fn(1))
251
+ else:
252
+ iterator = iter(base_dataset)
253
+ try:
254
+ first_row = next(iterator)
255
+ except StopIteration:
256
+ preview_rows = []
257
+ else:
258
+ preview_rows = [first_row]
259
+ if not preview_rows:
260
+ raise ValueError(
261
+ f"Column '{column}' did not yield any textual entries in dataset '{dataset_id}'."
262
+ )
263
+ else:
264
+ if dataset_length == 0:
265
+ raise ValueError(
266
+ f"Column '{column}' did not yield any textual entries in dataset '{dataset_id}'."
267
+ )
246
268
 
247
269
  gaggle = _as_gaggle(glitchlings, seed=seed)
248
- glitched_dataset = gaggle.corrupt_dataset(dataset, ["prompt"])
270
+ glitched_dataset = gaggle.corrupt_dataset(base_dataset, ["prompt"])
249
271
 
250
272
  rubric_func = reward_function or symmetric_damerau_levenshtein_similarity
251
273
  rubric = vf.Rubric(funcs=[rubric_func], weights=[1.0])
glitchlings/main.py CHANGED
@@ -46,7 +46,7 @@ def build_parser() -> argparse.ArgumentParser:
46
46
  metavar="SPEC",
47
47
  help=(
48
48
  "Glitchling to apply, optionally with parameters like "
49
- "Typogre(max_change_rate=0.05). Repeat for multiples; defaults to all built-ins."
49
+ "Typogre(rate=0.05). Repeat for multiples; defaults to all built-ins."
50
50
  ),
51
51
  )
52
52
  parser.add_argument(
@@ -0,0 +1,21 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ def resolve_rate(
5
+ *,
6
+ rate: float | None,
7
+ legacy_value: float | None,
8
+ default: float,
9
+ legacy_name: str,
10
+ ) -> float:
11
+ """Return the effective rate while enforcing mutual exclusivity."""
12
+
13
+ if rate is not None and legacy_value is not None:
14
+ raise ValueError(
15
+ f"Specify either 'rate' or '{legacy_name}', not both."
16
+ )
17
+ if rate is not None:
18
+ return rate
19
+ if legacy_value is not None:
20
+ return legacy_value
21
+ return default
glitchlings/zoo/core.py CHANGED
@@ -107,6 +107,7 @@ class Glitchling:
107
107
  scope: AttackWave,
108
108
  order: AttackOrder = AttackOrder.NORMAL,
109
109
  seed: int | None = None,
110
+ pipeline_operation: Callable[["Glitchling"], dict[str, Any] | None] | None = None,
110
111
  **kwargs: Any,
111
112
  ) -> None:
112
113
  """Initialize a glitchling.
@@ -128,31 +129,76 @@ class Glitchling:
128
129
  self.corruption_function: CorruptionCallable = corruption_function
129
130
  self.level: AttackWave = scope
130
131
  self.order: AttackOrder = order
132
+ self._pipeline_descriptor_factory = pipeline_operation
131
133
  self.kwargs: dict[str, Any] = {}
134
+ self._cached_rng_callable: CorruptionCallable | None = None
135
+ self._cached_rng_expectation: bool | None = None
132
136
  for kw, val in kwargs.items():
133
137
  self.set_param(kw, val)
134
138
 
135
139
  def set_param(self, key: str, value: Any) -> None:
136
140
  """Persist a parameter for use by the corruption callable."""
137
141
 
138
- setattr(self, key, value)
139
- self.kwargs[key] = value
140
- if key == "seed":
142
+ aliases = getattr(self, "_param_aliases", {})
143
+ canonical = aliases.get(key, key)
144
+
145
+ # Drop stale alias keys so we only forward canonical kwargs.
146
+ self.kwargs.pop(key, None)
147
+ for alias, target in aliases.items():
148
+ if target == canonical:
149
+ self.kwargs.pop(alias, None)
150
+
151
+ self.kwargs[canonical] = value
152
+ setattr(self, canonical, value)
153
+
154
+ if canonical == "seed":
141
155
  self.reset_rng(value)
142
156
 
143
- def __corrupt(self, text: str, *args: Any, **kwargs: Any) -> str:
144
- """Execute the corruption callable, injecting the RNG when required."""
157
+ for alias, target in aliases.items():
158
+ if target == canonical:
159
+ setattr(self, alias, value)
145
160
 
146
- # Pass rng to underlying corruption function if it expects it.
161
+ def pipeline_operation(self) -> dict[str, Any] | None:
162
+ """Return the Rust pipeline operation descriptor for this glitchling."""
163
+
164
+ factory = self._pipeline_descriptor_factory
165
+ if factory is None:
166
+ return None
167
+
168
+ return factory(self)
169
+
170
+ def _corruption_expects_rng(self) -> bool:
171
+ """Return `True` when the corruption function accepts an rng keyword."""
172
+
173
+ cached_callable = self._cached_rng_callable
174
+ cached_expectation = self._cached_rng_expectation
175
+ corruption_function = self.corruption_function
176
+
177
+ if (
178
+ cached_callable is corruption_function
179
+ and cached_expectation is not None
180
+ ):
181
+ return cached_expectation
182
+
183
+ expects_rng = False
147
184
  try:
148
- signature = inspect.signature(self.corruption_function)
185
+ signature = inspect.signature(corruption_function)
149
186
  except (TypeError, ValueError):
150
187
  signature = None
151
188
 
152
- expects_rng = False
153
189
  if signature is not None:
154
190
  expects_rng = "rng" in signature.parameters
155
191
 
192
+ self._cached_rng_callable = corruption_function
193
+ self._cached_rng_expectation = expects_rng
194
+ return expects_rng
195
+
196
+ def __corrupt(self, text: str, *args: Any, **kwargs: Any) -> str:
197
+ """Execute the corruption callable, injecting the RNG when required."""
198
+
199
+ # Pass rng to underlying corruption function if it expects it.
200
+ expects_rng = self._corruption_expects_rng()
201
+
156
202
  if expects_rng:
157
203
  corrupted = self.corruption_function(text, *args, rng=self.rng, **kwargs)
158
204
  else:
@@ -231,53 +277,14 @@ class Glitchling:
231
277
  self.corruption_function,
232
278
  self.level,
233
279
  self.order,
280
+ pipeline_operation=self._pipeline_descriptor_factory,
234
281
  **filtered_kwargs,
235
282
  )
236
283
 
237
284
  return cls(**filtered_kwargs)
238
285
 
239
286
 
240
- def _pipeline_operation_reduplicate(glitchling: "Glitchling") -> dict[str, Any] | None:
241
- rate = glitchling.kwargs.get("reduplication_rate")
242
- if rate is None:
243
- return None
244
- return {"type": "reduplicate", "reduplication_rate": float(rate)}
245
-
246
287
 
247
- def _pipeline_operation_delete(glitchling: "Glitchling") -> dict[str, Any] | None:
248
- rate = glitchling.kwargs.get("max_deletion_rate")
249
- if rate is None:
250
- return None
251
- return {"type": "delete", "max_deletion_rate": float(rate)}
252
-
253
-
254
- def _pipeline_operation_redact(glitchling: "Glitchling") -> dict[str, Any] | None:
255
- replacement_char = glitchling.kwargs.get("replacement_char")
256
- redaction_rate = glitchling.kwargs.get("redaction_rate")
257
- merge_adjacent = glitchling.kwargs.get("merge_adjacent")
258
- if replacement_char is None or redaction_rate is None or merge_adjacent is None:
259
- return None
260
- return {
261
- "type": "redact",
262
- "replacement_char": str(replacement_char),
263
- "redaction_rate": float(redaction_rate),
264
- "merge_adjacent": bool(merge_adjacent),
265
- }
266
-
267
-
268
- def _pipeline_operation_ocr(glitchling: "Glitchling") -> dict[str, Any] | None:
269
- error_rate = glitchling.kwargs.get("error_rate")
270
- if error_rate is None:
271
- return None
272
- return {"type": "ocr", "error_rate": float(error_rate)}
273
-
274
-
275
- _PIPELINE_OPERATION_BUILDERS: dict[str, Callable[["Glitchling"], dict[str, Any] | None]] = {
276
- "Reduple": _pipeline_operation_reduplicate,
277
- "Rushmore": _pipeline_operation_delete,
278
- "Redactyl": _pipeline_operation_redact,
279
- "Scannequin": _pipeline_operation_ocr,
280
- }
281
288
 
282
289
 
283
290
  class Gaggle(Glitchling):
@@ -359,10 +366,7 @@ class Gaggle(Glitchling):
359
366
 
360
367
  descriptors: list[dict[str, Any]] = []
361
368
  for glitchling in self.apply_order:
362
- builder = _PIPELINE_OPERATION_BUILDERS.get(glitchling.name)
363
- if builder is None:
364
- return None
365
- operation = builder(glitchling)
369
+ operation = glitchling.pipeline_operation()
366
370
  if operation is None:
367
371
  return None
368
372
 
@@ -33,6 +33,7 @@ else:
33
33
  _WORDNET_MODULE = None
34
34
 
35
35
  from .core import AttackWave, Glitchling
36
+ from ._rate import resolve_rate
36
37
 
37
38
  _WORDNET_HANDLE: WordNetCorpusReader | Any | None = _WORDNET_MODULE
38
39
 
@@ -211,16 +212,18 @@ def _collect_synonyms(
211
212
 
212
213
  def substitute_random_synonyms(
213
214
  text: str,
214
- replacement_rate: float = 0.1,
215
+ rate: float | None = None,
215
216
  part_of_speech: PartOfSpeechInput = "n",
216
217
  seed: int | None = None,
217
218
  rng: random.Random | None = None,
219
+ *,
220
+ replacement_rate: float | None = None,
218
221
  ) -> str:
219
222
  """Replace words with random WordNet synonyms.
220
223
 
221
224
  Parameters
222
225
  - text: Input text.
223
- - replacement_rate: Max proportion of candidate words to replace (default 0.1).
226
+ - rate: Max proportion of candidate words to replace (default 0.1).
224
227
  - part_of_speech: WordNet POS tag(s) to target. Accepts "n", "v", "a", "r",
225
228
  any iterable of those tags, or "any" to include all four.
226
229
  - rng: Optional RNG instance used for deterministic sampling.
@@ -232,6 +235,13 @@ def substitute_random_synonyms(
232
235
  - Synonyms sorted before rng.choice to fix ordering.
233
236
  - For each POS, the first synset containing alternate lemmas is used for stability.
234
237
  """
238
+ effective_rate = resolve_rate(
239
+ rate=rate,
240
+ legacy_value=replacement_rate,
241
+ default=0.1,
242
+ legacy_name="replacement_rate",
243
+ )
244
+
235
245
  ensure_wordnet()
236
246
  wordnet = _wordnet()
237
247
 
@@ -270,7 +280,8 @@ def substitute_random_synonyms(
270
280
  if not candidate_indices:
271
281
  return text
272
282
 
273
- max_replacements = int(len(candidate_indices) * replacement_rate)
283
+ clamped_rate = max(0.0, effective_rate)
284
+ max_replacements = int(len(candidate_indices) * clamped_rate)
274
285
  if max_replacements <= 0:
275
286
  return text
276
287
 
@@ -297,16 +308,24 @@ class Jargoyle(Glitchling):
297
308
  def __init__(
298
309
  self,
299
310
  *,
300
- replacement_rate: float = 0.1,
311
+ rate: float | None = None,
312
+ replacement_rate: float | None = None,
301
313
  part_of_speech: PartOfSpeechInput = "n",
302
314
  seed: int | None = None,
303
315
  ) -> None:
316
+ self._param_aliases = {"replacement_rate": "rate"}
317
+ effective_rate = resolve_rate(
318
+ rate=rate,
319
+ legacy_value=replacement_rate,
320
+ default=0.1,
321
+ legacy_name="replacement_rate",
322
+ )
304
323
  super().__init__(
305
324
  name="Jargoyle",
306
325
  corruption_function=substitute_random_synonyms,
307
326
  scope=AttackWave.WORD,
308
327
  seed=seed,
309
- replacement_rate=replacement_rate,
328
+ rate=effective_rate,
310
329
  part_of_speech=part_of_speech,
311
330
  )
312
331
 
glitchlings/zoo/mim1c.py CHANGED
@@ -5,21 +5,24 @@ from typing import Literal
5
5
  from confusable_homoglyphs import confusables
6
6
 
7
7
  from .core import AttackOrder, AttackWave, Glitchling
8
+ from ._rate import resolve_rate
8
9
 
9
10
 
10
11
  def swap_homoglyphs(
11
12
  text: str,
12
- replacement_rate: float = 0.02,
13
+ rate: float | None = None,
13
14
  classes: list[str] | Literal["all"] | None = None,
14
15
  banned_characters: Collection[str] | None = None,
15
16
  seed: int | None = None,
16
17
  rng: random.Random | None = None,
18
+ *,
19
+ replacement_rate: float | None = None,
17
20
  ) -> str:
18
21
  """Replace characters with visually confusable homoglyphs.
19
22
 
20
23
  Parameters
21
24
  - text: Input text.
22
- - replacement_rate: Max proportion of eligible characters to replace (default 0.02).
25
+ - rate: Max proportion of eligible characters to replace (default 0.02).
23
26
  - classes: Restrict replacements to these Unicode script classes (default ["LATIN","GREEK","CYRILLIC"]). Use "all" to allow any.
24
27
  - banned_characters: Characters that must never appear as replacements.
25
28
  - seed: Optional seed if `rng` not provided.
@@ -29,6 +32,13 @@ def swap_homoglyphs(
29
32
  - Only replaces characters present in confusables.confusables_data with single-codepoint alternatives.
30
33
  - Maintains determinism by shuffling candidates and sampling via the provided RNG.
31
34
  """
35
+ effective_rate = resolve_rate(
36
+ rate=rate,
37
+ legacy_value=replacement_rate,
38
+ default=0.02,
39
+ legacy_name="replacement_rate",
40
+ )
41
+
32
42
  if rng is None:
33
43
  rng = random.Random(seed)
34
44
 
@@ -39,7 +49,8 @@ def swap_homoglyphs(
39
49
  confusable_chars = [
40
50
  char for char in target_chars if char in confusables.confusables_data
41
51
  ]
42
- num_replacements = int(len(confusable_chars) * replacement_rate)
52
+ clamped_rate = max(0.0, effective_rate)
53
+ num_replacements = int(len(confusable_chars) * clamped_rate)
43
54
  done = 0
44
55
  rng.shuffle(confusable_chars)
45
56
  banned_set = set(banned_characters or ())
@@ -66,18 +77,26 @@ class Mim1c(Glitchling):
66
77
  def __init__(
67
78
  self,
68
79
  *,
69
- replacement_rate: float = 0.02,
80
+ rate: float | None = None,
81
+ replacement_rate: float | None = None,
70
82
  classes: list[str] | Literal["all"] | None = None,
71
83
  banned_characters: Collection[str] | None = None,
72
84
  seed: int | None = None,
73
85
  ) -> None:
86
+ self._param_aliases = {"replacement_rate": "rate"}
87
+ effective_rate = resolve_rate(
88
+ rate=rate,
89
+ legacy_value=replacement_rate,
90
+ default=0.02,
91
+ legacy_name="replacement_rate",
92
+ )
74
93
  super().__init__(
75
94
  name="Mim1c",
76
95
  corruption_function=swap_homoglyphs,
77
96
  scope=AttackWave.CHARACTER,
78
97
  order=AttackOrder.LAST,
79
98
  seed=seed,
80
- replacement_rate=replacement_rate,
99
+ rate=effective_rate,
81
100
  classes=classes,
82
101
  banned_characters=banned_characters,
83
102
  )
@@ -1,7 +1,9 @@
1
1
  import re
2
2
  import random
3
+ from typing import Any
3
4
 
4
5
  from .core import Glitchling, AttackWave
6
+ from ._rate import resolve_rate
5
7
 
6
8
  FULL_BLOCK = "█"
7
9
 
@@ -16,7 +18,7 @@ def _python_redact_words(
16
18
  text: str,
17
19
  *,
18
20
  replacement_char: str,
19
- redaction_rate: float,
21
+ rate: float,
20
22
  merge_adjacent: bool,
21
23
  rng: random.Random,
22
24
  ) -> str:
@@ -25,7 +27,7 @@ def _python_redact_words(
25
27
  Parameters
26
28
  - text: Input text.
27
29
  - replacement_char: The character to use for redaction (default FULL_BLOCK).
28
- - redaction_rate: Max proportion of words to redact (default 0.05).
30
+ - rate: Max proportion of words to redact (default 0.05).
29
31
  - merge_adjacent: If True, merges adjacent redactions across intervening non-word chars.
30
32
  - seed: Seed used if `rng` not provided (default 151).
31
33
  - rng: Optional RNG; overrides seed.
@@ -35,7 +37,7 @@ def _python_redact_words(
35
37
  word_indices = [i for i, token in enumerate(tokens) if i % 2 == 0 and token.strip()]
36
38
  if not word_indices:
37
39
  raise ValueError("Cannot redact words because the input text contains no redactable words.")
38
- num_to_redact = max(1, int(len(word_indices) * redaction_rate))
40
+ num_to_redact = max(1, int(len(word_indices) * rate))
39
41
 
40
42
  # Sample from the indices of actual words
41
43
  indices_to_redact = rng.sample(word_indices, k=num_to_redact)
@@ -72,23 +74,34 @@ def _python_redact_words(
72
74
  def redact_words(
73
75
  text: str,
74
76
  replacement_char: str = FULL_BLOCK,
75
- redaction_rate: float = 0.05,
77
+ rate: float | None = None,
76
78
  merge_adjacent: bool = False,
77
79
  seed: int = 151,
78
80
  rng: random.Random | None = None,
81
+ *,
82
+ redaction_rate: float | None = None,
79
83
  ) -> str:
80
84
  """Redact random words by replacing their characters."""
81
85
 
86
+ effective_rate = resolve_rate(
87
+ rate=rate,
88
+ legacy_value=redaction_rate,
89
+ default=0.05,
90
+ legacy_name="redaction_rate",
91
+ )
92
+
82
93
  if rng is None:
83
94
  rng = random.Random(seed)
84
95
 
96
+ clamped_rate = max(0.0, effective_rate)
97
+
85
98
  use_rust = _redact_words_rust is not None and isinstance(merge_adjacent, bool)
86
99
 
87
100
  if use_rust:
88
101
  return _redact_words_rust(
89
102
  text,
90
103
  replacement_char,
91
- redaction_rate,
104
+ clamped_rate,
92
105
  merge_adjacent,
93
106
  rng,
94
107
  )
@@ -96,7 +109,7 @@ def redact_words(
96
109
  return _python_redact_words(
97
110
  text,
98
111
  replacement_char=replacement_char,
99
- redaction_rate=redaction_rate,
112
+ rate=clamped_rate,
100
113
  merge_adjacent=merge_adjacent,
101
114
  rng=rng,
102
115
  )
@@ -109,20 +122,42 @@ class Redactyl(Glitchling):
109
122
  self,
110
123
  *,
111
124
  replacement_char: str = FULL_BLOCK,
112
- redaction_rate: float = 0.05,
125
+ rate: float | None = None,
126
+ redaction_rate: float | None = None,
113
127
  merge_adjacent: bool = False,
114
128
  seed: int = 151,
115
129
  ) -> None:
130
+ self._param_aliases = {"redaction_rate": "rate"}
131
+ effective_rate = resolve_rate(
132
+ rate=rate,
133
+ legacy_value=redaction_rate,
134
+ default=0.05,
135
+ legacy_name="redaction_rate",
136
+ )
116
137
  super().__init__(
117
138
  name="Redactyl",
118
139
  corruption_function=redact_words,
119
140
  scope=AttackWave.WORD,
120
141
  seed=seed,
121
142
  replacement_char=replacement_char,
122
- redaction_rate=redaction_rate,
143
+ rate=effective_rate,
123
144
  merge_adjacent=merge_adjacent,
124
145
  )
125
146
 
147
+ def pipeline_operation(self) -> dict[str, Any] | None:
148
+ replacement_char = self.kwargs.get("replacement_char")
149
+ rate = self.kwargs.get("rate")
150
+ merge_adjacent = self.kwargs.get("merge_adjacent")
151
+ if replacement_char is None or rate is None or merge_adjacent is None:
152
+ return None
153
+ return {
154
+ "type": "redact",
155
+ "replacement_char": str(replacement_char),
156
+ "redaction_rate": float(rate),
157
+ "merge_adjacent": bool(merge_adjacent),
158
+ }
159
+
160
+
126
161
 
127
162
  redactyl = Redactyl()
128
163
 
@@ -1,7 +1,9 @@
1
1
  import re
2
2
  import random
3
+ from typing import Any
3
4
 
4
5
  from .core import Glitchling, AttackWave
6
+ from ._rate import resolve_rate
5
7
 
6
8
  try:
7
9
  from glitchlings._zoo_rust import reduplicate_words as _reduplicate_words_rust
@@ -12,14 +14,14 @@ except ImportError: # pragma: no cover - compiled extension not present
12
14
  def _python_reduplicate_words(
13
15
  text: str,
14
16
  *,
15
- reduplication_rate: float,
17
+ rate: float,
16
18
  rng: random.Random,
17
19
  ) -> str:
18
20
  """Randomly reduplicate words in the text.
19
21
 
20
22
  Parameters
21
23
  - text: Input text.
22
- - reduplication_rate: Max proportion of words to reduplicate (default 0.05).
24
+ - rate: Max proportion of words to reduplicate (default 0.05).
23
25
  - seed: Optional seed if `rng` not provided.
24
26
  - rng: Optional RNG; overrides seed.
25
27
 
@@ -39,7 +41,7 @@ def _python_reduplicate_words(
39
41
  continue
40
42
 
41
43
  # Only consider actual words for reduplication
42
- if rng.random() < reduplication_rate:
44
+ if rng.random() < rate:
43
45
  # Check if word has trailing punctuation
44
46
  match = re.match(r"^(\W*)(.*?)(\W*)$", word)
45
47
  if match:
@@ -53,9 +55,11 @@ def _python_reduplicate_words(
53
55
 
54
56
  def reduplicate_words(
55
57
  text: str,
56
- reduplication_rate: float = 0.05,
58
+ rate: float | None = None,
57
59
  seed: int | None = None,
58
60
  rng: random.Random | None = None,
61
+ *,
62
+ reduplication_rate: float | None = None,
59
63
  ) -> str:
60
64
  """Randomly reduplicate words in the text.
61
65
 
@@ -63,15 +67,24 @@ def reduplicate_words(
63
67
  extension is unavailable.
64
68
  """
65
69
 
70
+ effective_rate = resolve_rate(
71
+ rate=rate,
72
+ legacy_value=reduplication_rate,
73
+ default=0.05,
74
+ legacy_name="reduplication_rate",
75
+ )
76
+
66
77
  if rng is None:
67
78
  rng = random.Random(seed)
68
79
 
80
+ clamped_rate = max(0.0, effective_rate)
81
+
69
82
  if _reduplicate_words_rust is not None:
70
- return _reduplicate_words_rust(text, reduplication_rate, rng)
83
+ return _reduplicate_words_rust(text, clamped_rate, rng)
71
84
 
72
85
  return _python_reduplicate_words(
73
86
  text,
74
- reduplication_rate=reduplication_rate,
87
+ rate=clamped_rate,
75
88
  rng=rng,
76
89
  )
77
90
 
@@ -82,17 +95,32 @@ class Reduple(Glitchling):
82
95
  def __init__(
83
96
  self,
84
97
  *,
85
- reduplication_rate: float = 0.05,
98
+ rate: float | None = None,
99
+ reduplication_rate: float | None = None,
86
100
  seed: int | None = None,
87
101
  ) -> None:
102
+ self._param_aliases = {"reduplication_rate": "rate"}
103
+ effective_rate = resolve_rate(
104
+ rate=rate,
105
+ legacy_value=reduplication_rate,
106
+ default=0.05,
107
+ legacy_name="reduplication_rate",
108
+ )
88
109
  super().__init__(
89
110
  name="Reduple",
90
111
  corruption_function=reduplicate_words,
91
112
  scope=AttackWave.WORD,
92
113
  seed=seed,
93
- reduplication_rate=reduplication_rate,
114
+ rate=effective_rate,
94
115
  )
95
116
 
117
+ def pipeline_operation(self) -> dict[str, Any] | None:
118
+ rate = self.kwargs.get("rate")
119
+ if rate is None:
120
+ return None
121
+ return {"type": "reduplicate", "reduplication_rate": float(rate)}
122
+
123
+
96
124
 
97
125
  reduple = Reduple()
98
126
 
@@ -1,8 +1,10 @@
1
1
  import math
2
2
  import random
3
3
  import re
4
+ from typing import Any
4
5
 
5
6
  from .core import Glitchling, AttackWave
7
+ from ._rate import resolve_rate
6
8
 
7
9
  try:
8
10
  from glitchlings._zoo_rust import delete_random_words as _delete_random_words_rust
@@ -13,11 +15,14 @@ except ImportError: # pragma: no cover - compiled extension not present
13
15
  def _python_delete_random_words(
14
16
  text: str,
15
17
  *,
16
- max_deletion_rate: float,
18
+ rate: float,
17
19
  rng: random.Random,
18
20
  ) -> str:
19
21
  """Delete random words from the input text while preserving whitespace."""
20
22
 
23
+ if rate <= 0.0:
24
+ return text
25
+
21
26
  tokens = re.split(r"(\s+)", text) # Split but keep separators for later rejoin
22
27
 
23
28
  candidate_indices: list[int] = []
@@ -29,14 +34,14 @@ def _python_delete_random_words(
29
34
  candidate_indices.append(i)
30
35
 
31
36
  allowed_deletions = min(
32
- len(candidate_indices), math.floor(len(candidate_indices) * max_deletion_rate)
37
+ len(candidate_indices), math.floor(len(candidate_indices) * rate)
33
38
  )
34
39
  if allowed_deletions <= 0:
35
40
  return text
36
41
 
37
42
  deletions = 0
38
43
  for i in candidate_indices:
39
- if rng.random() < max_deletion_rate:
44
+ if rng.random() < rate:
40
45
  word = tokens[i]
41
46
  match = re.match(r"^(\W*)(.*?)(\W*)$", word)
42
47
  if match:
@@ -58,24 +63,35 @@ def _python_delete_random_words(
58
63
 
59
64
  def delete_random_words(
60
65
  text: str,
61
- max_deletion_rate: float = 0.01,
66
+ rate: float | None = None,
62
67
  seed: int | None = None,
63
68
  rng: random.Random | None = None,
69
+ *,
70
+ max_deletion_rate: float | None = None,
64
71
  ) -> str:
65
72
  """Delete random words from the input text.
66
73
 
67
74
  Uses the optional Rust implementation when available.
68
75
  """
69
76
 
77
+ effective_rate = resolve_rate(
78
+ rate=rate,
79
+ legacy_value=max_deletion_rate,
80
+ default=0.01,
81
+ legacy_name="max_deletion_rate",
82
+ )
83
+
70
84
  if rng is None:
71
85
  rng = random.Random(seed)
72
86
 
87
+ clamped_rate = max(0.0, effective_rate)
88
+
73
89
  if _delete_random_words_rust is not None:
74
- return _delete_random_words_rust(text, max_deletion_rate, rng)
90
+ return _delete_random_words_rust(text, clamped_rate, rng)
75
91
 
76
92
  return _python_delete_random_words(
77
93
  text,
78
- max_deletion_rate=max_deletion_rate,
94
+ rate=clamped_rate,
79
95
  rng=rng,
80
96
  )
81
97
 
@@ -86,17 +102,33 @@ class Rushmore(Glitchling):
86
102
  def __init__(
87
103
  self,
88
104
  *,
89
- max_deletion_rate: float = 0.01,
105
+ rate: float | None = None,
106
+ max_deletion_rate: float | None = None,
90
107
  seed: int | None = None,
91
108
  ) -> None:
109
+ self._param_aliases = {"max_deletion_rate": "rate"}
110
+ effective_rate = resolve_rate(
111
+ rate=rate,
112
+ legacy_value=max_deletion_rate,
113
+ default=0.01,
114
+ legacy_name="max_deletion_rate",
115
+ )
92
116
  super().__init__(
93
117
  name="Rushmore",
94
118
  corruption_function=delete_random_words,
95
119
  scope=AttackWave.WORD,
96
120
  seed=seed,
97
- max_deletion_rate=max_deletion_rate,
121
+ rate=effective_rate,
98
122
  )
99
123
 
124
+ def pipeline_operation(self) -> dict[str, Any] | None:
125
+ rate = self.kwargs.get("rate")
126
+ if rate is None:
127
+ rate = self.kwargs.get("max_deletion_rate")
128
+ if rate is None:
129
+ return None
130
+ return {"type": "delete", "max_deletion_rate": float(rate)}
131
+
100
132
 
101
133
  rushmore = Rushmore()
102
134
 
@@ -1,8 +1,10 @@
1
1
  import re
2
2
  import random
3
+ from typing import Any
3
4
 
4
5
  from ._ocr_confusions import load_confusion_table
5
6
  from .core import Glitchling, AttackWave, AttackOrder
7
+ from ._rate import resolve_rate
6
8
 
7
9
  try:
8
10
  from glitchlings._zoo_rust import ocr_artifacts as _ocr_artifacts_rust
@@ -13,14 +15,14 @@ except ImportError: # pragma: no cover - compiled extension not present
13
15
  def _python_ocr_artifacts(
14
16
  text: str,
15
17
  *,
16
- error_rate: float,
18
+ rate: float,
17
19
  rng: random.Random,
18
20
  ) -> str:
19
21
  """Introduce OCR-like artifacts into text.
20
22
 
21
23
  Parameters
22
24
  - text: Input text to corrupt.
23
- - error_rate: Max proportion of eligible confusion matches to replace (default 0.02).
25
+ - rate: Max proportion of eligible confusion matches to replace (default 0.02).
24
26
  - seed: Optional seed if `rng` not provided.
25
27
  - rng: Optional RNG; overrides seed.
26
28
 
@@ -53,7 +55,7 @@ def _python_ocr_artifacts(
53
55
  return text
54
56
 
55
57
  # Decide how many to replace
56
- k = int(len(candidates) * error_rate)
58
+ k = int(len(candidates) * rate)
57
59
  if k <= 0:
58
60
  return text
59
61
 
@@ -95,9 +97,11 @@ def _python_ocr_artifacts(
95
97
 
96
98
  def ocr_artifacts(
97
99
  text: str,
98
- error_rate: float = 0.02,
100
+ rate: float | None = None,
99
101
  seed: int | None = None,
100
102
  rng: random.Random | None = None,
103
+ *,
104
+ error_rate: float | None = None,
101
105
  ) -> str:
102
106
  """Introduce OCR-like artifacts into text.
103
107
 
@@ -107,13 +111,22 @@ def ocr_artifacts(
107
111
  if not text:
108
112
  return text
109
113
 
114
+ effective_rate = resolve_rate(
115
+ rate=rate,
116
+ legacy_value=error_rate,
117
+ default=0.02,
118
+ legacy_name="error_rate",
119
+ )
120
+
110
121
  if rng is None:
111
122
  rng = random.Random(seed)
112
123
 
124
+ clamped_rate = max(0.0, effective_rate)
125
+
113
126
  if _ocr_artifacts_rust is not None:
114
- return _ocr_artifacts_rust(text, error_rate, rng)
127
+ return _ocr_artifacts_rust(text, clamped_rate, rng)
115
128
 
116
- return _python_ocr_artifacts(text, error_rate=error_rate, rng=rng)
129
+ return _python_ocr_artifacts(text, rate=clamped_rate, rng=rng)
117
130
 
118
131
 
119
132
  class Scannequin(Glitchling):
@@ -122,18 +135,35 @@ class Scannequin(Glitchling):
122
135
  def __init__(
123
136
  self,
124
137
  *,
125
- error_rate: float = 0.02,
138
+ rate: float | None = None,
139
+ error_rate: float | None = None,
126
140
  seed: int | None = None,
127
141
  ) -> None:
142
+ self._param_aliases = {"error_rate": "rate"}
143
+ effective_rate = resolve_rate(
144
+ rate=rate,
145
+ legacy_value=error_rate,
146
+ default=0.02,
147
+ legacy_name="error_rate",
148
+ )
128
149
  super().__init__(
129
150
  name="Scannequin",
130
151
  corruption_function=ocr_artifacts,
131
152
  scope=AttackWave.CHARACTER,
132
153
  order=AttackOrder.LATE,
133
154
  seed=seed,
134
- error_rate=error_rate,
155
+ rate=effective_rate,
135
156
  )
136
157
 
158
+ def pipeline_operation(self) -> dict[str, Any] | None:
159
+ rate = self.kwargs.get("rate")
160
+ if rate is None:
161
+ rate = self.kwargs.get("error_rate")
162
+ if rate is None:
163
+ return None
164
+ return {"type": "ocr", "error_rate": float(rate)}
165
+
166
+
137
167
 
138
168
  scannequin = Scannequin()
139
169
 
@@ -5,6 +5,7 @@ import random
5
5
  from typing import Optional
6
6
 
7
7
  from .core import Glitchling, AttackWave, AttackOrder
8
+ from ._rate import resolve_rate
8
9
  from ..util import KEYNEIGHBORS
9
10
 
10
11
  try:
@@ -88,11 +89,13 @@ def _python_draw_eligible_index(
88
89
  def _fatfinger_python(
89
90
  text: str,
90
91
  *,
91
- max_change_rate: float,
92
+ rate: float,
92
93
  layout: dict[str, list[str]],
93
94
  rng: random.Random,
94
95
  ) -> str:
95
- rate = max(0.0, max_change_rate)
96
+ if rate <= 0.0:
97
+ return text
98
+
96
99
  s = text
97
100
  max_changes = math.ceil(len(s) * rate)
98
101
  if max_changes == 0:
@@ -140,28 +143,37 @@ def _fatfinger_python(
140
143
 
141
144
  def fatfinger(
142
145
  text: str,
143
- max_change_rate: float = 0.02,
146
+ rate: float | None = None,
144
147
  keyboard: str = "CURATOR_QWERTY",
145
148
  seed: int | None = None,
146
149
  rng: random.Random | None = None,
150
+ *,
151
+ max_change_rate: float | None = None,
147
152
  ) -> str:
148
153
  """Introduce character-level "fat finger" edits with a Rust fast path."""
149
154
 
155
+ effective_rate = resolve_rate(
156
+ rate=rate,
157
+ legacy_value=max_change_rate,
158
+ default=0.02,
159
+ legacy_name="max_change_rate",
160
+ )
161
+
150
162
  if rng is None:
151
163
  rng = random.Random(seed)
152
164
  if not text:
153
165
  return ""
154
166
 
155
- rate = max(0.0, max_change_rate)
156
- if rate == 0.0:
167
+ clamped_rate = max(0.0, effective_rate)
168
+ if clamped_rate == 0.0:
157
169
  return text
158
170
 
159
171
  layout = getattr(KEYNEIGHBORS, keyboard)
160
172
 
161
173
  if _fatfinger_rust is not None:
162
- return _fatfinger_rust(text, max_change_rate=rate, layout=layout, rng=rng)
174
+ return _fatfinger_rust(text, max_change_rate=clamped_rate, layout=layout, rng=rng)
163
175
 
164
- return _fatfinger_python(text, max_change_rate=rate, layout=layout, rng=rng)
176
+ return _fatfinger_python(text, rate=clamped_rate, layout=layout, rng=rng)
165
177
 
166
178
 
167
179
  class Typogre(Glitchling):
@@ -170,17 +182,25 @@ class Typogre(Glitchling):
170
182
  def __init__(
171
183
  self,
172
184
  *,
173
- max_change_rate: float = 0.02,
185
+ rate: float | None = None,
186
+ max_change_rate: float | None = None,
174
187
  keyboard: str = "CURATOR_QWERTY",
175
188
  seed: int | None = None,
176
189
  ) -> None:
190
+ self._param_aliases = {"max_change_rate": "rate"}
191
+ effective_rate = resolve_rate(
192
+ rate=rate,
193
+ legacy_value=max_change_rate,
194
+ default=0.02,
195
+ legacy_name="max_change_rate",
196
+ )
177
197
  super().__init__(
178
198
  name="Typogre",
179
199
  corruption_function=fatfinger,
180
200
  scope=AttackWave.CHARACTER,
181
201
  order=AttackOrder.EARLY,
182
202
  seed=seed,
183
- max_change_rate=max_change_rate,
203
+ rate=effective_rate,
184
204
  keyboard=keyboard,
185
205
  )
186
206
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: glitchlings
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Monsters for your language games.
5
5
  Author: osoleve
6
6
  License: Apache License
@@ -209,25 +209,21 @@ Project-URL: Homepage, https://github.com/osoleve/glitchlings
209
209
  Project-URL: Repository, https://github.com/osoleve/glitchlings.git
210
210
  Project-URL: Issues, https://github.com/osoleve/glitchlings/issues
211
211
  Project-URL: Changelog, https://github.com/osoleve/glitchlings/releases
212
- Keywords: nlp,text,adversarial augmentation,text augmentation
212
+ Keywords: nlp,text,adversarial augmentation,text augmentation,large language models,llms,data augmentation,confusables,typo,
213
213
  Classifier: Development Status :: 3 - Alpha
214
214
  Classifier: Intended Audience :: Developers
215
- Classifier: License :: OSI Approved :: Apache Software License
216
215
  Classifier: Programming Language :: Python
217
216
  Classifier: Programming Language :: Python :: 3
217
+ Classifier: Programming Language :: Python :: 3.10
218
+ Classifier: Programming Language :: Python :: 3.11
218
219
  Classifier: Programming Language :: Python :: 3.12
219
220
  Classifier: Programming Language :: Rust
220
- Classifier: Operating System :: MacOS :: MacOS X
221
- Classifier: Operating System :: Microsoft :: Windows
222
- Classifier: Operating System :: POSIX :: Linux
223
- Classifier: Operating System :: OS Independent
224
221
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
225
222
  Classifier: Topic :: Software Development :: Testing
226
- Requires-Python: >=3.12
223
+ Requires-Python: >=3.10
227
224
  Description-Content-Type: text/markdown
228
225
  License-File: LICENSE
229
226
  Requires-Dist: confusable-homoglyphs>=3.3.1
230
- Requires-Dist: jellyfish>=1.2.0
231
227
  Provides-Extra: hf
232
228
  Requires-Dist: datasets>=4.0.0; extra == "hf"
233
229
  Provides-Extra: wordnet
@@ -235,6 +231,7 @@ Requires-Dist: nltk>=3.9.1; extra == "wordnet"
235
231
  Requires-Dist: numpy<=2.0,>=1.24; extra == "wordnet"
236
232
  Provides-Extra: prime
237
233
  Requires-Dist: verifiers>=0.1.3.post0; extra == "prime"
234
+ Requires-Dist: jellyfish>=1.2.0; extra == "prime"
238
235
  Provides-Extra: dev
239
236
  Requires-Dist: pytest>=8.0.0; extra == "dev"
240
237
  Requires-Dist: hypothesis>=6.140.0; extra == "dev"
@@ -280,14 +277,16 @@ After all, what good is general intelligence if it can't handle a little chaos?
280
277
  pip install -U glitchlings
281
278
  ```
282
279
 
280
+ > Glitchlings requires Python 3.10 or newer.
281
+
283
282
  ```python
284
283
  from glitchlings import Gaggle, SAMPLE_TEXT, Typogre, Mim1c, Reduple, Rushmore
285
284
 
286
285
  gaggle = Gaggle([
287
- Typogre(max_change_rate=0.03),
288
- Mim1c(replacement_rate=0.02),
286
+ Typogre(rate=0.03),
287
+ Mim1c(rate=0.02),
289
288
  Reduple(seed=404),
290
- Rushmore(max_deletion_rate=0.02),
289
+ Rushmore(rate=0.02),
291
290
  ])
292
291
 
293
292
  print(gaggle(SAMPLE_TEXT))
@@ -295,41 +294,10 @@ print(gaggle(SAMPLE_TEXT))
295
294
 
296
295
  > Onҽ m‎ھ‎rning, wһen Gregor Samƽa woke from trouble𝐝 𝑑reams, he found himself transformed in his bed into a horrible vermin‎٠‎ He l lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightlh domed and divided by arches ino stiff sections. The bedding was adly able to cover it and and seemed ready to slide off any moment. His many legxs, pitifully thin compared with the size of the the rest of him, waved about helplessly ashe looked looked.
297
296
 
298
- ## Usage
299
-
300
- Need detailed usage patterns, dataset workflows, or tips for enabling the
301
- Rust accelerator? Consult the [Glitchlings Usage Guide](docs/index.md)
302
- for end-to-end instructions spanning the Python API, CLI, Hugging Face
297
+ Consult the [Glitchlings Usage Guide](docs/index.md)
298
+ for end-to-end instructions spanning the Python API, CLI, HuggingFace and Prime Intellect
303
299
  integrations, and the feature-flagged Rust pipeline.
304
300
 
305
- ### Prime Intellect environments
306
-
307
- After `pip install -e .[prime]`, the `glitchlings.dlc.prime.load_environment` helper mirrors `verifiers.load_environment` for Prime Intellect scenarios while optionally applying glitchlings before returning the environment:
308
-
309
- ```python
310
- from glitchlings import Mim1c, Typogre
311
- from glitchlings.dlc.prime import echo_chamber, load_environment
312
-
313
- env = load_environment(
314
- "osoleve/syllabify-en",
315
- glitchlings=[Mim1c(replacement_rate=0.01), Typogre(max_change_rate=0.02)],
316
- seed=404,
317
- )
318
-
319
- # Spin up an echo chamber that corrupts a dataset column and
320
- # rewards models for perfectly restoring it
321
- practice_env = echo_chamber(
322
- "osoleve/clean-room",
323
- column="text",
324
- glitchlings=["Typogre", "Mim1c"],
325
- reward_function=lambda prompt, completion, answer: float(completion == answer),
326
- )
327
- ```
328
-
329
- Skip the `glitchlings` argument to receive an untouched verifier dataset, and
330
- override `reward_function` when you want to evaluate completions with a custom
331
- scoring routine.
332
-
333
301
  ## Motivation
334
302
 
335
303
  If your model performs well on a particular task, but not when `Glitchling`s are present, it's a sign that it hasn't actually generalized to the problem.
@@ -344,8 +312,8 @@ Glitchlings are standard Python classes, so you can instantiate them with whatev
344
312
  ```python
345
313
  from glitchlings import Gaggle, Typogre, Mim1c
346
314
 
347
- custom_typogre = Typogre(max_change_rate=0.1)
348
- selective_mimic = Mim1c(replacement_rate=0.05, classes=["LATIN", "GREEK"])
315
+ custom_typogre = Typogre(rate=0.1)
316
+ selective_mimic = Mim1c(rate=0.05, classes=["LATIN", "GREEK"])
349
317
 
350
318
  gaggle = Gaggle([custom_typogre, selective_mimic], seed=99)
351
319
  print(gaggle("Summoned heroes do not fear the glitch."))
@@ -376,7 +344,7 @@ glitchlings --list
376
344
  glitchlings -g typogre --file documents/report.txt --diff
377
345
 
378
346
  # Configure glitchlings inline by passing keyword arguments.
379
- glitchlings -g "Typogre(max_change_rate=0.05)" "Ghouls just wanna have fun"
347
+ glitchlings -g "Typogre(rate=0.05)" "Ghouls just wanna have fun"
380
348
 
381
349
  # Pipe text straight into the CLI for an on-the-fly corruption.
382
350
  echo "Beware LLM-written flavor-text" | glitchlings -g mim1c
@@ -400,7 +368,7 @@ _What a nice word, would be a shame if something happened to it._
400
368
  >
401
369
  > Args
402
370
  >
403
- > - `max_change_rate (float)`: The maximum number of edits to make as a percentage of the length (default: 0.02, 2%).
371
+ > - `rate (float)`: The maximum number of edits to make as a percentage of the length (default: 0.02, 2%).
404
372
  > - `keyboard (str)`: Keyboard layout key-neighbor map to use (default: "CURATOR_QWERTY"; also accepts "QWERTY", "DVORAK", "COLEMAK", and "AZERTY").
405
373
  > - `seed (int)`: The random seed for reproducibility (default: 151).
406
374
 
@@ -412,7 +380,7 @@ _Wait, was that...?_
412
380
  >
413
381
  > Args
414
382
  >
415
- > - `replacement_rate (float)`: The maximum proportion of characters to replace (default: 0.02, 2%).
383
+ > - `rate (float)`: The maximum proportion of characters to replace (default: 0.02, 2%).
416
384
  > - `classes (list[str] | "all")`: Restrict replacements to these Unicode script classes (default: ["LATIN", "GREEK", "CYRILLIC"]).
417
385
  > - `banned_characters (Collection[str])`: Characters that must never appear as replacements (default: none).
418
386
  > - `seed (int)`: The random seed for reproducibility (default: 151).
@@ -425,7 +393,7 @@ _How can a computer need reading glasses?_
425
393
  >
426
394
  > Args
427
395
  >
428
- > - `error_rate (float)`: The maximum proportion of eligible confusion spans to replace (default: 0.02, 2%).
396
+ > - `rate (float)`: The maximum proportion of eligible confusion spans to replace (default: 0.02, 2%).
429
397
  > - `seed (int)`: The random seed for reproducibility (default: 151).
430
398
 
431
399
  ### Jargoyle
@@ -436,7 +404,7 @@ _Uh oh. The worst person you know just bought a thesaurus._
436
404
  >
437
405
  > Args
438
406
  >
439
- > - `replacement_rate (float)`: The maximum proportion of words to replace (default: 0.1, 10%).
407
+ > - `rate (float)`: The maximum proportion of words to replace (default: 0.1, 10%).
440
408
  > - `part_of_speech`: The WordNet part(s) of speech to target (default: nouns). Accepts `wn.NOUN`, `wn.VERB`, `wn.ADJ`, `wn.ADV`, any iterable of those tags, or the string `"any"` to include them all.
441
409
  > - `seed (int)`: The random seed for reproducibility (default: 151).
442
410
 
@@ -448,7 +416,7 @@ _Did you say that or did I?_
448
416
  >
449
417
  > Args
450
418
  >
451
- > - `reduplication_rate (float)`: The maximum proportion of words to reduplicate (default: 0.05, 5%).
419
+ > - `rate (float)`: The maximum proportion of words to reduplicate (default: 0.05, 5%).
452
420
  > - `seed (int)`: The random seed for reproducibility (default: 151).
453
421
 
454
422
  ### Rushmore
@@ -459,7 +427,7 @@ _I accidentally an entire word._
459
427
  >
460
428
  > Args
461
429
  >
462
- > - `max_deletion_rate (float)`: The maximum proportion of words to delete (default: 0.01, 1%).
430
+ > - `rate (float)`: The maximum proportion of words to delete (default: 0.01, 1%).
463
431
  > - `seed (int)`: The random seed for reproducibility (default: 151).
464
432
 
465
433
  ### Redactyl
@@ -471,7 +439,7 @@ _Oops, that was my black highlighter._
471
439
  > ### Args
472
440
  >
473
441
  > - `replacement_char (str)`: The character to use for redaction (default: █).
474
- > - `redaction_rate (float)`: The maximum proportion of words to redact (default: 0.05, 5%).
442
+ > - `rate (float)`: The maximum proportion of words to redact (default: 0.05, 5%).
475
443
  > - `merge_adjacent (bool)`: Whether to redact the space between adjacent redacted words (default: False).
476
444
  > - `seed (int)`: The random seed for reproducibility (default: 151).
477
445
 
@@ -0,0 +1,26 @@
1
+ glitchlings/__init__.py,sha256=w8heFqUejrXM_9NNlM9CQnIGkmGUyBV29acg3WsocXA,622
2
+ glitchlings/__main__.py,sha256=pqNe1C9hMf8pap4oh6x6yo2h4Nsa2RFSaMWHfGtNXj0,130
3
+ glitchlings/_zoo_rust.cp312-win_amd64.pyd,sha256=qHk8hPmRrzJTwOyhcBNr-2qhXBaEBUy__7_SMFhzWSc,1989632
4
+ glitchlings/main.py,sha256=QrSSLWcKh1_NDfJDGh-3UVKdI7AkzfMy6Jz1ouxIgnE,6149
5
+ glitchlings/dlc/__init__.py,sha256=IHD-GGhVFb7SVzErvf2YCJkOR4wGo0nFHXkn_daMvS8,146
6
+ glitchlings/dlc/huggingface.py,sha256=PIesnDIEvyJxj1IuLw2P9nVPTr4Nv81XM7w2axfyhkA,3029
7
+ glitchlings/dlc/prime.py,sha256=hySyYBncUM-49j6JtrHYO6c3HpbG2vTt2EYZnOJ85C0,8972
8
+ glitchlings/util/__init__.py,sha256=GoyQuHTfGRkHzuZwJji6QWSiGd_LHa9QiyjjEpBFW7E,4679
9
+ glitchlings/zoo/__init__.py,sha256=kYKKlNvEwKtrD26E1hfde33rkN83CMf_h5AQFGjQyBQ,4312
10
+ glitchlings/zoo/_ocr_confusions.py,sha256=W59Aa5MBDwRF65f8GV-6XwGAmlR5Uk7pa5qvHvhIYdY,1252
11
+ glitchlings/zoo/_rate.py,sha256=EYUWXYyR2IK0zYBWyBOlnUjDxU32JE9mZTZeodVx5CA,548
12
+ glitchlings/zoo/core.py,sha256=QKHmzmONNkiA3RdfgLdNx-FPFwoH4Bm-Tkc3vSCHNpc,14412
13
+ glitchlings/zoo/jargoyle.py,sha256=1fnL_8bv1Y-T2h1C6NRzIylYyOuAUI-BiMReFewqh00,11002
14
+ glitchlings/zoo/mim1c.py,sha256=3ddNOzWgLABuEOh5T98Xk439ejx-YHGI7ErXET03Crc,3537
15
+ glitchlings/zoo/ocr_confusions.tsv,sha256=S-IJEYCIXYKT1Uu7Id8Lnvg5pw528yNigTtWUdnMv9k,213
16
+ glitchlings/zoo/redactyl.py,sha256=dM3W59xLhuiS8t5jXETc_L8EEhRN1CpLazBnVPiSknk,4834
17
+ glitchlings/zoo/reduple.py,sha256=9jid6tCvCaiSxWSPMNuHWZitd7et60RRFYeek3S0ElU,3641
18
+ glitchlings/zoo/rushmore.py,sha256=pJy3g_H1z8PNoHitvD3-HsytAuE0U6FOdsdaKZy6OqY,3680
19
+ glitchlings/zoo/scannequin.py,sha256=TJyNYTTIB7rxZH3XKIETy0YVf4EjsMgGWYmYaxH9jxU,5030
20
+ glitchlings/zoo/typogre.py,sha256=olTTXDmFkVQ3r-T1vxm2mLomRvIDXHrNHfgin316wzE,6221
21
+ glitchlings-0.2.4.dist-info/licenses/LICENSE,sha256=EFEP1evBfHaxsMTBjxm0sZVRp2wct8QLvHE1saII5FI,11538
22
+ glitchlings-0.2.4.dist-info/METADATA,sha256=mGKlfmodtLjWsfrz6O0cLk4DDPFeUO5vt6LKgw-uu-M,26513
23
+ glitchlings-0.2.4.dist-info/WHEEL,sha256=8UP9x9puWI0P1V_d7K2oMTBqfeLNm21CTzZ_Ptr0NXU,101
24
+ glitchlings-0.2.4.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
25
+ glitchlings-0.2.4.dist-info/top_level.txt,sha256=VHFNBrLjtDwPCYXbGKi6o17Eueedi81eNbR3hBOoST0,12
26
+ glitchlings-0.2.4.dist-info/RECORD,,
@@ -1,25 +0,0 @@
1
- glitchlings/__init__.py,sha256=w8heFqUejrXM_9NNlM9CQnIGkmGUyBV29acg3WsocXA,622
2
- glitchlings/__main__.py,sha256=pqNe1C9hMf8pap4oh6x6yo2h4Nsa2RFSaMWHfGtNXj0,130
3
- glitchlings/_zoo_rust.cp312-win_amd64.pyd,sha256=Eh4tD2b4ym3zX0KWxVWCFRpmPsZFnyeOiFWr_qQGg5A,1989632
4
- glitchlings/main.py,sha256=krujz3GBrdP6FU3O6Z9f3rvc444rT79Hm69zAPG3b-U,6160
5
- glitchlings/dlc/__init__.py,sha256=IHD-GGhVFb7SVzErvf2YCJkOR4wGo0nFHXkn_daMvS8,146
6
- glitchlings/dlc/huggingface.py,sha256=PIesnDIEvyJxj1IuLw2P9nVPTr4Nv81XM7w2axfyhkA,3029
7
- glitchlings/dlc/prime.py,sha256=oKVAVWSD-aa-LqDsctSLXzq0JW2RaIc1l2859ogr4lY,8107
8
- glitchlings/util/__init__.py,sha256=GoyQuHTfGRkHzuZwJji6QWSiGd_LHa9QiyjjEpBFW7E,4679
9
- glitchlings/zoo/__init__.py,sha256=kYKKlNvEwKtrD26E1hfde33rkN83CMf_h5AQFGjQyBQ,4312
10
- glitchlings/zoo/_ocr_confusions.py,sha256=W59Aa5MBDwRF65f8GV-6XwGAmlR5Uk7pa5qvHvhIYdY,1252
11
- glitchlings/zoo/core.py,sha256=aGGc0M97QeKM5rsQjTZs3fhIVac0g8A72mW4u72YnD0,14373
12
- glitchlings/zoo/jargoyle.py,sha256=TBzt9CFL5GBP_DjqKqUY54DFsX2VAU4LnBNMDIg7P-Y,10444
13
- glitchlings/zoo/mim1c.py,sha256=YHFELu3fpY_9VxRavYfCoAWZYp-HZBXdiLk4DTKdqcY,2979
14
- glitchlings/zoo/ocr_confusions.tsv,sha256=S-IJEYCIXYKT1Uu7Id8Lnvg5pw528yNigTtWUdnMv9k,213
15
- glitchlings/zoo/redactyl.py,sha256=VV2mPE2WQ41Sl874TjaHu9ShhYlFNLI7embQqKM5_ZE,3738
16
- glitchlings/zoo/reduple.py,sha256=WuMpmuZrf5x7JneiRjDF2Y0beEAn7j1DPCV2BuuTuRY,2873
17
- glitchlings/zoo/rushmore.py,sha256=dAiv53B_6Zg-zNG5aW8YobJevyBV586HtJVlZqgcGR8,2790
18
- glitchlings/zoo/scannequin.py,sha256=BLJ8VFNTrXxv6mKjTMPUHOqziXO-NLpKNQNPbxG7jLI,4178
19
- glitchlings/zoo/typogre.py,sha256=CISk0aqI8y5SdZXibqhfP0cu5MZ7TkiOQ7kftqW9RtI,5680
20
- glitchlings-0.2.2.dist-info/licenses/LICENSE,sha256=EFEP1evBfHaxsMTBjxm0sZVRp2wct8QLvHE1saII5FI,11538
21
- glitchlings-0.2.2.dist-info/METADATA,sha256=mRSQQoNoQAPmmVzfUn6ZZLHL1I6n5wxr45o3DyWsSMw,27811
22
- glitchlings-0.2.2.dist-info/WHEEL,sha256=8UP9x9puWI0P1V_d7K2oMTBqfeLNm21CTzZ_Ptr0NXU,101
23
- glitchlings-0.2.2.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
24
- glitchlings-0.2.2.dist-info/top_level.txt,sha256=VHFNBrLjtDwPCYXbGKi6o17Eueedi81eNbR3hBOoST0,12
25
- glitchlings-0.2.2.dist-info/RECORD,,