glitchlings 1.0.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. glitchlings/__init__.py +101 -0
  2. glitchlings/__main__.py +8 -0
  3. glitchlings/_corruption_engine/__init__.py +12 -0
  4. glitchlings/_corruption_engine.cp313-win_amd64.pyd +0 -0
  5. glitchlings/assets/__init__.py +180 -0
  6. glitchlings/assets/apostrofae_pairs.json +32 -0
  7. glitchlings/assets/ekkokin_homophones.json +2014 -0
  8. glitchlings/assets/hokey_assets.json +193 -0
  9. glitchlings/assets/lexemes/academic.json +1049 -0
  10. glitchlings/assets/lexemes/colors.json +1333 -0
  11. glitchlings/assets/lexemes/corporate.json +716 -0
  12. glitchlings/assets/lexemes/cyberpunk.json +22 -0
  13. glitchlings/assets/lexemes/lovecraftian.json +23 -0
  14. glitchlings/assets/lexemes/synonyms.json +3354 -0
  15. glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
  16. glitchlings/assets/ocr_confusions.tsv +30 -0
  17. glitchlings/assets/pipeline_assets.json +29 -0
  18. glitchlings/attack/__init__.py +184 -0
  19. glitchlings/attack/analysis.py +1321 -0
  20. glitchlings/attack/core.py +819 -0
  21. glitchlings/attack/core_execution.py +378 -0
  22. glitchlings/attack/core_planning.py +612 -0
  23. glitchlings/attack/encode.py +114 -0
  24. glitchlings/attack/metrics.py +211 -0
  25. glitchlings/attack/metrics_dispatch.py +70 -0
  26. glitchlings/attack/tokenization.py +338 -0
  27. glitchlings/attack/tokenizer_metrics.py +373 -0
  28. glitchlings/auggie.py +285 -0
  29. glitchlings/compat/__init__.py +9 -0
  30. glitchlings/compat/loaders.py +355 -0
  31. glitchlings/compat/types.py +41 -0
  32. glitchlings/conf/__init__.py +39 -0
  33. glitchlings/conf/loaders.py +331 -0
  34. glitchlings/conf/schema.py +156 -0
  35. glitchlings/conf/types.py +72 -0
  36. glitchlings/config.toml +2 -0
  37. glitchlings/constants.py +139 -0
  38. glitchlings/dev/__init__.py +3 -0
  39. glitchlings/dev/docs.py +45 -0
  40. glitchlings/dlc/__init__.py +21 -0
  41. glitchlings/dlc/_shared.py +300 -0
  42. glitchlings/dlc/gutenberg.py +400 -0
  43. glitchlings/dlc/huggingface.py +68 -0
  44. glitchlings/dlc/langchain.py +147 -0
  45. glitchlings/dlc/nemo.py +283 -0
  46. glitchlings/dlc/prime.py +215 -0
  47. glitchlings/dlc/pytorch.py +98 -0
  48. glitchlings/dlc/pytorch_lightning.py +173 -0
  49. glitchlings/internal/__init__.py +16 -0
  50. glitchlings/internal/rust.py +159 -0
  51. glitchlings/internal/rust_ffi.py +599 -0
  52. glitchlings/main.py +426 -0
  53. glitchlings/protocols.py +91 -0
  54. glitchlings/runtime_config.py +24 -0
  55. glitchlings/util/__init__.py +41 -0
  56. glitchlings/util/adapters.py +65 -0
  57. glitchlings/util/keyboards.py +508 -0
  58. glitchlings/util/transcripts.py +108 -0
  59. glitchlings/zoo/__init__.py +161 -0
  60. glitchlings/zoo/assets/__init__.py +29 -0
  61. glitchlings/zoo/core.py +852 -0
  62. glitchlings/zoo/core_execution.py +154 -0
  63. glitchlings/zoo/core_planning.py +451 -0
  64. glitchlings/zoo/corrupt_dispatch.py +291 -0
  65. glitchlings/zoo/hokey.py +139 -0
  66. glitchlings/zoo/jargoyle.py +301 -0
  67. glitchlings/zoo/mim1c.py +269 -0
  68. glitchlings/zoo/pedant/__init__.py +109 -0
  69. glitchlings/zoo/pedant/core.py +99 -0
  70. glitchlings/zoo/pedant/forms.py +50 -0
  71. glitchlings/zoo/pedant/stones.py +83 -0
  72. glitchlings/zoo/redactyl.py +94 -0
  73. glitchlings/zoo/rng.py +280 -0
  74. glitchlings/zoo/rushmore.py +416 -0
  75. glitchlings/zoo/scannequin.py +370 -0
  76. glitchlings/zoo/transforms.py +331 -0
  77. glitchlings/zoo/typogre.py +194 -0
  78. glitchlings/zoo/validation.py +643 -0
  79. glitchlings/zoo/wherewolf.py +120 -0
  80. glitchlings/zoo/zeedub.py +165 -0
  81. glitchlings-1.0.0.dist-info/METADATA +404 -0
  82. glitchlings-1.0.0.dist-info/RECORD +86 -0
  83. glitchlings-1.0.0.dist-info/WHEEL +5 -0
  84. glitchlings-1.0.0.dist-info/entry_points.txt +3 -0
  85. glitchlings-1.0.0.dist-info/licenses/LICENSE +201 -0
  86. glitchlings-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,416 @@
1
+ from __future__ import annotations
2
+
3
+ import random
4
+ import re
5
+ from collections.abc import Iterable, Sequence
6
+ from dataclasses import dataclass
7
+ from enum import Enum, unique
8
+ from typing import Any
9
+
10
+ from glitchlings.constants import RUSHMORE_DEFAULT_RATES
11
+ from glitchlings.internal.rust_ffi import (
12
+ delete_random_words_rust,
13
+ reduplicate_words_rust,
14
+ resolve_seed,
15
+ swap_adjacent_words_rust,
16
+ )
17
+
18
+ from .core import AttackWave, Glitchling
19
+ from .transforms import WordToken
20
+
21
+
22
+ @unique
23
+ class RushmoreMode(Enum):
24
+ """Enumerates Rushmore's selectable attack behaviours."""
25
+
26
+ DELETE = "delete"
27
+ DUPLICATE = "duplicate"
28
+ SWAP = "swap"
29
+
30
+ @classmethod
31
+ def execution_order(cls) -> tuple["RushmoreMode", ...]:
32
+ """Return the deterministic application order for Rushmore modes."""
33
+ return (cls.DELETE, cls.DUPLICATE, cls.SWAP)
34
+
35
+
36
+ _MODE_ALIASES: dict[str, RushmoreMode] = {
37
+ "delete": RushmoreMode.DELETE,
38
+ "drop": RushmoreMode.DELETE,
39
+ "rushmore": RushmoreMode.DELETE,
40
+ "duplicate": RushmoreMode.DUPLICATE,
41
+ "reduplicate": RushmoreMode.DUPLICATE,
42
+ "repeat": RushmoreMode.DUPLICATE,
43
+ "swap": RushmoreMode.SWAP,
44
+ "adjacent": RushmoreMode.SWAP,
45
+ }
46
+
47
+
48
+ @dataclass(frozen=True)
49
+ class RushmoreRuntimeConfig:
50
+ """Resolved Rushmore configuration used by both Python and Rust paths."""
51
+
52
+ modes: tuple[RushmoreMode, ...]
53
+ rates: dict[RushmoreMode, float]
54
+ delete_unweighted: bool
55
+ duplicate_unweighted: bool
56
+
57
+ def has_mode(self, mode: RushmoreMode) -> bool:
58
+ return mode in self.rates
59
+
60
+ def to_pipeline_descriptor(self) -> dict[str, Any]:
61
+ if not self.modes:
62
+ raise RuntimeError("Rushmore configuration is missing attack modes")
63
+
64
+ if len(self.modes) == 1:
65
+ mode = self.modes[0]
66
+ rate = self.rates.get(mode)
67
+ if rate is None:
68
+ message = f"Rushmore mode {mode!r} is missing a configured rate"
69
+ raise RuntimeError(message)
70
+ if mode is RushmoreMode.DELETE:
71
+ return {
72
+ "type": "delete",
73
+ "rate": rate,
74
+ "unweighted": self.delete_unweighted,
75
+ }
76
+ if mode is RushmoreMode.DUPLICATE:
77
+ return {
78
+ "type": "reduplicate",
79
+ "rate": rate,
80
+ "unweighted": self.duplicate_unweighted,
81
+ }
82
+ if mode is RushmoreMode.SWAP:
83
+ return {
84
+ "type": "swap_adjacent",
85
+ "rate": rate,
86
+ }
87
+ message = f"Rushmore mode {mode!r} is not serialisable"
88
+ raise RuntimeError(message)
89
+
90
+ descriptor: dict[str, Any] = {
91
+ "type": "rushmore_combo",
92
+ "modes": [mode.value for mode in self.modes],
93
+ }
94
+ if self.has_mode(RushmoreMode.DELETE):
95
+ descriptor["delete"] = {
96
+ "rate": self.rates[RushmoreMode.DELETE],
97
+ "unweighted": self.delete_unweighted,
98
+ }
99
+ if self.has_mode(RushmoreMode.DUPLICATE):
100
+ descriptor["duplicate"] = {
101
+ "rate": self.rates[RushmoreMode.DUPLICATE],
102
+ "unweighted": self.duplicate_unweighted,
103
+ }
104
+ if self.has_mode(RushmoreMode.SWAP):
105
+ descriptor["swap"] = {"rate": self.rates[RushmoreMode.SWAP]}
106
+ return descriptor
107
+
108
+
109
+ @dataclass(frozen=True)
110
+ class _WeightedWordToken:
111
+ """Internal helper that bundles weighting metadata with a token."""
112
+
113
+ token: WordToken
114
+ weight: float
115
+
116
+
117
+ def _normalize_mode_item(value: RushmoreMode | str) -> list[RushmoreMode]:
118
+ if isinstance(value, RushmoreMode):
119
+ return [value]
120
+
121
+ text = str(value).strip().lower()
122
+ if not text:
123
+ return []
124
+
125
+ if text in {"all", "any", "full"}:
126
+ return list(RushmoreMode.execution_order())
127
+
128
+ tokens = [token for token in re.split(r"[+,\s]+", text) if token]
129
+ if not tokens:
130
+ return []
131
+
132
+ modes: list[RushmoreMode] = []
133
+ for token in tokens:
134
+ mode = _MODE_ALIASES.get(token)
135
+ if mode is None:
136
+ raise ValueError(f"Unsupported Rushmore mode '{value}'")
137
+ modes.append(mode)
138
+ return modes
139
+
140
+
141
+ def _normalize_modes(
142
+ modes: RushmoreMode | str | Iterable[RushmoreMode | str] | None,
143
+ ) -> tuple[RushmoreMode, ...]:
144
+ if modes is None:
145
+ candidates: Sequence[RushmoreMode | str] = (RushmoreMode.DELETE,)
146
+ elif isinstance(modes, (RushmoreMode, str)):
147
+ candidates = (modes,)
148
+ else:
149
+ collected = tuple(modes)
150
+ candidates = collected if collected else (RushmoreMode.DELETE,)
151
+
152
+ resolved: list[RushmoreMode] = []
153
+ seen: set[RushmoreMode] = set()
154
+ for candidate in candidates:
155
+ for mode in _normalize_mode_item(candidate):
156
+ if mode not in seen:
157
+ seen.add(mode)
158
+ resolved.append(mode)
159
+
160
+ if not resolved:
161
+ return (RushmoreMode.DELETE,)
162
+ return tuple(resolved)
163
+
164
+
165
+ def _resolve_mode_rate(
166
+ *,
167
+ mode: RushmoreMode,
168
+ global_rate: float | None,
169
+ specific_rate: float | None,
170
+ allow_default: bool,
171
+ ) -> float | None:
172
+ baseline = specific_rate if specific_rate is not None else global_rate
173
+ if baseline is None:
174
+ if not allow_default:
175
+ return None
176
+ baseline = RUSHMORE_DEFAULT_RATES[mode.value]
177
+
178
+ value = float(baseline)
179
+ value = max(0.0, value)
180
+ if mode is RushmoreMode.SWAP:
181
+ value = min(1.0, value)
182
+ return value
183
+
184
+
185
+ def _resolve_rushmore_config(
186
+ *,
187
+ modes: RushmoreMode | str | Iterable[RushmoreMode | str] | None,
188
+ rate: float | None,
189
+ delete_rate: float | None,
190
+ duplicate_rate: float | None,
191
+ swap_rate: float | None,
192
+ unweighted: bool,
193
+ delete_unweighted: bool | None,
194
+ duplicate_unweighted: bool | None,
195
+ allow_defaults: bool,
196
+ ) -> RushmoreRuntimeConfig | None:
197
+ normalized_modes = _normalize_modes(modes)
198
+ global_rate = float(rate) if rate is not None else None
199
+
200
+ mode_specific_rates: dict[RushmoreMode, float | None] = {
201
+ RushmoreMode.DELETE: delete_rate,
202
+ RushmoreMode.DUPLICATE: duplicate_rate,
203
+ RushmoreMode.SWAP: swap_rate,
204
+ }
205
+
206
+ rates: dict[RushmoreMode, float] = {}
207
+ for mode in normalized_modes:
208
+ resolved = _resolve_mode_rate(
209
+ mode=mode,
210
+ global_rate=global_rate,
211
+ specific_rate=mode_specific_rates[mode],
212
+ allow_default=allow_defaults,
213
+ )
214
+ if resolved is None:
215
+ return None
216
+ rates[mode] = resolved
217
+
218
+ delete_flag = bool(delete_unweighted if delete_unweighted is not None else unweighted)
219
+ duplicate_flag = bool(duplicate_unweighted if duplicate_unweighted is not None else unweighted)
220
+
221
+ return RushmoreRuntimeConfig(
222
+ modes=normalized_modes,
223
+ rates=rates,
224
+ delete_unweighted=delete_flag,
225
+ duplicate_unweighted=duplicate_flag,
226
+ )
227
+
228
+
229
+ def delete_random_words(
230
+ text: str,
231
+ rate: float | None = None,
232
+ seed: int | None = None,
233
+ rng: random.Random | None = None,
234
+ unweighted: bool = False,
235
+ ) -> str:
236
+ """Delete random words from the input text."""
237
+ effective_rate = RUSHMORE_DEFAULT_RATES["delete"] if rate is None else rate
238
+
239
+ clamped_rate = max(0.0, effective_rate)
240
+ unweighted_flag = bool(unweighted)
241
+
242
+ seed_value = resolve_seed(seed, rng)
243
+ return delete_random_words_rust(text, clamped_rate, unweighted_flag, seed_value)
244
+
245
+
246
+ def reduplicate_words(
247
+ text: str,
248
+ rate: float | None = None,
249
+ seed: int | None = None,
250
+ rng: random.Random | None = None,
251
+ *,
252
+ unweighted: bool = False,
253
+ ) -> str:
254
+ """Randomly reduplicate words in the text."""
255
+ effective_rate = RUSHMORE_DEFAULT_RATES["duplicate"] if rate is None else rate
256
+
257
+ clamped_rate = max(0.0, effective_rate)
258
+ unweighted_flag = bool(unweighted)
259
+
260
+ seed_value = resolve_seed(seed, rng)
261
+ return reduplicate_words_rust(text, clamped_rate, unweighted_flag, seed_value)
262
+
263
+
264
+ def swap_adjacent_words(
265
+ text: str,
266
+ rate: float | None = None,
267
+ seed: int | None = None,
268
+ rng: random.Random | None = None,
269
+ ) -> str:
270
+ """Swap adjacent word cores while preserving spacing and punctuation."""
271
+ effective_rate = RUSHMORE_DEFAULT_RATES["swap"] if rate is None else rate
272
+ clamped_rate = max(0.0, min(effective_rate, 1.0))
273
+
274
+ seed_value = resolve_seed(seed, rng)
275
+ return swap_adjacent_words_rust(text, clamped_rate, seed_value)
276
+
277
+
278
+ def rushmore_attack(
279
+ text: str,
280
+ *,
281
+ modes: RushmoreMode | str | Iterable[RushmoreMode | str] | None = None,
282
+ rate: float | None = None,
283
+ delete_rate: float | None = None,
284
+ duplicate_rate: float | None = None,
285
+ swap_rate: float | None = None,
286
+ unweighted: bool = False,
287
+ delete_unweighted: bool | None = None,
288
+ duplicate_unweighted: bool | None = None,
289
+ seed: int | None = None,
290
+ rng: random.Random | None = None,
291
+ ) -> str:
292
+ """Apply the configured Rushmore attack modes to ``text``."""
293
+ config = _resolve_rushmore_config(
294
+ modes=modes,
295
+ rate=rate,
296
+ delete_rate=delete_rate,
297
+ duplicate_rate=duplicate_rate,
298
+ swap_rate=swap_rate,
299
+ unweighted=unweighted,
300
+ delete_unweighted=delete_unweighted,
301
+ duplicate_unweighted=duplicate_unweighted,
302
+ allow_defaults=True,
303
+ )
304
+ if config is None:
305
+ return text
306
+
307
+ mode_rng = rng
308
+ if mode_rng is None and seed is not None:
309
+ mode_rng = random.Random(resolve_seed(seed, None))
310
+
311
+ result = text
312
+ for mode in config.modes:
313
+ if not config.has_mode(mode):
314
+ continue
315
+
316
+ rate_value = config.rates[mode]
317
+ if rate_value <= 0.0:
318
+ continue
319
+
320
+ if mode is RushmoreMode.DELETE:
321
+ result = delete_random_words(
322
+ result,
323
+ rate=rate_value,
324
+ rng=mode_rng,
325
+ unweighted=config.delete_unweighted,
326
+ )
327
+ elif mode is RushmoreMode.DUPLICATE:
328
+ result = reduplicate_words(
329
+ result,
330
+ rate=rate_value,
331
+ rng=mode_rng,
332
+ unweighted=config.duplicate_unweighted,
333
+ )
334
+ else:
335
+ result = swap_adjacent_words(
336
+ result,
337
+ rate=rate_value,
338
+ rng=mode_rng,
339
+ )
340
+
341
+ return result
342
+
343
+
344
+ def _rushmore_pipeline_descriptor(glitchling: Glitchling) -> dict[str, Any] | None:
345
+ config = _resolve_rushmore_config(
346
+ modes=glitchling.kwargs.get("modes"),
347
+ rate=glitchling.kwargs.get("rate"),
348
+ delete_rate=glitchling.kwargs.get("delete_rate"),
349
+ duplicate_rate=glitchling.kwargs.get("duplicate_rate"),
350
+ swap_rate=glitchling.kwargs.get("swap_rate"),
351
+ unweighted=glitchling.kwargs.get("unweighted", False),
352
+ delete_unweighted=glitchling.kwargs.get("delete_unweighted"),
353
+ duplicate_unweighted=glitchling.kwargs.get("duplicate_unweighted"),
354
+ allow_defaults=True,
355
+ )
356
+ if config is None:
357
+ return None
358
+ return config.to_pipeline_descriptor()
359
+
360
+
361
+ class Rushmore(Glitchling):
362
+ """Glitchling that bundles deletion, duplication, and swap attacks."""
363
+
364
+ flavor = (
365
+ "You shouldn't have waited for the last minute to write that paper, anon. "
366
+ "Sure hope everything is in the right place."
367
+ )
368
+
369
+ _param_aliases = {"mode": "modes"}
370
+
371
+ def __init__(
372
+ self,
373
+ *,
374
+ name: str = "Rushmore",
375
+ modes: RushmoreMode | str | Iterable[RushmoreMode | str] | None = None,
376
+ rate: float | None = None,
377
+ delete_rate: float | None = None,
378
+ duplicate_rate: float | None = None,
379
+ swap_rate: float | None = None,
380
+ seed: int | None = None,
381
+ unweighted: bool = False,
382
+ delete_unweighted: bool | None = None,
383
+ duplicate_unweighted: bool | None = None,
384
+ **kwargs: Any,
385
+ ) -> None:
386
+ normalized_modes = _normalize_modes(modes)
387
+ super().__init__(
388
+ name=name,
389
+ corruption_function=rushmore_attack,
390
+ scope=AttackWave.WORD,
391
+ seed=seed,
392
+ pipeline_operation=_rushmore_pipeline_descriptor,
393
+ modes=normalized_modes,
394
+ rate=rate,
395
+ delete_rate=delete_rate,
396
+ duplicate_rate=duplicate_rate,
397
+ swap_rate=swap_rate,
398
+ unweighted=unweighted,
399
+ delete_unweighted=delete_unweighted,
400
+ duplicate_unweighted=duplicate_unweighted,
401
+ **kwargs,
402
+ )
403
+
404
+
405
+ rushmore = Rushmore()
406
+
407
+
408
+ __all__ = [
409
+ "Rushmore",
410
+ "rushmore",
411
+ "RushmoreMode",
412
+ "rushmore_attack",
413
+ "delete_random_words",
414
+ "reduplicate_words",
415
+ "swap_adjacent_words",
416
+ ]