glitchlings 0.2.5__cp312-cp312-win_amd64.whl → 0.9.3__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. glitchlings/__init__.py +36 -17
  2. glitchlings/__main__.py +0 -1
  3. glitchlings/_zoo_rust/__init__.py +12 -0
  4. glitchlings/_zoo_rust.cp312-win_amd64.pyd +0 -0
  5. glitchlings/assets/__init__.py +180 -0
  6. glitchlings/assets/apostrofae_pairs.json +32 -0
  7. glitchlings/assets/ekkokin_homophones.json +2014 -0
  8. glitchlings/assets/hokey_assets.json +193 -0
  9. glitchlings/assets/lexemes/academic.json +1049 -0
  10. glitchlings/assets/lexemes/colors.json +1333 -0
  11. glitchlings/assets/lexemes/corporate.json +716 -0
  12. glitchlings/assets/lexemes/cyberpunk.json +22 -0
  13. glitchlings/assets/lexemes/lovecraftian.json +23 -0
  14. glitchlings/assets/lexemes/synonyms.json +3354 -0
  15. glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
  16. glitchlings/assets/pipeline_assets.json +29 -0
  17. glitchlings/attack/__init__.py +53 -0
  18. glitchlings/attack/compose.py +299 -0
  19. glitchlings/attack/core.py +465 -0
  20. glitchlings/attack/encode.py +114 -0
  21. glitchlings/attack/metrics.py +104 -0
  22. glitchlings/attack/metrics_dispatch.py +70 -0
  23. glitchlings/attack/tokenization.py +157 -0
  24. glitchlings/auggie.py +283 -0
  25. glitchlings/compat/__init__.py +9 -0
  26. glitchlings/compat/loaders.py +355 -0
  27. glitchlings/compat/types.py +41 -0
  28. glitchlings/conf/__init__.py +41 -0
  29. glitchlings/conf/loaders.py +331 -0
  30. glitchlings/conf/schema.py +156 -0
  31. glitchlings/conf/types.py +72 -0
  32. glitchlings/config.toml +2 -0
  33. glitchlings/constants.py +59 -0
  34. glitchlings/dev/__init__.py +3 -0
  35. glitchlings/dev/docs.py +45 -0
  36. glitchlings/dlc/__init__.py +17 -3
  37. glitchlings/dlc/_shared.py +296 -0
  38. glitchlings/dlc/gutenberg.py +400 -0
  39. glitchlings/dlc/huggingface.py +37 -65
  40. glitchlings/dlc/prime.py +55 -114
  41. glitchlings/dlc/pytorch.py +98 -0
  42. glitchlings/dlc/pytorch_lightning.py +173 -0
  43. glitchlings/internal/__init__.py +16 -0
  44. glitchlings/internal/rust.py +159 -0
  45. glitchlings/internal/rust_ffi.py +432 -0
  46. glitchlings/main.py +123 -32
  47. glitchlings/runtime_config.py +24 -0
  48. glitchlings/util/__init__.py +29 -176
  49. glitchlings/util/adapters.py +65 -0
  50. glitchlings/util/keyboards.py +311 -0
  51. glitchlings/util/transcripts.py +108 -0
  52. glitchlings/zoo/__init__.py +47 -24
  53. glitchlings/zoo/assets/__init__.py +29 -0
  54. glitchlings/zoo/core.py +301 -167
  55. glitchlings/zoo/core_execution.py +98 -0
  56. glitchlings/zoo/core_planning.py +451 -0
  57. glitchlings/zoo/corrupt_dispatch.py +295 -0
  58. glitchlings/zoo/ekkokin.py +118 -0
  59. glitchlings/zoo/hokey.py +137 -0
  60. glitchlings/zoo/jargoyle.py +179 -274
  61. glitchlings/zoo/mim1c.py +106 -68
  62. glitchlings/zoo/pedant/__init__.py +107 -0
  63. glitchlings/zoo/pedant/core.py +105 -0
  64. glitchlings/zoo/pedant/forms.py +74 -0
  65. glitchlings/zoo/pedant/stones.py +74 -0
  66. glitchlings/zoo/redactyl.py +44 -175
  67. glitchlings/zoo/rng.py +259 -0
  68. glitchlings/zoo/rushmore.py +359 -116
  69. glitchlings/zoo/scannequin.py +18 -125
  70. glitchlings/zoo/transforms.py +386 -0
  71. glitchlings/zoo/typogre.py +76 -162
  72. glitchlings/zoo/validation.py +477 -0
  73. glitchlings/zoo/zeedub.py +33 -86
  74. glitchlings-0.9.3.dist-info/METADATA +334 -0
  75. glitchlings-0.9.3.dist-info/RECORD +80 -0
  76. {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/entry_points.txt +1 -0
  77. glitchlings/zoo/_ocr_confusions.py +0 -34
  78. glitchlings/zoo/_rate.py +0 -21
  79. glitchlings/zoo/reduple.py +0 -169
  80. glitchlings-0.2.5.dist-info/METADATA +0 -490
  81. glitchlings-0.2.5.dist-info/RECORD +0 -27
  82. /glitchlings/{zoo → assets}/ocr_confusions.tsv +0 -0
  83. {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/WHEEL +0 -0
  84. {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/licenses/LICENSE +0 -0
  85. {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/top_level.txt +0 -0
@@ -1,336 +1,241 @@
1
- import random
2
- import re
3
- from collections.abc import Iterable
4
- from dataclasses import dataclass
5
- from typing import TYPE_CHECKING, Any, Literal, cast
6
-
7
- try: # pragma: no cover - exercised in environments with NLTK installed
8
- import nltk # type: ignore[import]
9
- except ModuleNotFoundError as exc: # pragma: no cover - triggered when NLTK missing
10
- nltk = None # type: ignore[assignment]
11
- find = None # type: ignore[assignment]
12
- _NLTK_IMPORT_ERROR = exc
13
- else: # pragma: no cover - executed when NLTK is available
14
- from nltk.corpus.reader import WordNetCorpusReader as _WordNetCorpusReader # type: ignore[import]
15
- from nltk.data import find as _nltk_find # type: ignore[import]
16
-
17
- find = _nltk_find
18
- _NLTK_IMPORT_ERROR = None
19
-
20
- if TYPE_CHECKING: # pragma: no cover - typing aid only
21
- from nltk.corpus.reader import WordNetCorpusReader # type: ignore[import]
22
- else: # Use ``Any`` at runtime to avoid hard dependency when NLTK missing
23
- WordNetCorpusReader = Any
24
-
25
- if nltk is not None: # pragma: no cover - guarded by import success
26
- try:
27
- from nltk.corpus import wordnet as _WORDNET_MODULE # type: ignore[import]
28
- except ModuleNotFoundError: # pragma: no cover - only hit on namespace packages
29
- _WORDNET_MODULE = None
30
- else:
31
- WordNetCorpusReader = _WordNetCorpusReader # type: ignore[assignment]
32
- else:
33
- _WORDNET_MODULE = None
34
-
35
- from .core import AttackWave, Glitchling
36
- from ._rate import resolve_rate
37
-
38
- _WORDNET_HANDLE: WordNetCorpusReader | Any | None = _WORDNET_MODULE
1
+ """Jargoyle glitchling: Dictionary-based word drift.
39
2
 
40
- _wordnet_ready = False
3
+ Jargoyle swaps words with alternatives from bundled lexeme dictionaries.
4
+ Multiple dictionaries are supported:
5
+ - "colors": Color term swapping
6
+ - "synonyms": General synonym substitution
7
+ - "corporate": Business jargon alternatives
8
+ - "academic": Scholarly word substitutions
9
+ - "cyberpunk": Neon cyberpunk slang and gadgetry
10
+ - "lovecraftian": Cosmic horror terminology
11
+ You can also drop additional dictionaries into ``assets/lexemes`` to make
12
+ them available without modifying the code. The backend discovers any
13
+ ``*.json`` file in that directory at runtime.
41
14
 
15
+ Two modes are available:
16
+ - "literal": First entry in each word's alternatives (deterministic mapping)
17
+ - "drift": Random selection from alternatives (probabilistic)
18
+ """
42
19
 
43
- def _require_nltk() -> None:
44
- """Ensure the NLTK dependency is present before continuing."""
45
-
46
- if nltk is None or find is None:
47
- message = (
48
- "The NLTK package is required for the jargoyle glitchling; install "
49
- "the 'wordnet' extra via `pip install glitchlings[wordnet]`."
50
- )
51
- if '_NLTK_IMPORT_ERROR' in globals() and _NLTK_IMPORT_ERROR is not None:
52
- raise RuntimeError(message) from _NLTK_IMPORT_ERROR
53
- raise RuntimeError(message)
20
+ from __future__ import annotations
54
21
 
22
+ import os
23
+ from importlib import resources
24
+ from pathlib import Path
25
+ from typing import Literal, cast
55
26
 
56
- def dependencies_available() -> bool:
57
- """Return ``True`` when the runtime NLTK dependency is present."""
27
+ from glitchlings.constants import DEFAULT_JARGOYLE_RATE
28
+ from glitchlings.internal.rust_ffi import (
29
+ jargoyle_drift_rust,
30
+ list_lexeme_dictionaries_rust,
31
+ resolve_seed,
32
+ )
58
33
 
59
- return nltk is not None and find is not None
34
+ from .core import AttackOrder, AttackWave, Glitchling, PipelineOperationPayload
60
35
 
36
+ _LEXEME_ENV_VAR = "GLITCHLINGS_LEXEME_DIR"
61
37
 
62
- def _load_wordnet_reader() -> WordNetCorpusReader:
63
- """Return a WordNet corpus reader from the downloaded corpus files."""
64
38
 
65
- _require_nltk()
39
+ def _configure_lexeme_directory() -> Path | None:
40
+ """Expose the bundled lexeme directory to the Rust backend via an env var."""
66
41
 
67
42
  try:
68
- root = find("corpora/wordnet")
69
- except LookupError:
70
- try:
71
- zip_root = find("corpora/wordnet.zip")
72
- except LookupError as exc:
73
- raise RuntimeError(
74
- "The NLTK WordNet corpus is not installed; run `nltk.download('wordnet')`."
75
- ) from exc
76
- root = zip_root.join("wordnet/")
77
-
78
- return WordNetCorpusReader(root, None)
79
-
80
-
81
- def _wordnet(force_refresh: bool = False) -> WordNetCorpusReader | Any:
82
- """Retrieve the active WordNet handle, rebuilding it on demand."""
83
-
84
- global _WORDNET_HANDLE
85
-
86
- if force_refresh:
87
- _WORDNET_HANDLE = _WORDNET_MODULE
88
-
89
- if _WORDNET_HANDLE is not None:
90
- return _WORDNET_HANDLE
91
-
92
- _WORDNET_HANDLE = _load_wordnet_reader()
93
- return _WORDNET_HANDLE
43
+ lexeme_root = resources.files("glitchlings.assets.lexemes")
44
+ except (ModuleNotFoundError, AttributeError):
45
+ return None
94
46
 
47
+ try:
48
+ with resources.as_file(lexeme_root) as resolved:
49
+ path = Path(resolved)
50
+ except FileNotFoundError:
51
+ return None
95
52
 
96
- def ensure_wordnet() -> None:
97
- """Ensure the WordNet corpus is available before use."""
53
+ if not path.is_dir():
54
+ return None
98
55
 
99
- global _wordnet_ready
100
- if _wordnet_ready:
101
- return
56
+ os.environ.setdefault(_LEXEME_ENV_VAR, str(path))
57
+ return path
102
58
 
103
- _require_nltk()
104
59
 
105
- resource = _wordnet()
60
+ _configure_lexeme_directory()
106
61
 
107
- try:
108
- resource.ensure_loaded()
109
- except LookupError:
110
- nltk.download("wordnet", quiet=True)
111
- try:
112
- resource = _wordnet(force_refresh=True)
113
- resource.ensure_loaded()
114
- except LookupError as exc: # pragma: no cover - only triggered when download fails
115
- raise RuntimeError(
116
- "Unable to load NLTK WordNet corpus for the jargoyle glitchling."
117
- ) from exc
62
+ DEFAULT_LEXEMES = "synonyms"
118
63
 
119
- _wordnet_ready = True
64
+ # Valid modes
65
+ JargoyleMode = Literal["literal", "drift"]
66
+ VALID_MODES = ("literal", "drift")
67
+ DEFAULT_MODE: JargoyleMode = "drift"
120
68
 
121
69
 
122
- # Backwards compatibility for callers relying on the previous private helper name.
123
- _ensure_wordnet = ensure_wordnet
70
+ def _available_lexemes() -> list[str]:
71
+ return sorted({name.lower() for name in list_lexeme_dictionaries_rust()})
124
72
 
125
73
 
126
- PartOfSpeech = Literal["n", "v", "a", "r"]
127
- PartOfSpeechInput = PartOfSpeech | Iterable[PartOfSpeech] | Literal["any"]
128
- NormalizedPartsOfSpeech = tuple[PartOfSpeech, ...]
74
+ def _validate_lexemes(name: str) -> str:
75
+ normalized = name.lower()
76
+ available = _available_lexemes()
77
+ if normalized not in available:
78
+ raise ValueError(f"Invalid lexemes '{name}'. Must be one of: {', '.join(available)}")
79
+ return normalized
129
80
 
130
- _VALID_POS: tuple[PartOfSpeech, ...] = ("n", "v", "a", "r")
131
81
 
82
+ def _validate_mode(mode: JargoyleMode | str) -> JargoyleMode:
83
+ normalized = mode.lower()
84
+ if normalized not in VALID_MODES:
85
+ raise ValueError(f"Invalid mode '{mode}'. Must be one of: {', '.join(VALID_MODES)}")
86
+ return cast(JargoyleMode, normalized)
132
87
 
133
- def _split_token(token: str) -> tuple[str, str, str]:
134
- """Split a token into leading punctuation, core word, and trailing punctuation."""
135
88
 
136
- match = re.match(r"^(\W*)(.*?)(\W*)$", token)
137
- if not match:
138
- return "", token, ""
139
- prefix, core, suffix = match.groups()
140
- return prefix, core, suffix
89
+ VALID_LEXEMES = tuple(_available_lexemes())
141
90
 
142
91
 
143
- def _normalize_parts_of_speech(part_of_speech: PartOfSpeechInput) -> NormalizedPartsOfSpeech:
144
- """Coerce user input into a tuple of valid WordNet POS tags."""
92
+ def list_lexeme_dictionaries() -> list[str]:
93
+ """Return the list of available lexeme dictionaries.
145
94
 
146
- if isinstance(part_of_speech, str):
147
- lowered = part_of_speech.lower()
148
- if lowered == "any":
149
- return _VALID_POS
150
- if lowered not in _VALID_POS:
151
- raise ValueError(
152
- "part_of_speech must be one of 'n', 'v', 'a', 'r', or 'any'"
153
- )
154
- return (cast(PartOfSpeech, lowered),)
155
-
156
- normalized: list[PartOfSpeech] = []
157
- for pos in part_of_speech:
158
- if pos not in _VALID_POS:
159
- raise ValueError(
160
- "part_of_speech entries must be one of 'n', 'v', 'a', or 'r'"
161
- )
162
- if pos not in normalized:
163
- normalized.append(pos)
164
- if not normalized:
165
- raise ValueError("part_of_speech iterable may not be empty")
166
- return tuple(normalized)
95
+ Returns:
96
+ List of dictionary names that can be used with Jargoyle.
97
+ """
98
+ return _available_lexemes()
167
99
 
168
100
 
169
- @dataclass(frozen=True)
170
- class CandidateInfo:
171
- """Metadata for a candidate token that may be replaced."""
101
+ def jargoyle_drift(
102
+ text: str,
103
+ *,
104
+ lexemes: str = DEFAULT_LEXEMES,
105
+ mode: JargoyleMode = DEFAULT_MODE,
106
+ rate: float | None = None,
107
+ seed: int | None = None,
108
+ ) -> str:
109
+ """Apply dictionary-based word drift to text.
172
110
 
173
- prefix: str
174
- core_word: str
175
- suffix: str
176
- parts_of_speech: NormalizedPartsOfSpeech
111
+ Args:
112
+ text: Input text to transform.
113
+ lexemes: Name of the dictionary to use.
114
+ mode: "literal" for deterministic first-entry swaps,
115
+ "drift" for random selection from alternatives.
116
+ rate: Probability of transforming each matching word (0.0 to 1.0).
117
+ seed: Seed for deterministic randomness (only used in "drift" mode).
177
118
 
119
+ Returns:
120
+ Text with word substitutions applied.
178
121
 
179
- def _collect_synonyms(
180
- word: str, parts_of_speech: NormalizedPartsOfSpeech
181
- ) -> list[str]:
182
- """Gather deterministic synonym candidates for the supplied word."""
122
+ Raises:
123
+ ValueError: If lexemes or mode is invalid.
124
+ """
125
+ normalized_lexemes = _validate_lexemes(lexemes)
126
+ normalized_mode = _validate_mode(mode)
127
+
128
+ effective_rate = DEFAULT_JARGOYLE_RATE if rate is None else float(rate)
129
+ resolved_seed = resolve_seed(seed, None) if normalized_mode == "drift" else None
130
+
131
+ return jargoyle_drift_rust(
132
+ text,
133
+ normalized_lexemes,
134
+ normalized_mode,
135
+ effective_rate,
136
+ resolved_seed,
137
+ )
183
138
 
184
- normalized_word = word.lower()
185
- wordnet = _wordnet()
186
- synonyms: set[str] = set()
187
- for pos_tag in parts_of_speech:
188
- synsets = wordnet.synsets(word, pos=pos_tag)
189
- if not synsets:
190
- continue
191
139
 
192
- for synset in synsets:
193
- lemmas_list = [lemma.name() for lemma in cast(Any, synset).lemmas()]
194
- if not lemmas_list:
195
- continue
140
+ class Jargoyle(Glitchling):
141
+ """Glitchling that swaps words using bundled lexeme dictionaries.
196
142
 
197
- filtered = []
198
- for lemma_str in lemmas_list:
199
- cleaned = lemma_str.replace("_", " ")
200
- if cleaned.lower() != normalized_word:
201
- filtered.append(cleaned)
143
+ Jargoyle replaces words with alternatives from one of several dictionaries:
202
144
 
203
- if filtered:
204
- synonyms.update(filtered)
205
- break
145
+ - **colors**: Swap color terms (e.g., "red" -> "blue").
146
+ - **synonyms**: General synonym substitution (e.g., "fast" -> "rapid").
147
+ - **corporate**: Business jargon alternatives.
148
+ - **academic**: Scholarly word substitutions.
149
+ - **cyberpunk**: Neon cyberpunk slang and gadgetry.
150
+ - **lovecraftian**: Cosmic horror terminology.
151
+ - **custom**: Any ``*.json`` dictionary placed in ``assets/lexemes``.
206
152
 
207
- if synonyms:
208
- break
153
+ Two modes are supported:
209
154
 
210
- return sorted(synonyms)
155
+ - **literal**: Use the first (canonical) entry for each word.
156
+ - **drift**: Randomly select from available alternatives.
211
157
 
158
+ Example:
159
+ >>> from glitchlings import Jargoyle
160
+ >>> jargoyle = Jargoyle(lexemes="colors", mode="literal")
161
+ >>> jargoyle("The red balloon floated away.")
162
+ 'The blue balloon floated away.'
212
163
 
213
- def substitute_random_synonyms(
214
- text: str,
215
- rate: float | None = None,
216
- part_of_speech: PartOfSpeechInput = "n",
217
- seed: int | None = None,
218
- rng: random.Random | None = None,
219
- *,
220
- replacement_rate: float | None = None,
221
- ) -> str:
222
- """Replace words with random WordNet synonyms.
223
-
224
- Parameters
225
- - text: Input text.
226
- - rate: Max proportion of candidate words to replace (default 0.1).
227
- - part_of_speech: WordNet POS tag(s) to target. Accepts "n", "v", "a", "r",
228
- any iterable of those tags, or "any" to include all four.
229
- - rng: Optional RNG instance used for deterministic sampling.
230
- - seed: Optional seed if `rng` not provided.
231
-
232
- Determinism
233
- - Candidates collected in left-to-right order; no set() reordering.
234
- - Replacement positions chosen via rng.sample.
235
- - Synonyms sorted before rng.choice to fix ordering.
236
- - For each POS, the first synset containing alternate lemmas is used for stability.
164
+ >>> jargoyle = Jargoyle(lexemes="synonyms", mode="drift", rate=0.5, seed=42)
165
+ >>> jargoyle("The quick fox jumps fast.")
166
+ 'The swift fox jumps rapid.'
237
167
  """
238
- effective_rate = resolve_rate(
239
- rate=rate,
240
- legacy_value=replacement_rate,
241
- default=0.1,
242
- legacy_name="replacement_rate",
243
- )
244
168
 
245
- ensure_wordnet()
246
- wordnet = _wordnet()
247
-
248
- active_rng: random.Random
249
- if rng is not None:
250
- active_rng = rng
251
- else:
252
- active_rng = random.Random(seed)
253
-
254
- target_pos = _normalize_parts_of_speech(part_of_speech)
255
-
256
- # Split but keep whitespace separators so we can rebuild easily
257
- tokens = re.split(r"(\s+)", text)
258
-
259
- # Collect indices of candidate tokens (even positions 0,2,.. are words given our split design)
260
- candidate_indices: list[int] = []
261
- candidate_metadata: dict[int, CandidateInfo] = {}
262
- for idx, tok in enumerate(tokens):
263
- if idx % 2 == 0 and tok and not tok.isspace():
264
- prefix, core_word, suffix = _split_token(tok)
265
- if not core_word:
266
- continue
267
-
268
- available_pos: NormalizedPartsOfSpeech = tuple(
269
- pos for pos in target_pos if wordnet.synsets(core_word, pos=pos)
270
- )
271
- if available_pos:
272
- candidate_indices.append(idx)
273
- candidate_metadata[idx] = CandidateInfo(
274
- prefix=prefix,
275
- core_word=core_word,
276
- suffix=suffix,
277
- parts_of_speech=available_pos,
278
- )
279
-
280
- if not candidate_indices:
281
- return text
282
-
283
- clamped_rate = max(0.0, effective_rate)
284
- max_replacements = int(len(candidate_indices) * clamped_rate)
285
- if max_replacements <= 0:
286
- return text
287
-
288
- # Choose which positions to replace deterministically via rng.sample
289
- replace_positions = active_rng.sample(candidate_indices, k=max_replacements)
290
- # Process in ascending order to avoid affecting later indices
291
- replace_positions.sort()
292
-
293
- for pos in replace_positions:
294
- metadata = candidate_metadata[pos]
295
- synonyms = _collect_synonyms(metadata.core_word, metadata.parts_of_speech)
296
- if not synonyms:
297
- continue
298
-
299
- replacement = active_rng.choice(synonyms)
300
- tokens[pos] = f"{metadata.prefix}{replacement}{metadata.suffix}"
301
-
302
- return "".join(tokens)
303
-
304
-
305
- class Jargoyle(Glitchling):
306
- """Glitchling that swaps words with random WordNet synonyms."""
169
+ flavor = "Oh no... The worst person you know just bought a thesaurus..."
307
170
 
308
171
  def __init__(
309
172
  self,
310
173
  *,
174
+ lexemes: str = DEFAULT_LEXEMES,
175
+ mode: JargoyleMode = DEFAULT_MODE,
311
176
  rate: float | None = None,
312
- replacement_rate: float | None = None,
313
- part_of_speech: PartOfSpeechInput = "n",
314
177
  seed: int | None = None,
315
178
  ) -> None:
316
- self._param_aliases = {"replacement_rate": "rate"}
317
- effective_rate = resolve_rate(
318
- rate=rate,
319
- legacy_value=replacement_rate,
320
- default=0.1,
321
- legacy_name="replacement_rate",
322
- )
179
+ """Initialize Jargoyle with the specified dictionary and mode.
180
+
181
+ Args:
182
+ lexemes: Name of the dictionary to use. See ``list_lexeme_dictionaries()``
183
+ for the full, dynamic list (including any custom ``*.json`` files).
184
+ mode: Transformation mode. "literal" for deterministic swaps,
185
+ "drift" for random selection.
186
+ rate: Probability of transforming each matching word (0.0 to 1.0).
187
+ Defaults to 0.01.
188
+ seed: Seed for deterministic randomness.
189
+ """
190
+ # Validate inputs
191
+ normalized_lexemes = _validate_lexemes(lexemes)
192
+ normalized_mode = _validate_mode(mode)
193
+
194
+ effective_rate = DEFAULT_JARGOYLE_RATE if rate is None else rate
195
+
323
196
  super().__init__(
324
197
  name="Jargoyle",
325
- corruption_function=substitute_random_synonyms,
198
+ corruption_function=jargoyle_drift,
326
199
  scope=AttackWave.WORD,
200
+ order=AttackOrder.NORMAL,
327
201
  seed=seed,
202
+ lexemes=normalized_lexemes,
203
+ mode=normalized_mode,
328
204
  rate=effective_rate,
329
- part_of_speech=part_of_speech,
205
+ # Pass seed explicitly to kwargs so corruption_function receives it
206
+ # (seed is stored separately in base class but needed by jargoyle_drift)
207
+ )
208
+ # Ensure seed is in kwargs for the corruption function
209
+ self.kwargs["seed"] = seed
210
+
211
+ def pipeline_operation(self) -> PipelineOperationPayload:
212
+ """Return the pipeline descriptor for the Rust backend."""
213
+ lexemes = self.kwargs.get("lexemes", DEFAULT_LEXEMES)
214
+ mode = self.kwargs.get("mode", DEFAULT_MODE)
215
+ rate = self.kwargs.get("rate", DEFAULT_JARGOYLE_RATE)
216
+ return cast(
217
+ PipelineOperationPayload,
218
+ {
219
+ "type": "jargoyle",
220
+ "lexemes": str(lexemes),
221
+ "mode": str(mode),
222
+ "rate": float(rate),
223
+ },
330
224
  )
331
225
 
332
226
 
227
+ # Module-level singleton for convenience
333
228
  jargoyle = Jargoyle()
334
229
 
335
230
 
336
- __all__ = ["Jargoyle", "dependencies_available", "ensure_wordnet", "jargoyle"]
231
+ __all__ = [
232
+ "DEFAULT_LEXEMES",
233
+ "DEFAULT_MODE",
234
+ "Jargoyle",
235
+ "JargoyleMode",
236
+ "VALID_LEXEMES",
237
+ "VALID_MODES",
238
+ "jargoyle",
239
+ "jargoyle_drift",
240
+ "list_lexeme_dictionaries",
241
+ ]