glitchlings 0.10.2__cp312-cp312-macosx_11_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

Files changed (83) hide show
  1. glitchlings/__init__.py +99 -0
  2. glitchlings/__main__.py +8 -0
  3. glitchlings/_zoo_rust/__init__.py +12 -0
  4. glitchlings/_zoo_rust.cpython-312-darwin.so +0 -0
  5. glitchlings/assets/__init__.py +180 -0
  6. glitchlings/assets/apostrofae_pairs.json +32 -0
  7. glitchlings/assets/ekkokin_homophones.json +2014 -0
  8. glitchlings/assets/hokey_assets.json +193 -0
  9. glitchlings/assets/lexemes/academic.json +1049 -0
  10. glitchlings/assets/lexemes/colors.json +1333 -0
  11. glitchlings/assets/lexemes/corporate.json +716 -0
  12. glitchlings/assets/lexemes/cyberpunk.json +22 -0
  13. glitchlings/assets/lexemes/lovecraftian.json +23 -0
  14. glitchlings/assets/lexemes/synonyms.json +3354 -0
  15. glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
  16. glitchlings/assets/ocr_confusions.tsv +30 -0
  17. glitchlings/assets/pipeline_assets.json +29 -0
  18. glitchlings/attack/__init__.py +147 -0
  19. glitchlings/attack/analysis.py +1321 -0
  20. glitchlings/attack/core.py +493 -0
  21. glitchlings/attack/core_execution.py +367 -0
  22. glitchlings/attack/core_planning.py +612 -0
  23. glitchlings/attack/encode.py +114 -0
  24. glitchlings/attack/metrics.py +218 -0
  25. glitchlings/attack/metrics_dispatch.py +70 -0
  26. glitchlings/attack/tokenization.py +227 -0
  27. glitchlings/auggie.py +284 -0
  28. glitchlings/compat/__init__.py +9 -0
  29. glitchlings/compat/loaders.py +355 -0
  30. glitchlings/compat/types.py +41 -0
  31. glitchlings/conf/__init__.py +41 -0
  32. glitchlings/conf/loaders.py +331 -0
  33. glitchlings/conf/schema.py +156 -0
  34. glitchlings/conf/types.py +72 -0
  35. glitchlings/config.toml +2 -0
  36. glitchlings/constants.py +59 -0
  37. glitchlings/dev/__init__.py +3 -0
  38. glitchlings/dev/docs.py +45 -0
  39. glitchlings/dlc/__init__.py +19 -0
  40. glitchlings/dlc/_shared.py +296 -0
  41. glitchlings/dlc/gutenberg.py +400 -0
  42. glitchlings/dlc/huggingface.py +68 -0
  43. glitchlings/dlc/prime.py +215 -0
  44. glitchlings/dlc/pytorch.py +98 -0
  45. glitchlings/dlc/pytorch_lightning.py +173 -0
  46. glitchlings/internal/__init__.py +16 -0
  47. glitchlings/internal/rust.py +159 -0
  48. glitchlings/internal/rust_ffi.py +490 -0
  49. glitchlings/main.py +426 -0
  50. glitchlings/protocols.py +91 -0
  51. glitchlings/runtime_config.py +24 -0
  52. glitchlings/util/__init__.py +27 -0
  53. glitchlings/util/adapters.py +65 -0
  54. glitchlings/util/keyboards.py +356 -0
  55. glitchlings/util/transcripts.py +108 -0
  56. glitchlings/zoo/__init__.py +161 -0
  57. glitchlings/zoo/assets/__init__.py +29 -0
  58. glitchlings/zoo/core.py +678 -0
  59. glitchlings/zoo/core_execution.py +154 -0
  60. glitchlings/zoo/core_planning.py +451 -0
  61. glitchlings/zoo/corrupt_dispatch.py +295 -0
  62. glitchlings/zoo/hokey.py +139 -0
  63. glitchlings/zoo/jargoyle.py +243 -0
  64. glitchlings/zoo/mim1c.py +148 -0
  65. glitchlings/zoo/pedant/__init__.py +109 -0
  66. glitchlings/zoo/pedant/core.py +105 -0
  67. glitchlings/zoo/pedant/forms.py +74 -0
  68. glitchlings/zoo/pedant/stones.py +74 -0
  69. glitchlings/zoo/redactyl.py +97 -0
  70. glitchlings/zoo/rng.py +259 -0
  71. glitchlings/zoo/rushmore.py +416 -0
  72. glitchlings/zoo/scannequin.py +66 -0
  73. glitchlings/zoo/transforms.py +346 -0
  74. glitchlings/zoo/typogre.py +128 -0
  75. glitchlings/zoo/validation.py +477 -0
  76. glitchlings/zoo/wherewolf.py +120 -0
  77. glitchlings/zoo/zeedub.py +93 -0
  78. glitchlings-0.10.2.dist-info/METADATA +337 -0
  79. glitchlings-0.10.2.dist-info/RECORD +83 -0
  80. glitchlings-0.10.2.dist-info/WHEEL +5 -0
  81. glitchlings-0.10.2.dist-info/entry_points.txt +3 -0
  82. glitchlings-0.10.2.dist-info/licenses/LICENSE +201 -0
  83. glitchlings-0.10.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,295 @@
1
+ """Pure dispatch logic for Glitchling corruption operations.
2
+
3
+ This module contains the deterministic, side-effect-free logic for building
4
+ corruption plans. It separates the "what to corrupt" decision from the
5
+ "how to corrupt" execution.
6
+
7
+ **Design Philosophy:**
8
+
9
+ All functions in this module are *pure* - they perform dispatch analysis
10
+ based solely on their inputs, without side effects. They do not:
11
+ - Invoke corruption functions
12
+ - Modify state
13
+ - Perform I/O
14
+
15
+ The separation allows:
16
+ - Corruption dispatch to be tested without actual corruption
17
+ - Clear boundaries between planning and execution
18
+ - Reasoning about what will be corrupted before execution
19
+
20
+ See AGENTS.md "Functional Purity Architecture" for full details.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ from dataclasses import dataclass
26
+ from typing import Any, Literal
27
+
28
+ from ..util.transcripts import (
29
+ Transcript,
30
+ TranscriptTarget,
31
+ TranscriptTurn,
32
+ is_transcript,
33
+ resolve_transcript_indices,
34
+ )
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # Type Definitions
38
+ # ---------------------------------------------------------------------------
39
+
40
+
41
+ @dataclass(slots=True, frozen=True)
42
+ class StringCorruptionTarget:
43
+ """Target specification for corrupting a plain string.
44
+
45
+ Attributes:
46
+ text: The string to corrupt.
47
+ """
48
+
49
+ text: str
50
+ kind: Literal["string"] = "string"
51
+
52
+
53
+ @dataclass(slots=True, frozen=True)
54
+ class TranscriptTurnTarget:
55
+ """Target specification for a single turn within a transcript.
56
+
57
+ Attributes:
58
+ index: Position of the turn in the transcript.
59
+ content: The text content to corrupt.
60
+ """
61
+
62
+ index: int
63
+ content: str
64
+
65
+
66
+ @dataclass(slots=True, frozen=True)
67
+ class TranscriptCorruptionTarget:
68
+ """Target specification for corrupting transcript turns.
69
+
70
+ Attributes:
71
+ turns: List of turn targets with their indices and content.
72
+ original_transcript: The original transcript for result assembly.
73
+ """
74
+
75
+ turns: tuple[TranscriptTurnTarget, ...]
76
+ original_transcript: Transcript
77
+ kind: Literal["transcript"] = "transcript"
78
+
79
+
80
+ # Union type for corruption targets
81
+ CorruptionTarget = StringCorruptionTarget | TranscriptCorruptionTarget
82
+
83
+
84
+ # ---------------------------------------------------------------------------
85
+ # Dispatch Functions
86
+ # ---------------------------------------------------------------------------
87
+
88
+
89
+ def resolve_corruption_target(
90
+ text: str | Transcript,
91
+ transcript_target: TranscriptTarget,
92
+ ) -> CorruptionTarget:
93
+ """Determine what needs to be corrupted from the input.
94
+
95
+ This is a pure function that analyzes the input and returns a structured
96
+ target specification. It does not perform any corruption.
97
+
98
+ Args:
99
+ text: Input text or transcript to analyze.
100
+ transcript_target: Specification for which transcript turns to target.
101
+
102
+ Returns:
103
+ CorruptionTarget describing what should be corrupted.
104
+
105
+ Note:
106
+ For backwards compatibility, lists that are not valid transcripts
107
+ (e.g., lists of strings) are treated as strings. The original corrupt()
108
+ implementation would cast such inputs to str and pass them to the
109
+ corruption function. This behavior is preserved to maintain compatibility
110
+ with dataset column transformations.
111
+ """
112
+ # Handle plain strings
113
+ if isinstance(text, str):
114
+ return StringCorruptionTarget(text=text)
115
+
116
+ # Handle transcripts (lists of dicts with "content" keys)
117
+ if is_transcript(text):
118
+ indices = resolve_transcript_indices(text, transcript_target)
119
+ turn_targets: list[TranscriptTurnTarget] = []
120
+
121
+ for idx in indices:
122
+ turn = text[idx]
123
+ content = turn.get("content")
124
+ if isinstance(content, str):
125
+ turn_targets.append(TranscriptTurnTarget(index=idx, content=content))
126
+
127
+ return TranscriptCorruptionTarget(
128
+ turns=tuple(turn_targets),
129
+ original_transcript=text,
130
+ )
131
+
132
+ # For backwards compatibility: treat other types (including lists of strings)
133
+ # as strings by casting. This preserves the original behavior where
134
+ # non-transcript lists were passed to corruption functions after casting.
135
+ # This handles cases like dataset column transformations where HuggingFace
136
+ # may batch values as lists.
137
+ return StringCorruptionTarget(text=str(text))
138
+
139
+
140
+ def count_corruption_targets(target: CorruptionTarget) -> int:
141
+ """Count how many text segments will be corrupted.
142
+
143
+ Args:
144
+ target: The corruption target specification.
145
+
146
+ Returns:
147
+ Number of text segments that will be processed.
148
+ """
149
+ if isinstance(target, StringCorruptionTarget):
150
+ return 1
151
+ return len(target.turns)
152
+
153
+
154
+ def extract_texts_to_corrupt(target: CorruptionTarget) -> list[str]:
155
+ """Extract all text strings that need to be corrupted.
156
+
157
+ This is useful for batch processing or analysis.
158
+
159
+ Args:
160
+ target: The corruption target specification.
161
+
162
+ Returns:
163
+ List of text strings to corrupt.
164
+ """
165
+ if isinstance(target, StringCorruptionTarget):
166
+ return [target.text]
167
+ return [turn.content for turn in target.turns]
168
+
169
+
170
+ # ---------------------------------------------------------------------------
171
+ # Result Assembly Functions
172
+ # ---------------------------------------------------------------------------
173
+
174
+
175
+ def assemble_string_result(
176
+ _target: StringCorruptionTarget,
177
+ corrupted: str,
178
+ ) -> str:
179
+ """Assemble the result for a string corruption.
180
+
181
+ Args:
182
+ _target: The original target (unused, included for symmetry).
183
+ corrupted: The corrupted text.
184
+
185
+ Returns:
186
+ The corrupted string.
187
+ """
188
+ return corrupted
189
+
190
+
191
+ def assemble_transcript_result(
192
+ target: TranscriptCorruptionTarget,
193
+ corrupted_contents: dict[int, str],
194
+ ) -> Transcript:
195
+ """Assemble the result for a transcript corruption.
196
+
197
+ Creates a copy of the original transcript with specified turns updated.
198
+
199
+ Args:
200
+ target: The original target specification.
201
+ corrupted_contents: Mapping of turn indices to corrupted content.
202
+
203
+ Returns:
204
+ New transcript with corrupted turns.
205
+ """
206
+ # Create a deep copy of the transcript
207
+ result: list[TranscriptTurn] = [dict(turn) for turn in target.original_transcript]
208
+
209
+ # Apply corrupted content to targeted turns
210
+ for idx, content in corrupted_contents.items():
211
+ if 0 <= idx < len(result):
212
+ result[idx]["content"] = content
213
+
214
+ return result
215
+
216
+
217
+ def assemble_corruption_result(
218
+ target: CorruptionTarget,
219
+ corrupted: str | dict[int, str],
220
+ ) -> str | Transcript:
221
+ """Assemble the final result based on target type.
222
+
223
+ This is a pure function that combines the original target structure
224
+ with the corrupted content.
225
+
226
+ Args:
227
+ target: The original corruption target.
228
+ corrupted: Either a single corrupted string (for StringCorruptionTarget)
229
+ or a mapping of indices to corrupted content (for TranscriptCorruptionTarget).
230
+
231
+ Returns:
232
+ The assembled result matching the input type.
233
+
234
+ Raises:
235
+ TypeError: If corrupted value type doesn't match target type.
236
+ """
237
+ if isinstance(target, StringCorruptionTarget):
238
+ if not isinstance(corrupted, str):
239
+ message = "String target requires corrupted string result"
240
+ raise TypeError(message)
241
+ return assemble_string_result(target, corrupted)
242
+
243
+ if isinstance(target, TranscriptCorruptionTarget):
244
+ if not isinstance(corrupted, dict):
245
+ message = "Transcript target requires corrupted content mapping"
246
+ raise TypeError(message)
247
+ return assemble_transcript_result(target, corrupted)
248
+
249
+ # Should be unreachable due to typing, but be explicit
250
+ message = f"Unknown target type: {type(target).__name__}"
251
+ raise TypeError(message)
252
+
253
+
254
+ # ---------------------------------------------------------------------------
255
+ # Validation Helpers
256
+ # ---------------------------------------------------------------------------
257
+
258
+
259
+ def validate_text_input(text: Any) -> str | Transcript:
260
+ """Validate that input is a supported text type.
261
+
262
+ Args:
263
+ text: Input to validate.
264
+
265
+ Returns:
266
+ The validated input.
267
+
268
+ Raises:
269
+ TypeError: If input is not a string or transcript.
270
+ """
271
+ if isinstance(text, str):
272
+ return text
273
+ if is_transcript(text):
274
+ return text
275
+ message = f"Expected string or transcript, got {type(text).__name__}"
276
+ raise TypeError(message)
277
+
278
+
279
+ __all__ = [
280
+ # Target types
281
+ "StringCorruptionTarget",
282
+ "TranscriptTurnTarget",
283
+ "TranscriptCorruptionTarget",
284
+ "CorruptionTarget",
285
+ # Dispatch functions
286
+ "resolve_corruption_target",
287
+ "count_corruption_targets",
288
+ "extract_texts_to_corrupt",
289
+ # Result assembly
290
+ "assemble_string_result",
291
+ "assemble_transcript_result",
292
+ "assemble_corruption_result",
293
+ # Validation
294
+ "validate_text_input",
295
+ ]
@@ -0,0 +1,139 @@
1
+ """Hokey glitchling that performs expressive lengthening."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import random
6
+ from typing import Any, cast
7
+
8
+ from glitchlings.internal.rust_ffi import hokey_rust, resolve_seed
9
+
10
+ from .core import AttackOrder, AttackWave, Gaggle, PipelineOperationPayload
11
+ from .core import Glitchling as GlitchlingBase
12
+
13
+
14
+ def extend_vowels(
15
+ text: str,
16
+ rate: float = 0.3,
17
+ extension_min: int = 2,
18
+ extension_max: int = 5,
19
+ word_length_threshold: int = 6,
20
+ seed: int | None = None,
21
+ rng: random.Random | None = None,
22
+ base_p: float | None = None,
23
+ ) -> str:
24
+ """Extend expressive segments of words for emphasis.
25
+
26
+ Parameters
27
+ ----------
28
+ text : str
29
+ Input text to transform.
30
+ rate : float, optional
31
+ Global selection rate for candidate words.
32
+ extension_min : int, optional
33
+ Minimum number of extra repetitions for the stretch unit.
34
+ extension_max : int, optional
35
+ Maximum number of extra repetitions for the stretch unit.
36
+ word_length_threshold : int, optional
37
+ Preferred maximum alphabetic length; longer words are de-emphasised but not
38
+ excluded.
39
+ seed : int, optional
40
+ Deterministic seed when ``rng`` is not supplied.
41
+ rng : random.Random, optional
42
+ Random number generator to drive sampling.
43
+ base_p : float, optional
44
+ Base probability for the negative-binomial sampler (heavier tails for smaller
45
+ values). Defaults to ``0.45``.
46
+ """
47
+ if not text:
48
+ return text
49
+
50
+ base_probability = base_p if base_p is not None else 0.45
51
+
52
+ seed_value = resolve_seed(seed, rng)
53
+ return hokey_rust(
54
+ text,
55
+ rate,
56
+ extension_min,
57
+ extension_max,
58
+ word_length_threshold,
59
+ base_probability,
60
+ seed_value,
61
+ )
62
+
63
+
64
+ class Hokey(GlitchlingBase):
65
+ """Glitchling that stretches words using linguistic heuristics."""
66
+
67
+ flavor = "Sooooo excited to meet you! We reeeeeally missed you last week."
68
+
69
+ seed: int | None
70
+
71
+ def __init__(
72
+ self,
73
+ *,
74
+ rate: float = 0.3,
75
+ extension_min: int = 2,
76
+ extension_max: int = 5,
77
+ word_length_threshold: int = 6,
78
+ base_p: float = 0.45,
79
+ seed: int | None = None,
80
+ **kwargs: Any,
81
+ ) -> None:
82
+ self._master_seed: int | None = seed
83
+
84
+ def _corruption_wrapper(text: str, **kwargs: Any) -> str:
85
+ return extend_vowels(text, **kwargs)
86
+
87
+ super().__init__(
88
+ name="Hokey",
89
+ corruption_function=_corruption_wrapper,
90
+ scope=AttackWave.CHARACTER,
91
+ order=AttackOrder.FIRST,
92
+ seed=seed,
93
+ rate=rate,
94
+ extension_min=extension_min,
95
+ extension_max=extension_max,
96
+ word_length_threshold=word_length_threshold,
97
+ base_p=base_p,
98
+ **kwargs,
99
+ )
100
+
101
+ def pipeline_operation(self) -> PipelineOperationPayload:
102
+ kwargs = self.kwargs
103
+ rate = kwargs.get("rate")
104
+ extension_min = kwargs.get("extension_min")
105
+ extension_max = kwargs.get("extension_max")
106
+ word_length_threshold = kwargs.get("word_length_threshold")
107
+ base_p = kwargs.get("base_p")
108
+ return cast(
109
+ PipelineOperationPayload,
110
+ {
111
+ "type": "hokey",
112
+ "rate": 0.3 if rate is None else float(rate),
113
+ "extension_min": 2 if extension_min is None else int(extension_min),
114
+ "extension_max": 5 if extension_max is None else int(extension_max),
115
+ "word_length_threshold": 6
116
+ if word_length_threshold is None
117
+ else int(word_length_threshold),
118
+ "base_p": 0.45 if base_p is None else float(base_p),
119
+ },
120
+ )
121
+
122
+ def reset_rng(self, seed: int | None = None) -> None:
123
+ if seed is not None:
124
+ self._master_seed = seed
125
+ super().reset_rng(seed)
126
+ if self.seed is None:
127
+ return
128
+ derived = Gaggle.derive_seed(int(seed), self.name, 0)
129
+ self.seed = int(derived)
130
+ self.rng = random.Random(self.seed)
131
+ self.kwargs["seed"] = self.seed
132
+ else:
133
+ super().reset_rng(None)
134
+
135
+
136
+ hokey = Hokey()
137
+
138
+
139
+ __all__ = ["Hokey", "hokey", "extend_vowels"]
@@ -0,0 +1,243 @@
1
+ """Jargoyle glitchling: Dictionary-based word drift.
2
+
3
+ Jargoyle swaps words with alternatives from bundled lexeme dictionaries.
4
+ Multiple dictionaries are supported:
5
+ - "colors": Color term swapping
6
+ - "synonyms": General synonym substitution
7
+ - "corporate": Business jargon alternatives
8
+ - "academic": Scholarly word substitutions
9
+ - "cyberpunk": Neon cyberpunk slang and gadgetry
10
+ - "lovecraftian": Cosmic horror terminology
11
+ You can also drop additional dictionaries into ``assets/lexemes`` to make
12
+ them available without modifying the code. The backend discovers any
13
+ ``*.json`` file in that directory at runtime.
14
+
15
+ Two modes are available:
16
+ - "literal": First entry in each word's alternatives (deterministic mapping)
17
+ - "drift": Random selection from alternatives (probabilistic)
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import os
23
+ from importlib import resources
24
+ from pathlib import Path
25
+ from typing import Any, Literal, cast
26
+
27
+ from glitchlings.constants import DEFAULT_JARGOYLE_RATE
28
+ from glitchlings.internal.rust_ffi import (
29
+ jargoyle_drift_rust,
30
+ list_lexeme_dictionaries_rust,
31
+ resolve_seed,
32
+ )
33
+
34
+ from .core import AttackOrder, AttackWave, Glitchling, PipelineOperationPayload
35
+
36
+ _LEXEME_ENV_VAR = "GLITCHLINGS_LEXEME_DIR"
37
+
38
+
39
+ def _configure_lexeme_directory() -> Path | None:
40
+ """Expose the bundled lexeme directory to the Rust backend via an env var."""
41
+
42
+ try:
43
+ lexeme_root = resources.files("glitchlings.assets.lexemes")
44
+ except (ModuleNotFoundError, AttributeError):
45
+ return None
46
+
47
+ try:
48
+ with resources.as_file(lexeme_root) as resolved:
49
+ path = Path(resolved)
50
+ except FileNotFoundError:
51
+ return None
52
+
53
+ if not path.is_dir():
54
+ return None
55
+
56
+ os.environ.setdefault(_LEXEME_ENV_VAR, str(path))
57
+ return path
58
+
59
+
60
+ _configure_lexeme_directory()
61
+
62
+ DEFAULT_LEXEMES = "synonyms"
63
+
64
+ # Valid modes
65
+ JargoyleMode = Literal["literal", "drift"]
66
+ VALID_MODES = ("literal", "drift")
67
+ DEFAULT_MODE: JargoyleMode = "drift"
68
+
69
+
70
+ def _available_lexemes() -> list[str]:
71
+ return sorted({name.lower() for name in list_lexeme_dictionaries_rust()})
72
+
73
+
74
+ def _validate_lexemes(name: str) -> str:
75
+ normalized = name.lower()
76
+ available = _available_lexemes()
77
+ if normalized not in available:
78
+ raise ValueError(f"Invalid lexemes '{name}'. Must be one of: {', '.join(available)}")
79
+ return normalized
80
+
81
+
82
+ def _validate_mode(mode: JargoyleMode | str) -> JargoyleMode:
83
+ normalized = mode.lower()
84
+ if normalized not in VALID_MODES:
85
+ raise ValueError(f"Invalid mode '{mode}'. Must be one of: {', '.join(VALID_MODES)}")
86
+ return cast(JargoyleMode, normalized)
87
+
88
+
89
+ VALID_LEXEMES = tuple(_available_lexemes())
90
+
91
+
92
+ def list_lexeme_dictionaries() -> list[str]:
93
+ """Return the list of available lexeme dictionaries.
94
+
95
+ Returns:
96
+ List of dictionary names that can be used with Jargoyle.
97
+ """
98
+ return _available_lexemes()
99
+
100
+
101
+ def jargoyle_drift(
102
+ text: str,
103
+ *,
104
+ lexemes: str = DEFAULT_LEXEMES,
105
+ mode: JargoyleMode = DEFAULT_MODE,
106
+ rate: float | None = None,
107
+ seed: int | None = None,
108
+ ) -> str:
109
+ """Apply dictionary-based word drift to text.
110
+
111
+ Args:
112
+ text: Input text to transform.
113
+ lexemes: Name of the dictionary to use.
114
+ mode: "literal" for deterministic first-entry swaps,
115
+ "drift" for random selection from alternatives.
116
+ rate: Probability of transforming each matching word (0.0 to 1.0).
117
+ seed: Seed for deterministic randomness (only used in "drift" mode).
118
+
119
+ Returns:
120
+ Text with word substitutions applied.
121
+
122
+ Raises:
123
+ ValueError: If lexemes or mode is invalid.
124
+ """
125
+ normalized_lexemes = _validate_lexemes(lexemes)
126
+ normalized_mode = _validate_mode(mode)
127
+
128
+ effective_rate = DEFAULT_JARGOYLE_RATE if rate is None else float(rate)
129
+ resolved_seed = resolve_seed(seed, None) if normalized_mode == "drift" else None
130
+
131
+ return jargoyle_drift_rust(
132
+ text,
133
+ normalized_lexemes,
134
+ normalized_mode,
135
+ effective_rate,
136
+ resolved_seed,
137
+ )
138
+
139
+
140
+ class Jargoyle(Glitchling):
141
+ """Glitchling that swaps words using bundled lexeme dictionaries.
142
+
143
+ Jargoyle replaces words with alternatives from one of several dictionaries:
144
+
145
+ - **colors**: Swap color terms (e.g., "red" -> "blue").
146
+ - **synonyms**: General synonym substitution (e.g., "fast" -> "rapid").
147
+ - **corporate**: Business jargon alternatives.
148
+ - **academic**: Scholarly word substitutions.
149
+ - **cyberpunk**: Neon cyberpunk slang and gadgetry.
150
+ - **lovecraftian**: Cosmic horror terminology.
151
+ - **custom**: Any ``*.json`` dictionary placed in ``assets/lexemes``.
152
+
153
+ Two modes are supported:
154
+
155
+ - **literal**: Use the first (canonical) entry for each word.
156
+ - **drift**: Randomly select from available alternatives.
157
+
158
+ Example:
159
+ >>> from glitchlings import Jargoyle
160
+ >>> jargoyle = Jargoyle(lexemes="colors", mode="literal")
161
+ >>> jargoyle("The red balloon floated away.")
162
+ 'The blue balloon floated away.'
163
+
164
+ >>> jargoyle = Jargoyle(lexemes="synonyms", mode="drift", rate=0.5, seed=42)
165
+ >>> jargoyle("The quick fox jumps fast.")
166
+ 'The swift fox jumps rapid.'
167
+ """
168
+
169
+ flavor = "Oh no... The worst person you know just bought a thesaurus..."
170
+
171
+ def __init__(
172
+ self,
173
+ *,
174
+ lexemes: str = DEFAULT_LEXEMES,
175
+ mode: JargoyleMode = DEFAULT_MODE,
176
+ rate: float | None = None,
177
+ seed: int | None = None,
178
+ **kwargs: Any,
179
+ ) -> None:
180
+ """Initialize Jargoyle with the specified dictionary and mode.
181
+
182
+ Args:
183
+ lexemes: Name of the dictionary to use. See ``list_lexeme_dictionaries()``
184
+ for the full, dynamic list (including any custom ``*.json`` files).
185
+ mode: Transformation mode. "literal" for deterministic swaps,
186
+ "drift" for random selection.
187
+ rate: Probability of transforming each matching word (0.0 to 1.0).
188
+ Defaults to 0.01.
189
+ seed: Seed for deterministic randomness.
190
+ """
191
+ # Validate inputs
192
+ normalized_lexemes = _validate_lexemes(lexemes)
193
+ normalized_mode = _validate_mode(mode)
194
+
195
+ effective_rate = DEFAULT_JARGOYLE_RATE if rate is None else rate
196
+
197
+ super().__init__(
198
+ name="Jargoyle",
199
+ corruption_function=jargoyle_drift,
200
+ scope=AttackWave.WORD,
201
+ order=AttackOrder.NORMAL,
202
+ seed=seed,
203
+ lexemes=normalized_lexemes,
204
+ mode=normalized_mode,
205
+ rate=effective_rate,
206
+ **kwargs,
207
+ # Pass seed explicitly to kwargs so corruption_function receives it
208
+ # (seed is stored separately in base class but needed by jargoyle_drift)
209
+ )
210
+ # Ensure seed is in kwargs for the corruption function
211
+ self.kwargs["seed"] = seed
212
+
213
+ def pipeline_operation(self) -> PipelineOperationPayload:
214
+ """Return the pipeline descriptor for the Rust backend."""
215
+ lexemes = self.kwargs.get("lexemes", DEFAULT_LEXEMES)
216
+ mode = self.kwargs.get("mode", DEFAULT_MODE)
217
+ rate = self.kwargs.get("rate", DEFAULT_JARGOYLE_RATE)
218
+ return cast(
219
+ PipelineOperationPayload,
220
+ {
221
+ "type": "jargoyle",
222
+ "lexemes": str(lexemes),
223
+ "mode": str(mode),
224
+ "rate": float(rate),
225
+ },
226
+ )
227
+
228
+
229
+ # Module-level singleton for convenience
230
+ jargoyle = Jargoyle()
231
+
232
+
233
+ __all__ = [
234
+ "DEFAULT_LEXEMES",
235
+ "DEFAULT_MODE",
236
+ "Jargoyle",
237
+ "JargoyleMode",
238
+ "VALID_LEXEMES",
239
+ "VALID_MODES",
240
+ "jargoyle",
241
+ "jargoyle_drift",
242
+ "list_lexeme_dictionaries",
243
+ ]