glitchlings 1.0.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. glitchlings/__init__.py +101 -0
  2. glitchlings/__main__.py +8 -0
  3. glitchlings/_corruption_engine/__init__.py +12 -0
  4. glitchlings/_corruption_engine.cp313-win_amd64.pyd +0 -0
  5. glitchlings/assets/__init__.py +180 -0
  6. glitchlings/assets/apostrofae_pairs.json +32 -0
  7. glitchlings/assets/ekkokin_homophones.json +2014 -0
  8. glitchlings/assets/hokey_assets.json +193 -0
  9. glitchlings/assets/lexemes/academic.json +1049 -0
  10. glitchlings/assets/lexemes/colors.json +1333 -0
  11. glitchlings/assets/lexemes/corporate.json +716 -0
  12. glitchlings/assets/lexemes/cyberpunk.json +22 -0
  13. glitchlings/assets/lexemes/lovecraftian.json +23 -0
  14. glitchlings/assets/lexemes/synonyms.json +3354 -0
  15. glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
  16. glitchlings/assets/ocr_confusions.tsv +30 -0
  17. glitchlings/assets/pipeline_assets.json +29 -0
  18. glitchlings/attack/__init__.py +184 -0
  19. glitchlings/attack/analysis.py +1321 -0
  20. glitchlings/attack/core.py +819 -0
  21. glitchlings/attack/core_execution.py +378 -0
  22. glitchlings/attack/core_planning.py +612 -0
  23. glitchlings/attack/encode.py +114 -0
  24. glitchlings/attack/metrics.py +211 -0
  25. glitchlings/attack/metrics_dispatch.py +70 -0
  26. glitchlings/attack/tokenization.py +338 -0
  27. glitchlings/attack/tokenizer_metrics.py +373 -0
  28. glitchlings/auggie.py +285 -0
  29. glitchlings/compat/__init__.py +9 -0
  30. glitchlings/compat/loaders.py +355 -0
  31. glitchlings/compat/types.py +41 -0
  32. glitchlings/conf/__init__.py +39 -0
  33. glitchlings/conf/loaders.py +331 -0
  34. glitchlings/conf/schema.py +156 -0
  35. glitchlings/conf/types.py +72 -0
  36. glitchlings/config.toml +2 -0
  37. glitchlings/constants.py +139 -0
  38. glitchlings/dev/__init__.py +3 -0
  39. glitchlings/dev/docs.py +45 -0
  40. glitchlings/dlc/__init__.py +21 -0
  41. glitchlings/dlc/_shared.py +300 -0
  42. glitchlings/dlc/gutenberg.py +400 -0
  43. glitchlings/dlc/huggingface.py +68 -0
  44. glitchlings/dlc/langchain.py +147 -0
  45. glitchlings/dlc/nemo.py +283 -0
  46. glitchlings/dlc/prime.py +215 -0
  47. glitchlings/dlc/pytorch.py +98 -0
  48. glitchlings/dlc/pytorch_lightning.py +173 -0
  49. glitchlings/internal/__init__.py +16 -0
  50. glitchlings/internal/rust.py +159 -0
  51. glitchlings/internal/rust_ffi.py +599 -0
  52. glitchlings/main.py +426 -0
  53. glitchlings/protocols.py +91 -0
  54. glitchlings/runtime_config.py +24 -0
  55. glitchlings/util/__init__.py +41 -0
  56. glitchlings/util/adapters.py +65 -0
  57. glitchlings/util/keyboards.py +508 -0
  58. glitchlings/util/transcripts.py +108 -0
  59. glitchlings/zoo/__init__.py +161 -0
  60. glitchlings/zoo/assets/__init__.py +29 -0
  61. glitchlings/zoo/core.py +852 -0
  62. glitchlings/zoo/core_execution.py +154 -0
  63. glitchlings/zoo/core_planning.py +451 -0
  64. glitchlings/zoo/corrupt_dispatch.py +291 -0
  65. glitchlings/zoo/hokey.py +139 -0
  66. glitchlings/zoo/jargoyle.py +301 -0
  67. glitchlings/zoo/mim1c.py +269 -0
  68. glitchlings/zoo/pedant/__init__.py +109 -0
  69. glitchlings/zoo/pedant/core.py +99 -0
  70. glitchlings/zoo/pedant/forms.py +50 -0
  71. glitchlings/zoo/pedant/stones.py +83 -0
  72. glitchlings/zoo/redactyl.py +94 -0
  73. glitchlings/zoo/rng.py +280 -0
  74. glitchlings/zoo/rushmore.py +416 -0
  75. glitchlings/zoo/scannequin.py +370 -0
  76. glitchlings/zoo/transforms.py +331 -0
  77. glitchlings/zoo/typogre.py +194 -0
  78. glitchlings/zoo/validation.py +643 -0
  79. glitchlings/zoo/wherewolf.py +120 -0
  80. glitchlings/zoo/zeedub.py +165 -0
  81. glitchlings-1.0.0.dist-info/METADATA +404 -0
  82. glitchlings-1.0.0.dist-info/RECORD +86 -0
  83. glitchlings-1.0.0.dist-info/WHEEL +5 -0
  84. glitchlings-1.0.0.dist-info/entry_points.txt +3 -0
  85. glitchlings-1.0.0.dist-info/licenses/LICENSE +201 -0
  86. glitchlings-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,301 @@
1
+ """Jargoyle glitchling: Dictionary-based word drift.
2
+
3
+ Jargoyle swaps words with alternatives from bundled lexeme dictionaries.
4
+ Multiple dictionaries are supported:
5
+ - "colors": Color term swapping
6
+ - "synonyms": General synonym substitution
7
+ - "corporate": Business jargon alternatives
8
+ - "academic": Scholarly word substitutions
9
+ - "cyberpunk": Neon cyberpunk slang and gadgetry
10
+ - "lovecraftian": Cosmic horror terminology
11
+ You can also drop additional dictionaries into ``assets/lexemes`` to make
12
+ them available without modifying the code. The backend discovers any
13
+ ``*.json`` file in that directory at runtime.
14
+
15
+ Two modes are available:
16
+ - "literal": First entry in each word's alternatives (deterministic mapping)
17
+ - "drift": Random selection from alternatives (probabilistic)
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import os
23
+ import random
24
+ from importlib import resources
25
+ from pathlib import Path
26
+ from typing import Any, Literal, cast
27
+
28
+ from glitchlings.constants import DEFAULT_JARGOYLE_RATE
29
+ from glitchlings.internal.rust_ffi import (
30
+ is_bundled_lexeme_rust,
31
+ list_bundled_lexeme_dictionaries_rust,
32
+ list_lexeme_dictionaries_rust,
33
+ resolve_seed,
34
+ substitute_lexeme_rust,
35
+ )
36
+
37
+ from .core import AttackOrder, AttackWave, Glitchling, PipelineOperationPayload
38
+
39
+ _LEXEME_ENV_VAR = "GLITCHLINGS_LEXEME_DIR"
40
+ _lexeme_directory_configured = False
41
+
42
+
43
+ def _configure_lexeme_directory() -> Path | None:
44
+ """Expose the bundled lexeme directory to the Rust backend via an env var.
45
+
46
+ This is only needed for discovering custom lexeme files at runtime.
47
+ Built-in lexemes (synonyms, colors, corporate, academic, cyberpunk, lovecraftian)
48
+ are embedded directly in the Rust binary and require no file I/O.
49
+ """
50
+ global _lexeme_directory_configured
51
+ if _lexeme_directory_configured:
52
+ return None
53
+
54
+ try:
55
+ lexeme_root = resources.files("glitchlings.assets.lexemes")
56
+ except (ModuleNotFoundError, AttributeError):
57
+ _lexeme_directory_configured = True
58
+ return None
59
+
60
+ try:
61
+ with resources.as_file(lexeme_root) as resolved:
62
+ path = Path(resolved)
63
+ except FileNotFoundError:
64
+ _lexeme_directory_configured = True
65
+ return None
66
+
67
+ if not path.is_dir():
68
+ _lexeme_directory_configured = True
69
+ return None
70
+
71
+ os.environ.setdefault(_LEXEME_ENV_VAR, str(path))
72
+ _lexeme_directory_configured = True
73
+ return path
74
+
75
+
76
+ # NOTE: We intentionally do NOT call _configure_lexeme_directory() at module load.
77
+ # Built-in lexemes are embedded in the Rust binary and require no file I/O.
78
+ # The directory configuration is only needed for custom lexeme discovery.
79
+
80
+ DEFAULT_LEXEMES = "synonyms"
81
+
82
+ # Valid modes
83
+ JargoyleMode = Literal["literal", "drift"]
84
+ VALID_MODES = ("literal", "drift")
85
+ DEFAULT_MODE: JargoyleMode = "drift"
86
+
87
+
88
+ def _bundled_lexemes() -> list[str]:
89
+ """Return the list of bundled (embedded) lexeme dictionaries."""
90
+ return sorted({name.lower() for name in list_bundled_lexeme_dictionaries_rust()})
91
+
92
+
93
+ def _available_lexemes() -> list[str]:
94
+ """Return all available lexeme dictionaries (bundled + custom)."""
95
+ return sorted({name.lower() for name in list_lexeme_dictionaries_rust()})
96
+
97
+
98
+ def _validate_lexemes(name: str) -> str:
99
+ """Validate and normalize a lexeme dictionary name.
100
+
101
+ For built-in lexemes (bundled in the Rust binary), no file I/O is performed.
102
+ For custom lexemes, the lexeme directory is configured on-demand to discover them.
103
+ """
104
+ normalized = name.lower()
105
+
106
+ # Fast path: check if it's a bundled lexeme (no file I/O needed)
107
+ if is_bundled_lexeme_rust(normalized):
108
+ return normalized
109
+
110
+ # Slow path: configure directory to discover custom lexemes
111
+ _configure_lexeme_directory()
112
+
113
+ available = _available_lexemes()
114
+ if normalized not in available:
115
+ raise ValueError(f"Invalid lexemes '{name}'. Must be one of: {', '.join(available)}")
116
+ return normalized
117
+
118
+
119
+ def _validate_mode(mode: JargoyleMode | str) -> JargoyleMode:
120
+ normalized = mode.lower()
121
+ if normalized not in VALID_MODES:
122
+ raise ValueError(f"Invalid mode '{mode}'. Must be one of: {', '.join(VALID_MODES)}")
123
+ return cast(JargoyleMode, normalized)
124
+
125
+
126
+ VALID_LEXEMES = tuple(_bundled_lexemes())
127
+
128
+
129
+ def list_lexeme_dictionaries() -> list[str]:
130
+ """Return the list of available lexeme dictionaries.
131
+
132
+ This includes both built-in dictionaries (embedded in the binary) and any
133
+ custom dictionaries found in the lexeme directory.
134
+
135
+ Returns:
136
+ List of dictionary names that can be used with Jargoyle.
137
+ """
138
+ # Configure directory to discover any custom lexemes
139
+ _configure_lexeme_directory()
140
+ return _available_lexemes()
141
+
142
+
143
+ def list_bundled_lexeme_dictionaries() -> list[str]:
144
+ """Return the list of bundled (built-in) lexeme dictionaries.
145
+
146
+ These dictionaries are embedded directly in the Rust binary and require
147
+ no file I/O to access.
148
+
149
+ Returns:
150
+ List of built-in dictionary names: academic, colors, corporate,
151
+ cyberpunk, lovecraftian, synonyms.
152
+ """
153
+ return _bundled_lexemes()
154
+
155
+
156
+ def jargoyle_drift(
157
+ text: str,
158
+ *,
159
+ lexemes: str = DEFAULT_LEXEMES,
160
+ mode: JargoyleMode = DEFAULT_MODE,
161
+ rate: float | None = None,
162
+ seed: int | None = None,
163
+ rng: random.Random | None = None,
164
+ ) -> str:
165
+ """Apply dictionary-based word drift to text.
166
+
167
+ Args:
168
+ text: Input text to transform.
169
+ lexemes: Name of the dictionary to use.
170
+ mode: "literal" for deterministic first-entry swaps,
171
+ "drift" for random selection from alternatives.
172
+ rate: Probability of transforming each matching word (0.0 to 1.0).
173
+ seed: Seed for deterministic randomness (only used in "drift" mode).
174
+ rng: Random number generator (alternative to seed).
175
+
176
+ Returns:
177
+ Text with word substitutions applied.
178
+
179
+ Raises:
180
+ ValueError: If lexemes or mode is invalid.
181
+ """
182
+ normalized_lexemes = _validate_lexemes(lexemes)
183
+ normalized_mode = _validate_mode(mode)
184
+
185
+ effective_rate = DEFAULT_JARGOYLE_RATE if rate is None else float(rate)
186
+ resolved_seed = resolve_seed(seed, rng) if normalized_mode == "drift" else None
187
+
188
+ return substitute_lexeme_rust(
189
+ text,
190
+ normalized_lexemes,
191
+ normalized_mode,
192
+ effective_rate,
193
+ resolved_seed,
194
+ )
195
+
196
+
197
+ class Jargoyle(Glitchling):
198
+ """Glitchling that swaps words using bundled lexeme dictionaries.
199
+
200
+ Jargoyle replaces words with alternatives from one of several dictionaries:
201
+
202
+ - **colors**: Swap color terms (e.g., "red" -> "blue").
203
+ - **synonyms**: General synonym substitution (e.g., "fast" -> "rapid").
204
+ - **corporate**: Business jargon alternatives.
205
+ - **academic**: Scholarly word substitutions.
206
+ - **cyberpunk**: Neon cyberpunk slang and gadgetry.
207
+ - **lovecraftian**: Cosmic horror terminology.
208
+ - **custom**: Any ``*.json`` dictionary placed in ``assets/lexemes``.
209
+
210
+ Two modes are supported:
211
+
212
+ - **literal**: Use the first (canonical) entry for each word.
213
+ - **drift**: Randomly select from available alternatives.
214
+
215
+ Example:
216
+ >>> from glitchlings import Jargoyle
217
+ >>> jargoyle = Jargoyle(lexemes="colors", mode="literal")
218
+ >>> jargoyle("The red balloon floated away.")
219
+ 'The blue balloon floated away.'
220
+
221
+ >>> jargoyle = Jargoyle(lexemes="synonyms", mode="drift", rate=0.5, seed=42)
222
+ >>> jargoyle("The quick fox jumps fast.")
223
+ 'The swift fox jumps rapid.'
224
+ """
225
+
226
+ flavor = "Oh no... The worst person you know just bought a thesaurus..."
227
+
228
+ def __init__(
229
+ self,
230
+ *,
231
+ lexemes: str = DEFAULT_LEXEMES,
232
+ mode: JargoyleMode = DEFAULT_MODE,
233
+ rate: float | None = None,
234
+ seed: int | None = None,
235
+ **kwargs: Any,
236
+ ) -> None:
237
+ """Initialize Jargoyle with the specified dictionary and mode.
238
+
239
+ Args:
240
+ lexemes: Name of the dictionary to use. See ``list_lexeme_dictionaries()``
241
+ for the full, dynamic list (including any custom ``*.json`` files).
242
+ mode: Transformation mode. "literal" for deterministic swaps,
243
+ "drift" for random selection.
244
+ rate: Probability of transforming each matching word (0.0 to 1.0).
245
+ Defaults to 0.01.
246
+ seed: Seed for deterministic randomness.
247
+ """
248
+ # Validate inputs
249
+ normalized_lexemes = _validate_lexemes(lexemes)
250
+ normalized_mode = _validate_mode(mode)
251
+
252
+ effective_rate = DEFAULT_JARGOYLE_RATE if rate is None else rate
253
+
254
+ super().__init__(
255
+ name="Jargoyle",
256
+ corruption_function=jargoyle_drift,
257
+ scope=AttackWave.WORD,
258
+ order=AttackOrder.NORMAL,
259
+ seed=seed,
260
+ lexemes=normalized_lexemes,
261
+ mode=normalized_mode,
262
+ rate=effective_rate,
263
+ **kwargs,
264
+ # Pass seed explicitly to kwargs so corruption_function receives it
265
+ # (seed is stored separately in base class but needed by jargoyle_drift)
266
+ )
267
+ # Ensure seed is in kwargs for the corruption function
268
+ self.kwargs["seed"] = seed
269
+
270
+ def pipeline_operation(self) -> PipelineOperationPayload:
271
+ """Return the pipeline descriptor for the Rust backend."""
272
+ lexemes = self.kwargs.get("lexemes", DEFAULT_LEXEMES)
273
+ mode = self.kwargs.get("mode", DEFAULT_MODE)
274
+ rate = self.kwargs.get("rate", DEFAULT_JARGOYLE_RATE)
275
+ return cast(
276
+ PipelineOperationPayload,
277
+ {
278
+ "type": "jargoyle",
279
+ "lexemes": str(lexemes),
280
+ "mode": str(mode),
281
+ "rate": float(rate),
282
+ },
283
+ )
284
+
285
+
286
+ # Module-level singleton for convenience
287
+ jargoyle = Jargoyle()
288
+
289
+
290
+ __all__ = [
291
+ "DEFAULT_LEXEMES",
292
+ "DEFAULT_MODE",
293
+ "Jargoyle",
294
+ "JargoyleMode",
295
+ "VALID_LEXEMES",
296
+ "VALID_MODES",
297
+ "jargoyle",
298
+ "jargoyle_drift",
299
+ "list_bundled_lexeme_dictionaries",
300
+ "list_lexeme_dictionaries",
301
+ ]
@@ -0,0 +1,269 @@
1
+ """Rust-backed Mim1c glitchling that swaps characters for homoglyphs.
2
+
3
+ The Mim1c glitchling replaces characters with visually similar confusable
4
+ characters (homoglyphs) based on Unicode Technical Standard #39.
5
+
6
+ ## Modes
7
+
8
+ - **single_script** (safest): Only substitute within the same script
9
+ (Latin→Latin variants). Minimal visual disruption.
10
+ - **mixed_script** (default): Allow visually similar cross-script substitutions
11
+ (Latin↔Cyrillic↔Greek). Maximum visual similarity with some mixed scripts.
12
+ - **compatibility**: Include Unicode compatibility variants
13
+ (fullwidth, math alphanumerics). Wider range of substitutions.
14
+ - **aggressive**: All of the above combined. Most aggressive substitution.
15
+
16
+ ## Locality Control
17
+
18
+ `max_consecutive` limits how many adjacent characters can be substituted,
19
+ preventing the "ransom note" effect where every character is from a different
20
+ script. Default is 3.
21
+
22
+ ## Data Source
23
+
24
+ Confusable mappings derived from Unicode Technical Standard #39 (confusables.txt).
25
+
26
+ ## References
27
+
28
+ - **Unicode Technical Standard #39**: Unicode Security Mechanisms
29
+ - https://www.unicode.org/reports/tr39/
30
+ - **confusables.txt**: Official confusable character mappings
31
+ - https://www.unicode.org/Public/security/latest/confusables.txt
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import random
37
+ from collections.abc import Collection, Iterable
38
+ from typing import Any, Literal, cast
39
+
40
+ from glitchlings.constants import (
41
+ DEFAULT_MIM1C_MAX_CONSECUTIVE,
42
+ DEFAULT_MIM1C_MODE,
43
+ DEFAULT_MIM1C_RATE,
44
+ MIM1C_DEFAULT_CLASSES,
45
+ )
46
+ from glitchlings.internal.rust_ffi import resolve_seed, swap_homoglyphs_rust
47
+
48
+ from .core import AttackOrder, AttackWave, Glitchling, PipelineOperationPayload
49
+ from .validation import normalize_mim1c_max_consecutive, normalize_mim1c_mode
50
+
51
+
52
+ def _normalise_classes(
53
+ value: object,
54
+ ) -> tuple[str, ...] | Literal["all"] | None:
55
+ if value is None:
56
+ return None
57
+ if isinstance(value, str):
58
+ if value.lower() == "all":
59
+ return "all"
60
+ return (value,)
61
+ if isinstance(value, Iterable):
62
+ return tuple(str(item) for item in value)
63
+ raise TypeError("classes must be an iterable of strings or 'all'")
64
+
65
+
66
+ def _normalise_banned(value: object) -> tuple[str, ...] | None:
67
+ if value is None:
68
+ return None
69
+ if isinstance(value, str):
70
+ return tuple(value)
71
+ if isinstance(value, Iterable):
72
+ return tuple(str(item) for item in value)
73
+ raise TypeError("banned_characters must be an iterable of strings")
74
+
75
+
76
+ def _serialise_classes(
77
+ value: tuple[str, ...] | Literal["all"] | None,
78
+ ) -> list[str] | Literal["all"] | None:
79
+ if value is None:
80
+ return None
81
+ if value == "all":
82
+ return "all"
83
+ return list(value)
84
+
85
+
86
+ def _serialise_banned(value: tuple[str, ...] | None) -> list[str] | None:
87
+ if value is None:
88
+ return None
89
+ return list(value)
90
+
91
+
92
+ HomoglyphMode = Literal["single_script", "mixed_script", "compatibility", "aggressive"]
93
+
94
+
95
+ def swap_homoglyphs(
96
+ text: str,
97
+ rate: float | None = None,
98
+ classes: list[str] | Literal["all"] | None = None,
99
+ banned_characters: Collection[str] | None = None,
100
+ seed: int | None = None,
101
+ rng: random.Random | None = None,
102
+ mode: HomoglyphMode | None = None,
103
+ max_consecutive: int | None = None,
104
+ ) -> str:
105
+ """Replace characters with visually confusable homoglyphs via the Rust engine.
106
+
107
+ Args:
108
+ text: The input text to transform.
109
+ rate: Probability of substituting each eligible character. Default 0.02.
110
+ classes: Unicode script classes to include.
111
+ Default ["LATIN", "GREEK", "CYRILLIC", "COMMON"].
112
+ banned_characters: Characters to never use as substitutes.
113
+ seed: Random seed for deterministic behavior.
114
+ rng: Optional random.Random instance (alternative to seed).
115
+ mode: Substitution mode controlling confusable types:
116
+ - "single_script": Only same-script substitutions (safest).
117
+ - "mixed_script": Allow cross-script like Latin↔Cyrillic↔Greek (default).
118
+ - "compatibility": Include fullwidth, math alphanumerics.
119
+ - "aggressive": All confusable types.
120
+ max_consecutive: Maximum consecutive characters to substitute. Default 3.
121
+ Set to 0 for unlimited.
122
+
123
+ Returns:
124
+ Text with some characters replaced by visually similar confusables.
125
+ """
126
+ effective_rate = DEFAULT_MIM1C_RATE if rate is None else rate
127
+ effective_mode = normalize_mim1c_mode(mode, DEFAULT_MIM1C_MODE)
128
+ effective_max_consecutive = normalize_mim1c_max_consecutive(
129
+ max_consecutive, DEFAULT_MIM1C_MAX_CONSECUTIVE
130
+ )
131
+
132
+ normalised_classes = _normalise_classes(classes)
133
+ normalised_banned = _normalise_banned(banned_characters)
134
+
135
+ if normalised_classes is None:
136
+ payload_classes: list[str] | Literal["all"] | None = list(MIM1C_DEFAULT_CLASSES)
137
+ else:
138
+ payload_classes = _serialise_classes(normalised_classes)
139
+ payload_banned = _serialise_banned(normalised_banned)
140
+
141
+ return swap_homoglyphs_rust(
142
+ text,
143
+ effective_rate,
144
+ payload_classes,
145
+ payload_banned,
146
+ resolve_seed(seed, rng),
147
+ effective_mode,
148
+ effective_max_consecutive,
149
+ )
150
+
151
+
152
+ class Mim1c(Glitchling):
153
+ """Glitchling that swaps characters for visually similar homoglyphs.
154
+
155
+ Mim1c replaces characters with visually similar confusable characters
156
+ (homoglyphs) based on Unicode Technical Standard #39.
157
+
158
+ ## Modes
159
+
160
+ - **single_script** (safest): Only substitute within the same script
161
+ (Latin→Latin variants). Minimal visual disruption.
162
+ - **mixed_script** (default): Allow visually similar cross-script substitutions
163
+ (Latin↔Cyrillic↔Greek). Maximum visual similarity with some mixed scripts.
164
+ - **compatibility**: Include Unicode compatibility variants
165
+ (fullwidth, math alphanumerics). Wider range of substitutions.
166
+ - **aggressive**: All of the above combined. Most aggressive substitution.
167
+
168
+ ## Locality Control
169
+
170
+ `max_consecutive` limits how many adjacent characters can be substituted,
171
+ preventing the "ransom note" effect where every character is from a different
172
+ script. Default is 3. Set to 0 for unlimited.
173
+
174
+ Args:
175
+ rate: Probability of substituting each eligible character. Default 0.02.
176
+ classes: Unicode script classes to include.
177
+ Default ["LATIN", "GREEK", "CYRILLIC", "COMMON"].
178
+ banned_characters: Characters to never use as substitutes.
179
+ mode: Substitution mode. One of "single_script", "mixed_script",
180
+ "compatibility", "aggressive".
181
+ max_consecutive: Maximum consecutive characters to substitute. Default 3.
182
+ seed: Random seed for deterministic behavior.
183
+ """
184
+
185
+ flavor = (
186
+ "Breaks your parser by replacing some characters in strings with "
187
+ "doppelgangers. Don't worry, this text is clean. ;)"
188
+ )
189
+
190
+ def __init__(
191
+ self,
192
+ *,
193
+ rate: float | None = None,
194
+ classes: list[str] | Literal["all"] | None = None,
195
+ banned_characters: Collection[str] | None = None,
196
+ mode: HomoglyphMode | None = None,
197
+ max_consecutive: int | None = None,
198
+ seed: int | None = None,
199
+ **kwargs: Any,
200
+ ) -> None:
201
+ effective_rate = DEFAULT_MIM1C_RATE if rate is None else rate
202
+ effective_mode = normalize_mim1c_mode(mode, DEFAULT_MIM1C_MODE)
203
+ effective_max_consecutive = normalize_mim1c_max_consecutive(
204
+ max_consecutive, DEFAULT_MIM1C_MAX_CONSECUTIVE
205
+ )
206
+ normalised_classes = _normalise_classes(classes)
207
+ normalised_banned = _normalise_banned(banned_characters)
208
+ super().__init__(
209
+ name="Mim1c",
210
+ corruption_function=swap_homoglyphs,
211
+ scope=AttackWave.CHARACTER,
212
+ order=AttackOrder.LAST,
213
+ seed=seed,
214
+ rate=effective_rate,
215
+ classes=normalised_classes,
216
+ banned_characters=normalised_banned,
217
+ mode=effective_mode,
218
+ max_consecutive=effective_max_consecutive,
219
+ **kwargs,
220
+ )
221
+
222
+ def pipeline_operation(self) -> PipelineOperationPayload:
223
+ rate_value = self.kwargs.get("rate")
224
+ rate = DEFAULT_MIM1C_RATE if rate_value is None else float(rate_value)
225
+
226
+ descriptor: dict[str, object] = {"type": "mimic", "rate": rate}
227
+
228
+ classes = self.kwargs.get("classes")
229
+ serialised_classes = _serialise_classes(classes)
230
+ if serialised_classes is not None:
231
+ descriptor["classes"] = serialised_classes
232
+
233
+ banned = self.kwargs.get("banned_characters")
234
+ serialised_banned = _serialise_banned(banned)
235
+ if serialised_banned:
236
+ descriptor["banned_characters"] = serialised_banned
237
+
238
+ # Add mode and max_consecutive parameters
239
+ mode = self.kwargs.get("mode")
240
+ if mode is not None:
241
+ descriptor["mode"] = str(mode)
242
+
243
+ max_consecutive = self.kwargs.get("max_consecutive")
244
+ if max_consecutive is not None:
245
+ descriptor["max_consecutive"] = int(max_consecutive)
246
+
247
+ return cast(PipelineOperationPayload, descriptor)
248
+
249
+ def set_param(self, key: str, value: object) -> None:
250
+ if key == "classes":
251
+ super().set_param(key, _normalise_classes(value))
252
+ return
253
+ if key == "banned_characters":
254
+ super().set_param(key, _normalise_banned(value))
255
+ return
256
+ if key == "mode":
257
+ super().set_param(key, normalize_mim1c_mode(str(value) if value else None))
258
+ return
259
+ if key == "max_consecutive":
260
+ int_value: int | None = int(cast(Any, value)) if value is not None else None
261
+ super().set_param(key, normalize_mim1c_max_consecutive(int_value))
262
+ return
263
+ super().set_param(key, value)
264
+
265
+
266
+ mim1c = Mim1c()
267
+
268
+
269
+ __all__ = ["Mim1c", "mim1c", "swap_homoglyphs", "HomoglyphMode"]
@@ -0,0 +1,109 @@
1
+ """Pedant glitchling integrating grammar evolutions with Rust acceleration."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import random
6
+ from typing import Any, cast
7
+
8
+ from glitchlings.internal.rust_ffi import resolve_seed
9
+
10
+ from ..core import AttackOrder, AttackWave, Glitchling, PipelineOperationPayload
11
+ from .core import EVOLUTIONS, PedantBase, apply_pedant
12
+ from .stones import STONES, PedantStone
13
+
14
+
15
+ def _coerce_stone(value: Any) -> PedantStone:
16
+ """Return a :class:`PedantStone` enum member for ``value``."""
17
+
18
+ return PedantStone.from_value(value)
19
+
20
+
21
+ def pedant_transform(
22
+ text: str,
23
+ *,
24
+ stone: PedantStone | str = PedantStone.HYPERCORRECTITE,
25
+ seed: int | None = None,
26
+ rng: random.Random | None = None,
27
+ ) -> str:
28
+ """Apply a pedant evolution to text."""
29
+
30
+ pedant_stone = _coerce_stone(stone)
31
+ if pedant_stone not in EVOLUTIONS:
32
+ raise ValueError(f"Unknown pedant stone: {stone!r}")
33
+
34
+ effective_seed = resolve_seed(seed, rng)
35
+
36
+ return apply_pedant(
37
+ text,
38
+ stone=pedant_stone,
39
+ seed=effective_seed,
40
+ )
41
+
42
+
43
+ def _build_pipeline_descriptor(glitch: Glitchling) -> PipelineOperationPayload:
44
+ stone_value = glitch.kwargs.get("stone")
45
+ if stone_value is None:
46
+ message = "Pedant requires a stone to build the pipeline descriptor"
47
+ raise RuntimeError(message)
48
+
49
+ pedant_stone = _coerce_stone(stone_value)
50
+
51
+ return cast(
52
+ PipelineOperationPayload,
53
+ {"type": "pedant", "stone": pedant_stone.label},
54
+ )
55
+
56
+
57
+ class Pedant(Glitchling):
58
+ """Glitchling that deterministically applies pedant evolutions."""
59
+
60
+ _param_aliases = {
61
+ "form": "stone",
62
+ "stone_name": "stone",
63
+ }
64
+
65
+ def __init__(
66
+ self,
67
+ *,
68
+ stone: PedantStone | str = PedantStone.HYPERCORRECTITE,
69
+ seed: int | None = None,
70
+ **kwargs: Any,
71
+ ) -> None:
72
+ super().__init__(
73
+ name="Pedant",
74
+ corruption_function=pedant_transform,
75
+ scope=AttackWave.WORD,
76
+ order=AttackOrder.LATE,
77
+ seed=seed,
78
+ pipeline_operation=_build_pipeline_descriptor,
79
+ stone=_coerce_stone(stone),
80
+ **kwargs,
81
+ )
82
+ if seed is not None:
83
+ self.set_param("seed", int(seed))
84
+
85
+ def set_param(self, key: str, value: object) -> None:
86
+ if key in {"stone", "form", "stone_name"}:
87
+ super().set_param(key, _coerce_stone(value))
88
+ return
89
+ super().set_param(key, value)
90
+
91
+ def reset_rng(self, seed: int | None = None) -> None:
92
+ super().reset_rng(seed)
93
+ if self.seed is None:
94
+ self.kwargs.pop("seed", None)
95
+ return
96
+ self.kwargs["seed"] = int(self.seed)
97
+
98
+
99
+ pedant = Pedant()
100
+
101
+ __all__ = [
102
+ "PedantBase",
103
+ "Pedant",
104
+ "pedant",
105
+ "pedant_transform",
106
+ "EVOLUTIONS",
107
+ "STONES",
108
+ "PedantStone",
109
+ ]