glitchlings 0.4.5__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

Files changed (53) hide show
  1. glitchlings/__init__.py +71 -0
  2. glitchlings/__main__.py +8 -0
  3. glitchlings/_zoo_rust.cp311-win_amd64.pyd +0 -0
  4. glitchlings/compat.py +282 -0
  5. glitchlings/config.py +386 -0
  6. glitchlings/config.toml +3 -0
  7. glitchlings/data/__init__.py +1 -0
  8. glitchlings/data/hokey_assets.json +193 -0
  9. glitchlings/dlc/__init__.py +7 -0
  10. glitchlings/dlc/_shared.py +153 -0
  11. glitchlings/dlc/huggingface.py +81 -0
  12. glitchlings/dlc/prime.py +254 -0
  13. glitchlings/dlc/pytorch.py +166 -0
  14. glitchlings/dlc/pytorch_lightning.py +209 -0
  15. glitchlings/lexicon/__init__.py +192 -0
  16. glitchlings/lexicon/_cache.py +108 -0
  17. glitchlings/lexicon/data/default_vector_cache.json +82 -0
  18. glitchlings/lexicon/metrics.py +162 -0
  19. glitchlings/lexicon/vector.py +652 -0
  20. glitchlings/lexicon/wordnet.py +228 -0
  21. glitchlings/main.py +364 -0
  22. glitchlings/util/__init__.py +195 -0
  23. glitchlings/util/adapters.py +27 -0
  24. glitchlings/util/hokey_generator.py +144 -0
  25. glitchlings/util/stretch_locator.py +140 -0
  26. glitchlings/util/stretchability.py +375 -0
  27. glitchlings/zoo/__init__.py +172 -0
  28. glitchlings/zoo/_ocr_confusions.py +32 -0
  29. glitchlings/zoo/_rate.py +131 -0
  30. glitchlings/zoo/_rust_extensions.py +143 -0
  31. glitchlings/zoo/_sampling.py +54 -0
  32. glitchlings/zoo/_text_utils.py +100 -0
  33. glitchlings/zoo/adjax.py +128 -0
  34. glitchlings/zoo/apostrofae.py +127 -0
  35. glitchlings/zoo/assets/__init__.py +0 -0
  36. glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
  37. glitchlings/zoo/core.py +582 -0
  38. glitchlings/zoo/hokey.py +173 -0
  39. glitchlings/zoo/jargoyle.py +335 -0
  40. glitchlings/zoo/mim1c.py +109 -0
  41. glitchlings/zoo/ocr_confusions.tsv +30 -0
  42. glitchlings/zoo/redactyl.py +193 -0
  43. glitchlings/zoo/reduple.py +148 -0
  44. glitchlings/zoo/rushmore.py +153 -0
  45. glitchlings/zoo/scannequin.py +171 -0
  46. glitchlings/zoo/typogre.py +231 -0
  47. glitchlings/zoo/zeedub.py +185 -0
  48. glitchlings-0.4.5.dist-info/METADATA +648 -0
  49. glitchlings-0.4.5.dist-info/RECORD +53 -0
  50. glitchlings-0.4.5.dist-info/WHEEL +5 -0
  51. glitchlings-0.4.5.dist-info/entry_points.txt +2 -0
  52. glitchlings-0.4.5.dist-info/licenses/LICENSE +201 -0
  53. glitchlings-0.4.5.dist-info/top_level.txt +1 -0
@@ -0,0 +1,173 @@
1
+ """Hokey glitchling that performs expressive lengthening."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import random
6
+ from typing import Any, cast
7
+
8
+ from ..util.hokey_generator import HokeyConfig, HokeyGenerator, StretchEvent
9
+ from ..util.stretchability import StretchabilityAnalyzer
10
+ from ._rust_extensions import get_rust_operation
11
+ from .core import AttackOrder, AttackWave, Gaggle
12
+ from .core import Glitchling as GlitchlingBase
13
+
14
+ _hokey_rust = get_rust_operation("hokey")
15
+ _ANALYZER = StretchabilityAnalyzer()
16
+ _GENERATOR = HokeyGenerator(analyzer=_ANALYZER)
17
+
18
+
19
+ def _python_extend_vowels(
20
+ text: str,
21
+ *,
22
+ rate: float,
23
+ extension_min: int,
24
+ extension_max: int,
25
+ word_length_threshold: int,
26
+ base_p: float,
27
+ rng: random.Random,
28
+ return_trace: bool = False,
29
+ ) -> str | tuple[str, list[StretchEvent]]:
30
+ config = HokeyConfig(
31
+ rate=rate,
32
+ extension_min=extension_min,
33
+ extension_max=extension_max,
34
+ word_length_threshold=word_length_threshold,
35
+ base_p=base_p,
36
+ )
37
+ result, events = _GENERATOR.generate(text, rng=rng, config=config)
38
+ return (result, events) if return_trace else result
39
+
40
+
41
+ def extend_vowels(
42
+ text: str,
43
+ rate: float = 0.3,
44
+ extension_min: int = 2,
45
+ extension_max: int = 5,
46
+ word_length_threshold: int = 6,
47
+ seed: int | None = None,
48
+ rng: random.Random | None = None,
49
+ *,
50
+ return_trace: bool = False,
51
+ base_p: float | None = None,
52
+ ) -> str | tuple[str, list[StretchEvent]]:
53
+ """Extend expressive segments of words for emphasis.
54
+
55
+ Parameters
56
+ ----------
57
+ text : str
58
+ Input text to transform.
59
+ rate : float, optional
60
+ Global selection rate for candidate words.
61
+ extension_min : int, optional
62
+ Minimum number of extra repetitions for the stretch unit.
63
+ extension_max : int, optional
64
+ Maximum number of extra repetitions for the stretch unit.
65
+ word_length_threshold : int, optional
66
+ Preferred maximum alphabetic length; longer words are de-emphasised but not
67
+ excluded.
68
+ seed : int, optional
69
+ Deterministic seed when ``rng`` is not supplied.
70
+ rng : random.Random, optional
71
+ Random number generator to drive sampling.
72
+ return_trace : bool, optional
73
+ When ``True`` also return the stretch events for introspection.
74
+ base_p : float, optional
75
+ Base probability for the negative-binomial sampler (heavier tails for smaller
76
+ values). Defaults to ``0.45``.
77
+ """
78
+ if not text:
79
+ empty_trace: list[StretchEvent] = []
80
+ return (text, empty_trace) if return_trace else text
81
+
82
+ if rng is None:
83
+ rng = random.Random(seed)
84
+ base_probability = base_p if base_p is not None else 0.45
85
+
86
+ if return_trace or _hokey_rust is None:
87
+ return _python_extend_vowels(
88
+ text,
89
+ rate=rate,
90
+ extension_min=extension_min,
91
+ extension_max=extension_max,
92
+ word_length_threshold=word_length_threshold,
93
+ base_p=base_probability,
94
+ rng=rng,
95
+ return_trace=return_trace,
96
+ )
97
+
98
+ return cast(
99
+ str,
100
+ _hokey_rust(
101
+ text,
102
+ rate,
103
+ extension_min,
104
+ extension_max,
105
+ word_length_threshold,
106
+ base_probability,
107
+ rng,
108
+ ),
109
+ )
110
+
111
+
112
+ class Hokey(GlitchlingBase):
113
+ """Glitchling that stretches words using linguistic heuristics."""
114
+
115
+ seed: int | None
116
+
117
+ def __init__(
118
+ self,
119
+ *,
120
+ rate: float = 0.3,
121
+ extension_min: int = 2,
122
+ extension_max: int = 5,
123
+ word_length_threshold: int = 6,
124
+ base_p: float = 0.45,
125
+ seed: int | None = None,
126
+ ) -> None:
127
+ self._master_seed: int | None = seed
128
+
129
+ def _corruption_wrapper(text: str, **kwargs: Any) -> str:
130
+ result = extend_vowels(text, **kwargs)
131
+ return result if isinstance(result, str) else result[0]
132
+
133
+ super().__init__(
134
+ name="Hokey",
135
+ corruption_function=_corruption_wrapper,
136
+ scope=AttackWave.CHARACTER,
137
+ order=AttackOrder.FIRST,
138
+ seed=seed,
139
+ rate=rate,
140
+ extension_min=extension_min,
141
+ extension_max=extension_max,
142
+ word_length_threshold=word_length_threshold,
143
+ base_p=base_p,
144
+ )
145
+
146
+ def pipeline_operation(self) -> dict[str, Any] | None:
147
+ return {
148
+ "type": "hokey",
149
+ "rate": self.kwargs.get("rate", 0.3),
150
+ "extension_min": self.kwargs.get("extension_min", 2),
151
+ "extension_max": self.kwargs.get("extension_max", 5),
152
+ "word_length_threshold": self.kwargs.get("word_length_threshold", 6),
153
+ "base_p": self.kwargs.get("base_p", 0.45),
154
+ }
155
+
156
+ def reset_rng(self, seed: int | None = None) -> None:
157
+ if seed is not None:
158
+ self._master_seed = seed
159
+ super().reset_rng(seed)
160
+ if self.seed is None:
161
+ return
162
+ derived = Gaggle.derive_seed(int(seed), self.name, 0)
163
+ self.seed = int(derived)
164
+ self.rng = random.Random(self.seed)
165
+ self.kwargs["seed"] = self.seed
166
+ else:
167
+ super().reset_rng(None)
168
+
169
+
170
+ hokey = Hokey()
171
+
172
+
173
+ __all__ = ["Hokey", "hokey", "extend_vowels"]
@@ -0,0 +1,335 @@
1
+ import random
2
+ import re
3
+ from collections.abc import Iterable
4
+ from dataclasses import dataclass
5
+ from types import ModuleType
6
+ from typing import Any, Literal, cast
7
+
8
+ from glitchlings.lexicon import Lexicon, get_default_lexicon
9
+
10
+ from ._rate import resolve_rate
11
+ from .core import AttackWave, Glitchling
12
+
13
+ _wordnet_module: ModuleType | None
14
+
15
+ try: # pragma: no cover - optional WordNet dependency
16
+ import glitchlings.lexicon.wordnet as _wordnet_module
17
+ except (
18
+ ImportError,
19
+ ModuleNotFoundError,
20
+ AttributeError,
21
+ ): # pragma: no cover - triggered when nltk unavailable
22
+ _wordnet_module = None
23
+
24
+ _wordnet_runtime: ModuleType | None = _wordnet_module
25
+
26
+ WordNetLexicon: type[Lexicon] | None
27
+ if _wordnet_runtime is None:
28
+
29
+ def _lexicon_dependencies_available() -> bool:
30
+ return False
31
+
32
+ def _lexicon_ensure_wordnet() -> None:
33
+ raise RuntimeError(
34
+ "The WordNet backend is no longer bundled by default. Install NLTK "
35
+ "and download its WordNet corpus manually if you need legacy synonyms."
36
+ )
37
+
38
+ WordNetLexicon = None
39
+ else:
40
+ WordNetLexicon = cast(type[Lexicon], _wordnet_runtime.WordNetLexicon)
41
+ _lexicon_dependencies_available = _wordnet_runtime.dependencies_available
42
+ _lexicon_ensure_wordnet = _wordnet_runtime.ensure_wordnet
43
+
44
+
45
+ ensure_wordnet = _lexicon_ensure_wordnet
46
+
47
+
48
+ def dependencies_available() -> bool:
49
+ """Return ``True`` when a synonym backend is accessible."""
50
+ if _lexicon_dependencies_available():
51
+ return True
52
+
53
+ try:
54
+ # Fall back to the configured default lexicon (typically the bundled vector cache).
55
+ get_default_lexicon(seed=None)
56
+ except (RuntimeError, ImportError, ModuleNotFoundError, AttributeError):
57
+ return False
58
+ return True
59
+
60
+
61
+ # Backwards compatibility for callers relying on the previous private helper name.
62
+ _ensure_wordnet = ensure_wordnet
63
+
64
+
65
+ PartOfSpeech = Literal["n", "v", "a", "r"]
66
+ PartOfSpeechInput = PartOfSpeech | Iterable[PartOfSpeech] | Literal["any"]
67
+ NormalizedPartsOfSpeech = tuple[PartOfSpeech, ...]
68
+
69
+ _VALID_POS: tuple[PartOfSpeech, ...] = ("n", "v", "a", "r")
70
+
71
+
72
+ def _split_token(token: str) -> tuple[str, str, str]:
73
+ """Split a token into leading punctuation, core word, and trailing punctuation."""
74
+ match = re.match(r"^(\W*)(.*?)(\W*)$", token)
75
+ if not match:
76
+ return "", token, ""
77
+ prefix, core, suffix = match.groups()
78
+ return prefix, core, suffix
79
+
80
+
81
+ def _normalize_parts_of_speech(
82
+ part_of_speech: PartOfSpeechInput,
83
+ ) -> NormalizedPartsOfSpeech:
84
+ """Coerce user input into a tuple of valid WordNet POS tags."""
85
+ if isinstance(part_of_speech, str):
86
+ lowered = part_of_speech.lower()
87
+ if lowered == "any":
88
+ return _VALID_POS
89
+ if lowered not in _VALID_POS:
90
+ raise ValueError("part_of_speech must be one of 'n', 'v', 'a', 'r', or 'any'")
91
+ return (cast(PartOfSpeech, lowered),)
92
+
93
+ normalized: list[PartOfSpeech] = []
94
+ for pos in part_of_speech:
95
+ if pos not in _VALID_POS:
96
+ raise ValueError("part_of_speech entries must be one of 'n', 'v', 'a', or 'r'")
97
+ if pos not in normalized:
98
+ normalized.append(pos)
99
+ if not normalized:
100
+ raise ValueError("part_of_speech iterable may not be empty")
101
+ return tuple(normalized)
102
+
103
+
104
+ @dataclass(frozen=True)
105
+ class CandidateInfo:
106
+ """Metadata for a candidate token that may be replaced."""
107
+
108
+ prefix: str
109
+ core_word: str
110
+ suffix: str
111
+ part_of_speech: str | None
112
+ synonyms: list[str]
113
+
114
+
115
+ def substitute_random_synonyms(
116
+ text: str,
117
+ rate: float | None = None,
118
+ part_of_speech: PartOfSpeechInput = "n",
119
+ seed: int | None = None,
120
+ rng: random.Random | None = None,
121
+ *,
122
+ replacement_rate: float | None = None,
123
+ lexicon: Lexicon | None = None,
124
+ ) -> str:
125
+ """Replace words with random lexicon-driven synonyms.
126
+
127
+ Parameters
128
+ ----------
129
+ - text: Input text.
130
+ - rate: Max proportion of candidate words to replace (default 0.01).
131
+ - part_of_speech: WordNet POS tag(s) to target. Accepts "n", "v", "a", "r",
132
+ any iterable of those tags, or "any" to include all four. Backends that do
133
+ not differentiate parts of speech simply ignore the setting.
134
+ - rng: Optional RNG instance used for deterministic sampling.
135
+ - seed: Optional seed if `rng` not provided.
136
+ - lexicon: Optional :class:`~glitchlings.lexicon.Lexicon` implementation to
137
+ supply synonyms. Defaults to the configured lexicon priority, typically the
138
+ packaged vector cache.
139
+
140
+ Determinism
141
+ - Candidates collected in left-to-right order; no set() reordering.
142
+ - Replacement positions chosen via rng.sample.
143
+ - Synonyms sourced through the lexicon; the default backend derives
144
+ deterministic subsets per word and part-of-speech using the active seed.
145
+
146
+ """
147
+ effective_rate = resolve_rate(
148
+ rate=rate,
149
+ legacy_value=replacement_rate,
150
+ default=0.1,
151
+ legacy_name="replacement_rate",
152
+ )
153
+
154
+ active_rng: random.Random
155
+ if rng is not None:
156
+ active_rng = rng
157
+ else:
158
+ active_rng = random.Random(seed)
159
+
160
+ active_lexicon: Lexicon
161
+ restore_lexicon_seed = False
162
+ original_lexicon_seed: int | None = None
163
+
164
+ if lexicon is None:
165
+ active_lexicon = get_default_lexicon(seed=seed)
166
+ else:
167
+ active_lexicon = lexicon
168
+ if seed is not None:
169
+ original_lexicon_seed = active_lexicon.seed
170
+ if original_lexicon_seed != seed:
171
+ active_lexicon.reseed(seed)
172
+ restore_lexicon_seed = True
173
+
174
+ try:
175
+ target_pos = _normalize_parts_of_speech(part_of_speech)
176
+
177
+ # Split but keep whitespace separators so we can rebuild easily
178
+ tokens = re.split(r"(\s+)", text)
179
+
180
+ # Collect candidate word indices (even positions are words because separators are kept)
181
+ candidate_indices: list[int] = []
182
+ candidate_metadata: dict[int, CandidateInfo] = {}
183
+ for idx, tok in enumerate(tokens):
184
+ if idx % 2 != 0 or not tok or tok.isspace():
185
+ continue
186
+
187
+ prefix, core_word, suffix = _split_token(tok)
188
+ if not core_word:
189
+ continue
190
+
191
+ chosen_pos: str | None = None
192
+ synonyms: list[str] = []
193
+
194
+ for tag in target_pos:
195
+ if not active_lexicon.supports_pos(tag):
196
+ continue
197
+ synonyms = active_lexicon.get_synonyms(core_word, pos=tag)
198
+ if synonyms:
199
+ chosen_pos = tag
200
+ break
201
+
202
+ if not synonyms and active_lexicon.supports_pos(None):
203
+ synonyms = active_lexicon.get_synonyms(core_word, pos=None)
204
+
205
+ if synonyms:
206
+ candidate_indices.append(idx)
207
+ candidate_metadata[idx] = CandidateInfo(
208
+ prefix=prefix,
209
+ core_word=core_word,
210
+ suffix=suffix,
211
+ part_of_speech=chosen_pos,
212
+ synonyms=synonyms,
213
+ )
214
+
215
+ if not candidate_indices:
216
+ return text
217
+
218
+ clamped_rate = max(0.0, effective_rate)
219
+ if clamped_rate == 0.0:
220
+ return text
221
+
222
+ population = len(candidate_indices)
223
+ effective_fraction = min(clamped_rate, 1.0)
224
+ expected_replacements = population * effective_fraction
225
+ max_replacements = int(expected_replacements)
226
+ remainder = expected_replacements - max_replacements
227
+ if remainder > 0.0 and active_rng.random() < remainder:
228
+ max_replacements += 1
229
+ if clamped_rate >= 1.0:
230
+ max_replacements = population
231
+ max_replacements = min(population, max_replacements)
232
+ if max_replacements <= 0:
233
+ return text
234
+
235
+ # Choose which positions to replace deterministically via rng.sample
236
+ replace_positions = active_rng.sample(candidate_indices, k=max_replacements)
237
+ # Process in ascending order to avoid affecting later indices
238
+ replace_positions.sort()
239
+
240
+ for pos in replace_positions:
241
+ metadata = candidate_metadata[pos]
242
+ if not metadata.synonyms:
243
+ continue
244
+
245
+ replacement = active_rng.choice(metadata.synonyms)
246
+ tokens[pos] = f"{metadata.prefix}{replacement}{metadata.suffix}"
247
+
248
+ return "".join(tokens)
249
+ finally:
250
+ if restore_lexicon_seed:
251
+ active_lexicon.reseed(original_lexicon_seed)
252
+
253
+
254
+ class Jargoyle(Glitchling):
255
+ """Glitchling that swaps words with lexicon-driven synonyms."""
256
+
257
+ def __init__(
258
+ self,
259
+ *,
260
+ rate: float | None = None,
261
+ replacement_rate: float | None = None,
262
+ part_of_speech: PartOfSpeechInput = "n",
263
+ seed: int | None = None,
264
+ lexicon: Lexicon | None = None,
265
+ ) -> None:
266
+ self._param_aliases = {"replacement_rate": "rate"}
267
+ self._owns_lexicon = lexicon is None
268
+ self._external_lexicon_original_seed = (
269
+ lexicon.seed if isinstance(lexicon, Lexicon) else None
270
+ )
271
+ self._initializing = True
272
+ effective_rate = resolve_rate(
273
+ rate=rate,
274
+ legacy_value=replacement_rate,
275
+ default=0.01,
276
+ legacy_name="replacement_rate",
277
+ )
278
+ prepared_lexicon = lexicon or get_default_lexicon(seed=seed)
279
+ if lexicon is not None and seed is not None:
280
+ prepared_lexicon.reseed(seed)
281
+ try:
282
+ super().__init__(
283
+ name="Jargoyle",
284
+ corruption_function=substitute_random_synonyms,
285
+ scope=AttackWave.WORD,
286
+ seed=seed,
287
+ rate=effective_rate,
288
+ part_of_speech=part_of_speech,
289
+ lexicon=prepared_lexicon,
290
+ )
291
+ finally:
292
+ self._initializing = False
293
+
294
+ def set_param(self, key: str, value: Any) -> None:
295
+ super().set_param(key, value)
296
+
297
+ aliases = getattr(self, "_param_aliases", {})
298
+ canonical = aliases.get(key, key)
299
+
300
+ if canonical == "seed":
301
+ current_lexicon = getattr(self, "lexicon", None)
302
+ if isinstance(current_lexicon, Lexicon):
303
+ if getattr(self, "_owns_lexicon", False):
304
+ current_lexicon.reseed(self.seed)
305
+ else:
306
+ if self.seed is not None:
307
+ current_lexicon.reseed(self.seed)
308
+ else:
309
+ if hasattr(self, "_external_lexicon_original_seed"):
310
+ original_seed = getattr(self, "_external_lexicon_original_seed", None)
311
+ current_lexicon.reseed(original_seed)
312
+ elif canonical == "lexicon" and isinstance(value, Lexicon):
313
+ if getattr(self, "_initializing", False):
314
+ if getattr(self, "_owns_lexicon", False):
315
+ if self.seed is not None:
316
+ value.reseed(self.seed)
317
+ else:
318
+ if getattr(self, "_external_lexicon_original_seed", None) is None:
319
+ self._external_lexicon_original_seed = value.seed
320
+ if self.seed is not None:
321
+ value.reseed(self.seed)
322
+ return
323
+
324
+ self._owns_lexicon = False
325
+ self._external_lexicon_original_seed = value.seed
326
+ if self.seed is not None:
327
+ value.reseed(self.seed)
328
+ elif value.seed != self._external_lexicon_original_seed:
329
+ value.reseed(self._external_lexicon_original_seed)
330
+
331
+
332
+ jargoyle = Jargoyle()
333
+
334
+
335
+ __all__ = ["Jargoyle", "dependencies_available", "ensure_wordnet", "jargoyle"]
@@ -0,0 +1,109 @@
1
+ import random
2
+ from collections.abc import Collection
3
+ from typing import Literal
4
+
5
+ from confusable_homoglyphs import confusables
6
+
7
+ from ._rate import resolve_rate
8
+ from .core import AttackOrder, AttackWave, Glitchling
9
+
10
+
11
+ def swap_homoglyphs(
12
+ text: str,
13
+ rate: float | None = None,
14
+ classes: list[str] | Literal["all"] | None = None,
15
+ banned_characters: Collection[str] | None = None,
16
+ seed: int | None = None,
17
+ rng: random.Random | None = None,
18
+ *,
19
+ replacement_rate: float | None = None,
20
+ ) -> str:
21
+ """Replace characters with visually confusable homoglyphs.
22
+
23
+ Parameters
24
+ ----------
25
+ - text: Input text.
26
+ - rate: Max proportion of eligible characters to replace (default 0.02).
27
+ - classes: Restrict replacements to these Unicode script classes (default
28
+ ["LATIN", "GREEK", "CYRILLIC"]). Use "all" to allow any.
29
+ - banned_characters: Characters that must never appear as replacements.
30
+ - seed: Optional seed if `rng` not provided.
31
+ - rng: Optional RNG; overrides seed.
32
+
33
+ Notes
34
+ -----
35
+ - Only replaces characters present in ``confusables.confusables_data`` with
36
+ single-codepoint alternatives.
37
+ - Maintains determinism by shuffling candidates and sampling via the provided RNG.
38
+
39
+ """
40
+ effective_rate = resolve_rate(
41
+ rate=rate,
42
+ legacy_value=replacement_rate,
43
+ default=0.02,
44
+ legacy_name="replacement_rate",
45
+ )
46
+
47
+ if rng is None:
48
+ rng = random.Random(seed)
49
+
50
+ if classes is None:
51
+ classes = ["LATIN", "GREEK", "CYRILLIC"]
52
+
53
+ target_chars = [char for char in text if char.isalnum()]
54
+ confusable_chars = [char for char in target_chars if char in confusables.confusables_data]
55
+ clamped_rate = max(0.0, effective_rate)
56
+ num_replacements = int(len(confusable_chars) * clamped_rate)
57
+ done = 0
58
+ rng.shuffle(confusable_chars)
59
+ banned_set = set(banned_characters or ())
60
+ for char in confusable_chars:
61
+ if done >= num_replacements:
62
+ break
63
+ options = [o["c"] for o in confusables.confusables_data[char] if len(o["c"]) == 1]
64
+ if classes != "all":
65
+ options = [opt for opt in options if confusables.alias(opt) in classes]
66
+ if banned_set:
67
+ options = [opt for opt in options if opt not in banned_set]
68
+ if not options:
69
+ continue
70
+ text = text.replace(char, rng.choice(options), 1)
71
+ done += 1
72
+ return text
73
+
74
+
75
+ class Mim1c(Glitchling):
76
+ """Glitchling that swaps characters for visually similar homoglyphs."""
77
+
78
+ def __init__(
79
+ self,
80
+ *,
81
+ rate: float | None = None,
82
+ replacement_rate: float | None = None,
83
+ classes: list[str] | Literal["all"] | None = None,
84
+ banned_characters: Collection[str] | None = None,
85
+ seed: int | None = None,
86
+ ) -> None:
87
+ self._param_aliases = {"replacement_rate": "rate"}
88
+ effective_rate = resolve_rate(
89
+ rate=rate,
90
+ legacy_value=replacement_rate,
91
+ default=0.02,
92
+ legacy_name="replacement_rate",
93
+ )
94
+ super().__init__(
95
+ name="Mim1c",
96
+ corruption_function=swap_homoglyphs,
97
+ scope=AttackWave.CHARACTER,
98
+ order=AttackOrder.LAST,
99
+ seed=seed,
100
+ rate=effective_rate,
101
+ classes=classes,
102
+ banned_characters=banned_characters,
103
+ )
104
+
105
+
106
+ mim1c = Mim1c()
107
+
108
+
109
+ __all__ = ["Mim1c", "mim1c"]
@@ -0,0 +1,30 @@
1
+ # Source Replacements (space-separated)
2
+ li h
3
+ h li
4
+ rn m
5
+ m rn
6
+ cl d
7
+ d cl
8
+ I l
9
+ l I 1
10
+ 1 l I
11
+ 0 O
12
+ O 0
13
+ B 8
14
+ 8 B
15
+ S 5
16
+ 5 S
17
+ Z 2
18
+ 2 Z
19
+ G 6
20
+ 6 G
21
+ “ "
22
+ ” "
23
+ ‘ '
24
+ ’ '
25
+ — -
26
+ – -
27
+ vv w
28
+ w vv
29
+ ri n
30
+ n ri