glitchlings 0.4.1__cp310-cp310-macosx_11_0_universal2.whl → 0.4.3__cp310-cp310-macosx_11_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

Files changed (47) hide show
  1. glitchlings/__init__.py +30 -17
  2. glitchlings/__main__.py +0 -1
  3. glitchlings/_zoo_rust.cpython-310-darwin.so +0 -0
  4. glitchlings/compat.py +284 -0
  5. glitchlings/config.py +164 -34
  6. glitchlings/config.toml +1 -1
  7. glitchlings/dlc/__init__.py +3 -1
  8. glitchlings/dlc/_shared.py +68 -0
  9. glitchlings/dlc/huggingface.py +26 -41
  10. glitchlings/dlc/prime.py +64 -101
  11. glitchlings/dlc/pytorch.py +216 -0
  12. glitchlings/dlc/pytorch_lightning.py +233 -0
  13. glitchlings/lexicon/__init__.py +12 -33
  14. glitchlings/lexicon/_cache.py +21 -22
  15. glitchlings/lexicon/data/default_vector_cache.json +80 -14
  16. glitchlings/lexicon/metrics.py +1 -8
  17. glitchlings/lexicon/vector.py +109 -49
  18. glitchlings/lexicon/wordnet.py +89 -49
  19. glitchlings/main.py +30 -24
  20. glitchlings/util/__init__.py +18 -4
  21. glitchlings/util/adapters.py +27 -0
  22. glitchlings/zoo/__init__.py +26 -15
  23. glitchlings/zoo/_ocr_confusions.py +1 -3
  24. glitchlings/zoo/_rate.py +1 -4
  25. glitchlings/zoo/_sampling.py +0 -1
  26. glitchlings/zoo/_text_utils.py +1 -5
  27. glitchlings/zoo/adjax.py +2 -4
  28. glitchlings/zoo/apostrofae.py +128 -0
  29. glitchlings/zoo/assets/__init__.py +0 -0
  30. glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
  31. glitchlings/zoo/core.py +152 -87
  32. glitchlings/zoo/jargoyle.py +50 -45
  33. glitchlings/zoo/mim1c.py +11 -10
  34. glitchlings/zoo/redactyl.py +16 -16
  35. glitchlings/zoo/reduple.py +5 -3
  36. glitchlings/zoo/rushmore.py +4 -10
  37. glitchlings/zoo/scannequin.py +7 -6
  38. glitchlings/zoo/typogre.py +8 -9
  39. glitchlings/zoo/zeedub.py +6 -3
  40. {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/METADATA +101 -4
  41. glitchlings-0.4.3.dist-info/RECORD +46 -0
  42. glitchlings/lexicon/graph.py +0 -290
  43. glitchlings-0.4.1.dist-info/RECORD +0 -39
  44. {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/WHEEL +0 -0
  45. {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/entry_points.txt +0 -0
  46. {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/licenses/LICENSE +0 -0
  47. {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/top_level.txt +0 -0
glitchlings/zoo/core.py CHANGED
@@ -4,24 +4,18 @@ import inspect
4
4
  import logging
5
5
  import os
6
6
  import random
7
+ from collections.abc import Mapping, Sequence
7
8
  from enum import IntEnum, auto
8
9
  from hashlib import blake2s
9
- from typing import TYPE_CHECKING, Any, Callable, Protocol
10
+ from typing import TYPE_CHECKING, Any, Callable, Protocol, TypedDict, TypeGuard, Union, cast
10
11
 
11
- _datasets_error: ModuleNotFoundError | None = None
12
- try: # pragma: no cover - optional dependency
13
- from datasets import Dataset as _DatasetsDataset
14
- except ModuleNotFoundError as error: # pragma: no cover - optional dependency
15
- _DatasetsDataset = None # type: ignore[assignment]
16
- _datasets_error = error
17
- else:
18
- _datasets_error = None
12
+ from ..compat import get_datasets_dataset, require_datasets
13
+
14
+ _DatasetsDataset = get_datasets_dataset()
19
15
 
20
16
  try: # pragma: no cover - optional dependency
21
- from glitchlings._zoo_rust import (
22
- compose_glitchlings as _compose_glitchlings_rust,
23
- plan_glitchlings as _plan_glitchlings_rust,
24
- )
17
+ from glitchlings._zoo_rust import compose_glitchlings as _compose_glitchlings_rust
18
+ from glitchlings._zoo_rust import plan_glitchlings as _plan_glitchlings_rust
25
19
  except ImportError: # pragma: no cover - compiled extension not present
26
20
  _compose_glitchlings_rust = None
27
21
  _plan_glitchlings_rust = None
@@ -35,9 +29,20 @@ _PIPELINE_ENABLE_VALUES = {"1", "true", "yes", "on"}
35
29
  _PIPELINE_DISABLE_VALUES = {"0", "false", "no", "off"}
36
30
 
37
31
 
38
- def _pipeline_feature_flag_enabled() -> bool:
39
- """Return ``True`` when the environment does not explicitly disable the Rust pipeline."""
32
+ class PlanSpecification(TypedDict):
33
+ name: str
34
+ scope: int
35
+ order: int
40
36
 
37
+
38
+ TranscriptTurn = dict[str, Any]
39
+ Transcript = list[TranscriptTurn]
40
+
41
+ PlanEntry = Union["Glitchling", Mapping[str, Any]]
42
+
43
+
44
+ def pipeline_feature_flag_enabled() -> bool:
45
+ """Return ``True`` when the environment does not explicitly disable the Rust pipeline."""
41
46
  value = os.environ.get(_PIPELINE_FEATURE_FLAG_ENV)
42
47
  if value is None:
43
48
  return True
@@ -51,12 +56,62 @@ def _pipeline_feature_flag_enabled() -> bool:
51
56
 
52
57
  return True
53
58
 
59
+
60
+ def _pipeline_feature_flag_enabled() -> bool:
61
+ """Compatibility shim for legacy callers."""
62
+ return pipeline_feature_flag_enabled()
63
+
64
+
65
+ def is_rust_pipeline_supported() -> bool:
66
+ """Return ``True`` when the optional Rust extension is importable."""
67
+ return _compose_glitchlings_rust is not None
68
+
69
+
70
+ def is_rust_pipeline_enabled() -> bool:
71
+ """Return ``True`` when the Rust pipeline is available and not explicitly disabled."""
72
+ return is_rust_pipeline_supported() and pipeline_feature_flag_enabled()
73
+
74
+
75
+ def _spec_from_glitchling(glitchling: "Glitchling") -> PlanSpecification:
76
+ """Create a plan specification mapping from a glitchling instance."""
77
+ return {
78
+ "name": glitchling.name,
79
+ "scope": int(glitchling.level),
80
+ "order": int(glitchling.order),
81
+ }
82
+
83
+
84
+ def _normalize_plan_entry(entry: PlanEntry) -> PlanSpecification:
85
+ """Convert a plan entry (glitchling or mapping) into a normalized specification."""
86
+ if isinstance(entry, Glitchling):
87
+ return _spec_from_glitchling(entry)
88
+
89
+ if not isinstance(entry, Mapping):
90
+ message = "plan_glitchlings expects Glitchling instances or mapping specifications"
91
+ raise TypeError(message)
92
+
93
+ try:
94
+ name = str(entry["name"])
95
+ scope_value = int(entry["scope"])
96
+ order_value = int(entry["order"])
97
+ except KeyError as exc: # pragma: no cover - defensive guard
98
+ raise ValueError(f"Plan specification missing required field: {exc.args[0]}") from exc
99
+ except (TypeError, ValueError) as exc:
100
+ raise ValueError("Plan specification fields must be coercible to integers") from exc
101
+
102
+ return {"name": name, "scope": scope_value, "order": order_value}
103
+
104
+
105
+ def _normalize_plan_entries(entries: Sequence[PlanEntry]) -> list[PlanSpecification]:
106
+ """Normalize a collection of orchestration plan entries."""
107
+ return [_normalize_plan_entry(entry) for entry in entries]
108
+
109
+
54
110
  def _plan_glitchlings_python(
55
- specs: list[dict[str, Any]],
111
+ specs: Sequence[Mapping[str, Any]],
56
112
  master_seed: int,
57
113
  ) -> list[tuple[int, int]]:
58
114
  """Pure-Python fallback for orchestrating glitchlings in deterministic order."""
59
-
60
115
  master_seed_int = int(master_seed)
61
116
  planned: list[tuple[int, int, int, int, str]] = []
62
117
  for index, spec in enumerate(specs):
@@ -71,11 +126,10 @@ def _plan_glitchlings_python(
71
126
 
72
127
 
73
128
  def _plan_glitchlings_with_rust(
74
- specs: list[dict[str, Any]],
129
+ specs: Sequence[Mapping[str, Any]],
75
130
  master_seed: int,
76
131
  ) -> list[tuple[int, int]] | None:
77
132
  """Attempt to obtain the orchestration plan from the compiled Rust module."""
78
-
79
133
  if _plan_glitchlings_rust is None:
80
134
  return None
81
135
 
@@ -88,41 +142,54 @@ def _plan_glitchlings_with_rust(
88
142
  return [(int(index), int(seed)) for index, seed in plan]
89
143
 
90
144
 
91
- def _plan_glitchling_specs(
92
- specs: list[dict[str, Any]],
145
+ def _resolve_orchestration_plan(
146
+ specs: Sequence[PlanSpecification],
147
+ master_seed: int,
148
+ prefer_rust: bool,
149
+ ) -> list[tuple[int, int]]:
150
+ """Dispatch to the Rust planner when available, otherwise fall back to Python."""
151
+ if prefer_rust:
152
+ plan = _plan_glitchlings_with_rust(list(specs), master_seed)
153
+ if plan is not None:
154
+ return plan
155
+
156
+ return _plan_glitchlings_python(list(specs), master_seed)
157
+
158
+
159
+ def plan_glitchling_specs(
160
+ specs: Sequence[Mapping[str, Any]],
93
161
  master_seed: int | None,
162
+ *,
163
+ prefer_rust: bool = True,
94
164
  ) -> list[tuple[int, int]]:
95
165
  """Resolve orchestration order and seeds from glitchling specifications."""
96
-
97
166
  if master_seed is None:
98
167
  message = "Gaggle orchestration requires a master seed"
99
168
  raise ValueError(message)
100
169
 
170
+ normalized_specs = [_normalize_plan_entry(spec) for spec in specs]
101
171
  master_seed_int = int(master_seed)
102
- plan = _plan_glitchlings_with_rust(specs, master_seed_int)
103
- if plan is not None:
104
- return plan
105
-
106
- return _plan_glitchlings_python(specs, master_seed_int)
172
+ return _resolve_orchestration_plan(normalized_specs, master_seed_int, prefer_rust)
107
173
 
108
174
 
109
- def _plan_glitchling_sequence(
110
- glitchlings: list["Glitchling"], master_seed: int | None
175
+ def plan_glitchlings(
176
+ entries: Sequence[PlanEntry],
177
+ master_seed: int | None,
178
+ *,
179
+ prefer_rust: bool = True,
111
180
  ) -> list[tuple[int, int]]:
112
- """Derive orchestration plan for concrete glitchling instances."""
113
-
114
- specs = [
115
- {
116
- "name": glitchling.name,
117
- "scope": int(glitchling.level),
118
- "order": int(glitchling.order),
119
- }
120
- for glitchling in glitchlings
121
- ]
122
- return _plan_glitchling_specs(specs, master_seed)
181
+ """Normalize glitchling instances or specs and compute an orchestration plan."""
182
+ if master_seed is None:
183
+ message = "Gaggle orchestration requires a master seed"
184
+ raise ValueError(message)
185
+
186
+ normalized_specs = _normalize_plan_entries(entries)
187
+ master_seed_int = int(master_seed)
188
+ return _resolve_orchestration_plan(normalized_specs, master_seed_int, prefer_rust)
189
+
123
190
 
124
191
  if TYPE_CHECKING: # pragma: no cover - typing only
125
- from datasets import Dataset # type: ignore
192
+ from datasets import Dataset
126
193
  elif _DatasetsDataset is not None:
127
194
  Dataset = _DatasetsDataset
128
195
  else:
@@ -138,9 +205,8 @@ def _is_transcript(
138
205
  *,
139
206
  allow_empty: bool = True,
140
207
  require_all_content: bool = False,
141
- ) -> bool:
142
- """Return `True` when `value` appears to be a chat transcript."""
143
-
208
+ ) -> TypeGuard[Transcript]:
209
+ """Return ``True`` when ``value`` appears to be a chat transcript."""
144
210
  if not isinstance(value, list):
145
211
  return False
146
212
 
@@ -209,8 +275,8 @@ class Glitchling:
209
275
  order: Relative ordering within the same scope.
210
276
  seed: Optional seed for deterministic random behaviour.
211
277
  **kwargs: Additional parameters forwarded to the corruption callable.
212
- """
213
278
 
279
+ """
214
280
  # Each Glitchling maintains its own RNG for deterministic yet isolated behavior.
215
281
  # If no seed is supplied, we fall back to Python's default entropy.
216
282
  self.seed = seed
@@ -228,7 +294,6 @@ class Glitchling:
228
294
 
229
295
  def set_param(self, key: str, value: Any) -> None:
230
296
  """Persist a parameter for use by the corruption callable."""
231
-
232
297
  aliases = getattr(self, "_param_aliases", {})
233
298
  canonical = aliases.get(key, key)
234
299
 
@@ -250,7 +315,6 @@ class Glitchling:
250
315
 
251
316
  def pipeline_operation(self) -> dict[str, Any] | None:
252
317
  """Return the Rust pipeline operation descriptor for this glitchling."""
253
-
254
318
  factory = self._pipeline_descriptor_factory
255
319
  if factory is None:
256
320
  return None
@@ -259,15 +323,11 @@ class Glitchling:
259
323
 
260
324
  def _corruption_expects_rng(self) -> bool:
261
325
  """Return `True` when the corruption function accepts an rng keyword."""
262
-
263
326
  cached_callable = self._cached_rng_callable
264
327
  cached_expectation = self._cached_rng_expectation
265
328
  corruption_function = self.corruption_function
266
329
 
267
- if (
268
- cached_callable is corruption_function
269
- and cached_expectation is not None
270
- ):
330
+ if cached_callable is corruption_function and cached_expectation is not None:
271
331
  return cached_expectation
272
332
 
273
333
  expects_rng = False
@@ -285,7 +345,6 @@ class Glitchling:
285
345
 
286
346
  def __corrupt(self, text: str, *args: Any, **kwargs: Any) -> str:
287
347
  """Execute the corruption callable, injecting the RNG when required."""
288
-
289
348
  # Pass rng to underlying corruption function if it expects it.
290
349
  expects_rng = self._corruption_expects_rng()
291
350
 
@@ -295,25 +354,21 @@ class Glitchling:
295
354
  corrupted = self.corruption_function(text, *args, **kwargs)
296
355
  return corrupted
297
356
 
298
- def corrupt(self, text: str | list[dict[str, Any]]) -> str | list[dict[str, Any]]:
357
+ def corrupt(self, text: str | Transcript) -> str | Transcript:
299
358
  """Apply the corruption function to text or conversational transcripts."""
300
-
301
359
  if _is_transcript(text):
302
- transcript = [dict(turn) for turn in text]
360
+ transcript: Transcript = [dict(turn) for turn in text]
303
361
  if transcript:
304
- transcript[-1]["content"] = self.__corrupt(
305
- transcript[-1]["content"], **self.kwargs
306
- )
362
+ content = transcript[-1].get("content")
363
+ if isinstance(content, str):
364
+ transcript[-1]["content"] = self.__corrupt(content, **self.kwargs)
307
365
  return transcript
308
366
 
309
- return self.__corrupt(text, **self.kwargs)
367
+ return self.__corrupt(cast(str, text), **self.kwargs)
310
368
 
311
369
  def corrupt_dataset(self, dataset: Dataset, columns: list[str]) -> Dataset:
312
370
  """Apply corruption lazily across dataset columns."""
313
-
314
- if _DatasetsDataset is None:
315
- message = "datasets is not installed"
316
- raise ModuleNotFoundError(message) from _datasets_error
371
+ require_datasets("datasets is not installed")
317
372
 
318
373
  def __corrupt_row(row: dict[str, Any]) -> dict[str, Any]:
319
374
  row = dict(row)
@@ -333,14 +388,12 @@ class Glitchling:
333
388
 
334
389
  return dataset.with_transform(__corrupt_row)
335
390
 
336
- def __call__(self, text: str, *args: Any, **kwds: Any) -> str | list[dict[str, Any]]:
391
+ def __call__(self, text: str, *args: Any, **kwds: Any) -> str | Transcript:
337
392
  """Allow a glitchling to be invoked directly like a callable."""
338
-
339
393
  return self.corrupt(text, *args, **kwds)
340
394
 
341
395
  def reset_rng(self, seed: int | None = None) -> None:
342
396
  """Reset the glitchling's RNG to its initial seed."""
343
-
344
397
  if seed is not None:
345
398
  self.seed = seed
346
399
  if self.seed is not None:
@@ -348,7 +401,6 @@ class Glitchling:
348
401
 
349
402
  def clone(self, seed: int | None = None) -> "Glitchling":
350
403
  """Create a copy of this glitchling, optionally with a new seed."""
351
-
352
404
  cls = self.__class__
353
405
  filtered_kwargs = {k: v for k, v in self.kwargs.items() if k != "seed"}
354
406
  clone_seed = seed if seed is not None else self.seed
@@ -368,9 +420,6 @@ class Glitchling:
368
420
  return cls(**filtered_kwargs)
369
421
 
370
422
 
371
-
372
-
373
-
374
423
  class Gaggle(Glitchling):
375
424
  """A collection of glitchlings executed in a deterministic order."""
376
425
 
@@ -380,18 +429,16 @@ class Gaggle(Glitchling):
380
429
  Args:
381
430
  glitchlings: Glitchlings to orchestrate.
382
431
  seed: Master seed used to derive per-glitchling seeds.
383
- """
384
432
 
385
- super().__init__("Gaggle", self.corrupt, AttackWave.DOCUMENT, seed=seed)
433
+ """
434
+ super().__init__("Gaggle", self._corrupt_text, AttackWave.DOCUMENT, seed=seed)
386
435
  self._clones_by_index: list[Glitchling] = []
387
436
  for idx, glitchling in enumerate(glitchlings):
388
437
  clone = glitchling.clone()
389
438
  setattr(clone, "_gaggle_index", idx)
390
439
  self._clones_by_index.append(clone)
391
440
 
392
- self.glitchlings: dict[AttackWave, list[Glitchling]] = {
393
- level: [] for level in AttackWave
394
- }
441
+ self.glitchlings: dict[AttackWave, list[Glitchling]] = {level: [] for level in AttackWave}
395
442
  self.apply_order: list[Glitchling] = []
396
443
  self._plan: list[tuple[int, int]] = []
397
444
  self.sort_glitchlings()
@@ -399,6 +446,7 @@ class Gaggle(Glitchling):
399
446
  @staticmethod
400
447
  def derive_seed(master_seed: int, glitchling_name: str, index: int) -> int:
401
448
  """Derive a deterministic seed for a glitchling based on the master seed."""
449
+
402
450
  def _int_to_bytes(value: int) -> bytes:
403
451
  if value == 0:
404
452
  return b"\x00"
@@ -425,8 +473,7 @@ class Gaggle(Glitchling):
425
473
 
426
474
  def sort_glitchlings(self) -> None:
427
475
  """Sort glitchlings by wave then order to produce application order."""
428
-
429
- plan = _plan_glitchling_sequence(self._clones_by_index, self.seed)
476
+ plan = plan_glitchlings(self._clones_by_index, self.seed)
430
477
  self._plan = plan
431
478
 
432
479
  self.glitchlings = {level: [] for level in AttackWave}
@@ -451,14 +498,12 @@ class Gaggle(Glitchling):
451
498
  @staticmethod
452
499
  def rust_pipeline_supported() -> bool:
453
500
  """Return ``True`` when the compiled Rust pipeline is importable."""
454
-
455
- return _compose_glitchlings_rust is not None
501
+ return is_rust_pipeline_supported()
456
502
 
457
503
  @staticmethod
458
504
  def rust_pipeline_enabled() -> bool:
459
505
  """Return ``True`` when the Rust pipeline is available and not explicitly disabled."""
460
-
461
- return Gaggle.rust_pipeline_supported() and _pipeline_feature_flag_enabled()
506
+ return is_rust_pipeline_enabled()
462
507
 
463
508
  def _pipeline_descriptors(self) -> list[dict[str, Any]] | None:
464
509
  if not self.rust_pipeline_enabled():
@@ -488,18 +533,38 @@ class Gaggle(Glitchling):
488
533
 
489
534
  return descriptors
490
535
 
491
- def corrupt(self, text: str) -> str:
492
- """Apply each glitchling to the provided text sequentially."""
493
-
536
+ def _corrupt_text(self, text: str) -> str:
537
+ """Apply each glitchling to string input sequentially."""
494
538
  master_seed = self.seed
495
539
  descriptors = self._pipeline_descriptors()
496
540
  if master_seed is not None and descriptors is not None:
497
541
  try:
498
- return _compose_glitchlings_rust(text, descriptors, master_seed)
542
+ return cast(str, _compose_glitchlings_rust(text, descriptors, master_seed))
499
543
  except Exception: # pragma: no cover - fall back to Python execution
500
544
  log.debug("Rust pipeline failed; falling back", exc_info=True)
501
545
 
502
546
  corrupted = text
503
547
  for glitchling in self.apply_order:
504
- corrupted = glitchling(corrupted)
548
+ next_value = glitchling.corrupt(corrupted)
549
+ if not isinstance(next_value, str):
550
+ message = "Glitchling pipeline produced non-string output for string input"
551
+ raise TypeError(message)
552
+ corrupted = next_value
553
+
505
554
  return corrupted
555
+
556
+ def corrupt(self, text: str | Transcript) -> str | Transcript:
557
+ """Apply each glitchling to the provided text sequentially."""
558
+ if isinstance(text, str):
559
+ return self._corrupt_text(text)
560
+
561
+ if _is_transcript(text):
562
+ transcript: Transcript = [dict(turn) for turn in text]
563
+ if transcript and "content" in transcript[-1]:
564
+ content = transcript[-1]["content"]
565
+ if isinstance(content, str):
566
+ transcript[-1]["content"] = self._corrupt_text(content)
567
+ return transcript
568
+
569
+ message = f"Unsupported text type for Gaggle corruption: {type(text)!r}"
570
+ raise TypeError(message)
@@ -2,18 +2,25 @@ import random
2
2
  import re
3
3
  from collections.abc import Iterable
4
4
  from dataclasses import dataclass
5
+ from types import ModuleType
5
6
  from typing import Any, Literal, cast
6
7
 
7
8
  from glitchlings.lexicon import Lexicon, get_default_lexicon
8
9
 
10
+ from ._rate import resolve_rate
11
+ from .core import AttackWave, Glitchling
12
+
13
+ _wordnet_module: ModuleType | None
14
+
9
15
  try: # pragma: no cover - optional WordNet dependency
10
- from glitchlings.lexicon.wordnet import (
11
- WordNetLexicon,
12
- dependencies_available as _lexicon_dependencies_available,
13
- ensure_wordnet as _lexicon_ensure_wordnet,
14
- )
16
+ import glitchlings.lexicon.wordnet as _wordnet_module
15
17
  except Exception: # pragma: no cover - triggered when nltk unavailable
16
- WordNetLexicon = None # type: ignore[assignment]
18
+ _wordnet_module = None
19
+
20
+ _wordnet_runtime: ModuleType | None = _wordnet_module
21
+
22
+ WordNetLexicon: type[Lexicon] | None
23
+ if _wordnet_runtime is None:
17
24
 
18
25
  def _lexicon_dependencies_available() -> bool:
19
26
  return False
@@ -24,16 +31,18 @@ except Exception: # pragma: no cover - triggered when nltk unavailable
24
31
  "and download its WordNet corpus manually if you need legacy synonyms."
25
32
  )
26
33
 
34
+ WordNetLexicon = None
35
+ else:
36
+ WordNetLexicon = cast(type[Lexicon], _wordnet_runtime.WordNetLexicon)
37
+ _lexicon_dependencies_available = _wordnet_runtime.dependencies_available
38
+ _lexicon_ensure_wordnet = _wordnet_runtime.ensure_wordnet
27
39
 
28
- from ._rate import resolve_rate
29
- from .core import AttackWave, Glitchling
30
40
 
31
41
  ensure_wordnet = _lexicon_ensure_wordnet
32
42
 
33
43
 
34
44
  def dependencies_available() -> bool:
35
45
  """Return ``True`` when a synonym backend is accessible."""
36
-
37
46
  if _lexicon_dependencies_available():
38
47
  return True
39
48
 
@@ -58,7 +67,6 @@ _VALID_POS: tuple[PartOfSpeech, ...] = ("n", "v", "a", "r")
58
67
 
59
68
  def _split_token(token: str) -> tuple[str, str, str]:
60
69
  """Split a token into leading punctuation, core word, and trailing punctuation."""
61
-
62
70
  match = re.match(r"^(\W*)(.*?)(\W*)$", token)
63
71
  if not match:
64
72
  return "", token, ""
@@ -70,23 +78,18 @@ def _normalize_parts_of_speech(
70
78
  part_of_speech: PartOfSpeechInput,
71
79
  ) -> NormalizedPartsOfSpeech:
72
80
  """Coerce user input into a tuple of valid WordNet POS tags."""
73
-
74
81
  if isinstance(part_of_speech, str):
75
82
  lowered = part_of_speech.lower()
76
83
  if lowered == "any":
77
84
  return _VALID_POS
78
85
  if lowered not in _VALID_POS:
79
- raise ValueError(
80
- "part_of_speech must be one of 'n', 'v', 'a', 'r', or 'any'"
81
- )
86
+ raise ValueError("part_of_speech must be one of 'n', 'v', 'a', 'r', or 'any'")
82
87
  return (cast(PartOfSpeech, lowered),)
83
88
 
84
89
  normalized: list[PartOfSpeech] = []
85
90
  for pos in part_of_speech:
86
91
  if pos not in _VALID_POS:
87
- raise ValueError(
88
- "part_of_speech entries must be one of 'n', 'v', 'a', or 'r'"
89
- )
92
+ raise ValueError("part_of_speech entries must be one of 'n', 'v', 'a', or 'r'")
90
93
  if pos not in normalized:
91
94
  normalized.append(pos)
92
95
  if not normalized:
@@ -118,6 +121,7 @@ def substitute_random_synonyms(
118
121
  """Replace words with random lexicon-driven synonyms.
119
122
 
120
123
  Parameters
124
+ ----------
121
125
  - text: Input text.
122
126
  - rate: Max proportion of candidate words to replace (default 0.01).
123
127
  - part_of_speech: WordNet POS tag(s) to target. Accepts "n", "v", "a", "r",
@@ -134,6 +138,7 @@ def substitute_random_synonyms(
134
138
  - Replacement positions chosen via rng.sample.
135
139
  - Synonyms sourced through the lexicon; the default backend derives
136
140
  deterministic subsets per word and part-of-speech using the active seed.
141
+
137
142
  """
138
143
  effective_rate = resolve_rate(
139
144
  rate=rate,
@@ -168,38 +173,40 @@ def substitute_random_synonyms(
168
173
  # Split but keep whitespace separators so we can rebuild easily
169
174
  tokens = re.split(r"(\s+)", text)
170
175
 
171
- # Collect indices of candidate tokens (even positions 0,2,.. are words given our split design)
176
+ # Collect candidate word indices (even positions are words because separators are kept)
172
177
  candidate_indices: list[int] = []
173
178
  candidate_metadata: dict[int, CandidateInfo] = {}
174
179
  for idx, tok in enumerate(tokens):
175
- if idx % 2 == 0 and tok and not tok.isspace():
176
- prefix, core_word, suffix = _split_token(tok)
177
- if not core_word:
178
- continue
179
-
180
- chosen_pos: str | None = None
181
- synonyms: list[str] = []
180
+ if idx % 2 != 0 or not tok or tok.isspace():
181
+ continue
182
182
 
183
- for pos in target_pos:
184
- if not active_lexicon.supports_pos(pos):
185
- continue
186
- synonyms = active_lexicon.get_synonyms(core_word, pos=pos)
187
- if synonyms:
188
- chosen_pos = pos
189
- break
183
+ prefix, core_word, suffix = _split_token(tok)
184
+ if not core_word:
185
+ continue
190
186
 
191
- if not synonyms and active_lexicon.supports_pos(None):
192
- synonyms = active_lexicon.get_synonyms(core_word, pos=None)
187
+ chosen_pos: str | None = None
188
+ synonyms: list[str] = []
193
189
 
190
+ for tag in target_pos:
191
+ if not active_lexicon.supports_pos(tag):
192
+ continue
193
+ synonyms = active_lexicon.get_synonyms(core_word, pos=tag)
194
194
  if synonyms:
195
- candidate_indices.append(idx)
196
- candidate_metadata[idx] = CandidateInfo(
197
- prefix=prefix,
198
- core_word=core_word,
199
- suffix=suffix,
200
- part_of_speech=chosen_pos,
201
- synonyms=synonyms,
202
- )
195
+ chosen_pos = tag
196
+ break
197
+
198
+ if not synonyms and active_lexicon.supports_pos(None):
199
+ synonyms = active_lexicon.get_synonyms(core_word, pos=None)
200
+
201
+ if synonyms:
202
+ candidate_indices.append(idx)
203
+ candidate_metadata[idx] = CandidateInfo(
204
+ prefix=prefix,
205
+ core_word=core_word,
206
+ suffix=suffix,
207
+ part_of_speech=chosen_pos,
208
+ synonyms=synonyms,
209
+ )
203
210
 
204
211
  if not candidate_indices:
205
212
  return text
@@ -296,9 +303,7 @@ class Jargoyle(Glitchling):
296
303
  current_lexicon.reseed(self.seed)
297
304
  else:
298
305
  if hasattr(self, "_external_lexicon_original_seed"):
299
- original_seed = getattr(
300
- self, "_external_lexicon_original_seed", None
301
- )
306
+ original_seed = getattr(self, "_external_lexicon_original_seed", None)
302
307
  current_lexicon.reseed(original_seed)
303
308
  elif canonical == "lexicon" and isinstance(value, Lexicon):
304
309
  if getattr(self, "_initializing", False):
glitchlings/zoo/mim1c.py CHANGED
@@ -1,11 +1,11 @@
1
- from collections.abc import Collection
2
1
  import random
2
+ from collections.abc import Collection
3
3
  from typing import Literal
4
4
 
5
5
  from confusable_homoglyphs import confusables
6
6
 
7
- from .core import AttackOrder, AttackWave, Glitchling
8
7
  from ._rate import resolve_rate
8
+ from .core import AttackOrder, AttackWave, Glitchling
9
9
 
10
10
 
11
11
  def swap_homoglyphs(
@@ -21,16 +21,21 @@ def swap_homoglyphs(
21
21
  """Replace characters with visually confusable homoglyphs.
22
22
 
23
23
  Parameters
24
+ ----------
24
25
  - text: Input text.
25
26
  - rate: Max proportion of eligible characters to replace (default 0.02).
26
- - classes: Restrict replacements to these Unicode script classes (default ["LATIN","GREEK","CYRILLIC"]). Use "all" to allow any.
27
+ - classes: Restrict replacements to these Unicode script classes (default
28
+ ["LATIN", "GREEK", "CYRILLIC"]). Use "all" to allow any.
27
29
  - banned_characters: Characters that must never appear as replacements.
28
30
  - seed: Optional seed if `rng` not provided.
29
31
  - rng: Optional RNG; overrides seed.
30
32
 
31
33
  Notes
32
- - Only replaces characters present in confusables.confusables_data with single-codepoint alternatives.
34
+ -----
35
+ - Only replaces characters present in ``confusables.confusables_data`` with
36
+ single-codepoint alternatives.
33
37
  - Maintains determinism by shuffling candidates and sampling via the provided RNG.
38
+
34
39
  """
35
40
  effective_rate = resolve_rate(
36
41
  rate=rate,
@@ -46,9 +51,7 @@ def swap_homoglyphs(
46
51
  classes = ["LATIN", "GREEK", "CYRILLIC"]
47
52
 
48
53
  target_chars = [char for char in text if char.isalnum()]
49
- confusable_chars = [
50
- char for char in target_chars if char in confusables.confusables_data
51
- ]
54
+ confusable_chars = [char for char in target_chars if char in confusables.confusables_data]
52
55
  clamped_rate = max(0.0, effective_rate)
53
56
  num_replacements = int(len(confusable_chars) * clamped_rate)
54
57
  done = 0
@@ -57,9 +60,7 @@ def swap_homoglyphs(
57
60
  for char in confusable_chars:
58
61
  if done >= num_replacements:
59
62
  break
60
- options = [
61
- o["c"] for o in confusables.confusables_data[char] if len(o["c"]) == 1
62
- ]
63
+ options = [o["c"] for o in confusables.confusables_data[char] if len(o["c"]) == 1]
63
64
  if classes != "all":
64
65
  options = [opt for opt in options if confusables.alias(opt) in classes]
65
66
  if banned_set: