glitchlings 0.4.5__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

Files changed (53) hide show
  1. glitchlings/__init__.py +71 -0
  2. glitchlings/__main__.py +8 -0
  3. glitchlings/_zoo_rust.cp311-win_amd64.pyd +0 -0
  4. glitchlings/compat.py +282 -0
  5. glitchlings/config.py +386 -0
  6. glitchlings/config.toml +3 -0
  7. glitchlings/data/__init__.py +1 -0
  8. glitchlings/data/hokey_assets.json +193 -0
  9. glitchlings/dlc/__init__.py +7 -0
  10. glitchlings/dlc/_shared.py +153 -0
  11. glitchlings/dlc/huggingface.py +81 -0
  12. glitchlings/dlc/prime.py +254 -0
  13. glitchlings/dlc/pytorch.py +166 -0
  14. glitchlings/dlc/pytorch_lightning.py +209 -0
  15. glitchlings/lexicon/__init__.py +192 -0
  16. glitchlings/lexicon/_cache.py +108 -0
  17. glitchlings/lexicon/data/default_vector_cache.json +82 -0
  18. glitchlings/lexicon/metrics.py +162 -0
  19. glitchlings/lexicon/vector.py +652 -0
  20. glitchlings/lexicon/wordnet.py +228 -0
  21. glitchlings/main.py +364 -0
  22. glitchlings/util/__init__.py +195 -0
  23. glitchlings/util/adapters.py +27 -0
  24. glitchlings/util/hokey_generator.py +144 -0
  25. glitchlings/util/stretch_locator.py +140 -0
  26. glitchlings/util/stretchability.py +375 -0
  27. glitchlings/zoo/__init__.py +172 -0
  28. glitchlings/zoo/_ocr_confusions.py +32 -0
  29. glitchlings/zoo/_rate.py +131 -0
  30. glitchlings/zoo/_rust_extensions.py +143 -0
  31. glitchlings/zoo/_sampling.py +54 -0
  32. glitchlings/zoo/_text_utils.py +100 -0
  33. glitchlings/zoo/adjax.py +128 -0
  34. glitchlings/zoo/apostrofae.py +127 -0
  35. glitchlings/zoo/assets/__init__.py +0 -0
  36. glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
  37. glitchlings/zoo/core.py +582 -0
  38. glitchlings/zoo/hokey.py +173 -0
  39. glitchlings/zoo/jargoyle.py +335 -0
  40. glitchlings/zoo/mim1c.py +109 -0
  41. glitchlings/zoo/ocr_confusions.tsv +30 -0
  42. glitchlings/zoo/redactyl.py +193 -0
  43. glitchlings/zoo/reduple.py +148 -0
  44. glitchlings/zoo/rushmore.py +153 -0
  45. glitchlings/zoo/scannequin.py +171 -0
  46. glitchlings/zoo/typogre.py +231 -0
  47. glitchlings/zoo/zeedub.py +185 -0
  48. glitchlings-0.4.5.dist-info/METADATA +648 -0
  49. glitchlings-0.4.5.dist-info/RECORD +53 -0
  50. glitchlings-0.4.5.dist-info/WHEEL +5 -0
  51. glitchlings-0.4.5.dist-info/entry_points.txt +2 -0
  52. glitchlings-0.4.5.dist-info/licenses/LICENSE +201 -0
  53. glitchlings-0.4.5.dist-info/top_level.txt +1 -0
@@ -0,0 +1,143 @@
1
+ """Centralized loading and fallback management for optional Rust extensions.
2
+
3
+ This module provides a single source of truth for importing Rust-accelerated
4
+ operations, eliminating duplicated try/except blocks across the codebase.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from typing import Any, Callable
11
+
12
+ log = logging.getLogger(__name__)
13
+
14
+
15
+ # Cache of loaded Rust operations to avoid repeated import attempts
16
+ _rust_operation_cache: dict[str, Callable[..., Any] | None] = {}
17
+ _rust_module_available: bool | None = None
18
+
19
+
20
+ def is_rust_module_available() -> bool:
21
+ """Check if the Rust extension module can be imported.
22
+
23
+ Returns
24
+ -------
25
+ bool
26
+ True if glitchlings._zoo_rust can be imported successfully.
27
+
28
+ Notes
29
+ -----
30
+ The result is cached after the first check to avoid repeated import attempts.
31
+ """
32
+ global _rust_module_available
33
+
34
+ if _rust_module_available is not None:
35
+ return _rust_module_available
36
+
37
+ try:
38
+ import glitchlings._zoo_rust # noqa: F401
39
+
40
+ _rust_module_available = True
41
+ log.debug("Rust extension module successfully loaded")
42
+ except (ImportError, ModuleNotFoundError):
43
+ _rust_module_available = False
44
+ log.debug("Rust extension module not available; using Python fallbacks")
45
+
46
+ return _rust_module_available
47
+
48
+
49
+ def get_rust_operation(operation_name: str) -> Callable[..., Any] | None:
50
+ """Load a specific Rust operation by name with caching.
51
+
52
+ Parameters
53
+ ----------
54
+ operation_name : str
55
+ The name of the operation to import from glitchlings._zoo_rust.
56
+
57
+ Returns
58
+ -------
59
+ Callable | None
60
+ The Rust operation callable if available, None otherwise.
61
+
62
+ Examples
63
+ --------
64
+ >>> fatfinger = get_rust_operation("fatfinger")
65
+ >>> if fatfinger is not None:
66
+ ... result = fatfinger(text, ...)
67
+ ... else:
68
+ ... result = python_fallback(text, ...)
69
+
70
+ Notes
71
+ -----
72
+ - Results are cached to avoid repeated imports
73
+ - Returns None if the Rust module is unavailable or the operation doesn't exist
74
+ - All import errors are logged at debug level
75
+ """
76
+ # Check cache first
77
+ if operation_name in _rust_operation_cache:
78
+ return _rust_operation_cache[operation_name]
79
+
80
+ # If the module isn't available, don't try to import individual operations
81
+ if not is_rust_module_available():
82
+ _rust_operation_cache[operation_name] = None
83
+ return None
84
+
85
+ try:
86
+ from glitchlings import _zoo_rust
87
+
88
+ operation = getattr(_zoo_rust, operation_name, None)
89
+ _rust_operation_cache[operation_name] = operation
90
+
91
+ if operation is None:
92
+ log.debug(f"Rust operation '{operation_name}' not found in extension module")
93
+ else:
94
+ log.debug(f"Rust operation '{operation_name}' loaded successfully")
95
+
96
+ return operation
97
+
98
+ except (ImportError, ModuleNotFoundError, AttributeError) as exc:
99
+ log.debug(f"Failed to load Rust operation '{operation_name}': {exc}")
100
+ _rust_operation_cache[operation_name] = None
101
+ return None
102
+
103
+
104
+ def clear_cache() -> None:
105
+ """Clear the operation cache, forcing re-import on next access.
106
+
107
+ This is primarily useful for testing scenarios where the Rust module
108
+ availability might change during runtime.
109
+ """
110
+ global _rust_module_available, _rust_operation_cache
111
+
112
+ _rust_module_available = None
113
+ _rust_operation_cache.clear()
114
+ log.debug("Rust extension cache cleared")
115
+
116
+
117
+ def preload_operations(*operation_names: str) -> dict[str, Callable[..., Any] | None]:
118
+ """Eagerly load multiple Rust operations at once.
119
+
120
+ Parameters
121
+ ----------
122
+ *operation_names : str
123
+ Names of operations to preload.
124
+
125
+ Returns
126
+ -------
127
+ dict[str, Callable | None]
128
+ Mapping of operation names to their callables (or None if unavailable).
129
+
130
+ Examples
131
+ --------
132
+ >>> ops = preload_operations("fatfinger", "reduplicate_words", "delete_random_words")
133
+ >>> fatfinger = ops["fatfinger"]
134
+ """
135
+ return {name: get_rust_operation(name) for name in operation_names}
136
+
137
+
138
+ __all__ = [
139
+ "is_rust_module_available",
140
+ "get_rust_operation",
141
+ "clear_cache",
142
+ "preload_operations",
143
+ ]
@@ -0,0 +1,54 @@
1
+ from __future__ import annotations
2
+
3
+ import random
4
+ from typing import Sequence
5
+
6
+
7
+ def weighted_sample_without_replacement(
8
+ population: Sequence[int],
9
+ weights: Sequence[float],
10
+ *,
11
+ k: int,
12
+ rng: random.Random,
13
+ ) -> list[int]:
14
+ """Sample ``k`` unique indices from ``population`` using ``weights``.
15
+
16
+ Mirrors the behaviour used by several glitchlings while centralising error
17
+ handling and RNG interactions so the Python and Rust implementations remain
18
+ aligned.
19
+ """
20
+ if k < 0:
21
+ raise ValueError("Sample size cannot be negative")
22
+
23
+ if len(population) != len(weights):
24
+ raise ValueError("Population and weight sequences must be the same length")
25
+
26
+ items = list(zip(population, weights))
27
+ count = len(items)
28
+ if k == 0 or count == 0:
29
+ return []
30
+
31
+ if k > count:
32
+ raise ValueError("Sample larger than population or is negative")
33
+
34
+ selections: list[int] = []
35
+ for _ in range(k):
36
+ total_weight = sum(weight for _, weight in items)
37
+ if total_weight <= 0.0:
38
+ chosen_index = rng.randrange(len(items))
39
+ else:
40
+ threshold = rng.random() * total_weight
41
+ cumulative = 0.0
42
+ chosen_index = len(items) - 1
43
+ for idx, (_, weight) in enumerate(items):
44
+ cumulative += weight
45
+ if cumulative >= threshold:
46
+ chosen_index = idx
47
+ break
48
+ value, _ = items.pop(chosen_index)
49
+ selections.append(value)
50
+
51
+ return selections
52
+
53
+
54
+ __all__ = ["weighted_sample_without_replacement"]
@@ -0,0 +1,100 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+ from typing import Sequence
6
+
7
+ _WORD_SPLIT_PATTERN = re.compile(r"(\s+)")
8
+ _TOKEN_EDGES_PATTERN = re.compile(r"^(\W*)(.*?)(\W*)$")
9
+
10
+
11
+ def split_preserving_whitespace(text: str) -> list[str]:
12
+ """Split text while keeping whitespace tokens for stable reconstruction."""
13
+ return _WORD_SPLIT_PATTERN.split(text)
14
+
15
+
16
+ def split_token_edges(token: str) -> tuple[str, str, str]:
17
+ """Return leading, core, and trailing segments for a token."""
18
+ match = _TOKEN_EDGES_PATTERN.match(token)
19
+ if match is None:
20
+ return "", token, ""
21
+ return match.group(1), match.group(2), match.group(3)
22
+
23
+
24
+ def token_core_length(token: str) -> int:
25
+ """Return the length of the main word characters for weighting heuristics."""
26
+ _, core, _ = split_token_edges(token)
27
+ candidate = core if core else token
28
+ length = len(candidate)
29
+ if length <= 0:
30
+ stripped = token.strip()
31
+ length = len(stripped) if stripped else len(token)
32
+ if length <= 0:
33
+ length = 1
34
+ return length
35
+
36
+
37
+ @dataclass(frozen=True)
38
+ class WordToken:
39
+ """Metadata describing a non-whitespace token yielded by word splitters."""
40
+
41
+ index: int
42
+ prefix: str
43
+ core: str
44
+ suffix: str
45
+ core_length: int
46
+
47
+ @property
48
+ def has_core(self) -> bool:
49
+ """Return ``True`` when the token contains at least one core character."""
50
+ return bool(self.core)
51
+
52
+
53
+ def collect_word_tokens(
54
+ tokens: Sequence[str],
55
+ *,
56
+ skip_first_word: bool = False,
57
+ ) -> list[WordToken]:
58
+ """Return structured metadata for non-whitespace tokens within ``tokens``.
59
+
60
+ Args:
61
+ tokens: Token sequence produced by :func:`split_preserving_whitespace`.
62
+ skip_first_word: Exclude the first candidate token (used by Rushmore to
63
+ preserve leading words).
64
+
65
+ """
66
+ start = 2 if skip_first_word else 0
67
+ collected: list[WordToken] = []
68
+ for index in range(start, len(tokens), 2):
69
+ token = tokens[index]
70
+ if not token or token.isspace():
71
+ continue
72
+
73
+ prefix, core, suffix = split_token_edges(token)
74
+ core_length = len(core)
75
+ if core_length <= 0:
76
+ stripped = token.strip()
77
+ core_length = len(stripped) if stripped else len(token)
78
+ if core_length <= 0:
79
+ core_length = 1
80
+
81
+ collected.append(
82
+ WordToken(
83
+ index=index,
84
+ prefix=prefix,
85
+ core=core,
86
+ suffix=suffix,
87
+ core_length=core_length,
88
+ )
89
+ )
90
+
91
+ return collected
92
+
93
+
94
+ __all__ = [
95
+ "split_preserving_whitespace",
96
+ "split_token_edges",
97
+ "token_core_length",
98
+ "WordToken",
99
+ "collect_word_tokens",
100
+ ]
@@ -0,0 +1,128 @@
1
+ from __future__ import annotations
2
+
3
+ import random
4
+ from typing import Any, cast
5
+
6
+ from ._rate import resolve_rate
7
+ from ._rust_extensions import get_rust_operation
8
+ from ._text_utils import split_preserving_whitespace, split_token_edges
9
+ from .core import AttackWave, Glitchling
10
+
11
+ # Load Rust-accelerated operation if available
12
+ _swap_adjacent_words_rust = get_rust_operation("swap_adjacent_words")
13
+
14
+
15
+ def _python_swap_adjacent_words(
16
+ text: str,
17
+ *,
18
+ rate: float,
19
+ rng: random.Random,
20
+ ) -> str:
21
+ """Swap the cores of adjacent words while keeping affixes and spacing intact."""
22
+ tokens = split_preserving_whitespace(text)
23
+ if len(tokens) < 2:
24
+ return text
25
+
26
+ word_indices: list[int] = []
27
+ for index in range(len(tokens)):
28
+ token = tokens[index]
29
+ if not token or token.isspace():
30
+ continue
31
+ if index % 2 == 0:
32
+ word_indices.append(index)
33
+
34
+ if len(word_indices) < 2:
35
+ return text
36
+
37
+ clamped = max(0.0, min(rate, 1.0))
38
+ if clamped <= 0.0:
39
+ return text
40
+
41
+ for cursor in range(0, len(word_indices) - 1, 2):
42
+ left_index = word_indices[cursor]
43
+ right_index = word_indices[cursor + 1]
44
+
45
+ left_token = tokens[left_index]
46
+ right_token = tokens[right_index]
47
+
48
+ left_prefix, left_core, left_suffix = split_token_edges(left_token)
49
+ right_prefix, right_core, right_suffix = split_token_edges(right_token)
50
+
51
+ if not left_core or not right_core:
52
+ continue
53
+
54
+ should_swap = clamped >= 1.0 or rng.random() < clamped
55
+ if not should_swap:
56
+ continue
57
+
58
+ tokens[left_index] = f"{left_prefix}{right_core}{left_suffix}"
59
+ tokens[right_index] = f"{right_prefix}{left_core}{right_suffix}"
60
+
61
+ return "".join(tokens)
62
+
63
+
64
+ def swap_adjacent_words(
65
+ text: str,
66
+ rate: float | None = None,
67
+ seed: int | None = None,
68
+ rng: random.Random | None = None,
69
+ *,
70
+ swap_rate: float | None = None,
71
+ ) -> str:
72
+ """Swap adjacent word cores while preserving spacing and punctuation."""
73
+ effective_rate = resolve_rate(
74
+ rate=rate,
75
+ legacy_value=swap_rate,
76
+ default=0.5,
77
+ legacy_name="swap_rate",
78
+ )
79
+ clamped_rate = max(0.0, min(effective_rate, 1.0))
80
+
81
+ if rng is None:
82
+ rng = random.Random(seed)
83
+
84
+ if _swap_adjacent_words_rust is not None:
85
+ return cast(str, _swap_adjacent_words_rust(text, clamped_rate, rng))
86
+
87
+ return _python_swap_adjacent_words(text, rate=clamped_rate, rng=rng)
88
+
89
+
90
+ class Adjax(Glitchling):
91
+ """Glitchling that swaps adjacent words to scramble local semantics."""
92
+
93
+ def __init__(
94
+ self,
95
+ *,
96
+ rate: float | None = None,
97
+ swap_rate: float | None = None,
98
+ seed: int | None = None,
99
+ ) -> None:
100
+ self._param_aliases = {"swap_rate": "rate"}
101
+ effective_rate = resolve_rate(
102
+ rate=rate,
103
+ legacy_value=swap_rate,
104
+ default=0.5,
105
+ legacy_name="swap_rate",
106
+ )
107
+ super().__init__(
108
+ name="Adjax",
109
+ corruption_function=swap_adjacent_words,
110
+ scope=AttackWave.WORD,
111
+ seed=seed,
112
+ rate=effective_rate,
113
+ )
114
+
115
+ def pipeline_operation(self) -> dict[str, Any] | None:
116
+ rate = self.kwargs.get("rate")
117
+ if rate is None:
118
+ return None
119
+ return {
120
+ "type": "swap_adjacent",
121
+ "swap_rate": float(rate),
122
+ }
123
+
124
+
125
+ adjax = Adjax()
126
+
127
+
128
+ __all__ = ["Adjax", "adjax", "swap_adjacent_words"]
@@ -0,0 +1,127 @@
1
+ """Smart-quote glitchling that swaps straight quotes for fancy counterparts."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import random
7
+ from functools import cache
8
+ from importlib import resources
9
+ from typing import Any, Sequence, cast
10
+
11
+ from ._rust_extensions import get_rust_operation
12
+ from .core import AttackOrder, AttackWave, Gaggle, Glitchling
13
+
14
+ # Load Rust-accelerated operation if available
15
+ _apostrofae_rust = get_rust_operation("apostrofae")
16
+
17
+
18
+ @cache
19
+ def _load_replacement_pairs() -> dict[str, list[tuple[str, str]]]:
20
+ """Load the curated mapping of straight quotes to fancy pairs."""
21
+
22
+ resource = resources.files(f"{__package__}.assets").joinpath("apostrofae_pairs.json")
23
+ with resource.open("r", encoding="utf-8") as handle:
24
+ data: dict[str, list[Sequence[str]]] = json.load(handle)
25
+
26
+ parsed: dict[str, list[tuple[str, str]]] = {}
27
+ for straight, replacements in data.items():
28
+ parsed[straight] = [(pair[0], pair[1]) for pair in replacements if len(pair) == 2]
29
+ return parsed
30
+
31
+
32
+ def _find_quote_pairs(text: str) -> list[tuple[int, int, str]]:
33
+ """Return all balanced pairs of straight quotes in ``text``.
34
+
35
+ The search walks the string once, pairing sequential occurrences of each quote
36
+ glyph. Unmatched openers remain untouched so contractions (e.g. ``it's``)
37
+ survive unmodified.
38
+ """
39
+
40
+ stacks: dict[str, int | None] = {'"': None, "'": None, "`": None}
41
+ pairs: list[tuple[int, int, str]] = []
42
+
43
+ for index, ch in enumerate(text):
44
+ if ch not in stacks:
45
+ continue
46
+ start = stacks[ch]
47
+ if start is None:
48
+ stacks[ch] = index
49
+ else:
50
+ pairs.append((start, index, ch))
51
+ stacks[ch] = None
52
+
53
+ return pairs
54
+
55
+
56
+ def _apostrofae_python(text: str, *, rng: random.Random) -> str:
57
+ """Python fallback that replaces paired straight quotes with fancy glyphs."""
58
+
59
+ pairs = _load_replacement_pairs()
60
+ candidates = _find_quote_pairs(text)
61
+ if not candidates:
62
+ return text
63
+
64
+ chars = list(text)
65
+ for start, end, glyph in candidates:
66
+ options = pairs.get(glyph)
67
+ if not options:
68
+ continue
69
+ left, right = rng.choice(options)
70
+ chars[start] = left
71
+ chars[end] = right
72
+ return "".join(chars)
73
+
74
+
75
+ def smart_quotes(
76
+ text: str,
77
+ seed: int | None = None,
78
+ rng: random.Random | None = None,
79
+ ) -> str:
80
+ """Replace straight quotes, apostrophes, and backticks with fancy pairs."""
81
+
82
+ if not text:
83
+ return text
84
+
85
+ if rng is None:
86
+ rng = random.Random(seed)
87
+
88
+ if _apostrofae_rust is not None:
89
+ return cast(str, _apostrofae_rust(text, rng))
90
+
91
+ return _apostrofae_python(text, rng=rng)
92
+
93
+
94
+ class Apostrofae(Glitchling):
95
+ """Glitchling that swaps straight quotes for decorative Unicode pairs."""
96
+
97
+ def __init__(self, *, seed: int | None = None) -> None:
98
+ self._master_seed: int | None = seed
99
+ super().__init__(
100
+ name="Apostrofae",
101
+ corruption_function=smart_quotes,
102
+ scope=AttackWave.CHARACTER,
103
+ order=AttackOrder.NORMAL,
104
+ seed=seed,
105
+ )
106
+
107
+ def pipeline_operation(self) -> dict[str, Any] | None:
108
+ return {"type": "apostrofae"}
109
+
110
+ def reset_rng(self, seed: int | None = None) -> None: # pragma: no cover - exercised indirectly
111
+ if seed is not None:
112
+ self._master_seed = seed
113
+ super().reset_rng(seed)
114
+ if self.seed is None:
115
+ return
116
+ derived = Gaggle.derive_seed(int(seed), self.name, 0)
117
+ self.seed = int(derived)
118
+ self.rng = random.Random(self.seed)
119
+ self.kwargs["seed"] = self.seed
120
+ else:
121
+ super().reset_rng(None)
122
+
123
+
124
+ apostrofae = Apostrofae()
125
+
126
+
127
+ __all__ = ["Apostrofae", "apostrofae", "smart_quotes"]
File without changes
@@ -0,0 +1,32 @@
1
+ {
2
+ "\"": [
3
+ ["“", "”"],
4
+ ["„", "“"],
5
+ ["«", "»"],
6
+ ["‹", "›"],
7
+ ["『", "』"],
8
+ ["「", "」"],
9
+ ["﹁", "﹂"],
10
+ ["﹃", "﹄"],
11
+ ["〝", "〞"],
12
+ ["❝", "❞"]
13
+ ],
14
+ "'": [
15
+ ["‘", "’"],
16
+ ["‚", "‘"],
17
+ ["‹", "›"],
18
+ ["❮", "❯"],
19
+ ["❛", "❜"],
20
+ ["﹇", "﹈"]
21
+ ],
22
+ "`": [
23
+ ["‵", "′"],
24
+ ["﹁", "﹂"],
25
+ ["﹃", "﹄"],
26
+ ["⌈", "⌉"],
27
+ ["⌊", "⌋"],
28
+ ["⎡", "⎤"],
29
+ ["⎣", "⎦"],
30
+ ["〝", "〞"]
31
+ ]
32
+ }