glitchlings 0.4.4__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

Files changed (47) hide show
  1. glitchlings/__init__.py +67 -0
  2. glitchlings/__main__.py +8 -0
  3. glitchlings/_zoo_rust.cp310-win_amd64.pyd +0 -0
  4. glitchlings/compat.py +284 -0
  5. glitchlings/config.py +388 -0
  6. glitchlings/config.toml +3 -0
  7. glitchlings/dlc/__init__.py +7 -0
  8. glitchlings/dlc/_shared.py +153 -0
  9. glitchlings/dlc/huggingface.py +81 -0
  10. glitchlings/dlc/prime.py +254 -0
  11. glitchlings/dlc/pytorch.py +166 -0
  12. glitchlings/dlc/pytorch_lightning.py +215 -0
  13. glitchlings/lexicon/__init__.py +192 -0
  14. glitchlings/lexicon/_cache.py +110 -0
  15. glitchlings/lexicon/data/default_vector_cache.json +82 -0
  16. glitchlings/lexicon/metrics.py +162 -0
  17. glitchlings/lexicon/vector.py +651 -0
  18. glitchlings/lexicon/wordnet.py +232 -0
  19. glitchlings/main.py +364 -0
  20. glitchlings/util/__init__.py +195 -0
  21. glitchlings/util/adapters.py +27 -0
  22. glitchlings/zoo/__init__.py +168 -0
  23. glitchlings/zoo/_ocr_confusions.py +32 -0
  24. glitchlings/zoo/_rate.py +131 -0
  25. glitchlings/zoo/_rust_extensions.py +143 -0
  26. glitchlings/zoo/_sampling.py +54 -0
  27. glitchlings/zoo/_text_utils.py +100 -0
  28. glitchlings/zoo/adjax.py +128 -0
  29. glitchlings/zoo/apostrofae.py +127 -0
  30. glitchlings/zoo/assets/__init__.py +0 -0
  31. glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
  32. glitchlings/zoo/core.py +582 -0
  33. glitchlings/zoo/jargoyle.py +335 -0
  34. glitchlings/zoo/mim1c.py +109 -0
  35. glitchlings/zoo/ocr_confusions.tsv +30 -0
  36. glitchlings/zoo/redactyl.py +193 -0
  37. glitchlings/zoo/reduple.py +148 -0
  38. glitchlings/zoo/rushmore.py +153 -0
  39. glitchlings/zoo/scannequin.py +171 -0
  40. glitchlings/zoo/typogre.py +231 -0
  41. glitchlings/zoo/zeedub.py +185 -0
  42. glitchlings-0.4.4.dist-info/METADATA +627 -0
  43. glitchlings-0.4.4.dist-info/RECORD +47 -0
  44. glitchlings-0.4.4.dist-info/WHEEL +5 -0
  45. glitchlings-0.4.4.dist-info/entry_points.txt +2 -0
  46. glitchlings-0.4.4.dist-info/licenses/LICENSE +201 -0
  47. glitchlings-0.4.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,54 @@
1
+ from __future__ import annotations
2
+
3
+ import random
4
+ from typing import Sequence
5
+
6
+
7
+ def weighted_sample_without_replacement(
8
+ population: Sequence[int],
9
+ weights: Sequence[float],
10
+ *,
11
+ k: int,
12
+ rng: random.Random,
13
+ ) -> list[int]:
14
+ """Sample ``k`` unique indices from ``population`` using ``weights``.
15
+
16
+ Mirrors the behaviour used by several glitchlings while centralising error
17
+ handling and RNG interactions so the Python and Rust implementations remain
18
+ aligned.
19
+ """
20
+ if k < 0:
21
+ raise ValueError("Sample size cannot be negative")
22
+
23
+ if len(population) != len(weights):
24
+ raise ValueError("Population and weight sequences must be the same length")
25
+
26
+ items = list(zip(population, weights))
27
+ count = len(items)
28
+ if k == 0 or count == 0:
29
+ return []
30
+
31
+ if k > count:
32
+ raise ValueError("Sample larger than population or is negative")
33
+
34
+ selections: list[int] = []
35
+ for _ in range(k):
36
+ total_weight = sum(weight for _, weight in items)
37
+ if total_weight <= 0.0:
38
+ chosen_index = rng.randrange(len(items))
39
+ else:
40
+ threshold = rng.random() * total_weight
41
+ cumulative = 0.0
42
+ chosen_index = len(items) - 1
43
+ for idx, (_, weight) in enumerate(items):
44
+ cumulative += weight
45
+ if cumulative >= threshold:
46
+ chosen_index = idx
47
+ break
48
+ value, _ = items.pop(chosen_index)
49
+ selections.append(value)
50
+
51
+ return selections
52
+
53
+
54
+ __all__ = ["weighted_sample_without_replacement"]
@@ -0,0 +1,100 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+ from typing import Sequence
6
+
7
+ _WORD_SPLIT_PATTERN = re.compile(r"(\s+)")
8
+ _TOKEN_EDGES_PATTERN = re.compile(r"^(\W*)(.*?)(\W*)$")
9
+
10
+
11
+ def split_preserving_whitespace(text: str) -> list[str]:
12
+ """Split text while keeping whitespace tokens for stable reconstruction."""
13
+ return _WORD_SPLIT_PATTERN.split(text)
14
+
15
+
16
+ def split_token_edges(token: str) -> tuple[str, str, str]:
17
+ """Return leading, core, and trailing segments for a token."""
18
+ match = _TOKEN_EDGES_PATTERN.match(token)
19
+ if match is None:
20
+ return "", token, ""
21
+ return match.group(1), match.group(2), match.group(3)
22
+
23
+
24
+ def token_core_length(token: str) -> int:
25
+ """Return the length of the main word characters for weighting heuristics."""
26
+ _, core, _ = split_token_edges(token)
27
+ candidate = core if core else token
28
+ length = len(candidate)
29
+ if length <= 0:
30
+ stripped = token.strip()
31
+ length = len(stripped) if stripped else len(token)
32
+ if length <= 0:
33
+ length = 1
34
+ return length
35
+
36
+
37
+ @dataclass(frozen=True)
38
+ class WordToken:
39
+ """Metadata describing a non-whitespace token yielded by word splitters."""
40
+
41
+ index: int
42
+ prefix: str
43
+ core: str
44
+ suffix: str
45
+ core_length: int
46
+
47
+ @property
48
+ def has_core(self) -> bool:
49
+ """Return ``True`` when the token contains at least one core character."""
50
+ return bool(self.core)
51
+
52
+
53
+ def collect_word_tokens(
54
+ tokens: Sequence[str],
55
+ *,
56
+ skip_first_word: bool = False,
57
+ ) -> list[WordToken]:
58
+ """Return structured metadata for non-whitespace tokens within ``tokens``.
59
+
60
+ Args:
61
+ tokens: Token sequence produced by :func:`split_preserving_whitespace`.
62
+ skip_first_word: Exclude the first candidate token (used by Rushmore to
63
+ preserve leading words).
64
+
65
+ """
66
+ start = 2 if skip_first_word else 0
67
+ collected: list[WordToken] = []
68
+ for index in range(start, len(tokens), 2):
69
+ token = tokens[index]
70
+ if not token or token.isspace():
71
+ continue
72
+
73
+ prefix, core, suffix = split_token_edges(token)
74
+ core_length = len(core)
75
+ if core_length <= 0:
76
+ stripped = token.strip()
77
+ core_length = len(stripped) if stripped else len(token)
78
+ if core_length <= 0:
79
+ core_length = 1
80
+
81
+ collected.append(
82
+ WordToken(
83
+ index=index,
84
+ prefix=prefix,
85
+ core=core,
86
+ suffix=suffix,
87
+ core_length=core_length,
88
+ )
89
+ )
90
+
91
+ return collected
92
+
93
+
94
+ __all__ = [
95
+ "split_preserving_whitespace",
96
+ "split_token_edges",
97
+ "token_core_length",
98
+ "WordToken",
99
+ "collect_word_tokens",
100
+ ]
@@ -0,0 +1,128 @@
1
+ from __future__ import annotations
2
+
3
+ import random
4
+ from typing import Any, cast
5
+
6
+ from ._rate import resolve_rate
7
+ from ._rust_extensions import get_rust_operation
8
+ from ._text_utils import split_preserving_whitespace, split_token_edges
9
+ from .core import AttackWave, Glitchling
10
+
11
+ # Load Rust-accelerated operation if available
12
+ _swap_adjacent_words_rust = get_rust_operation("swap_adjacent_words")
13
+
14
+
15
+ def _python_swap_adjacent_words(
16
+ text: str,
17
+ *,
18
+ rate: float,
19
+ rng: random.Random,
20
+ ) -> str:
21
+ """Swap the cores of adjacent words while keeping affixes and spacing intact."""
22
+ tokens = split_preserving_whitespace(text)
23
+ if len(tokens) < 2:
24
+ return text
25
+
26
+ word_indices: list[int] = []
27
+ for index in range(len(tokens)):
28
+ token = tokens[index]
29
+ if not token or token.isspace():
30
+ continue
31
+ if index % 2 == 0:
32
+ word_indices.append(index)
33
+
34
+ if len(word_indices) < 2:
35
+ return text
36
+
37
+ clamped = max(0.0, min(rate, 1.0))
38
+ if clamped <= 0.0:
39
+ return text
40
+
41
+ for cursor in range(0, len(word_indices) - 1, 2):
42
+ left_index = word_indices[cursor]
43
+ right_index = word_indices[cursor + 1]
44
+
45
+ left_token = tokens[left_index]
46
+ right_token = tokens[right_index]
47
+
48
+ left_prefix, left_core, left_suffix = split_token_edges(left_token)
49
+ right_prefix, right_core, right_suffix = split_token_edges(right_token)
50
+
51
+ if not left_core or not right_core:
52
+ continue
53
+
54
+ should_swap = clamped >= 1.0 or rng.random() < clamped
55
+ if not should_swap:
56
+ continue
57
+
58
+ tokens[left_index] = f"{left_prefix}{right_core}{left_suffix}"
59
+ tokens[right_index] = f"{right_prefix}{left_core}{right_suffix}"
60
+
61
+ return "".join(tokens)
62
+
63
+
64
+ def swap_adjacent_words(
65
+ text: str,
66
+ rate: float | None = None,
67
+ seed: int | None = None,
68
+ rng: random.Random | None = None,
69
+ *,
70
+ swap_rate: float | None = None,
71
+ ) -> str:
72
+ """Swap adjacent word cores while preserving spacing and punctuation."""
73
+ effective_rate = resolve_rate(
74
+ rate=rate,
75
+ legacy_value=swap_rate,
76
+ default=0.5,
77
+ legacy_name="swap_rate",
78
+ )
79
+ clamped_rate = max(0.0, min(effective_rate, 1.0))
80
+
81
+ if rng is None:
82
+ rng = random.Random(seed)
83
+
84
+ if _swap_adjacent_words_rust is not None:
85
+ return cast(str, _swap_adjacent_words_rust(text, clamped_rate, rng))
86
+
87
+ return _python_swap_adjacent_words(text, rate=clamped_rate, rng=rng)
88
+
89
+
90
+ class Adjax(Glitchling):
91
+ """Glitchling that swaps adjacent words to scramble local semantics."""
92
+
93
+ def __init__(
94
+ self,
95
+ *,
96
+ rate: float | None = None,
97
+ swap_rate: float | None = None,
98
+ seed: int | None = None,
99
+ ) -> None:
100
+ self._param_aliases = {"swap_rate": "rate"}
101
+ effective_rate = resolve_rate(
102
+ rate=rate,
103
+ legacy_value=swap_rate,
104
+ default=0.5,
105
+ legacy_name="swap_rate",
106
+ )
107
+ super().__init__(
108
+ name="Adjax",
109
+ corruption_function=swap_adjacent_words,
110
+ scope=AttackWave.WORD,
111
+ seed=seed,
112
+ rate=effective_rate,
113
+ )
114
+
115
+ def pipeline_operation(self) -> dict[str, Any] | None:
116
+ rate = self.kwargs.get("rate")
117
+ if rate is None:
118
+ return None
119
+ return {
120
+ "type": "swap_adjacent",
121
+ "swap_rate": float(rate),
122
+ }
123
+
124
+
125
+ adjax = Adjax()
126
+
127
+
128
+ __all__ = ["Adjax", "adjax", "swap_adjacent_words"]
@@ -0,0 +1,127 @@
1
+ """Smart-quote glitchling that swaps straight quotes for fancy counterparts."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import random
7
+ from functools import cache
8
+ from importlib import resources
9
+ from typing import Any, Sequence, cast
10
+
11
+ from ._rust_extensions import get_rust_operation
12
+ from .core import AttackOrder, AttackWave, Gaggle, Glitchling
13
+
14
+ # Load Rust-accelerated operation if available
15
+ _apostrofae_rust = get_rust_operation("apostrofae")
16
+
17
+
18
+ @cache
19
+ def _load_replacement_pairs() -> dict[str, list[tuple[str, str]]]:
20
+ """Load the curated mapping of straight quotes to fancy pairs."""
21
+
22
+ resource = resources.files(f"{__package__}.assets").joinpath("apostrofae_pairs.json")
23
+ with resource.open("r", encoding="utf-8") as handle:
24
+ data: dict[str, list[Sequence[str]]] = json.load(handle)
25
+
26
+ parsed: dict[str, list[tuple[str, str]]] = {}
27
+ for straight, replacements in data.items():
28
+ parsed[straight] = [(pair[0], pair[1]) for pair in replacements if len(pair) == 2]
29
+ return parsed
30
+
31
+
32
+ def _find_quote_pairs(text: str) -> list[tuple[int, int, str]]:
33
+ """Return all balanced pairs of straight quotes in ``text``.
34
+
35
+ The search walks the string once, pairing sequential occurrences of each quote
36
+ glyph. Unmatched openers remain untouched so contractions (e.g. ``it's``)
37
+ survive unmodified.
38
+ """
39
+
40
+ stacks: dict[str, int | None] = {'"': None, "'": None, "`": None}
41
+ pairs: list[tuple[int, int, str]] = []
42
+
43
+ for index, ch in enumerate(text):
44
+ if ch not in stacks:
45
+ continue
46
+ start = stacks[ch]
47
+ if start is None:
48
+ stacks[ch] = index
49
+ else:
50
+ pairs.append((start, index, ch))
51
+ stacks[ch] = None
52
+
53
+ return pairs
54
+
55
+
56
+ def _apostrofae_python(text: str, *, rng: random.Random) -> str:
57
+ """Python fallback that replaces paired straight quotes with fancy glyphs."""
58
+
59
+ pairs = _load_replacement_pairs()
60
+ candidates = _find_quote_pairs(text)
61
+ if not candidates:
62
+ return text
63
+
64
+ chars = list(text)
65
+ for start, end, glyph in candidates:
66
+ options = pairs.get(glyph)
67
+ if not options:
68
+ continue
69
+ left, right = rng.choice(options)
70
+ chars[start] = left
71
+ chars[end] = right
72
+ return "".join(chars)
73
+
74
+
75
+ def smart_quotes(
76
+ text: str,
77
+ seed: int | None = None,
78
+ rng: random.Random | None = None,
79
+ ) -> str:
80
+ """Replace straight quotes, apostrophes, and backticks with fancy pairs."""
81
+
82
+ if not text:
83
+ return text
84
+
85
+ if rng is None:
86
+ rng = random.Random(seed)
87
+
88
+ if _apostrofae_rust is not None:
89
+ return cast(str, _apostrofae_rust(text, rng))
90
+
91
+ return _apostrofae_python(text, rng=rng)
92
+
93
+
94
+ class Apostrofae(Glitchling):
95
+ """Glitchling that swaps straight quotes for decorative Unicode pairs."""
96
+
97
+ def __init__(self, *, seed: int | None = None) -> None:
98
+ self._master_seed: int | None = seed
99
+ super().__init__(
100
+ name="Apostrofae",
101
+ corruption_function=smart_quotes,
102
+ scope=AttackWave.CHARACTER,
103
+ order=AttackOrder.NORMAL,
104
+ seed=seed,
105
+ )
106
+
107
+ def pipeline_operation(self) -> dict[str, Any] | None:
108
+ return {"type": "apostrofae"}
109
+
110
+ def reset_rng(self, seed: int | None = None) -> None: # pragma: no cover - exercised indirectly
111
+ if seed is not None:
112
+ self._master_seed = seed
113
+ super().reset_rng(seed)
114
+ if self.seed is None:
115
+ return
116
+ derived = Gaggle.derive_seed(int(seed), self.name, 0)
117
+ self.seed = int(derived)
118
+ self.rng = random.Random(self.seed)
119
+ self.kwargs["seed"] = self.seed
120
+ else:
121
+ super().reset_rng(None)
122
+
123
+
124
+ apostrofae = Apostrofae()
125
+
126
+
127
+ __all__ = ["Apostrofae", "apostrofae", "smart_quotes"]
File without changes
@@ -0,0 +1,32 @@
1
+ {
2
+ "\"": [
3
+ ["“", "”"],
4
+ ["„", "“"],
5
+ ["«", "»"],
6
+ ["‹", "›"],
7
+ ["『", "』"],
8
+ ["「", "」"],
9
+ ["﹁", "﹂"],
10
+ ["﹃", "﹄"],
11
+ ["〝", "〞"],
12
+ ["❝", "❞"]
13
+ ],
14
+ "'": [
15
+ ["‘", "’"],
16
+ ["‚", "‘"],
17
+ ["‹", "›"],
18
+ ["❮", "❯"],
19
+ ["❛", "❜"],
20
+ ["﹇", "﹈"]
21
+ ],
22
+ "`": [
23
+ ["‵", "′"],
24
+ ["﹁", "﹂"],
25
+ ["﹃", "﹄"],
26
+ ["⌈", "⌉"],
27
+ ["⌊", "⌋"],
28
+ ["⎡", "⎤"],
29
+ ["⎣", "⎦"],
30
+ ["〝", "〞"]
31
+ ]
32
+ }