glitchlings 0.4.5__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +71 -0
- glitchlings/__main__.py +8 -0
- glitchlings/_zoo_rust.cp311-win_amd64.pyd +0 -0
- glitchlings/compat.py +282 -0
- glitchlings/config.py +386 -0
- glitchlings/config.toml +3 -0
- glitchlings/data/__init__.py +1 -0
- glitchlings/data/hokey_assets.json +193 -0
- glitchlings/dlc/__init__.py +7 -0
- glitchlings/dlc/_shared.py +153 -0
- glitchlings/dlc/huggingface.py +81 -0
- glitchlings/dlc/prime.py +254 -0
- glitchlings/dlc/pytorch.py +166 -0
- glitchlings/dlc/pytorch_lightning.py +209 -0
- glitchlings/lexicon/__init__.py +192 -0
- glitchlings/lexicon/_cache.py +108 -0
- glitchlings/lexicon/data/default_vector_cache.json +82 -0
- glitchlings/lexicon/metrics.py +162 -0
- glitchlings/lexicon/vector.py +652 -0
- glitchlings/lexicon/wordnet.py +228 -0
- glitchlings/main.py +364 -0
- glitchlings/util/__init__.py +195 -0
- glitchlings/util/adapters.py +27 -0
- glitchlings/util/hokey_generator.py +144 -0
- glitchlings/util/stretch_locator.py +140 -0
- glitchlings/util/stretchability.py +375 -0
- glitchlings/zoo/__init__.py +172 -0
- glitchlings/zoo/_ocr_confusions.py +32 -0
- glitchlings/zoo/_rate.py +131 -0
- glitchlings/zoo/_rust_extensions.py +143 -0
- glitchlings/zoo/_sampling.py +54 -0
- glitchlings/zoo/_text_utils.py +100 -0
- glitchlings/zoo/adjax.py +128 -0
- glitchlings/zoo/apostrofae.py +127 -0
- glitchlings/zoo/assets/__init__.py +0 -0
- glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
- glitchlings/zoo/core.py +582 -0
- glitchlings/zoo/hokey.py +173 -0
- glitchlings/zoo/jargoyle.py +335 -0
- glitchlings/zoo/mim1c.py +109 -0
- glitchlings/zoo/ocr_confusions.tsv +30 -0
- glitchlings/zoo/redactyl.py +193 -0
- glitchlings/zoo/reduple.py +148 -0
- glitchlings/zoo/rushmore.py +153 -0
- glitchlings/zoo/scannequin.py +171 -0
- glitchlings/zoo/typogre.py +231 -0
- glitchlings/zoo/zeedub.py +185 -0
- glitchlings-0.4.5.dist-info/METADATA +648 -0
- glitchlings-0.4.5.dist-info/RECORD +53 -0
- glitchlings-0.4.5.dist-info/WHEEL +5 -0
- glitchlings-0.4.5.dist-info/entry_points.txt +2 -0
- glitchlings-0.4.5.dist-info/licenses/LICENSE +201 -0
- glitchlings-0.4.5.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""Centralized loading and fallback management for optional Rust extensions.
|
|
2
|
+
|
|
3
|
+
This module provides a single source of truth for importing Rust-accelerated
|
|
4
|
+
operations, eliminating duplicated try/except blocks across the codebase.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Any, Callable
|
|
11
|
+
|
|
12
|
+
log = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# Cache of loaded Rust operations to avoid repeated import attempts
|
|
16
|
+
_rust_operation_cache: dict[str, Callable[..., Any] | None] = {}
|
|
17
|
+
_rust_module_available: bool | None = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def is_rust_module_available() -> bool:
|
|
21
|
+
"""Check if the Rust extension module can be imported.
|
|
22
|
+
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
bool
|
|
26
|
+
True if glitchlings._zoo_rust can be imported successfully.
|
|
27
|
+
|
|
28
|
+
Notes
|
|
29
|
+
-----
|
|
30
|
+
The result is cached after the first check to avoid repeated import attempts.
|
|
31
|
+
"""
|
|
32
|
+
global _rust_module_available
|
|
33
|
+
|
|
34
|
+
if _rust_module_available is not None:
|
|
35
|
+
return _rust_module_available
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
import glitchlings._zoo_rust # noqa: F401
|
|
39
|
+
|
|
40
|
+
_rust_module_available = True
|
|
41
|
+
log.debug("Rust extension module successfully loaded")
|
|
42
|
+
except (ImportError, ModuleNotFoundError):
|
|
43
|
+
_rust_module_available = False
|
|
44
|
+
log.debug("Rust extension module not available; using Python fallbacks")
|
|
45
|
+
|
|
46
|
+
return _rust_module_available
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_rust_operation(operation_name: str) -> Callable[..., Any] | None:
|
|
50
|
+
"""Load a specific Rust operation by name with caching.
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
operation_name : str
|
|
55
|
+
The name of the operation to import from glitchlings._zoo_rust.
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
Callable | None
|
|
60
|
+
The Rust operation callable if available, None otherwise.
|
|
61
|
+
|
|
62
|
+
Examples
|
|
63
|
+
--------
|
|
64
|
+
>>> fatfinger = get_rust_operation("fatfinger")
|
|
65
|
+
>>> if fatfinger is not None:
|
|
66
|
+
... result = fatfinger(text, ...)
|
|
67
|
+
... else:
|
|
68
|
+
... result = python_fallback(text, ...)
|
|
69
|
+
|
|
70
|
+
Notes
|
|
71
|
+
-----
|
|
72
|
+
- Results are cached to avoid repeated imports
|
|
73
|
+
- Returns None if the Rust module is unavailable or the operation doesn't exist
|
|
74
|
+
- All import errors are logged at debug level
|
|
75
|
+
"""
|
|
76
|
+
# Check cache first
|
|
77
|
+
if operation_name in _rust_operation_cache:
|
|
78
|
+
return _rust_operation_cache[operation_name]
|
|
79
|
+
|
|
80
|
+
# If the module isn't available, don't try to import individual operations
|
|
81
|
+
if not is_rust_module_available():
|
|
82
|
+
_rust_operation_cache[operation_name] = None
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
try:
|
|
86
|
+
from glitchlings import _zoo_rust
|
|
87
|
+
|
|
88
|
+
operation = getattr(_zoo_rust, operation_name, None)
|
|
89
|
+
_rust_operation_cache[operation_name] = operation
|
|
90
|
+
|
|
91
|
+
if operation is None:
|
|
92
|
+
log.debug(f"Rust operation '{operation_name}' not found in extension module")
|
|
93
|
+
else:
|
|
94
|
+
log.debug(f"Rust operation '{operation_name}' loaded successfully")
|
|
95
|
+
|
|
96
|
+
return operation
|
|
97
|
+
|
|
98
|
+
except (ImportError, ModuleNotFoundError, AttributeError) as exc:
|
|
99
|
+
log.debug(f"Failed to load Rust operation '{operation_name}': {exc}")
|
|
100
|
+
_rust_operation_cache[operation_name] = None
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def clear_cache() -> None:
|
|
105
|
+
"""Clear the operation cache, forcing re-import on next access.
|
|
106
|
+
|
|
107
|
+
This is primarily useful for testing scenarios where the Rust module
|
|
108
|
+
availability might change during runtime.
|
|
109
|
+
"""
|
|
110
|
+
global _rust_module_available, _rust_operation_cache
|
|
111
|
+
|
|
112
|
+
_rust_module_available = None
|
|
113
|
+
_rust_operation_cache.clear()
|
|
114
|
+
log.debug("Rust extension cache cleared")
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def preload_operations(*operation_names: str) -> dict[str, Callable[..., Any] | None]:
|
|
118
|
+
"""Eagerly load multiple Rust operations at once.
|
|
119
|
+
|
|
120
|
+
Parameters
|
|
121
|
+
----------
|
|
122
|
+
*operation_names : str
|
|
123
|
+
Names of operations to preload.
|
|
124
|
+
|
|
125
|
+
Returns
|
|
126
|
+
-------
|
|
127
|
+
dict[str, Callable | None]
|
|
128
|
+
Mapping of operation names to their callables (or None if unavailable).
|
|
129
|
+
|
|
130
|
+
Examples
|
|
131
|
+
--------
|
|
132
|
+
>>> ops = preload_operations("fatfinger", "reduplicate_words", "delete_random_words")
|
|
133
|
+
>>> fatfinger = ops["fatfinger"]
|
|
134
|
+
"""
|
|
135
|
+
return {name: get_rust_operation(name) for name in operation_names}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
__all__ = [
|
|
139
|
+
"is_rust_module_available",
|
|
140
|
+
"get_rust_operation",
|
|
141
|
+
"clear_cache",
|
|
142
|
+
"preload_operations",
|
|
143
|
+
]
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import random
|
|
4
|
+
from typing import Sequence
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def weighted_sample_without_replacement(
|
|
8
|
+
population: Sequence[int],
|
|
9
|
+
weights: Sequence[float],
|
|
10
|
+
*,
|
|
11
|
+
k: int,
|
|
12
|
+
rng: random.Random,
|
|
13
|
+
) -> list[int]:
|
|
14
|
+
"""Sample ``k`` unique indices from ``population`` using ``weights``.
|
|
15
|
+
|
|
16
|
+
Mirrors the behaviour used by several glitchlings while centralising error
|
|
17
|
+
handling and RNG interactions so the Python and Rust implementations remain
|
|
18
|
+
aligned.
|
|
19
|
+
"""
|
|
20
|
+
if k < 0:
|
|
21
|
+
raise ValueError("Sample size cannot be negative")
|
|
22
|
+
|
|
23
|
+
if len(population) != len(weights):
|
|
24
|
+
raise ValueError("Population and weight sequences must be the same length")
|
|
25
|
+
|
|
26
|
+
items = list(zip(population, weights))
|
|
27
|
+
count = len(items)
|
|
28
|
+
if k == 0 or count == 0:
|
|
29
|
+
return []
|
|
30
|
+
|
|
31
|
+
if k > count:
|
|
32
|
+
raise ValueError("Sample larger than population or is negative")
|
|
33
|
+
|
|
34
|
+
selections: list[int] = []
|
|
35
|
+
for _ in range(k):
|
|
36
|
+
total_weight = sum(weight for _, weight in items)
|
|
37
|
+
if total_weight <= 0.0:
|
|
38
|
+
chosen_index = rng.randrange(len(items))
|
|
39
|
+
else:
|
|
40
|
+
threshold = rng.random() * total_weight
|
|
41
|
+
cumulative = 0.0
|
|
42
|
+
chosen_index = len(items) - 1
|
|
43
|
+
for idx, (_, weight) in enumerate(items):
|
|
44
|
+
cumulative += weight
|
|
45
|
+
if cumulative >= threshold:
|
|
46
|
+
chosen_index = idx
|
|
47
|
+
break
|
|
48
|
+
value, _ = items.pop(chosen_index)
|
|
49
|
+
selections.append(value)
|
|
50
|
+
|
|
51
|
+
return selections
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
__all__ = ["weighted_sample_without_replacement"]
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Sequence
|
|
6
|
+
|
|
7
|
+
_WORD_SPLIT_PATTERN = re.compile(r"(\s+)")
|
|
8
|
+
_TOKEN_EDGES_PATTERN = re.compile(r"^(\W*)(.*?)(\W*)$")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def split_preserving_whitespace(text: str) -> list[str]:
|
|
12
|
+
"""Split text while keeping whitespace tokens for stable reconstruction."""
|
|
13
|
+
return _WORD_SPLIT_PATTERN.split(text)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def split_token_edges(token: str) -> tuple[str, str, str]:
|
|
17
|
+
"""Return leading, core, and trailing segments for a token."""
|
|
18
|
+
match = _TOKEN_EDGES_PATTERN.match(token)
|
|
19
|
+
if match is None:
|
|
20
|
+
return "", token, ""
|
|
21
|
+
return match.group(1), match.group(2), match.group(3)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def token_core_length(token: str) -> int:
|
|
25
|
+
"""Return the length of the main word characters for weighting heuristics."""
|
|
26
|
+
_, core, _ = split_token_edges(token)
|
|
27
|
+
candidate = core if core else token
|
|
28
|
+
length = len(candidate)
|
|
29
|
+
if length <= 0:
|
|
30
|
+
stripped = token.strip()
|
|
31
|
+
length = len(stripped) if stripped else len(token)
|
|
32
|
+
if length <= 0:
|
|
33
|
+
length = 1
|
|
34
|
+
return length
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass(frozen=True)
|
|
38
|
+
class WordToken:
|
|
39
|
+
"""Metadata describing a non-whitespace token yielded by word splitters."""
|
|
40
|
+
|
|
41
|
+
index: int
|
|
42
|
+
prefix: str
|
|
43
|
+
core: str
|
|
44
|
+
suffix: str
|
|
45
|
+
core_length: int
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def has_core(self) -> bool:
|
|
49
|
+
"""Return ``True`` when the token contains at least one core character."""
|
|
50
|
+
return bool(self.core)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def collect_word_tokens(
|
|
54
|
+
tokens: Sequence[str],
|
|
55
|
+
*,
|
|
56
|
+
skip_first_word: bool = False,
|
|
57
|
+
) -> list[WordToken]:
|
|
58
|
+
"""Return structured metadata for non-whitespace tokens within ``tokens``.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
tokens: Token sequence produced by :func:`split_preserving_whitespace`.
|
|
62
|
+
skip_first_word: Exclude the first candidate token (used by Rushmore to
|
|
63
|
+
preserve leading words).
|
|
64
|
+
|
|
65
|
+
"""
|
|
66
|
+
start = 2 if skip_first_word else 0
|
|
67
|
+
collected: list[WordToken] = []
|
|
68
|
+
for index in range(start, len(tokens), 2):
|
|
69
|
+
token = tokens[index]
|
|
70
|
+
if not token or token.isspace():
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
prefix, core, suffix = split_token_edges(token)
|
|
74
|
+
core_length = len(core)
|
|
75
|
+
if core_length <= 0:
|
|
76
|
+
stripped = token.strip()
|
|
77
|
+
core_length = len(stripped) if stripped else len(token)
|
|
78
|
+
if core_length <= 0:
|
|
79
|
+
core_length = 1
|
|
80
|
+
|
|
81
|
+
collected.append(
|
|
82
|
+
WordToken(
|
|
83
|
+
index=index,
|
|
84
|
+
prefix=prefix,
|
|
85
|
+
core=core,
|
|
86
|
+
suffix=suffix,
|
|
87
|
+
core_length=core_length,
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
return collected
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
__all__ = [
|
|
95
|
+
"split_preserving_whitespace",
|
|
96
|
+
"split_token_edges",
|
|
97
|
+
"token_core_length",
|
|
98
|
+
"WordToken",
|
|
99
|
+
"collect_word_tokens",
|
|
100
|
+
]
|
glitchlings/zoo/adjax.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import random
|
|
4
|
+
from typing import Any, cast
|
|
5
|
+
|
|
6
|
+
from ._rate import resolve_rate
|
|
7
|
+
from ._rust_extensions import get_rust_operation
|
|
8
|
+
from ._text_utils import split_preserving_whitespace, split_token_edges
|
|
9
|
+
from .core import AttackWave, Glitchling
|
|
10
|
+
|
|
11
|
+
# Load Rust-accelerated operation if available
|
|
12
|
+
_swap_adjacent_words_rust = get_rust_operation("swap_adjacent_words")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _python_swap_adjacent_words(
|
|
16
|
+
text: str,
|
|
17
|
+
*,
|
|
18
|
+
rate: float,
|
|
19
|
+
rng: random.Random,
|
|
20
|
+
) -> str:
|
|
21
|
+
"""Swap the cores of adjacent words while keeping affixes and spacing intact."""
|
|
22
|
+
tokens = split_preserving_whitespace(text)
|
|
23
|
+
if len(tokens) < 2:
|
|
24
|
+
return text
|
|
25
|
+
|
|
26
|
+
word_indices: list[int] = []
|
|
27
|
+
for index in range(len(tokens)):
|
|
28
|
+
token = tokens[index]
|
|
29
|
+
if not token or token.isspace():
|
|
30
|
+
continue
|
|
31
|
+
if index % 2 == 0:
|
|
32
|
+
word_indices.append(index)
|
|
33
|
+
|
|
34
|
+
if len(word_indices) < 2:
|
|
35
|
+
return text
|
|
36
|
+
|
|
37
|
+
clamped = max(0.0, min(rate, 1.0))
|
|
38
|
+
if clamped <= 0.0:
|
|
39
|
+
return text
|
|
40
|
+
|
|
41
|
+
for cursor in range(0, len(word_indices) - 1, 2):
|
|
42
|
+
left_index = word_indices[cursor]
|
|
43
|
+
right_index = word_indices[cursor + 1]
|
|
44
|
+
|
|
45
|
+
left_token = tokens[left_index]
|
|
46
|
+
right_token = tokens[right_index]
|
|
47
|
+
|
|
48
|
+
left_prefix, left_core, left_suffix = split_token_edges(left_token)
|
|
49
|
+
right_prefix, right_core, right_suffix = split_token_edges(right_token)
|
|
50
|
+
|
|
51
|
+
if not left_core or not right_core:
|
|
52
|
+
continue
|
|
53
|
+
|
|
54
|
+
should_swap = clamped >= 1.0 or rng.random() < clamped
|
|
55
|
+
if not should_swap:
|
|
56
|
+
continue
|
|
57
|
+
|
|
58
|
+
tokens[left_index] = f"{left_prefix}{right_core}{left_suffix}"
|
|
59
|
+
tokens[right_index] = f"{right_prefix}{left_core}{right_suffix}"
|
|
60
|
+
|
|
61
|
+
return "".join(tokens)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def swap_adjacent_words(
|
|
65
|
+
text: str,
|
|
66
|
+
rate: float | None = None,
|
|
67
|
+
seed: int | None = None,
|
|
68
|
+
rng: random.Random | None = None,
|
|
69
|
+
*,
|
|
70
|
+
swap_rate: float | None = None,
|
|
71
|
+
) -> str:
|
|
72
|
+
"""Swap adjacent word cores while preserving spacing and punctuation."""
|
|
73
|
+
effective_rate = resolve_rate(
|
|
74
|
+
rate=rate,
|
|
75
|
+
legacy_value=swap_rate,
|
|
76
|
+
default=0.5,
|
|
77
|
+
legacy_name="swap_rate",
|
|
78
|
+
)
|
|
79
|
+
clamped_rate = max(0.0, min(effective_rate, 1.0))
|
|
80
|
+
|
|
81
|
+
if rng is None:
|
|
82
|
+
rng = random.Random(seed)
|
|
83
|
+
|
|
84
|
+
if _swap_adjacent_words_rust is not None:
|
|
85
|
+
return cast(str, _swap_adjacent_words_rust(text, clamped_rate, rng))
|
|
86
|
+
|
|
87
|
+
return _python_swap_adjacent_words(text, rate=clamped_rate, rng=rng)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class Adjax(Glitchling):
|
|
91
|
+
"""Glitchling that swaps adjacent words to scramble local semantics."""
|
|
92
|
+
|
|
93
|
+
def __init__(
|
|
94
|
+
self,
|
|
95
|
+
*,
|
|
96
|
+
rate: float | None = None,
|
|
97
|
+
swap_rate: float | None = None,
|
|
98
|
+
seed: int | None = None,
|
|
99
|
+
) -> None:
|
|
100
|
+
self._param_aliases = {"swap_rate": "rate"}
|
|
101
|
+
effective_rate = resolve_rate(
|
|
102
|
+
rate=rate,
|
|
103
|
+
legacy_value=swap_rate,
|
|
104
|
+
default=0.5,
|
|
105
|
+
legacy_name="swap_rate",
|
|
106
|
+
)
|
|
107
|
+
super().__init__(
|
|
108
|
+
name="Adjax",
|
|
109
|
+
corruption_function=swap_adjacent_words,
|
|
110
|
+
scope=AttackWave.WORD,
|
|
111
|
+
seed=seed,
|
|
112
|
+
rate=effective_rate,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
def pipeline_operation(self) -> dict[str, Any] | None:
|
|
116
|
+
rate = self.kwargs.get("rate")
|
|
117
|
+
if rate is None:
|
|
118
|
+
return None
|
|
119
|
+
return {
|
|
120
|
+
"type": "swap_adjacent",
|
|
121
|
+
"swap_rate": float(rate),
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
adjax = Adjax()
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
__all__ = ["Adjax", "adjax", "swap_adjacent_words"]
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Smart-quote glitchling that swaps straight quotes for fancy counterparts."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import random
|
|
7
|
+
from functools import cache
|
|
8
|
+
from importlib import resources
|
|
9
|
+
from typing import Any, Sequence, cast
|
|
10
|
+
|
|
11
|
+
from ._rust_extensions import get_rust_operation
|
|
12
|
+
from .core import AttackOrder, AttackWave, Gaggle, Glitchling
|
|
13
|
+
|
|
14
|
+
# Load Rust-accelerated operation if available
|
|
15
|
+
_apostrofae_rust = get_rust_operation("apostrofae")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@cache
|
|
19
|
+
def _load_replacement_pairs() -> dict[str, list[tuple[str, str]]]:
|
|
20
|
+
"""Load the curated mapping of straight quotes to fancy pairs."""
|
|
21
|
+
|
|
22
|
+
resource = resources.files(f"{__package__}.assets").joinpath("apostrofae_pairs.json")
|
|
23
|
+
with resource.open("r", encoding="utf-8") as handle:
|
|
24
|
+
data: dict[str, list[Sequence[str]]] = json.load(handle)
|
|
25
|
+
|
|
26
|
+
parsed: dict[str, list[tuple[str, str]]] = {}
|
|
27
|
+
for straight, replacements in data.items():
|
|
28
|
+
parsed[straight] = [(pair[0], pair[1]) for pair in replacements if len(pair) == 2]
|
|
29
|
+
return parsed
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _find_quote_pairs(text: str) -> list[tuple[int, int, str]]:
|
|
33
|
+
"""Return all balanced pairs of straight quotes in ``text``.
|
|
34
|
+
|
|
35
|
+
The search walks the string once, pairing sequential occurrences of each quote
|
|
36
|
+
glyph. Unmatched openers remain untouched so contractions (e.g. ``it's``)
|
|
37
|
+
survive unmodified.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
stacks: dict[str, int | None] = {'"': None, "'": None, "`": None}
|
|
41
|
+
pairs: list[tuple[int, int, str]] = []
|
|
42
|
+
|
|
43
|
+
for index, ch in enumerate(text):
|
|
44
|
+
if ch not in stacks:
|
|
45
|
+
continue
|
|
46
|
+
start = stacks[ch]
|
|
47
|
+
if start is None:
|
|
48
|
+
stacks[ch] = index
|
|
49
|
+
else:
|
|
50
|
+
pairs.append((start, index, ch))
|
|
51
|
+
stacks[ch] = None
|
|
52
|
+
|
|
53
|
+
return pairs
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _apostrofae_python(text: str, *, rng: random.Random) -> str:
|
|
57
|
+
"""Python fallback that replaces paired straight quotes with fancy glyphs."""
|
|
58
|
+
|
|
59
|
+
pairs = _load_replacement_pairs()
|
|
60
|
+
candidates = _find_quote_pairs(text)
|
|
61
|
+
if not candidates:
|
|
62
|
+
return text
|
|
63
|
+
|
|
64
|
+
chars = list(text)
|
|
65
|
+
for start, end, glyph in candidates:
|
|
66
|
+
options = pairs.get(glyph)
|
|
67
|
+
if not options:
|
|
68
|
+
continue
|
|
69
|
+
left, right = rng.choice(options)
|
|
70
|
+
chars[start] = left
|
|
71
|
+
chars[end] = right
|
|
72
|
+
return "".join(chars)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def smart_quotes(
|
|
76
|
+
text: str,
|
|
77
|
+
seed: int | None = None,
|
|
78
|
+
rng: random.Random | None = None,
|
|
79
|
+
) -> str:
|
|
80
|
+
"""Replace straight quotes, apostrophes, and backticks with fancy pairs."""
|
|
81
|
+
|
|
82
|
+
if not text:
|
|
83
|
+
return text
|
|
84
|
+
|
|
85
|
+
if rng is None:
|
|
86
|
+
rng = random.Random(seed)
|
|
87
|
+
|
|
88
|
+
if _apostrofae_rust is not None:
|
|
89
|
+
return cast(str, _apostrofae_rust(text, rng))
|
|
90
|
+
|
|
91
|
+
return _apostrofae_python(text, rng=rng)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class Apostrofae(Glitchling):
|
|
95
|
+
"""Glitchling that swaps straight quotes for decorative Unicode pairs."""
|
|
96
|
+
|
|
97
|
+
def __init__(self, *, seed: int | None = None) -> None:
|
|
98
|
+
self._master_seed: int | None = seed
|
|
99
|
+
super().__init__(
|
|
100
|
+
name="Apostrofae",
|
|
101
|
+
corruption_function=smart_quotes,
|
|
102
|
+
scope=AttackWave.CHARACTER,
|
|
103
|
+
order=AttackOrder.NORMAL,
|
|
104
|
+
seed=seed,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
def pipeline_operation(self) -> dict[str, Any] | None:
|
|
108
|
+
return {"type": "apostrofae"}
|
|
109
|
+
|
|
110
|
+
def reset_rng(self, seed: int | None = None) -> None: # pragma: no cover - exercised indirectly
|
|
111
|
+
if seed is not None:
|
|
112
|
+
self._master_seed = seed
|
|
113
|
+
super().reset_rng(seed)
|
|
114
|
+
if self.seed is None:
|
|
115
|
+
return
|
|
116
|
+
derived = Gaggle.derive_seed(int(seed), self.name, 0)
|
|
117
|
+
self.seed = int(derived)
|
|
118
|
+
self.rng = random.Random(self.seed)
|
|
119
|
+
self.kwargs["seed"] = self.seed
|
|
120
|
+
else:
|
|
121
|
+
super().reset_rng(None)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
apostrofae = Apostrofae()
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
__all__ = ["Apostrofae", "apostrofae", "smart_quotes"]
|
|
File without changes
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"\"": [
|
|
3
|
+
["“", "”"],
|
|
4
|
+
["„", "“"],
|
|
5
|
+
["«", "»"],
|
|
6
|
+
["‹", "›"],
|
|
7
|
+
["『", "』"],
|
|
8
|
+
["「", "」"],
|
|
9
|
+
["﹁", "﹂"],
|
|
10
|
+
["﹃", "﹄"],
|
|
11
|
+
["〝", "〞"],
|
|
12
|
+
["❝", "❞"]
|
|
13
|
+
],
|
|
14
|
+
"'": [
|
|
15
|
+
["‘", "’"],
|
|
16
|
+
["‚", "‘"],
|
|
17
|
+
["‹", "›"],
|
|
18
|
+
["❮", "❯"],
|
|
19
|
+
["❛", "❜"],
|
|
20
|
+
["﹇", "﹈"]
|
|
21
|
+
],
|
|
22
|
+
"`": [
|
|
23
|
+
["‵", "′"],
|
|
24
|
+
["﹁", "﹂"],
|
|
25
|
+
["﹃", "﹄"],
|
|
26
|
+
["⌈", "⌉"],
|
|
27
|
+
["⌊", "⌋"],
|
|
28
|
+
["⎡", "⎤"],
|
|
29
|
+
["⎣", "⎦"],
|
|
30
|
+
["〝", "〞"]
|
|
31
|
+
]
|
|
32
|
+
}
|