glitchlings 0.4.4__cp313-cp313-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +67 -0
- glitchlings/__main__.py +8 -0
- glitchlings/_zoo_rust.cpython-313-x86_64-linux-gnu.so +0 -0
- glitchlings/compat.py +284 -0
- glitchlings/config.py +388 -0
- glitchlings/config.toml +3 -0
- glitchlings/dlc/__init__.py +7 -0
- glitchlings/dlc/_shared.py +153 -0
- glitchlings/dlc/huggingface.py +81 -0
- glitchlings/dlc/prime.py +254 -0
- glitchlings/dlc/pytorch.py +166 -0
- glitchlings/dlc/pytorch_lightning.py +215 -0
- glitchlings/lexicon/__init__.py +192 -0
- glitchlings/lexicon/_cache.py +110 -0
- glitchlings/lexicon/data/default_vector_cache.json +82 -0
- glitchlings/lexicon/metrics.py +162 -0
- glitchlings/lexicon/vector.py +651 -0
- glitchlings/lexicon/wordnet.py +232 -0
- glitchlings/main.py +364 -0
- glitchlings/util/__init__.py +195 -0
- glitchlings/util/adapters.py +27 -0
- glitchlings/zoo/__init__.py +168 -0
- glitchlings/zoo/_ocr_confusions.py +32 -0
- glitchlings/zoo/_rate.py +131 -0
- glitchlings/zoo/_rust_extensions.py +143 -0
- glitchlings/zoo/_sampling.py +54 -0
- glitchlings/zoo/_text_utils.py +100 -0
- glitchlings/zoo/adjax.py +128 -0
- glitchlings/zoo/apostrofae.py +127 -0
- glitchlings/zoo/assets/__init__.py +0 -0
- glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
- glitchlings/zoo/core.py +582 -0
- glitchlings/zoo/jargoyle.py +335 -0
- glitchlings/zoo/mim1c.py +109 -0
- glitchlings/zoo/ocr_confusions.tsv +30 -0
- glitchlings/zoo/redactyl.py +193 -0
- glitchlings/zoo/reduple.py +148 -0
- glitchlings/zoo/rushmore.py +153 -0
- glitchlings/zoo/scannequin.py +171 -0
- glitchlings/zoo/typogre.py +231 -0
- glitchlings/zoo/zeedub.py +185 -0
- glitchlings-0.4.4.dist-info/METADATA +627 -0
- glitchlings-0.4.4.dist-info/RECORD +47 -0
- glitchlings-0.4.4.dist-info/WHEEL +5 -0
- glitchlings-0.4.4.dist-info/entry_points.txt +2 -0
- glitchlings-0.4.4.dist-info/licenses/LICENSE +201 -0
- glitchlings-0.4.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
import random
|
|
5
|
+
from collections.abc import Sequence
|
|
6
|
+
from typing import Any, cast
|
|
7
|
+
|
|
8
|
+
from ._rate import resolve_rate
|
|
9
|
+
from ._rust_extensions import get_rust_operation
|
|
10
|
+
from .core import AttackOrder, AttackWave, Glitchling
|
|
11
|
+
|
|
12
|
+
# Load Rust-accelerated operation if available
|
|
13
|
+
_inject_zero_widths_rust = get_rust_operation("inject_zero_widths")
|
|
14
|
+
|
|
15
|
+
_DEFAULT_ZERO_WIDTH_CHARACTERS: tuple[str, ...] = (
|
|
16
|
+
"\u200b", # ZERO WIDTH SPACE
|
|
17
|
+
"\u200c", # ZERO WIDTH NON-JOINER
|
|
18
|
+
"\u200d", # ZERO WIDTH JOINER
|
|
19
|
+
"\ufeff", # ZERO WIDTH NO-BREAK SPACE
|
|
20
|
+
"\u2060", # WORD JOINER
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _python_insert_zero_widths(
|
|
25
|
+
text: str,
|
|
26
|
+
*,
|
|
27
|
+
rate: float,
|
|
28
|
+
rng: random.Random,
|
|
29
|
+
characters: Sequence[str],
|
|
30
|
+
) -> str:
|
|
31
|
+
if not text:
|
|
32
|
+
return text
|
|
33
|
+
|
|
34
|
+
palette = [char for char in characters if char]
|
|
35
|
+
if not palette:
|
|
36
|
+
return text
|
|
37
|
+
|
|
38
|
+
positions = [
|
|
39
|
+
index + 1
|
|
40
|
+
for index in range(len(text) - 1)
|
|
41
|
+
if not text[index].isspace() and not text[index + 1].isspace()
|
|
42
|
+
]
|
|
43
|
+
if not positions:
|
|
44
|
+
return text
|
|
45
|
+
|
|
46
|
+
total = len(positions)
|
|
47
|
+
clamped_rate = max(0.0, rate)
|
|
48
|
+
if clamped_rate <= 0.0:
|
|
49
|
+
return text
|
|
50
|
+
|
|
51
|
+
target = clamped_rate * total
|
|
52
|
+
count = math.floor(target)
|
|
53
|
+
remainder = target - count
|
|
54
|
+
if remainder > 0.0 and rng.random() < remainder:
|
|
55
|
+
count += 1
|
|
56
|
+
count = min(total, count)
|
|
57
|
+
|
|
58
|
+
if count <= 0:
|
|
59
|
+
return text
|
|
60
|
+
|
|
61
|
+
chosen = rng.sample(positions, count)
|
|
62
|
+
chosen.sort()
|
|
63
|
+
|
|
64
|
+
chars = list(text)
|
|
65
|
+
for position in reversed(chosen):
|
|
66
|
+
chars.insert(position, rng.choice(palette))
|
|
67
|
+
|
|
68
|
+
return "".join(chars)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def insert_zero_widths(
|
|
72
|
+
text: str,
|
|
73
|
+
rate: float | None = None,
|
|
74
|
+
seed: int | None = None,
|
|
75
|
+
rng: random.Random | None = None,
|
|
76
|
+
*,
|
|
77
|
+
characters: Sequence[str] | None = None,
|
|
78
|
+
) -> str:
|
|
79
|
+
"""Inject zero-width characters between non-space character pairs."""
|
|
80
|
+
effective_rate = resolve_rate(
|
|
81
|
+
rate=rate,
|
|
82
|
+
legacy_value=None,
|
|
83
|
+
default=0.02,
|
|
84
|
+
legacy_name="rate",
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
if rng is None:
|
|
88
|
+
rng = random.Random(seed)
|
|
89
|
+
|
|
90
|
+
palette: Sequence[str] = (
|
|
91
|
+
tuple(characters) if characters is not None else _DEFAULT_ZERO_WIDTH_CHARACTERS
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
cleaned_palette = tuple(char for char in palette if char)
|
|
95
|
+
if not cleaned_palette or not text:
|
|
96
|
+
return text
|
|
97
|
+
|
|
98
|
+
clamped_rate = max(0.0, effective_rate)
|
|
99
|
+
if clamped_rate == 0.0:
|
|
100
|
+
return text
|
|
101
|
+
|
|
102
|
+
if _inject_zero_widths_rust is not None:
|
|
103
|
+
state = None
|
|
104
|
+
python_state = None
|
|
105
|
+
if hasattr(rng, "getstate") and hasattr(rng, "setstate"):
|
|
106
|
+
state = rng.getstate()
|
|
107
|
+
python_result = _python_insert_zero_widths(
|
|
108
|
+
text,
|
|
109
|
+
rate=clamped_rate,
|
|
110
|
+
rng=rng,
|
|
111
|
+
characters=cleaned_palette,
|
|
112
|
+
)
|
|
113
|
+
if state is not None:
|
|
114
|
+
if hasattr(rng, "getstate"):
|
|
115
|
+
python_state = rng.getstate()
|
|
116
|
+
rng.setstate(state)
|
|
117
|
+
rust_result = cast(
|
|
118
|
+
str,
|
|
119
|
+
_inject_zero_widths_rust(text, clamped_rate, list(cleaned_palette), rng),
|
|
120
|
+
)
|
|
121
|
+
if rust_result == python_result:
|
|
122
|
+
return rust_result
|
|
123
|
+
if python_state is not None and hasattr(rng, "setstate"):
|
|
124
|
+
rng.setstate(python_state)
|
|
125
|
+
return python_result
|
|
126
|
+
|
|
127
|
+
return _python_insert_zero_widths(
|
|
128
|
+
text,
|
|
129
|
+
rate=clamped_rate,
|
|
130
|
+
rng=rng,
|
|
131
|
+
characters=cleaned_palette,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class Zeedub(Glitchling):
|
|
136
|
+
"""Glitchling that plants zero-width glyphs inside words."""
|
|
137
|
+
|
|
138
|
+
def __init__(
|
|
139
|
+
self,
|
|
140
|
+
*,
|
|
141
|
+
rate: float | None = None,
|
|
142
|
+
seed: int | None = None,
|
|
143
|
+
characters: Sequence[str] | None = None,
|
|
144
|
+
) -> None:
|
|
145
|
+
effective_rate = resolve_rate(
|
|
146
|
+
rate=rate,
|
|
147
|
+
legacy_value=None,
|
|
148
|
+
default=0.02,
|
|
149
|
+
legacy_name="rate",
|
|
150
|
+
)
|
|
151
|
+
super().__init__(
|
|
152
|
+
name="Zeedub",
|
|
153
|
+
corruption_function=insert_zero_widths,
|
|
154
|
+
scope=AttackWave.CHARACTER,
|
|
155
|
+
order=AttackOrder.LAST,
|
|
156
|
+
seed=seed,
|
|
157
|
+
rate=effective_rate,
|
|
158
|
+
characters=tuple(characters) if characters is not None else None,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
def pipeline_operation(self) -> dict[str, Any] | None:
|
|
162
|
+
rate = self.kwargs.get("rate")
|
|
163
|
+
if rate is None:
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
raw_characters = self.kwargs.get("characters")
|
|
167
|
+
if raw_characters is None:
|
|
168
|
+
palette = tuple(_DEFAULT_ZERO_WIDTH_CHARACTERS)
|
|
169
|
+
else:
|
|
170
|
+
palette = tuple(str(char) for char in raw_characters if char)
|
|
171
|
+
|
|
172
|
+
if not palette:
|
|
173
|
+
return None
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
"type": "zwj",
|
|
177
|
+
"rate": float(rate),
|
|
178
|
+
"characters": list(palette),
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
zeedub = Zeedub()
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
__all__ = ["Zeedub", "zeedub", "insert_zero_widths"]
|