glitchlings 1.0.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glitchlings/__init__.py +101 -0
- glitchlings/__main__.py +8 -0
- glitchlings/_corruption_engine/__init__.py +12 -0
- glitchlings/_corruption_engine.cp313-win_amd64.pyd +0 -0
- glitchlings/assets/__init__.py +180 -0
- glitchlings/assets/apostrofae_pairs.json +32 -0
- glitchlings/assets/ekkokin_homophones.json +2014 -0
- glitchlings/assets/hokey_assets.json +193 -0
- glitchlings/assets/lexemes/academic.json +1049 -0
- glitchlings/assets/lexemes/colors.json +1333 -0
- glitchlings/assets/lexemes/corporate.json +716 -0
- glitchlings/assets/lexemes/cyberpunk.json +22 -0
- glitchlings/assets/lexemes/lovecraftian.json +23 -0
- glitchlings/assets/lexemes/synonyms.json +3354 -0
- glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
- glitchlings/assets/ocr_confusions.tsv +30 -0
- glitchlings/assets/pipeline_assets.json +29 -0
- glitchlings/attack/__init__.py +184 -0
- glitchlings/attack/analysis.py +1321 -0
- glitchlings/attack/core.py +819 -0
- glitchlings/attack/core_execution.py +378 -0
- glitchlings/attack/core_planning.py +612 -0
- glitchlings/attack/encode.py +114 -0
- glitchlings/attack/metrics.py +211 -0
- glitchlings/attack/metrics_dispatch.py +70 -0
- glitchlings/attack/tokenization.py +338 -0
- glitchlings/attack/tokenizer_metrics.py +373 -0
- glitchlings/auggie.py +285 -0
- glitchlings/compat/__init__.py +9 -0
- glitchlings/compat/loaders.py +355 -0
- glitchlings/compat/types.py +41 -0
- glitchlings/conf/__init__.py +39 -0
- glitchlings/conf/loaders.py +331 -0
- glitchlings/conf/schema.py +156 -0
- glitchlings/conf/types.py +72 -0
- glitchlings/config.toml +2 -0
- glitchlings/constants.py +139 -0
- glitchlings/dev/__init__.py +3 -0
- glitchlings/dev/docs.py +45 -0
- glitchlings/dlc/__init__.py +21 -0
- glitchlings/dlc/_shared.py +300 -0
- glitchlings/dlc/gutenberg.py +400 -0
- glitchlings/dlc/huggingface.py +68 -0
- glitchlings/dlc/langchain.py +147 -0
- glitchlings/dlc/nemo.py +283 -0
- glitchlings/dlc/prime.py +215 -0
- glitchlings/dlc/pytorch.py +98 -0
- glitchlings/dlc/pytorch_lightning.py +173 -0
- glitchlings/internal/__init__.py +16 -0
- glitchlings/internal/rust.py +159 -0
- glitchlings/internal/rust_ffi.py +599 -0
- glitchlings/main.py +426 -0
- glitchlings/protocols.py +91 -0
- glitchlings/runtime_config.py +24 -0
- glitchlings/util/__init__.py +41 -0
- glitchlings/util/adapters.py +65 -0
- glitchlings/util/keyboards.py +508 -0
- glitchlings/util/transcripts.py +108 -0
- glitchlings/zoo/__init__.py +161 -0
- glitchlings/zoo/assets/__init__.py +29 -0
- glitchlings/zoo/core.py +852 -0
- glitchlings/zoo/core_execution.py +154 -0
- glitchlings/zoo/core_planning.py +451 -0
- glitchlings/zoo/corrupt_dispatch.py +291 -0
- glitchlings/zoo/hokey.py +139 -0
- glitchlings/zoo/jargoyle.py +301 -0
- glitchlings/zoo/mim1c.py +269 -0
- glitchlings/zoo/pedant/__init__.py +109 -0
- glitchlings/zoo/pedant/core.py +99 -0
- glitchlings/zoo/pedant/forms.py +50 -0
- glitchlings/zoo/pedant/stones.py +83 -0
- glitchlings/zoo/redactyl.py +94 -0
- glitchlings/zoo/rng.py +280 -0
- glitchlings/zoo/rushmore.py +416 -0
- glitchlings/zoo/scannequin.py +370 -0
- glitchlings/zoo/transforms.py +331 -0
- glitchlings/zoo/typogre.py +194 -0
- glitchlings/zoo/validation.py +643 -0
- glitchlings/zoo/wherewolf.py +120 -0
- glitchlings/zoo/zeedub.py +165 -0
- glitchlings-1.0.0.dist-info/METADATA +404 -0
- glitchlings-1.0.0.dist-info/RECORD +86 -0
- glitchlings-1.0.0.dist-info/WHEEL +5 -0
- glitchlings-1.0.0.dist-info/entry_points.txt +3 -0
- glitchlings-1.0.0.dist-info/licenses/LICENSE +201 -0
- glitchlings-1.0.0.dist-info/top_level.txt +1 -0
glitchlings/__init__.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
from .attack import (
|
|
2
|
+
Attack,
|
|
3
|
+
AttackResult,
|
|
4
|
+
GlitchlingComparisonEntry,
|
|
5
|
+
GlitchlingComparisonResult,
|
|
6
|
+
GridSearch,
|
|
7
|
+
GridSearchResult,
|
|
8
|
+
MetricName,
|
|
9
|
+
SeedSweep,
|
|
10
|
+
SeedSweepResult,
|
|
11
|
+
TokenizerComparison,
|
|
12
|
+
TokenizerComparisonResult,
|
|
13
|
+
compare_glitchlings,
|
|
14
|
+
compare_tokenizers,
|
|
15
|
+
)
|
|
16
|
+
from .auggie import Auggie
|
|
17
|
+
from .conf import AttackConfig, build_gaggle, load_attack_config
|
|
18
|
+
from .util import SAMPLE_TEXT
|
|
19
|
+
from .util.transcripts import TranscriptTarget
|
|
20
|
+
from .zoo import (
|
|
21
|
+
Gaggle,
|
|
22
|
+
Glitchling,
|
|
23
|
+
Hokey,
|
|
24
|
+
Jargoyle,
|
|
25
|
+
Mim1c,
|
|
26
|
+
Pedant,
|
|
27
|
+
Redactyl,
|
|
28
|
+
Rushmore,
|
|
29
|
+
RushmoreMode,
|
|
30
|
+
Scannequin,
|
|
31
|
+
Typogre,
|
|
32
|
+
Wherewolf,
|
|
33
|
+
Zeedub,
|
|
34
|
+
hokey,
|
|
35
|
+
jargoyle,
|
|
36
|
+
mim1c,
|
|
37
|
+
pedant,
|
|
38
|
+
plan_operations,
|
|
39
|
+
redactyl,
|
|
40
|
+
rushmore,
|
|
41
|
+
scannequin,
|
|
42
|
+
summon,
|
|
43
|
+
typogre,
|
|
44
|
+
wherewolf,
|
|
45
|
+
zeedub,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
__version__ = "1.0.0"
|
|
49
|
+
|
|
50
|
+
__all__ = [
|
|
51
|
+
# Attack analysis
|
|
52
|
+
"Attack",
|
|
53
|
+
"AttackResult",
|
|
54
|
+
"SeedSweep",
|
|
55
|
+
"SeedSweepResult",
|
|
56
|
+
"GridSearch",
|
|
57
|
+
"GridSearchResult",
|
|
58
|
+
"TokenizerComparison",
|
|
59
|
+
"TokenizerComparisonResult",
|
|
60
|
+
# Comparison functions
|
|
61
|
+
"compare_glitchlings",
|
|
62
|
+
"compare_tokenizers",
|
|
63
|
+
"GlitchlingComparisonEntry",
|
|
64
|
+
"GlitchlingComparisonResult",
|
|
65
|
+
# Metrics
|
|
66
|
+
"MetricName",
|
|
67
|
+
# Builder
|
|
68
|
+
"Auggie",
|
|
69
|
+
# Glitchlings
|
|
70
|
+
"Typogre",
|
|
71
|
+
"typogre",
|
|
72
|
+
"Mim1c",
|
|
73
|
+
"mim1c",
|
|
74
|
+
"Jargoyle",
|
|
75
|
+
"jargoyle",
|
|
76
|
+
"Wherewolf",
|
|
77
|
+
"wherewolf",
|
|
78
|
+
"Hokey",
|
|
79
|
+
"hokey",
|
|
80
|
+
"Pedant",
|
|
81
|
+
"pedant",
|
|
82
|
+
"Redactyl",
|
|
83
|
+
"redactyl",
|
|
84
|
+
"Rushmore",
|
|
85
|
+
"rushmore",
|
|
86
|
+
"RushmoreMode",
|
|
87
|
+
"Scannequin",
|
|
88
|
+
"scannequin",
|
|
89
|
+
"Zeedub",
|
|
90
|
+
"zeedub",
|
|
91
|
+
"summon",
|
|
92
|
+
"Glitchling",
|
|
93
|
+
"Gaggle",
|
|
94
|
+
"plan_operations",
|
|
95
|
+
# Utilities
|
|
96
|
+
"SAMPLE_TEXT",
|
|
97
|
+
"AttackConfig",
|
|
98
|
+
"build_gaggle",
|
|
99
|
+
"load_attack_config",
|
|
100
|
+
"TranscriptTarget",
|
|
101
|
+
]
|
glitchlings/__main__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Compatibility wrapper for the compiled Rust extension."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
from glitchlings.internal.rust import load_rust_module
|
|
8
|
+
|
|
9
|
+
_module = load_rust_module()
|
|
10
|
+
|
|
11
|
+
sys.modules.setdefault("_corruption_engine", _module)
|
|
12
|
+
sys.modules[__name__] = _module
|
|
Binary file
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""Shared asset helpers for Python and Rust consumers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from functools import cache
|
|
8
|
+
from hashlib import blake2b
|
|
9
|
+
from importlib import resources
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, BinaryIO, Iterable, Iterator, Literal, TextIO, cast
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from importlib.resources.abc import Traversable # Python 3.11+
|
|
15
|
+
except ImportError: # pragma: no cover - Python <3.11
|
|
16
|
+
from importlib_resources.abc import Traversable
|
|
17
|
+
|
|
18
|
+
AssetKind = Literal["copy", "compressed"]
|
|
19
|
+
|
|
20
|
+
_DEFAULT_DIGEST_SIZE = 32
|
|
21
|
+
_PIPELINE_MANIFEST_NAME = "pipeline_assets.json"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True)
|
|
25
|
+
class PipelineAsset:
|
|
26
|
+
"""Descriptor for an asset staged into the Rust build."""
|
|
27
|
+
|
|
28
|
+
name: str
|
|
29
|
+
kind: AssetKind = "copy"
|
|
30
|
+
output: str | None = None
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def staged_name(self) -> str:
|
|
34
|
+
return self.output if self.output is not None else self.name
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _iter_asset_roots() -> Iterable[Traversable]:
|
|
38
|
+
"""Yield candidate locations for the shared glitchling asset bundle."""
|
|
39
|
+
|
|
40
|
+
package_root: Traversable | None
|
|
41
|
+
try:
|
|
42
|
+
package_root = resources.files(__name__)
|
|
43
|
+
except ModuleNotFoundError: # pragma: no cover - defensive guard for install issues
|
|
44
|
+
package_root = None
|
|
45
|
+
if package_root is not None and package_root.is_dir():
|
|
46
|
+
yield package_root
|
|
47
|
+
|
|
48
|
+
repo_root = Path(__file__).resolve().parents[3] / "assets"
|
|
49
|
+
if repo_root.is_dir():
|
|
50
|
+
yield cast(Traversable, repo_root)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _asset(name: str) -> Traversable:
|
|
54
|
+
asset_roots = list(_iter_asset_roots())
|
|
55
|
+
for root in asset_roots:
|
|
56
|
+
candidate = root.joinpath(name)
|
|
57
|
+
if candidate.is_file() or candidate.is_dir():
|
|
58
|
+
return candidate
|
|
59
|
+
|
|
60
|
+
searched = ", ".join(str(root.joinpath(name)) for root in asset_roots) or "<unavailable>"
|
|
61
|
+
raise FileNotFoundError(f"Asset '{name}' not found in: {searched}")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def read_text(name: str, *, encoding: str = "utf-8") -> str:
|
|
65
|
+
"""Return the decoded contents of a bundled text asset."""
|
|
66
|
+
|
|
67
|
+
return cast(str, _asset(name).read_text(encoding=encoding))
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def open_text(name: str, *, encoding: str = "utf-8") -> TextIO:
|
|
71
|
+
"""Open a bundled text asset for reading."""
|
|
72
|
+
|
|
73
|
+
return cast(TextIO, _asset(name).open("r", encoding=encoding))
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def open_binary(name: str) -> BinaryIO:
|
|
77
|
+
"""Open a bundled binary asset for reading."""
|
|
78
|
+
|
|
79
|
+
return cast(BinaryIO, _asset(name).open("rb"))
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def load_json(name: str, *, encoding: str = "utf-8") -> Any:
|
|
83
|
+
"""Deserialize a JSON asset using the shared loader helpers."""
|
|
84
|
+
|
|
85
|
+
with open_text(name, encoding=encoding) as handle:
|
|
86
|
+
return json.load(handle)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _iter_asset_files(root: Traversable, prefix: str = "") -> Iterator[tuple[str, Traversable]]:
|
|
90
|
+
"""Yield file entries within an asset directory with deterministic ordering."""
|
|
91
|
+
|
|
92
|
+
entries = sorted(root.iterdir(), key=lambda entry: entry.name)
|
|
93
|
+
for entry in entries:
|
|
94
|
+
relative = f"{prefix}{entry.name}"
|
|
95
|
+
if entry.is_dir():
|
|
96
|
+
yield from _iter_asset_files(entry, prefix=f"{relative}/")
|
|
97
|
+
else:
|
|
98
|
+
yield relative, entry
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def hash_asset(name: str) -> str:
|
|
102
|
+
"""Return a BLAKE2b digest for the bundled asset ``name``."""
|
|
103
|
+
|
|
104
|
+
digest = blake2b(digest_size=_DEFAULT_DIGEST_SIZE)
|
|
105
|
+
asset = _asset(name)
|
|
106
|
+
|
|
107
|
+
if asset.is_dir():
|
|
108
|
+
for relative, entry in _iter_asset_files(asset):
|
|
109
|
+
digest.update(relative.encode("utf-8"))
|
|
110
|
+
with entry.open("rb") as handle:
|
|
111
|
+
for chunk in iter(lambda: handle.read(8192), b""):
|
|
112
|
+
digest.update(chunk)
|
|
113
|
+
return digest.hexdigest()
|
|
114
|
+
|
|
115
|
+
with asset.open("rb") as handle:
|
|
116
|
+
for chunk in iter(lambda: handle.read(8192), b""):
|
|
117
|
+
digest.update(chunk)
|
|
118
|
+
return digest.hexdigest()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@cache
|
|
122
|
+
def load_homophone_groups(name: str = "ekkokin_homophones.json") -> tuple[tuple[str, ...], ...]:
|
|
123
|
+
"""Return the curated homophone sets bundled for the Wherewolf glitchling."""
|
|
124
|
+
|
|
125
|
+
data: list[list[str]] = load_json(name)
|
|
126
|
+
return tuple(tuple(group) for group in data)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _parse_pipeline_manifest(raw: Any) -> tuple[PipelineAsset, ...]:
|
|
130
|
+
if not isinstance(raw, dict) or "pipeline_assets" not in raw:
|
|
131
|
+
raise ValueError("pipeline_assets manifest must be a mapping with a 'pipeline_assets' list")
|
|
132
|
+
|
|
133
|
+
entries = raw["pipeline_assets"]
|
|
134
|
+
if not isinstance(entries, list):
|
|
135
|
+
raise ValueError("pipeline_assets manifest must contain a list of entries")
|
|
136
|
+
|
|
137
|
+
specs: list[PipelineAsset] = []
|
|
138
|
+
for entry in entries:
|
|
139
|
+
if not isinstance(entry, dict):
|
|
140
|
+
raise ValueError("pipeline_assets entries must be objects with a name field")
|
|
141
|
+
|
|
142
|
+
name = entry.get("name")
|
|
143
|
+
if not isinstance(name, str) or not name:
|
|
144
|
+
raise ValueError("pipeline_assets entries must supply a non-empty name")
|
|
145
|
+
|
|
146
|
+
kind = cast(AssetKind, entry.get("kind", "copy"))
|
|
147
|
+
if kind not in ("copy", "compressed"):
|
|
148
|
+
raise ValueError(f"unsupported asset kind '{kind}' in pipeline manifest")
|
|
149
|
+
|
|
150
|
+
output = entry.get("output")
|
|
151
|
+
if output is not None and not isinstance(output, str):
|
|
152
|
+
raise ValueError("pipeline_assets output names must be strings when provided")
|
|
153
|
+
|
|
154
|
+
specs.append(PipelineAsset(name=name, kind=kind, output=output))
|
|
155
|
+
|
|
156
|
+
return tuple(specs)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@cache
|
|
160
|
+
def _load_pipeline_asset_specs() -> tuple[PipelineAsset, ...]:
|
|
161
|
+
manifest = load_json(_PIPELINE_MANIFEST_NAME)
|
|
162
|
+
return _parse_pipeline_manifest(manifest)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
PIPELINE_ASSET_SPECS = _load_pipeline_asset_specs()
|
|
166
|
+
PIPELINE_ASSETS = frozenset(spec.name for spec in PIPELINE_ASSET_SPECS)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
__all__ = [
|
|
170
|
+
"AssetKind",
|
|
171
|
+
"PipelineAsset",
|
|
172
|
+
"PIPELINE_ASSETS",
|
|
173
|
+
"PIPELINE_ASSET_SPECS",
|
|
174
|
+
"read_text",
|
|
175
|
+
"open_text",
|
|
176
|
+
"open_binary",
|
|
177
|
+
"load_json",
|
|
178
|
+
"hash_asset",
|
|
179
|
+
"load_homophone_groups",
|
|
180
|
+
]
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"\"": [
|
|
3
|
+
["“", "”"],
|
|
4
|
+
["„", "“"],
|
|
5
|
+
["«", "»"],
|
|
6
|
+
["‹", "›"],
|
|
7
|
+
["『", "』"],
|
|
8
|
+
["「", "」"],
|
|
9
|
+
["﹁", "﹂"],
|
|
10
|
+
["﹃", "﹄"],
|
|
11
|
+
["〝", "〞"],
|
|
12
|
+
["❝", "❞"]
|
|
13
|
+
],
|
|
14
|
+
"'": [
|
|
15
|
+
["‘", "’"],
|
|
16
|
+
["‚", "‘"],
|
|
17
|
+
["‹", "›"],
|
|
18
|
+
["❮", "❯"],
|
|
19
|
+
["❛", "❜"],
|
|
20
|
+
["﹇", "﹈"]
|
|
21
|
+
],
|
|
22
|
+
"`": [
|
|
23
|
+
["‵", "′"],
|
|
24
|
+
["﹁", "﹂"],
|
|
25
|
+
["﹃", "﹄"],
|
|
26
|
+
["⌈", "⌉"],
|
|
27
|
+
["⌊", "⌋"],
|
|
28
|
+
["⎡", "⎤"],
|
|
29
|
+
["⎣", "⎦"],
|
|
30
|
+
["〝", "〞"]
|
|
31
|
+
]
|
|
32
|
+
}
|