glitchlings 0.2.5__cp312-cp312-win_amd64.whl → 0.9.3__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glitchlings/__init__.py +36 -17
- glitchlings/__main__.py +0 -1
- glitchlings/_zoo_rust/__init__.py +12 -0
- glitchlings/_zoo_rust.cp312-win_amd64.pyd +0 -0
- glitchlings/assets/__init__.py +180 -0
- glitchlings/assets/apostrofae_pairs.json +32 -0
- glitchlings/assets/ekkokin_homophones.json +2014 -0
- glitchlings/assets/hokey_assets.json +193 -0
- glitchlings/assets/lexemes/academic.json +1049 -0
- glitchlings/assets/lexemes/colors.json +1333 -0
- glitchlings/assets/lexemes/corporate.json +716 -0
- glitchlings/assets/lexemes/cyberpunk.json +22 -0
- glitchlings/assets/lexemes/lovecraftian.json +23 -0
- glitchlings/assets/lexemes/synonyms.json +3354 -0
- glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
- glitchlings/assets/pipeline_assets.json +29 -0
- glitchlings/attack/__init__.py +53 -0
- glitchlings/attack/compose.py +299 -0
- glitchlings/attack/core.py +465 -0
- glitchlings/attack/encode.py +114 -0
- glitchlings/attack/metrics.py +104 -0
- glitchlings/attack/metrics_dispatch.py +70 -0
- glitchlings/attack/tokenization.py +157 -0
- glitchlings/auggie.py +283 -0
- glitchlings/compat/__init__.py +9 -0
- glitchlings/compat/loaders.py +355 -0
- glitchlings/compat/types.py +41 -0
- glitchlings/conf/__init__.py +41 -0
- glitchlings/conf/loaders.py +331 -0
- glitchlings/conf/schema.py +156 -0
- glitchlings/conf/types.py +72 -0
- glitchlings/config.toml +2 -0
- glitchlings/constants.py +59 -0
- glitchlings/dev/__init__.py +3 -0
- glitchlings/dev/docs.py +45 -0
- glitchlings/dlc/__init__.py +17 -3
- glitchlings/dlc/_shared.py +296 -0
- glitchlings/dlc/gutenberg.py +400 -0
- glitchlings/dlc/huggingface.py +37 -65
- glitchlings/dlc/prime.py +55 -114
- glitchlings/dlc/pytorch.py +98 -0
- glitchlings/dlc/pytorch_lightning.py +173 -0
- glitchlings/internal/__init__.py +16 -0
- glitchlings/internal/rust.py +159 -0
- glitchlings/internal/rust_ffi.py +432 -0
- glitchlings/main.py +123 -32
- glitchlings/runtime_config.py +24 -0
- glitchlings/util/__init__.py +29 -176
- glitchlings/util/adapters.py +65 -0
- glitchlings/util/keyboards.py +311 -0
- glitchlings/util/transcripts.py +108 -0
- glitchlings/zoo/__init__.py +47 -24
- glitchlings/zoo/assets/__init__.py +29 -0
- glitchlings/zoo/core.py +301 -167
- glitchlings/zoo/core_execution.py +98 -0
- glitchlings/zoo/core_planning.py +451 -0
- glitchlings/zoo/corrupt_dispatch.py +295 -0
- glitchlings/zoo/ekkokin.py +118 -0
- glitchlings/zoo/hokey.py +137 -0
- glitchlings/zoo/jargoyle.py +179 -274
- glitchlings/zoo/mim1c.py +106 -68
- glitchlings/zoo/pedant/__init__.py +107 -0
- glitchlings/zoo/pedant/core.py +105 -0
- glitchlings/zoo/pedant/forms.py +74 -0
- glitchlings/zoo/pedant/stones.py +74 -0
- glitchlings/zoo/redactyl.py +44 -175
- glitchlings/zoo/rng.py +259 -0
- glitchlings/zoo/rushmore.py +359 -116
- glitchlings/zoo/scannequin.py +18 -125
- glitchlings/zoo/transforms.py +386 -0
- glitchlings/zoo/typogre.py +76 -162
- glitchlings/zoo/validation.py +477 -0
- glitchlings/zoo/zeedub.py +33 -86
- glitchlings-0.9.3.dist-info/METADATA +334 -0
- glitchlings-0.9.3.dist-info/RECORD +80 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/entry_points.txt +1 -0
- glitchlings/zoo/_ocr_confusions.py +0 -34
- glitchlings/zoo/_rate.py +0 -21
- glitchlings/zoo/reduple.py +0 -169
- glitchlings-0.2.5.dist-info/METADATA +0 -490
- glitchlings-0.2.5.dist-info/RECORD +0 -27
- /glitchlings/{zoo → assets}/ocr_confusions.tsv +0 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/WHEEL +0 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/licenses/LICENSE +0 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
"""Impure configuration loading functions.
|
|
2
|
+
|
|
3
|
+
This module is IMPURE - it performs file IO, environment variable access,
|
|
4
|
+
and maintains global state. Use the schema module for pure validation.
|
|
5
|
+
|
|
6
|
+
Impure operations:
|
|
7
|
+
- File reading (TOML, YAML)
|
|
8
|
+
- Environment variable access
|
|
9
|
+
- Global configuration cache
|
|
10
|
+
- Optional dependency imports (jsonschema, yaml)
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import importlib
|
|
16
|
+
import os
|
|
17
|
+
from io import TextIOBase
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import IO, TYPE_CHECKING, Any, Callable, Mapping, Protocol, cast
|
|
20
|
+
|
|
21
|
+
from glitchlings.constants import DEFAULT_ATTACK_SEED, DEFAULT_CONFIG_PATH
|
|
22
|
+
|
|
23
|
+
from ..compat.loaders import jsonschema
|
|
24
|
+
from .schema import (
|
|
25
|
+
normalize_mapping,
|
|
26
|
+
validate_attack_config_schema,
|
|
27
|
+
validate_runtime_config_data,
|
|
28
|
+
)
|
|
29
|
+
from .types import ATTACK_CONFIG_SCHEMA, AttackConfig, RuntimeConfig
|
|
30
|
+
|
|
31
|
+
if TYPE_CHECKING: # pragma: no cover - typing only
|
|
32
|
+
from ..zoo import Gaggle, Glitchling
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# ---------------------------------------------------------------------------
|
|
36
|
+
# TOML/YAML module loading (impure - module imports)
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
|
|
39
|
+
try: # Python 3.11+
|
|
40
|
+
import tomllib as _tomllib
|
|
41
|
+
except ModuleNotFoundError: # pragma: no cover - Python < 3.11
|
|
42
|
+
_tomllib = importlib.import_module("tomli")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class _TomllibModule(Protocol):
|
|
46
|
+
def load(self, fp: IO[bytes]) -> Any: ...
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class _YamlModule(Protocol):
|
|
50
|
+
YAMLError: type[Exception]
|
|
51
|
+
|
|
52
|
+
def safe_load(self, stream: str) -> Any: ...
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
tomllib = cast(_TomllibModule, _tomllib)
|
|
56
|
+
yaml = cast(_YamlModule, importlib.import_module("yaml"))
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
# Environment and path resolution (impure - environment access)
|
|
61
|
+
# ---------------------------------------------------------------------------
|
|
62
|
+
|
|
63
|
+
CONFIG_ENV_VAR = "GLITCHLINGS_CONFIG"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _resolve_config_path() -> Path:
|
|
67
|
+
"""Resolve the configuration file path from environment or default."""
|
|
68
|
+
override = os.environ.get(CONFIG_ENV_VAR)
|
|
69
|
+
if override:
|
|
70
|
+
return Path(override)
|
|
71
|
+
return DEFAULT_CONFIG_PATH
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
# Global configuration state (impure - mutable global)
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
|
|
78
|
+
_CONFIG: RuntimeConfig | None = None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def reset_config() -> None:
|
|
82
|
+
"""Forget any cached runtime configuration."""
|
|
83
|
+
global _CONFIG
|
|
84
|
+
_CONFIG = None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def reload_config() -> RuntimeConfig:
|
|
88
|
+
"""Reload the runtime configuration from disk."""
|
|
89
|
+
reset_config()
|
|
90
|
+
return get_config()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def get_config() -> RuntimeConfig:
|
|
94
|
+
"""Return the cached runtime configuration, loading it if necessary."""
|
|
95
|
+
global _CONFIG
|
|
96
|
+
if _CONFIG is None:
|
|
97
|
+
_CONFIG = _load_runtime_config()
|
|
98
|
+
return _CONFIG
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# ---------------------------------------------------------------------------
|
|
102
|
+
# File IO helpers (impure - file system access)
|
|
103
|
+
# ---------------------------------------------------------------------------
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _read_text_source(
|
|
107
|
+
source: str | Path | TextIOBase,
|
|
108
|
+
*,
|
|
109
|
+
description: str,
|
|
110
|
+
encoding: str,
|
|
111
|
+
missing_error: Callable[[Path], Exception] | None,
|
|
112
|
+
) -> tuple[str, str]:
|
|
113
|
+
"""Read text content from a file path or stream."""
|
|
114
|
+
if isinstance(source, (str, Path)):
|
|
115
|
+
path = Path(source)
|
|
116
|
+
try:
|
|
117
|
+
text = path.read_text(encoding=encoding)
|
|
118
|
+
except FileNotFoundError as exc:
|
|
119
|
+
if missing_error is not None:
|
|
120
|
+
raise missing_error(path) from exc
|
|
121
|
+
raise
|
|
122
|
+
return text, str(path)
|
|
123
|
+
|
|
124
|
+
if isinstance(source, TextIOBase):
|
|
125
|
+
return source.read(), getattr(source, "name", "<stream>")
|
|
126
|
+
|
|
127
|
+
raise TypeError(f"{description} source must be a path or text stream.")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def load_text_config(
|
|
131
|
+
source: str | Path | TextIOBase,
|
|
132
|
+
*,
|
|
133
|
+
loader: Callable[..., Any],
|
|
134
|
+
description: str,
|
|
135
|
+
encoding: str = "utf-8",
|
|
136
|
+
allow_empty: bool = False,
|
|
137
|
+
mapping_error: str = "must contain a top-level mapping.",
|
|
138
|
+
missing_error: Callable[[Path], Exception] | None = None,
|
|
139
|
+
pass_label: bool = False,
|
|
140
|
+
) -> tuple[dict[str, Any], str]:
|
|
141
|
+
"""Load text configuration data and validate the top-level mapping."""
|
|
142
|
+
text, label = _read_text_source(
|
|
143
|
+
source,
|
|
144
|
+
description=description,
|
|
145
|
+
encoding=encoding,
|
|
146
|
+
missing_error=missing_error,
|
|
147
|
+
)
|
|
148
|
+
if pass_label:
|
|
149
|
+
data = loader(text, label)
|
|
150
|
+
else:
|
|
151
|
+
data = loader(text)
|
|
152
|
+
mapping = normalize_mapping(
|
|
153
|
+
data,
|
|
154
|
+
source=label,
|
|
155
|
+
description=description,
|
|
156
|
+
allow_empty=allow_empty,
|
|
157
|
+
mapping_error=mapping_error,
|
|
158
|
+
)
|
|
159
|
+
return mapping, label
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def load_binary_config(
|
|
163
|
+
path: Path,
|
|
164
|
+
*,
|
|
165
|
+
loader: Callable[[IO[bytes]], Any],
|
|
166
|
+
description: str,
|
|
167
|
+
allow_missing: bool = False,
|
|
168
|
+
allow_empty: bool = False,
|
|
169
|
+
mapping_error: str = "must contain a top-level mapping.",
|
|
170
|
+
) -> dict[str, Any]:
|
|
171
|
+
"""Load binary configuration data from disk and validate the mapping."""
|
|
172
|
+
if not path.exists():
|
|
173
|
+
if allow_missing:
|
|
174
|
+
return {}
|
|
175
|
+
raise FileNotFoundError(f"{description} '{path}' not found.")
|
|
176
|
+
|
|
177
|
+
with path.open("rb") as handle:
|
|
178
|
+
data = loader(handle)
|
|
179
|
+
|
|
180
|
+
return normalize_mapping(
|
|
181
|
+
data,
|
|
182
|
+
source=str(path),
|
|
183
|
+
description=description,
|
|
184
|
+
allow_empty=allow_empty,
|
|
185
|
+
mapping_error=mapping_error,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
# ---------------------------------------------------------------------------
|
|
190
|
+
# Runtime configuration loading (impure - file IO + validation)
|
|
191
|
+
# ---------------------------------------------------------------------------
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _load_runtime_config() -> RuntimeConfig:
|
|
195
|
+
"""Load runtime configuration from disk."""
|
|
196
|
+
path = _resolve_config_path()
|
|
197
|
+
data = load_binary_config(
|
|
198
|
+
path,
|
|
199
|
+
loader=tomllib.load,
|
|
200
|
+
description="Configuration file",
|
|
201
|
+
allow_missing=path == DEFAULT_CONFIG_PATH,
|
|
202
|
+
allow_empty=True,
|
|
203
|
+
)
|
|
204
|
+
validate_runtime_config_data(data, source=str(path))
|
|
205
|
+
|
|
206
|
+
return RuntimeConfig(path=path)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# ---------------------------------------------------------------------------
|
|
210
|
+
# Attack configuration loading (impure - file IO + validation)
|
|
211
|
+
# ---------------------------------------------------------------------------
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _load_yaml(text: str, label: str) -> Any:
|
|
215
|
+
"""Parse YAML text, wrapping errors with source context."""
|
|
216
|
+
try:
|
|
217
|
+
return yaml.safe_load(text)
|
|
218
|
+
except yaml.YAMLError as exc:
|
|
219
|
+
raise ValueError(f"Failed to parse attack configuration '{label}': {exc}") from exc
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def load_attack_config(
|
|
223
|
+
source: str | Path | TextIOBase,
|
|
224
|
+
*,
|
|
225
|
+
encoding: str = "utf-8",
|
|
226
|
+
) -> AttackConfig:
|
|
227
|
+
"""Load and parse an attack configuration from YAML."""
|
|
228
|
+
mapping, label = load_text_config(
|
|
229
|
+
source,
|
|
230
|
+
loader=_load_yaml,
|
|
231
|
+
description="Attack configuration",
|
|
232
|
+
encoding=encoding,
|
|
233
|
+
mapping_error="must be a mapping.",
|
|
234
|
+
missing_error=lambda path: ValueError(f"Attack configuration '{path}' was not found."),
|
|
235
|
+
pass_label=True,
|
|
236
|
+
)
|
|
237
|
+
return parse_attack_config(mapping, source=label)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def parse_attack_config(data: Any, *, source: str = "<config>") -> AttackConfig:
|
|
241
|
+
"""Convert arbitrary YAML data into a validated ``AttackConfig``."""
|
|
242
|
+
mapping = validate_attack_config_schema(data, source=source)
|
|
243
|
+
|
|
244
|
+
# Optional jsonschema validation (impure - optional dependency)
|
|
245
|
+
schema_module = jsonschema.get()
|
|
246
|
+
if schema_module is not None:
|
|
247
|
+
try:
|
|
248
|
+
schema_module.validate(instance=mapping, schema=ATTACK_CONFIG_SCHEMA)
|
|
249
|
+
except schema_module.exceptions.ValidationError as exc: # pragma: no cover - optional dep
|
|
250
|
+
message = exc.message
|
|
251
|
+
raise ValueError(f"Attack configuration '{source}' is invalid: {message}") from exc
|
|
252
|
+
|
|
253
|
+
raw_glitchlings = mapping["glitchlings"]
|
|
254
|
+
|
|
255
|
+
glitchlings: list["Glitchling"] = []
|
|
256
|
+
for index, entry in enumerate(raw_glitchlings, start=1):
|
|
257
|
+
glitchlings.append(_build_glitchling(entry, source, index))
|
|
258
|
+
|
|
259
|
+
seed = mapping.get("seed")
|
|
260
|
+
|
|
261
|
+
return AttackConfig(glitchlings=glitchlings, seed=seed)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def build_gaggle(config: AttackConfig, *, seed_override: int | None = None) -> "Gaggle":
|
|
265
|
+
"""Instantiate a ``Gaggle`` according to ``config``."""
|
|
266
|
+
from ..zoo import Gaggle # Imported lazily to avoid circular dependencies
|
|
267
|
+
|
|
268
|
+
seed = seed_override if seed_override is not None else config.seed
|
|
269
|
+
if seed is None:
|
|
270
|
+
seed = DEFAULT_ATTACK_SEED
|
|
271
|
+
|
|
272
|
+
return Gaggle(config.glitchlings, seed=seed)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _build_glitchling(entry: Any, source: str, index: int) -> "Glitchling":
|
|
276
|
+
"""Build a glitchling instance from a configuration entry."""
|
|
277
|
+
from ..zoo import get_glitchling_class, parse_glitchling_spec
|
|
278
|
+
|
|
279
|
+
if isinstance(entry, str):
|
|
280
|
+
try:
|
|
281
|
+
return parse_glitchling_spec(entry)
|
|
282
|
+
except ValueError as exc:
|
|
283
|
+
raise ValueError(f"{source}: glitchling #{index}: {exc}") from exc
|
|
284
|
+
|
|
285
|
+
if isinstance(entry, Mapping):
|
|
286
|
+
if "type" in entry:
|
|
287
|
+
raise ValueError(f"{source}: glitchling #{index} uses unsupported 'type'; use 'name'.")
|
|
288
|
+
|
|
289
|
+
name_value = entry.get("name")
|
|
290
|
+
|
|
291
|
+
if not isinstance(name_value, str) or not name_value.strip():
|
|
292
|
+
raise ValueError(f"{source}: glitchling #{index} is missing a 'name'.")
|
|
293
|
+
|
|
294
|
+
parameters = entry.get("parameters")
|
|
295
|
+
if parameters is not None:
|
|
296
|
+
if not isinstance(parameters, Mapping):
|
|
297
|
+
raise ValueError(
|
|
298
|
+
f"{source}: glitchling '{name_value}' parameters must be a mapping."
|
|
299
|
+
)
|
|
300
|
+
kwargs = dict(parameters)
|
|
301
|
+
else:
|
|
302
|
+
kwargs = {
|
|
303
|
+
key: value for key, value in entry.items() if key not in {"name", "parameters"}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
try:
|
|
307
|
+
glitchling_type = get_glitchling_class(name_value)
|
|
308
|
+
except ValueError as exc:
|
|
309
|
+
raise ValueError(f"{source}: glitchling #{index}: {exc}") from exc
|
|
310
|
+
|
|
311
|
+
try:
|
|
312
|
+
return glitchling_type(**kwargs)
|
|
313
|
+
except TypeError as exc:
|
|
314
|
+
raise ValueError(
|
|
315
|
+
f"{source}: glitchling #{index}: failed to instantiate '{name_value}': {exc}"
|
|
316
|
+
) from exc
|
|
317
|
+
|
|
318
|
+
raise ValueError(f"{source}: glitchling #{index} must be a string or mapping.")
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
__all__ = [
|
|
322
|
+
"CONFIG_ENV_VAR",
|
|
323
|
+
"build_gaggle",
|
|
324
|
+
"get_config",
|
|
325
|
+
"load_attack_config",
|
|
326
|
+
"load_binary_config",
|
|
327
|
+
"load_text_config",
|
|
328
|
+
"parse_attack_config",
|
|
329
|
+
"reload_config",
|
|
330
|
+
"reset_config",
|
|
331
|
+
]
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""Pure validation functions for configuration data.
|
|
2
|
+
|
|
3
|
+
This module contains only pure validation functions that operate on already-
|
|
4
|
+
loaded data structures. Functions here do not perform IO - they validate
|
|
5
|
+
in-memory data and return normalized results.
|
|
6
|
+
|
|
7
|
+
Pure guarantees:
|
|
8
|
+
- No file IO
|
|
9
|
+
- No environment variable access
|
|
10
|
+
- No mutable global state
|
|
11
|
+
- Same inputs always produce same outputs
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from typing import Any, Mapping, Sequence
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def normalize_mapping(
|
|
20
|
+
data: Any,
|
|
21
|
+
*,
|
|
22
|
+
source: str,
|
|
23
|
+
description: str,
|
|
24
|
+
allow_empty: bool = False,
|
|
25
|
+
mapping_error: str = "must contain a top-level mapping.",
|
|
26
|
+
) -> dict[str, Any]:
|
|
27
|
+
"""Ensure ``data`` is a mapping, normalising error messages.
|
|
28
|
+
|
|
29
|
+
This is a pure validation function - it checks that ``data`` is a valid
|
|
30
|
+
mapping and returns a normalized dict copy.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
data: The data to validate.
|
|
34
|
+
source: A label identifying where the data came from (for error messages).
|
|
35
|
+
description: A human-readable description of the data type.
|
|
36
|
+
allow_empty: If True, None values are converted to empty dicts.
|
|
37
|
+
mapping_error: Custom error message suffix when data is not a mapping.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
A dict copy of the mapping.
|
|
41
|
+
|
|
42
|
+
Raises:
|
|
43
|
+
ValueError: If the data is not a valid mapping.
|
|
44
|
+
"""
|
|
45
|
+
if data is None:
|
|
46
|
+
if allow_empty:
|
|
47
|
+
return {}
|
|
48
|
+
raise ValueError(f"{description} '{source}' is empty.")
|
|
49
|
+
if not isinstance(data, Mapping):
|
|
50
|
+
raise ValueError(f"{description} '{source}' {mapping_error}")
|
|
51
|
+
return dict(data)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def validate_runtime_config_data(data: Any, *, source: str) -> Mapping[str, Any]:
|
|
55
|
+
"""Validate runtime configuration data structure.
|
|
56
|
+
|
|
57
|
+
This is a pure validation function that checks the structure of
|
|
58
|
+
already-loaded configuration data without performing any IO.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
data: The configuration data (typically from TOML parsing).
|
|
62
|
+
source: A label identifying the data source (for error messages).
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
The validated mapping.
|
|
66
|
+
|
|
67
|
+
Raises:
|
|
68
|
+
ValueError: If the configuration structure is invalid.
|
|
69
|
+
"""
|
|
70
|
+
mapping = normalize_mapping(
|
|
71
|
+
data,
|
|
72
|
+
source=source,
|
|
73
|
+
description="Configuration file",
|
|
74
|
+
allow_empty=True,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Accept empty configs for forwards compatibility
|
|
78
|
+
return mapping
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def validate_attack_config_schema(data: Any, *, source: str) -> Mapping[str, Any]:
|
|
82
|
+
"""Validate attack configuration data structure.
|
|
83
|
+
|
|
84
|
+
This is a pure validation function that checks the structure of
|
|
85
|
+
already-loaded configuration data. It does NOT perform jsonschema
|
|
86
|
+
validation (that requires the optional dependency).
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
data: The configuration data (typically from YAML parsing).
|
|
90
|
+
source: A label identifying the data source (for error messages).
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
The validated mapping.
|
|
94
|
+
|
|
95
|
+
Raises:
|
|
96
|
+
ValueError: If the configuration structure is invalid.
|
|
97
|
+
"""
|
|
98
|
+
mapping = normalize_mapping(
|
|
99
|
+
data,
|
|
100
|
+
source=source,
|
|
101
|
+
description="Attack configuration",
|
|
102
|
+
mapping_error="must be a mapping.",
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
unexpected = [key for key in mapping if key not in {"glitchlings", "seed"}]
|
|
106
|
+
if unexpected:
|
|
107
|
+
extras = ", ".join(sorted(unexpected))
|
|
108
|
+
raise ValueError(f"Attack configuration '{source}' has unsupported fields: {extras}.")
|
|
109
|
+
|
|
110
|
+
if "glitchlings" not in mapping:
|
|
111
|
+
raise ValueError(f"Attack configuration '{source}' must define 'glitchlings'.")
|
|
112
|
+
|
|
113
|
+
raw_glitchlings = mapping["glitchlings"]
|
|
114
|
+
if not isinstance(raw_glitchlings, Sequence) or isinstance(raw_glitchlings, (str, bytes)):
|
|
115
|
+
raise ValueError(f"'glitchlings' in '{source}' must be a sequence.")
|
|
116
|
+
|
|
117
|
+
seed = mapping.get("seed")
|
|
118
|
+
if seed is not None and not isinstance(seed, int):
|
|
119
|
+
raise ValueError(f"Seed in '{source}' must be an integer if provided.")
|
|
120
|
+
|
|
121
|
+
for index, entry in enumerate(raw_glitchlings, start=1):
|
|
122
|
+
_validate_glitchling_entry(entry, source=source, index=index)
|
|
123
|
+
|
|
124
|
+
return mapping
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _validate_glitchling_entry(entry: Any, *, source: str, index: int) -> None:
|
|
128
|
+
"""Validate a single glitchling entry in an attack configuration."""
|
|
129
|
+
if isinstance(entry, str):
|
|
130
|
+
if not entry.strip():
|
|
131
|
+
raise ValueError(f"{source}: glitchling #{index} name cannot be empty.")
|
|
132
|
+
return
|
|
133
|
+
|
|
134
|
+
if isinstance(entry, Mapping):
|
|
135
|
+
if "type" in entry:
|
|
136
|
+
raise ValueError(f"{source}: glitchling #{index} uses unsupported 'type'; use 'name'.")
|
|
137
|
+
|
|
138
|
+
name_candidate = entry.get("name")
|
|
139
|
+
if not isinstance(name_candidate, str) or not name_candidate.strip():
|
|
140
|
+
raise ValueError(f"{source}: glitchling #{index} is missing a 'name'.")
|
|
141
|
+
|
|
142
|
+
parameters = entry.get("parameters")
|
|
143
|
+
if parameters is not None and not isinstance(parameters, Mapping):
|
|
144
|
+
raise ValueError(
|
|
145
|
+
f"{source}: glitchling '{name_candidate}' parameters must be a mapping."
|
|
146
|
+
)
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
raise ValueError(f"{source}: glitchling #{index} must be a string or mapping.")
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
__all__ = [
|
|
153
|
+
"normalize_mapping",
|
|
154
|
+
"validate_attack_config_schema",
|
|
155
|
+
"validate_runtime_config_data",
|
|
156
|
+
]
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Pure type definitions for configuration structures.
|
|
2
|
+
|
|
3
|
+
This module contains only dataclass definitions and type constants with no
|
|
4
|
+
side effects. It can be safely imported anywhere without triggering IO or
|
|
5
|
+
module loading.
|
|
6
|
+
|
|
7
|
+
Pure guarantees:
|
|
8
|
+
- No import side effects
|
|
9
|
+
- No file IO
|
|
10
|
+
- No environment variable access
|
|
11
|
+
- No mutable global state
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import TYPE_CHECKING, Any
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING: # pragma: no cover - typing only
|
|
21
|
+
from ..zoo import Glitchling
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(slots=True)
|
|
25
|
+
class RuntimeConfig:
|
|
26
|
+
"""Top-level runtime configuration loaded from ``config.toml``."""
|
|
27
|
+
|
|
28
|
+
path: Path
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass(slots=True)
|
|
32
|
+
class AttackConfig:
|
|
33
|
+
"""Structured representation of a glitchling roster loaded from YAML."""
|
|
34
|
+
|
|
35
|
+
glitchlings: list["Glitchling"]
|
|
36
|
+
seed: int | None = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# JSON Schema for attack configuration validation
|
|
40
|
+
ATTACK_CONFIG_SCHEMA: dict[str, Any] = {
|
|
41
|
+
"type": "object",
|
|
42
|
+
"required": ["glitchlings"],
|
|
43
|
+
"properties": {
|
|
44
|
+
"glitchlings": {
|
|
45
|
+
"type": "array",
|
|
46
|
+
"minItems": 1,
|
|
47
|
+
"items": {
|
|
48
|
+
"anyOf": [
|
|
49
|
+
{"type": "string", "minLength": 1},
|
|
50
|
+
{
|
|
51
|
+
"type": "object",
|
|
52
|
+
"required": ["name"],
|
|
53
|
+
"properties": {
|
|
54
|
+
"name": {"type": "string", "minLength": 1},
|
|
55
|
+
"parameters": {"type": "object"},
|
|
56
|
+
},
|
|
57
|
+
"additionalProperties": True,
|
|
58
|
+
},
|
|
59
|
+
]
|
|
60
|
+
},
|
|
61
|
+
},
|
|
62
|
+
"seed": {"type": "integer"},
|
|
63
|
+
},
|
|
64
|
+
"additionalProperties": False,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
__all__ = [
|
|
69
|
+
"ATTACK_CONFIG_SCHEMA",
|
|
70
|
+
"AttackConfig",
|
|
71
|
+
"RuntimeConfig",
|
|
72
|
+
]
|
glitchlings/config.toml
ADDED
glitchlings/constants.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Centralized defaults and shared configuration constants."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
# Global configuration defaults
|
|
8
|
+
DEFAULT_ATTACK_SEED = 151
|
|
9
|
+
DEFAULT_CONFIG_PATH = Path(__file__).with_name("config.toml")
|
|
10
|
+
|
|
11
|
+
# Character-level glitchling default rates
|
|
12
|
+
DEFAULT_TYPOGRE_RATE = 0.02
|
|
13
|
+
DEFAULT_TYPOGRE_KEYBOARD = "CURATOR_QWERTY"
|
|
14
|
+
DEFAULT_MIM1C_RATE = 0.02
|
|
15
|
+
DEFAULT_SCANNEQUIN_RATE = 0.02
|
|
16
|
+
DEFAULT_ZEEDUB_RATE = 0.02
|
|
17
|
+
|
|
18
|
+
# Word-level glitchling default rates
|
|
19
|
+
DEFAULT_EKKOKIN_RATE = 0.02
|
|
20
|
+
DEFAULT_EKKOKIN_WEIGHTING = "flat"
|
|
21
|
+
DEFAULT_JARGOYLE_RATE = 0.01
|
|
22
|
+
DEFAULT_REDACTYL_RATE = 0.025
|
|
23
|
+
DEFAULT_REDACTYL_CHAR = "\u2588" # █ FULL BLOCK
|
|
24
|
+
|
|
25
|
+
# Rushmore default rates per mode
|
|
26
|
+
RUSHMORE_DEFAULT_RATES = {
|
|
27
|
+
"delete": 0.01,
|
|
28
|
+
"duplicate": 0.01,
|
|
29
|
+
"swap": 0.5,
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
# Mim1c Unicode script class defaults
|
|
33
|
+
MIM1C_DEFAULT_CLASSES: tuple[str, ...] = ("LATIN", "GREEK", "CYRILLIC", "COMMON")
|
|
34
|
+
|
|
35
|
+
# Zeedub zero-width character palette
|
|
36
|
+
ZEEDUB_DEFAULT_ZERO_WIDTHS: tuple[str, ...] = (
|
|
37
|
+
"\u200b", # ZERO WIDTH SPACE
|
|
38
|
+
"\u200c", # ZERO WIDTH NON-JOINER
|
|
39
|
+
"\u200d", # ZERO WIDTH JOINER
|
|
40
|
+
"\ufeff", # BYTE ORDER MARK (zero-width no-break space)
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
__all__ = [
|
|
44
|
+
"DEFAULT_ATTACK_SEED",
|
|
45
|
+
"DEFAULT_CONFIG_PATH",
|
|
46
|
+
"DEFAULT_EKKOKIN_RATE",
|
|
47
|
+
"DEFAULT_EKKOKIN_WEIGHTING",
|
|
48
|
+
"DEFAULT_JARGOYLE_RATE",
|
|
49
|
+
"DEFAULT_MIM1C_RATE",
|
|
50
|
+
"DEFAULT_REDACTYL_CHAR",
|
|
51
|
+
"DEFAULT_REDACTYL_RATE",
|
|
52
|
+
"DEFAULT_SCANNEQUIN_RATE",
|
|
53
|
+
"DEFAULT_TYPOGRE_KEYBOARD",
|
|
54
|
+
"DEFAULT_TYPOGRE_RATE",
|
|
55
|
+
"DEFAULT_ZEEDUB_RATE",
|
|
56
|
+
"MIM1C_DEFAULT_CLASSES",
|
|
57
|
+
"RUSHMORE_DEFAULT_RATES",
|
|
58
|
+
"ZEEDUB_DEFAULT_ZERO_WIDTHS",
|
|
59
|
+
]
|
glitchlings/dev/docs.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Developer helpers for refreshing generated documentation assets."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import runpy
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
REPO_ROOT = Path(__file__).resolve().parents[3]
|
|
9
|
+
DOCS_DIR = REPO_ROOT / "docs"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _run_script(path: Path) -> None:
|
|
13
|
+
if not path.exists():
|
|
14
|
+
raise FileNotFoundError(f"Documentation helper not found: {path}")
|
|
15
|
+
runpy.run_path(str(path))
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def refresh_cli_reference() -> None:
|
|
19
|
+
"""Regenerate the CLI reference docs page."""
|
|
20
|
+
_run_script(DOCS_DIR / "build_cli_reference.py")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def refresh_monster_manual() -> None:
|
|
24
|
+
"""Regenerate the Monster Manual."""
|
|
25
|
+
_run_script(DOCS_DIR / "build_monster_manual.py")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def refresh_gallery() -> None:
|
|
29
|
+
"""Regenerate the glitchling gallery page."""
|
|
30
|
+
_run_script(DOCS_DIR / "build_glitchling_gallery.py")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def refresh_all() -> None:
|
|
34
|
+
"""Regenerate CLI reference, Monster Manual, and gallery docs in one call."""
|
|
35
|
+
refresh_cli_reference()
|
|
36
|
+
refresh_monster_manual()
|
|
37
|
+
refresh_gallery()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def main() -> None:
|
|
41
|
+
refresh_all()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
if __name__ == "__main__":
|
|
45
|
+
main()
|
glitchlings/dlc/__init__.py
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
|
-
"""Optional DLC integrations for Glitchlings.
|
|
1
|
+
"""Optional DLC integrations for Glitchlings.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
This module provides explicit wrapper classes for integrating glitchlings
|
|
4
|
+
with popular ML frameworks:
|
|
4
5
|
|
|
5
|
-
|
|
6
|
+
- :class:`~glitchlings.dlc.huggingface.GlitchedDataset`: Wrap Hugging Face datasets
|
|
7
|
+
- :class:`~glitchlings.dlc.pytorch.GlitchedDataLoader`: Wrap PyTorch data loaders
|
|
8
|
+
- :class:`~glitchlings.dlc.pytorch_lightning.GlitchedLightningDataModule`: Wrap
|
|
9
|
+
Lightning data modules
|
|
10
|
+
- :class:`~glitchlings.dlc.gutenberg.GlitchenbergAPI`: Wrap Project Gutenberg API
|
|
11
|
+
|
|
12
|
+
Example:
|
|
13
|
+
>>> from glitchlings.dlc.huggingface import GlitchedDataset
|
|
14
|
+
>>> from datasets import Dataset
|
|
15
|
+
>>> dataset = Dataset.from_dict({"text": ["hello", "world"]})
|
|
16
|
+
>>> corrupted = GlitchedDataset(dataset, "typogre", column="text")
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
__all__: list[str] = []
|