glitchlings 0.4.1__cp312-cp312-manylinux_2_28_x86_64.whl → 0.4.3__cp312-cp312-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +30 -17
- glitchlings/__main__.py +0 -1
- glitchlings/_zoo_rust.cpython-312-x86_64-linux-gnu.so +0 -0
- glitchlings/compat.py +284 -0
- glitchlings/config.py +164 -34
- glitchlings/config.toml +1 -1
- glitchlings/dlc/__init__.py +3 -1
- glitchlings/dlc/_shared.py +68 -0
- glitchlings/dlc/huggingface.py +26 -41
- glitchlings/dlc/prime.py +64 -101
- glitchlings/dlc/pytorch.py +216 -0
- glitchlings/dlc/pytorch_lightning.py +233 -0
- glitchlings/lexicon/__init__.py +12 -33
- glitchlings/lexicon/_cache.py +21 -22
- glitchlings/lexicon/data/default_vector_cache.json +80 -14
- glitchlings/lexicon/metrics.py +1 -8
- glitchlings/lexicon/vector.py +109 -49
- glitchlings/lexicon/wordnet.py +89 -49
- glitchlings/main.py +30 -24
- glitchlings/util/__init__.py +18 -4
- glitchlings/util/adapters.py +27 -0
- glitchlings/zoo/__init__.py +26 -15
- glitchlings/zoo/_ocr_confusions.py +1 -3
- glitchlings/zoo/_rate.py +1 -4
- glitchlings/zoo/_sampling.py +0 -1
- glitchlings/zoo/_text_utils.py +1 -5
- glitchlings/zoo/adjax.py +2 -4
- glitchlings/zoo/apostrofae.py +128 -0
- glitchlings/zoo/assets/__init__.py +0 -0
- glitchlings/zoo/assets/apostrofae_pairs.json +32 -0
- glitchlings/zoo/core.py +152 -87
- glitchlings/zoo/jargoyle.py +50 -45
- glitchlings/zoo/mim1c.py +11 -10
- glitchlings/zoo/redactyl.py +16 -16
- glitchlings/zoo/reduple.py +5 -3
- glitchlings/zoo/rushmore.py +4 -10
- glitchlings/zoo/scannequin.py +7 -6
- glitchlings/zoo/typogre.py +8 -9
- glitchlings/zoo/zeedub.py +6 -3
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/METADATA +101 -4
- glitchlings-0.4.3.dist-info/RECORD +46 -0
- glitchlings/lexicon/graph.py +0 -290
- glitchlings-0.4.1.dist-info/RECORD +0 -39
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/WHEEL +0 -0
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/entry_points.txt +0 -0
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/licenses/LICENSE +0 -0
- {glitchlings-0.4.1.dist-info → glitchlings-0.4.3.dist-info}/top_level.txt +0 -0
glitchlings/__init__.py
CHANGED
|
@@ -1,29 +1,35 @@
|
|
|
1
|
+
from .config import AttackConfig, build_gaggle, load_attack_config
|
|
2
|
+
from .util import SAMPLE_TEXT
|
|
1
3
|
from .zoo import (
|
|
2
|
-
Typogre,
|
|
3
|
-
typogre,
|
|
4
|
-
Mim1c,
|
|
5
|
-
mim1c,
|
|
6
|
-
Jargoyle,
|
|
7
|
-
jargoyle,
|
|
8
4
|
Adjax,
|
|
9
|
-
|
|
5
|
+
Apostrofae,
|
|
6
|
+
Gaggle,
|
|
7
|
+
Glitchling,
|
|
8
|
+
Jargoyle,
|
|
9
|
+
Mim1c,
|
|
10
10
|
Redactyl,
|
|
11
|
-
redactyl,
|
|
12
11
|
Reduple,
|
|
13
|
-
reduple,
|
|
14
12
|
Rushmore,
|
|
15
|
-
rushmore,
|
|
16
13
|
Scannequin,
|
|
17
|
-
|
|
14
|
+
Typogre,
|
|
18
15
|
Zeedub,
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
16
|
+
adjax,
|
|
17
|
+
apostrofae,
|
|
18
|
+
is_rust_pipeline_enabled,
|
|
19
|
+
is_rust_pipeline_supported,
|
|
20
|
+
jargoyle,
|
|
21
|
+
mim1c,
|
|
22
|
+
pipeline_feature_flag_enabled,
|
|
23
|
+
plan_glitchling_specs,
|
|
24
|
+
plan_glitchlings,
|
|
25
|
+
redactyl,
|
|
26
|
+
reduple,
|
|
27
|
+
rushmore,
|
|
28
|
+
scannequin,
|
|
22
29
|
summon,
|
|
30
|
+
typogre,
|
|
31
|
+
zeedub,
|
|
23
32
|
)
|
|
24
|
-
from .config import AttackConfig, build_gaggle, load_attack_config
|
|
25
|
-
from .util import SAMPLE_TEXT
|
|
26
|
-
|
|
27
33
|
|
|
28
34
|
__all__ = [
|
|
29
35
|
"Typogre",
|
|
@@ -34,6 +40,8 @@ __all__ = [
|
|
|
34
40
|
"jargoyle",
|
|
35
41
|
"Adjax",
|
|
36
42
|
"adjax",
|
|
43
|
+
"Apostrofae",
|
|
44
|
+
"apostrofae",
|
|
37
45
|
"Redactyl",
|
|
38
46
|
"redactyl",
|
|
39
47
|
"Reduple",
|
|
@@ -47,6 +55,11 @@ __all__ = [
|
|
|
47
55
|
"summon",
|
|
48
56
|
"Glitchling",
|
|
49
57
|
"Gaggle",
|
|
58
|
+
"plan_glitchlings",
|
|
59
|
+
"plan_glitchling_specs",
|
|
60
|
+
"is_rust_pipeline_enabled",
|
|
61
|
+
"is_rust_pipeline_supported",
|
|
62
|
+
"pipeline_feature_flag_enabled",
|
|
50
63
|
"SAMPLE_TEXT",
|
|
51
64
|
"AttackConfig",
|
|
52
65
|
"build_gaggle",
|
glitchlings/__main__.py
CHANGED
|
Binary file
|
glitchlings/compat.py
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
"""Compatibility helpers centralising optional dependency imports and extras."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from importlib import import_module, metadata
|
|
8
|
+
from types import ModuleType
|
|
9
|
+
from typing import Any, Callable, Iterable, Protocol, cast
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class _MissingSentinel:
|
|
13
|
+
__slots__ = ()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
_MISSING = _MissingSentinel()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class _MarkerProtocol(Protocol):
|
|
20
|
+
def evaluate(self, environment: dict[str, str]) -> bool:
|
|
21
|
+
...
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class _RequirementProtocol(Protocol):
|
|
25
|
+
marker: _MarkerProtocol | None
|
|
26
|
+
name: str
|
|
27
|
+
|
|
28
|
+
def __init__(self, requirement: str) -> None:
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
try: # pragma: no cover - packaging is bundled with modern Python environments
|
|
33
|
+
from packaging.markers import default_environment as _default_environment
|
|
34
|
+
except ModuleNotFoundError: # pragma: no cover - fallback when packaging missing
|
|
35
|
+
_default_environment = None
|
|
36
|
+
|
|
37
|
+
try: # pragma: no cover - packaging is bundled with modern Python environments
|
|
38
|
+
from packaging.requirements import Requirement as _RequirementClass
|
|
39
|
+
except ModuleNotFoundError: # pragma: no cover - fallback when packaging missing
|
|
40
|
+
_RequirementClass = None
|
|
41
|
+
|
|
42
|
+
default_environment: Callable[[], dict[str, str]] | None
|
|
43
|
+
if _default_environment is None:
|
|
44
|
+
default_environment = None
|
|
45
|
+
else:
|
|
46
|
+
default_environment = cast(Callable[[], dict[str, str]], _default_environment)
|
|
47
|
+
|
|
48
|
+
Requirement: type[_RequirementProtocol] | None
|
|
49
|
+
if _RequirementClass is None:
|
|
50
|
+
Requirement = None
|
|
51
|
+
else:
|
|
52
|
+
Requirement = cast(type[_RequirementProtocol], _RequirementClass)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class OptionalDependency:
|
|
57
|
+
"""Lazily import an optional dependency and retain the import error."""
|
|
58
|
+
|
|
59
|
+
module_name: str
|
|
60
|
+
_cached: ModuleType | None | _MissingSentinel = _MISSING
|
|
61
|
+
_error: ModuleNotFoundError | None = None
|
|
62
|
+
|
|
63
|
+
def _attempt_import(self) -> ModuleType | None:
|
|
64
|
+
try:
|
|
65
|
+
module = import_module(self.module_name)
|
|
66
|
+
except ModuleNotFoundError as exc:
|
|
67
|
+
self._cached = None
|
|
68
|
+
self._error = exc
|
|
69
|
+
return None
|
|
70
|
+
else:
|
|
71
|
+
self._cached = module
|
|
72
|
+
self._error = None
|
|
73
|
+
return module
|
|
74
|
+
|
|
75
|
+
def get(self) -> ModuleType | None:
|
|
76
|
+
"""Return the imported module or ``None`` when unavailable."""
|
|
77
|
+
cached = self._cached
|
|
78
|
+
if isinstance(cached, _MissingSentinel):
|
|
79
|
+
return self._attempt_import()
|
|
80
|
+
if cached is None:
|
|
81
|
+
return None
|
|
82
|
+
return cached
|
|
83
|
+
|
|
84
|
+
def load(self) -> ModuleType:
|
|
85
|
+
"""Return the dependency, raising the original import error when absent."""
|
|
86
|
+
module = self.get()
|
|
87
|
+
if module is None:
|
|
88
|
+
error = self._error
|
|
89
|
+
if error is not None:
|
|
90
|
+
raise error
|
|
91
|
+
message = f"{self.module_name} is not installed"
|
|
92
|
+
raise ModuleNotFoundError(message)
|
|
93
|
+
return module
|
|
94
|
+
|
|
95
|
+
def require(self, message: str) -> ModuleType:
|
|
96
|
+
"""Return the dependency or raise ``ModuleNotFoundError`` with ``message``."""
|
|
97
|
+
try:
|
|
98
|
+
return self.load()
|
|
99
|
+
except ModuleNotFoundError as exc:
|
|
100
|
+
raise ModuleNotFoundError(message) from exc
|
|
101
|
+
|
|
102
|
+
def available(self) -> bool:
|
|
103
|
+
"""Return ``True`` when the dependency can be imported."""
|
|
104
|
+
return self.get() is not None
|
|
105
|
+
|
|
106
|
+
def reset(self) -> None:
|
|
107
|
+
"""Forget any cached import result."""
|
|
108
|
+
self._cached = _MISSING
|
|
109
|
+
self._error = None
|
|
110
|
+
|
|
111
|
+
def attr(self, attribute: str) -> Any | None:
|
|
112
|
+
"""Return ``attribute`` from the dependency when available."""
|
|
113
|
+
module = self.get()
|
|
114
|
+
if module is None:
|
|
115
|
+
return None
|
|
116
|
+
return getattr(module, attribute, None)
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def error(self) -> ModuleNotFoundError | None:
|
|
120
|
+
"""Return the most recent ``ModuleNotFoundError`` (if any)."""
|
|
121
|
+
self.get()
|
|
122
|
+
return self._error
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
pytorch_lightning = OptionalDependency("pytorch_lightning")
|
|
126
|
+
datasets = OptionalDependency("datasets")
|
|
127
|
+
verifiers = OptionalDependency("verifiers")
|
|
128
|
+
jellyfish = OptionalDependency("jellyfish")
|
|
129
|
+
jsonschema = OptionalDependency("jsonschema")
|
|
130
|
+
nltk = OptionalDependency("nltk")
|
|
131
|
+
torch = OptionalDependency("torch")
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def reset_optional_dependencies() -> None:
|
|
135
|
+
"""Clear cached optional dependency imports (used by tests)."""
|
|
136
|
+
for dependency in (pytorch_lightning, datasets, verifiers, jellyfish, jsonschema, nltk, torch):
|
|
137
|
+
dependency.reset()
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def get_datasets_dataset() -> Any | None:
|
|
141
|
+
"""Return Hugging Face ``Dataset`` class when the dependency is installed."""
|
|
142
|
+
return datasets.attr("Dataset")
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def require_datasets(message: str = "datasets is not installed") -> ModuleType:
|
|
146
|
+
"""Ensure the Hugging Face datasets dependency is present."""
|
|
147
|
+
return datasets.require(message)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def get_pytorch_lightning_datamodule() -> Any | None:
|
|
151
|
+
"""Return the PyTorch Lightning ``LightningDataModule`` when available."""
|
|
152
|
+
return pytorch_lightning.attr("LightningDataModule")
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def require_pytorch_lightning(message: str = "pytorch_lightning is not installed") -> ModuleType:
|
|
156
|
+
"""Ensure the PyTorch Lightning dependency is present."""
|
|
157
|
+
return pytorch_lightning.require(message)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def require_verifiers(message: str = "verifiers is not installed") -> ModuleType:
|
|
161
|
+
"""Ensure the verifiers dependency is present."""
|
|
162
|
+
return verifiers.require(message)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def require_jellyfish(message: str = "jellyfish is not installed") -> ModuleType:
|
|
166
|
+
"""Ensure the jellyfish dependency is present."""
|
|
167
|
+
return jellyfish.require(message)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def require_torch(message: str = "torch is not installed") -> ModuleType:
|
|
171
|
+
"""Ensure the PyTorch dependency is present."""
|
|
172
|
+
return torch.require(message)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def get_torch_dataloader() -> Any | None:
|
|
176
|
+
"""Return PyTorch ``DataLoader`` when the dependency is installed."""
|
|
177
|
+
torch_module = torch.get()
|
|
178
|
+
if torch_module is None:
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
utils_module = getattr(torch_module, "utils", None)
|
|
182
|
+
if utils_module is None:
|
|
183
|
+
return None
|
|
184
|
+
|
|
185
|
+
data_module = getattr(utils_module, "data", None)
|
|
186
|
+
if data_module is None:
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
return getattr(data_module, "DataLoader", None)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def get_installed_extras(
|
|
193
|
+
extras: Iterable[str] | None = None,
|
|
194
|
+
*,
|
|
195
|
+
distribution: str = "glitchlings",
|
|
196
|
+
) -> dict[str, bool]:
|
|
197
|
+
"""Return a mapping of optional extras to installation availability."""
|
|
198
|
+
try:
|
|
199
|
+
dist = metadata.distribution(distribution)
|
|
200
|
+
except metadata.PackageNotFoundError:
|
|
201
|
+
return {}
|
|
202
|
+
|
|
203
|
+
provided = {extra.lower() for extra in dist.metadata.get_all("Provides-Extra") or []}
|
|
204
|
+
targets = {extra.lower() for extra in extras} if extras is not None else provided
|
|
205
|
+
requirements = dist.requires or []
|
|
206
|
+
mapping: dict[str, set[str]] = {extra: set() for extra in provided}
|
|
207
|
+
|
|
208
|
+
for requirement in requirements:
|
|
209
|
+
names = _extras_from_requirement(requirement, provided)
|
|
210
|
+
if not names:
|
|
211
|
+
continue
|
|
212
|
+
req_name = _requirement_name(requirement)
|
|
213
|
+
for extra in names:
|
|
214
|
+
mapping.setdefault(extra, set()).add(req_name)
|
|
215
|
+
|
|
216
|
+
status: dict[str, bool] = {}
|
|
217
|
+
for extra in targets:
|
|
218
|
+
deps = mapping.get(extra)
|
|
219
|
+
if not deps:
|
|
220
|
+
status[extra] = False
|
|
221
|
+
continue
|
|
222
|
+
status[extra] = all(_distribution_installed(dep) for dep in deps)
|
|
223
|
+
return status
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _distribution_installed(name: str) -> bool:
|
|
227
|
+
try:
|
|
228
|
+
metadata.distribution(name)
|
|
229
|
+
except metadata.PackageNotFoundError:
|
|
230
|
+
return False
|
|
231
|
+
return True
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
_EXTRA_PATTERN = re.compile(r'extra\\s*==\\s*"(?P<extra>[^"]+)"')
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _extras_from_requirement(requirement: str, candidates: set[str]) -> set[str]:
|
|
238
|
+
if Requirement is not None and default_environment is not None:
|
|
239
|
+
req = Requirement(requirement)
|
|
240
|
+
if req.marker is None:
|
|
241
|
+
return set()
|
|
242
|
+
extras: set[str] = set()
|
|
243
|
+
for extra in candidates:
|
|
244
|
+
environment = default_environment()
|
|
245
|
+
environment["extra"] = extra
|
|
246
|
+
if req.marker.evaluate(environment):
|
|
247
|
+
extras.add(extra)
|
|
248
|
+
return extras
|
|
249
|
+
|
|
250
|
+
matches = set()
|
|
251
|
+
for match in _EXTRA_PATTERN.finditer(requirement):
|
|
252
|
+
extra = match.group("extra").lower()
|
|
253
|
+
if extra in candidates:
|
|
254
|
+
matches.add(extra)
|
|
255
|
+
return matches
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _requirement_name(requirement: str) -> str:
|
|
259
|
+
if Requirement is not None:
|
|
260
|
+
req = Requirement(requirement)
|
|
261
|
+
return req.name
|
|
262
|
+
|
|
263
|
+
candidate = requirement.split(";", 1)[0].strip()
|
|
264
|
+
for delimiter in ("[", "(", " ", "<", ">", "=", "!", "~"):
|
|
265
|
+
index = candidate.find(delimiter)
|
|
266
|
+
if index != -1:
|
|
267
|
+
return candidate[:index]
|
|
268
|
+
return candidate
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
__all__ = [
|
|
272
|
+
"OptionalDependency",
|
|
273
|
+
"datasets",
|
|
274
|
+
"verifiers",
|
|
275
|
+
"jellyfish",
|
|
276
|
+
"jsonschema",
|
|
277
|
+
"nltk",
|
|
278
|
+
"get_datasets_dataset",
|
|
279
|
+
"require_datasets",
|
|
280
|
+
"require_verifiers",
|
|
281
|
+
"require_jellyfish",
|
|
282
|
+
"get_installed_extras",
|
|
283
|
+
"reset_optional_dependencies",
|
|
284
|
+
]
|
glitchlings/config.py
CHANGED
|
@@ -2,29 +2,86 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import importlib
|
|
5
6
|
import os
|
|
7
|
+
import warnings
|
|
6
8
|
from dataclasses import dataclass, field
|
|
7
9
|
from io import TextIOBase
|
|
8
10
|
from pathlib import Path
|
|
9
|
-
from typing import Any, Mapping, Sequence,
|
|
11
|
+
from typing import IO, TYPE_CHECKING, Any, Mapping, Protocol, Sequence, cast
|
|
12
|
+
|
|
13
|
+
from glitchlings.compat import jsonschema
|
|
10
14
|
|
|
11
15
|
try: # Python 3.11+
|
|
12
|
-
import tomllib
|
|
16
|
+
import tomllib as _tomllib
|
|
13
17
|
except ModuleNotFoundError: # pragma: no cover - Python < 3.11
|
|
14
|
-
|
|
18
|
+
_tomllib = importlib.import_module("tomli")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class _TomllibModule(Protocol):
|
|
22
|
+
def load(self, fp: IO[bytes]) -> Any:
|
|
23
|
+
...
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
tomllib = cast(_TomllibModule, _tomllib)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class _YamlModule(Protocol):
|
|
30
|
+
YAMLError: type[Exception]
|
|
15
31
|
|
|
16
|
-
|
|
32
|
+
def safe_load(self, stream: str) -> Any:
|
|
33
|
+
...
|
|
17
34
|
|
|
18
35
|
|
|
36
|
+
yaml = cast(_YamlModule, importlib.import_module("yaml"))
|
|
37
|
+
|
|
19
38
|
if TYPE_CHECKING: # pragma: no cover - typing only
|
|
20
|
-
from .zoo import Glitchling
|
|
39
|
+
from .zoo import Gaggle, Glitchling
|
|
21
40
|
|
|
22
41
|
|
|
23
42
|
CONFIG_ENV_VAR = "GLITCHLINGS_CONFIG"
|
|
24
43
|
DEFAULT_CONFIG_PATH = Path(__file__).with_name("config.toml")
|
|
25
|
-
DEFAULT_LEXICON_PRIORITY = ["vector", "
|
|
44
|
+
DEFAULT_LEXICON_PRIORITY = ["vector", "wordnet"]
|
|
26
45
|
DEFAULT_ATTACK_SEED = 151
|
|
27
46
|
|
|
47
|
+
ATTACK_CONFIG_SCHEMA: dict[str, Any] = {
|
|
48
|
+
"type": "object",
|
|
49
|
+
"required": ["glitchlings"],
|
|
50
|
+
"properties": {
|
|
51
|
+
"glitchlings": {
|
|
52
|
+
"type": "array",
|
|
53
|
+
"minItems": 1,
|
|
54
|
+
"items": {
|
|
55
|
+
"anyOf": [
|
|
56
|
+
{"type": "string", "minLength": 1},
|
|
57
|
+
{
|
|
58
|
+
"type": "object",
|
|
59
|
+
"required": ["name"],
|
|
60
|
+
"properties": {
|
|
61
|
+
"name": {"type": "string", "minLength": 1},
|
|
62
|
+
"type": {"type": "string", "minLength": 1},
|
|
63
|
+
"parameters": {"type": "object"},
|
|
64
|
+
},
|
|
65
|
+
"additionalProperties": True,
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
"type": "object",
|
|
69
|
+
"required": ["type"],
|
|
70
|
+
"properties": {
|
|
71
|
+
"name": {"type": "string", "minLength": 1},
|
|
72
|
+
"type": {"type": "string", "minLength": 1},
|
|
73
|
+
"parameters": {"type": "object"},
|
|
74
|
+
},
|
|
75
|
+
"additionalProperties": True,
|
|
76
|
+
},
|
|
77
|
+
]
|
|
78
|
+
},
|
|
79
|
+
},
|
|
80
|
+
"seed": {"type": "integer"},
|
|
81
|
+
},
|
|
82
|
+
"additionalProperties": False,
|
|
83
|
+
}
|
|
84
|
+
|
|
28
85
|
|
|
29
86
|
@dataclass(slots=True)
|
|
30
87
|
class LexiconConfig:
|
|
@@ -32,7 +89,6 @@ class LexiconConfig:
|
|
|
32
89
|
|
|
33
90
|
priority: list[str] = field(default_factory=lambda: list(DEFAULT_LEXICON_PRIORITY))
|
|
34
91
|
vector_cache: Path | None = None
|
|
35
|
-
graph_cache: Path | None = None
|
|
36
92
|
|
|
37
93
|
|
|
38
94
|
@dataclass(slots=True)
|
|
@@ -48,21 +104,18 @@ _CONFIG: RuntimeConfig | None = None
|
|
|
48
104
|
|
|
49
105
|
def reset_config() -> None:
|
|
50
106
|
"""Forget any cached runtime configuration."""
|
|
51
|
-
|
|
52
107
|
global _CONFIG
|
|
53
108
|
_CONFIG = None
|
|
54
109
|
|
|
55
110
|
|
|
56
111
|
def reload_config() -> RuntimeConfig:
|
|
57
112
|
"""Reload the runtime configuration from disk."""
|
|
58
|
-
|
|
59
113
|
reset_config()
|
|
60
114
|
return get_config()
|
|
61
115
|
|
|
62
116
|
|
|
63
117
|
def get_config() -> RuntimeConfig:
|
|
64
118
|
"""Return the cached runtime configuration, loading it if necessary."""
|
|
65
|
-
|
|
66
119
|
global _CONFIG
|
|
67
120
|
if _CONFIG is None:
|
|
68
121
|
_CONFIG = _load_runtime_config()
|
|
@@ -72,26 +125,27 @@ def get_config() -> RuntimeConfig:
|
|
|
72
125
|
def _load_runtime_config() -> RuntimeConfig:
|
|
73
126
|
path = _resolve_config_path()
|
|
74
127
|
data = _read_toml(path)
|
|
75
|
-
|
|
128
|
+
mapping = _validate_runtime_config_data(data, source=path)
|
|
129
|
+
|
|
130
|
+
lexicon_section = mapping.get("lexicon", {})
|
|
76
131
|
|
|
77
132
|
priority = lexicon_section.get("priority", DEFAULT_LEXICON_PRIORITY)
|
|
78
133
|
if not isinstance(priority, Sequence) or isinstance(priority, (str, bytes)):
|
|
79
134
|
raise ValueError("lexicon.priority must be a sequence of strings.")
|
|
80
|
-
normalized_priority = [
|
|
135
|
+
normalized_priority = []
|
|
136
|
+
for item in priority:
|
|
137
|
+
string_value = str(item)
|
|
138
|
+
if not string_value:
|
|
139
|
+
raise ValueError("lexicon.priority entries must be non-empty strings.")
|
|
140
|
+
normalized_priority.append(string_value)
|
|
81
141
|
|
|
82
142
|
vector_cache = _resolve_optional_path(
|
|
83
143
|
lexicon_section.get("vector_cache"),
|
|
84
144
|
base=path.parent,
|
|
85
145
|
)
|
|
86
|
-
graph_cache = _resolve_optional_path(
|
|
87
|
-
lexicon_section.get("graph_cache"),
|
|
88
|
-
base=path.parent,
|
|
89
|
-
)
|
|
90
|
-
|
|
91
146
|
lexicon_config = LexiconConfig(
|
|
92
147
|
priority=normalized_priority,
|
|
93
148
|
vector_cache=vector_cache,
|
|
94
|
-
graph_cache=graph_cache,
|
|
95
149
|
)
|
|
96
150
|
|
|
97
151
|
return RuntimeConfig(lexicon=lexicon_config, path=path)
|
|
@@ -110,7 +164,40 @@ def _read_toml(path: Path) -> dict[str, Any]:
|
|
|
110
164
|
return {}
|
|
111
165
|
raise FileNotFoundError(f"Configuration file '{path}' not found.")
|
|
112
166
|
with path.open("rb") as handle:
|
|
113
|
-
|
|
167
|
+
loaded = tomllib.load(handle)
|
|
168
|
+
if isinstance(loaded, Mapping):
|
|
169
|
+
return dict(loaded)
|
|
170
|
+
raise ValueError(f"Configuration file '{path}' must contain a top-level mapping.")
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _validate_runtime_config_data(data: Any, *, source: Path) -> Mapping[str, Any]:
|
|
174
|
+
if data is None:
|
|
175
|
+
return {}
|
|
176
|
+
if not isinstance(data, Mapping):
|
|
177
|
+
raise ValueError(f"Configuration file '{source}' must contain a top-level mapping.")
|
|
178
|
+
|
|
179
|
+
allowed_sections = {"lexicon"}
|
|
180
|
+
unexpected_sections = [str(key) for key in data if key not in allowed_sections]
|
|
181
|
+
if unexpected_sections:
|
|
182
|
+
extras = ", ".join(sorted(unexpected_sections))
|
|
183
|
+
raise ValueError(f"Configuration file '{source}' has unsupported sections: {extras}.")
|
|
184
|
+
|
|
185
|
+
lexicon_section = data.get("lexicon", {})
|
|
186
|
+
if not isinstance(lexicon_section, Mapping):
|
|
187
|
+
raise ValueError("Configuration 'lexicon' section must be a table.")
|
|
188
|
+
|
|
189
|
+
allowed_lexicon_keys = {"priority", "vector_cache"}
|
|
190
|
+
unexpected_keys = [str(key) for key in lexicon_section if key not in allowed_lexicon_keys]
|
|
191
|
+
if unexpected_keys:
|
|
192
|
+
extras = ", ".join(sorted(unexpected_keys))
|
|
193
|
+
raise ValueError(f"Unknown lexicon settings: {extras}.")
|
|
194
|
+
|
|
195
|
+
for key in ("vector_cache",):
|
|
196
|
+
value = lexicon_section.get(key)
|
|
197
|
+
if value is not None and not isinstance(value, (str, os.PathLike)):
|
|
198
|
+
raise ValueError(f"lexicon.{key} must be a path or string when provided.")
|
|
199
|
+
|
|
200
|
+
return data
|
|
114
201
|
|
|
115
202
|
|
|
116
203
|
def _resolve_optional_path(value: Any, *, base: Path) -> Path | None:
|
|
@@ -137,7 +224,6 @@ def load_attack_config(
|
|
|
137
224
|
encoding: str = "utf-8",
|
|
138
225
|
) -> AttackConfig:
|
|
139
226
|
"""Load and parse an attack configuration from YAML."""
|
|
140
|
-
|
|
141
227
|
if isinstance(source, (str, Path)):
|
|
142
228
|
path = Path(source)
|
|
143
229
|
label = str(path)
|
|
@@ -155,36 +241,67 @@ def load_attack_config(
|
|
|
155
241
|
return parse_attack_config(data, source=label)
|
|
156
242
|
|
|
157
243
|
|
|
158
|
-
def
|
|
159
|
-
"""Convert arbitrary YAML data into a validated ``AttackConfig``."""
|
|
160
|
-
|
|
244
|
+
def _validate_attack_config_schema(data: Any, *, source: str) -> Mapping[str, Any]:
|
|
161
245
|
if data is None:
|
|
162
246
|
raise ValueError(f"Attack configuration '{source}' is empty.")
|
|
163
|
-
|
|
164
247
|
if not isinstance(data, Mapping):
|
|
165
248
|
raise ValueError(f"Attack configuration '{source}' must be a mapping.")
|
|
166
249
|
|
|
167
|
-
|
|
168
|
-
if
|
|
250
|
+
unexpected = [key for key in data if key not in {"glitchlings", "seed"}]
|
|
251
|
+
if unexpected:
|
|
252
|
+
extras = ", ".join(sorted(unexpected))
|
|
253
|
+
raise ValueError(f"Attack configuration '{source}' has unsupported fields: {extras}.")
|
|
254
|
+
|
|
255
|
+
if "glitchlings" not in data:
|
|
169
256
|
raise ValueError(f"Attack configuration '{source}' must define 'glitchlings'.")
|
|
170
257
|
|
|
258
|
+
raw_glitchlings = data["glitchlings"]
|
|
171
259
|
if not isinstance(raw_glitchlings, Sequence) or isinstance(raw_glitchlings, (str, bytes)):
|
|
172
260
|
raise ValueError(f"'glitchlings' in '{source}' must be a sequence.")
|
|
173
261
|
|
|
262
|
+
seed = data.get("seed")
|
|
263
|
+
if seed is not None and not isinstance(seed, int):
|
|
264
|
+
raise ValueError(f"Seed in '{source}' must be an integer if provided.")
|
|
265
|
+
|
|
266
|
+
for index, entry in enumerate(raw_glitchlings, start=1):
|
|
267
|
+
if isinstance(entry, Mapping):
|
|
268
|
+
name_candidate = entry.get("name") or entry.get("type")
|
|
269
|
+
if not isinstance(name_candidate, str) or not name_candidate.strip():
|
|
270
|
+
raise ValueError(f"{source}: glitchling #{index} is missing a 'name'.")
|
|
271
|
+
parameters = entry.get("parameters")
|
|
272
|
+
if parameters is not None and not isinstance(parameters, Mapping):
|
|
273
|
+
raise ValueError(
|
|
274
|
+
f"{source}: glitchling '{name_candidate}' parameters must be a mapping."
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
schema_module = jsonschema.get()
|
|
278
|
+
if schema_module is not None:
|
|
279
|
+
try:
|
|
280
|
+
schema_module.validate(instance=data, schema=ATTACK_CONFIG_SCHEMA)
|
|
281
|
+
except schema_module.exceptions.ValidationError as exc: # pragma: no cover - optional dep
|
|
282
|
+
message = exc.message
|
|
283
|
+
raise ValueError(f"Attack configuration '{source}' is invalid: {message}") from exc
|
|
284
|
+
|
|
285
|
+
return data
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def parse_attack_config(data: Any, *, source: str = "<config>") -> AttackConfig:
|
|
289
|
+
"""Convert arbitrary YAML data into a validated ``AttackConfig``."""
|
|
290
|
+
mapping = _validate_attack_config_schema(data, source=source)
|
|
291
|
+
|
|
292
|
+
raw_glitchlings = mapping["glitchlings"]
|
|
293
|
+
|
|
174
294
|
glitchlings: list["Glitchling"] = []
|
|
175
295
|
for index, entry in enumerate(raw_glitchlings, start=1):
|
|
176
296
|
glitchlings.append(_build_glitchling(entry, source, index))
|
|
177
297
|
|
|
178
|
-
seed =
|
|
179
|
-
if seed is not None and not isinstance(seed, int):
|
|
180
|
-
raise ValueError(f"Seed in '{source}' must be an integer if provided.")
|
|
298
|
+
seed = mapping.get("seed")
|
|
181
299
|
|
|
182
300
|
return AttackConfig(glitchlings=glitchlings, seed=seed)
|
|
183
301
|
|
|
184
302
|
|
|
185
|
-
def build_gaggle(config: AttackConfig, *, seed_override: int | None = None):
|
|
303
|
+
def build_gaggle(config: AttackConfig, *, seed_override: int | None = None) -> "Gaggle":
|
|
186
304
|
"""Instantiate a ``Gaggle`` according to ``config``."""
|
|
187
|
-
|
|
188
305
|
from .zoo import Gaggle # Imported lazily to avoid circular dependencies
|
|
189
306
|
|
|
190
307
|
seed = seed_override if seed_override is not None else config.seed
|
|
@@ -201,7 +318,7 @@ def _load_yaml(text: str, label: str) -> Any:
|
|
|
201
318
|
raise ValueError(f"Failed to parse attack configuration '{label}': {exc}") from exc
|
|
202
319
|
|
|
203
320
|
|
|
204
|
-
def _build_glitchling(entry: Any, source: str, index: int):
|
|
321
|
+
def _build_glitchling(entry: Any, source: str, index: int) -> "Glitchling":
|
|
205
322
|
from .zoo import get_glitchling_class, parse_glitchling_spec
|
|
206
323
|
|
|
207
324
|
if isinstance(entry, str):
|
|
@@ -211,14 +328,27 @@ def _build_glitchling(entry: Any, source: str, index: int):
|
|
|
211
328
|
raise ValueError(f"{source}: glitchling #{index}: {exc}") from exc
|
|
212
329
|
|
|
213
330
|
if isinstance(entry, Mapping):
|
|
214
|
-
name_value = entry.get("name"
|
|
331
|
+
name_value = entry.get("name")
|
|
332
|
+
legacy_type = entry.get("type")
|
|
333
|
+
if name_value is None and legacy_type is not None:
|
|
334
|
+
warnings.warn(
|
|
335
|
+
f"{source}: glitchling #{index} uses 'type'; prefer 'name'.",
|
|
336
|
+
DeprecationWarning,
|
|
337
|
+
stacklevel=2,
|
|
338
|
+
)
|
|
339
|
+
name_value = legacy_type
|
|
340
|
+
elif name_value is None:
|
|
341
|
+
name_value = legacy_type
|
|
342
|
+
|
|
215
343
|
if not isinstance(name_value, str) or not name_value.strip():
|
|
216
344
|
raise ValueError(f"{source}: glitchling #{index} is missing a 'name'.")
|
|
217
345
|
|
|
218
346
|
parameters = entry.get("parameters")
|
|
219
347
|
if parameters is not None:
|
|
220
348
|
if not isinstance(parameters, Mapping):
|
|
221
|
-
raise ValueError(
|
|
349
|
+
raise ValueError(
|
|
350
|
+
f"{source}: glitchling '{name_value}' parameters must be a mapping."
|
|
351
|
+
)
|
|
222
352
|
kwargs = dict(parameters)
|
|
223
353
|
else:
|
|
224
354
|
kwargs = {
|
glitchlings/config.toml
CHANGED