glitchlings 0.4.0__cp310-cp310-manylinux_2_28_x86_64.whl → 0.4.2__cp310-cp310-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +26 -17
- glitchlings/__main__.py +0 -1
- glitchlings/_zoo_rust.cpython-310-x86_64-linux-gnu.so +0 -0
- glitchlings/compat.py +215 -0
- glitchlings/config.py +136 -19
- glitchlings/dlc/_shared.py +68 -0
- glitchlings/dlc/huggingface.py +26 -41
- glitchlings/dlc/prime.py +64 -101
- glitchlings/lexicon/__init__.py +26 -19
- glitchlings/lexicon/_cache.py +104 -0
- glitchlings/lexicon/graph.py +18 -39
- glitchlings/lexicon/metrics.py +1 -8
- glitchlings/lexicon/vector.py +29 -67
- glitchlings/lexicon/wordnet.py +39 -30
- glitchlings/main.py +9 -13
- glitchlings/util/__init__.py +18 -4
- glitchlings/util/adapters.py +27 -0
- glitchlings/zoo/__init__.py +21 -14
- glitchlings/zoo/_ocr_confusions.py +1 -3
- glitchlings/zoo/_rate.py +1 -4
- glitchlings/zoo/_sampling.py +0 -1
- glitchlings/zoo/_text_utils.py +1 -5
- glitchlings/zoo/adjax.py +0 -2
- glitchlings/zoo/core.py +185 -56
- glitchlings/zoo/jargoyle.py +9 -14
- glitchlings/zoo/mim1c.py +11 -10
- glitchlings/zoo/redactyl.py +5 -8
- glitchlings/zoo/reduple.py +3 -1
- glitchlings/zoo/rushmore.py +2 -8
- glitchlings/zoo/scannequin.py +5 -4
- glitchlings/zoo/typogre.py +3 -7
- glitchlings/zoo/zeedub.py +2 -2
- {glitchlings-0.4.0.dist-info → glitchlings-0.4.2.dist-info}/METADATA +68 -4
- glitchlings-0.4.2.dist-info/RECORD +42 -0
- glitchlings-0.4.0.dist-info/RECORD +0 -38
- {glitchlings-0.4.0.dist-info → glitchlings-0.4.2.dist-info}/WHEEL +0 -0
- {glitchlings-0.4.0.dist-info → glitchlings-0.4.2.dist-info}/entry_points.txt +0 -0
- {glitchlings-0.4.0.dist-info → glitchlings-0.4.2.dist-info}/licenses/LICENSE +0 -0
- {glitchlings-0.4.0.dist-info → glitchlings-0.4.2.dist-info}/top_level.txt +0 -0
glitchlings/__init__.py
CHANGED
|
@@ -1,29 +1,33 @@
|
|
|
1
|
+
from .config import AttackConfig, build_gaggle, load_attack_config
|
|
2
|
+
from .util import SAMPLE_TEXT
|
|
1
3
|
from .zoo import (
|
|
2
|
-
Typogre,
|
|
3
|
-
typogre,
|
|
4
|
-
Mim1c,
|
|
5
|
-
mim1c,
|
|
6
|
-
Jargoyle,
|
|
7
|
-
jargoyle,
|
|
8
4
|
Adjax,
|
|
9
|
-
|
|
5
|
+
Gaggle,
|
|
6
|
+
Glitchling,
|
|
7
|
+
Jargoyle,
|
|
8
|
+
Mim1c,
|
|
10
9
|
Redactyl,
|
|
11
|
-
redactyl,
|
|
12
10
|
Reduple,
|
|
13
|
-
reduple,
|
|
14
11
|
Rushmore,
|
|
15
|
-
rushmore,
|
|
16
12
|
Scannequin,
|
|
17
|
-
|
|
13
|
+
Typogre,
|
|
18
14
|
Zeedub,
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
15
|
+
adjax,
|
|
16
|
+
is_rust_pipeline_enabled,
|
|
17
|
+
is_rust_pipeline_supported,
|
|
18
|
+
jargoyle,
|
|
19
|
+
mim1c,
|
|
20
|
+
pipeline_feature_flag_enabled,
|
|
21
|
+
plan_glitchling_specs,
|
|
22
|
+
plan_glitchlings,
|
|
23
|
+
redactyl,
|
|
24
|
+
reduple,
|
|
25
|
+
rushmore,
|
|
26
|
+
scannequin,
|
|
22
27
|
summon,
|
|
28
|
+
typogre,
|
|
29
|
+
zeedub,
|
|
23
30
|
)
|
|
24
|
-
from .config import AttackConfig, build_gaggle, load_attack_config
|
|
25
|
-
from .util import SAMPLE_TEXT
|
|
26
|
-
|
|
27
31
|
|
|
28
32
|
__all__ = [
|
|
29
33
|
"Typogre",
|
|
@@ -47,6 +51,11 @@ __all__ = [
|
|
|
47
51
|
"summon",
|
|
48
52
|
"Glitchling",
|
|
49
53
|
"Gaggle",
|
|
54
|
+
"plan_glitchlings",
|
|
55
|
+
"plan_glitchling_specs",
|
|
56
|
+
"is_rust_pipeline_enabled",
|
|
57
|
+
"is_rust_pipeline_supported",
|
|
58
|
+
"pipeline_feature_flag_enabled",
|
|
50
59
|
"SAMPLE_TEXT",
|
|
51
60
|
"AttackConfig",
|
|
52
61
|
"build_gaggle",
|
glitchlings/__main__.py
CHANGED
|
Binary file
|
glitchlings/compat.py
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""Compatibility helpers centralising optional dependency imports and extras."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from importlib import import_module, metadata
|
|
8
|
+
from types import ModuleType
|
|
9
|
+
from typing import Any, Iterable
|
|
10
|
+
|
|
11
|
+
try: # pragma: no cover - packaging is bundled with modern Python environments
|
|
12
|
+
from packaging.markers import default_environment
|
|
13
|
+
from packaging.requirements import Requirement
|
|
14
|
+
except ModuleNotFoundError: # pragma: no cover - fallback when packaging missing
|
|
15
|
+
Requirement = None # type: ignore[assignment]
|
|
16
|
+
default_environment = None # type: ignore[assignment]
|
|
17
|
+
|
|
18
|
+
_MISSING = object()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class OptionalDependency:
|
|
23
|
+
"""Lazily import an optional dependency and retain the import error."""
|
|
24
|
+
|
|
25
|
+
module_name: str
|
|
26
|
+
_cached: ModuleType | object = _MISSING
|
|
27
|
+
_error: ModuleNotFoundError | None = None
|
|
28
|
+
|
|
29
|
+
def _attempt_import(self) -> ModuleType | None:
|
|
30
|
+
try:
|
|
31
|
+
module = import_module(self.module_name)
|
|
32
|
+
except ModuleNotFoundError as exc:
|
|
33
|
+
self._cached = None
|
|
34
|
+
self._error = exc
|
|
35
|
+
return None
|
|
36
|
+
else:
|
|
37
|
+
self._cached = module
|
|
38
|
+
self._error = None
|
|
39
|
+
return module
|
|
40
|
+
|
|
41
|
+
def get(self) -> ModuleType | None:
|
|
42
|
+
"""Return the imported module or ``None`` when unavailable."""
|
|
43
|
+
if self._cached is _MISSING:
|
|
44
|
+
return self._attempt_import()
|
|
45
|
+
if self._cached is None:
|
|
46
|
+
return None
|
|
47
|
+
return self._cached
|
|
48
|
+
|
|
49
|
+
def load(self) -> ModuleType:
|
|
50
|
+
"""Return the dependency, raising the original import error when absent."""
|
|
51
|
+
module = self.get()
|
|
52
|
+
if module is None:
|
|
53
|
+
error = self._error
|
|
54
|
+
if error is not None:
|
|
55
|
+
raise error
|
|
56
|
+
message = f"{self.module_name} is not installed"
|
|
57
|
+
raise ModuleNotFoundError(message)
|
|
58
|
+
return module
|
|
59
|
+
|
|
60
|
+
def require(self, message: str) -> ModuleType:
|
|
61
|
+
"""Return the dependency or raise ``ModuleNotFoundError`` with ``message``."""
|
|
62
|
+
try:
|
|
63
|
+
return self.load()
|
|
64
|
+
except ModuleNotFoundError as exc:
|
|
65
|
+
raise ModuleNotFoundError(message) from exc
|
|
66
|
+
|
|
67
|
+
def available(self) -> bool:
|
|
68
|
+
"""Return ``True`` when the dependency can be imported."""
|
|
69
|
+
return self.get() is not None
|
|
70
|
+
|
|
71
|
+
def reset(self) -> None:
|
|
72
|
+
"""Forget any cached import result."""
|
|
73
|
+
self._cached = _MISSING
|
|
74
|
+
self._error = None
|
|
75
|
+
|
|
76
|
+
def attr(self, attribute: str) -> Any | None:
|
|
77
|
+
"""Return ``attribute`` from the dependency when available."""
|
|
78
|
+
module = self.get()
|
|
79
|
+
if module is None:
|
|
80
|
+
return None
|
|
81
|
+
return getattr(module, attribute, None)
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def error(self) -> ModuleNotFoundError | None:
|
|
85
|
+
"""Return the most recent ``ModuleNotFoundError`` (if any)."""
|
|
86
|
+
self.get()
|
|
87
|
+
return self._error
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
datasets = OptionalDependency("datasets")
|
|
91
|
+
verifiers = OptionalDependency("verifiers")
|
|
92
|
+
jellyfish = OptionalDependency("jellyfish")
|
|
93
|
+
jsonschema = OptionalDependency("jsonschema")
|
|
94
|
+
nltk = OptionalDependency("nltk")
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def reset_optional_dependencies() -> None:
|
|
98
|
+
"""Clear cached optional dependency imports (used by tests)."""
|
|
99
|
+
for dependency in (datasets, verifiers, jellyfish, jsonschema, nltk):
|
|
100
|
+
dependency.reset()
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def get_datasets_dataset() -> Any | None:
|
|
104
|
+
"""Return Hugging Face ``Dataset`` class when the dependency is installed."""
|
|
105
|
+
return datasets.attr("Dataset")
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def require_datasets(message: str = "datasets is not installed") -> ModuleType:
|
|
109
|
+
"""Ensure the Hugging Face datasets dependency is present."""
|
|
110
|
+
return datasets.require(message)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def require_verifiers(message: str = "verifiers is not installed") -> ModuleType:
|
|
114
|
+
"""Ensure the verifiers dependency is present."""
|
|
115
|
+
return verifiers.require(message)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def require_jellyfish(message: str = "jellyfish is not installed") -> ModuleType:
|
|
119
|
+
"""Ensure the jellyfish dependency is present."""
|
|
120
|
+
return jellyfish.require(message)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def get_installed_extras(
|
|
124
|
+
extras: Iterable[str] | None = None,
|
|
125
|
+
*,
|
|
126
|
+
distribution: str = "glitchlings",
|
|
127
|
+
) -> dict[str, bool]:
|
|
128
|
+
"""Return a mapping of optional extras to installation availability."""
|
|
129
|
+
try:
|
|
130
|
+
dist = metadata.distribution(distribution)
|
|
131
|
+
except metadata.PackageNotFoundError:
|
|
132
|
+
return {}
|
|
133
|
+
|
|
134
|
+
provided = {extra.lower() for extra in dist.metadata.get_all("Provides-Extra") or []}
|
|
135
|
+
targets = {extra.lower() for extra in extras} if extras is not None else provided
|
|
136
|
+
requirements = dist.requires or []
|
|
137
|
+
mapping: dict[str, set[str]] = {extra: set() for extra in provided}
|
|
138
|
+
|
|
139
|
+
for requirement in requirements:
|
|
140
|
+
names = _extras_from_requirement(requirement, provided)
|
|
141
|
+
if not names:
|
|
142
|
+
continue
|
|
143
|
+
req_name = _requirement_name(requirement)
|
|
144
|
+
for extra in names:
|
|
145
|
+
mapping.setdefault(extra, set()).add(req_name)
|
|
146
|
+
|
|
147
|
+
status: dict[str, bool] = {}
|
|
148
|
+
for extra in targets:
|
|
149
|
+
deps = mapping.get(extra)
|
|
150
|
+
if not deps:
|
|
151
|
+
status[extra] = False
|
|
152
|
+
continue
|
|
153
|
+
status[extra] = all(_distribution_installed(dep) for dep in deps)
|
|
154
|
+
return status
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _distribution_installed(name: str) -> bool:
|
|
158
|
+
try:
|
|
159
|
+
metadata.distribution(name)
|
|
160
|
+
except metadata.PackageNotFoundError:
|
|
161
|
+
return False
|
|
162
|
+
return True
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
_EXTRA_PATTERN = re.compile(r'extra\\s*==\\s*"(?P<extra>[^"]+)"')
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _extras_from_requirement(requirement: str, candidates: set[str]) -> set[str]:
|
|
169
|
+
if Requirement is not None and default_environment is not None:
|
|
170
|
+
req = Requirement(requirement)
|
|
171
|
+
if req.marker is None:
|
|
172
|
+
return set()
|
|
173
|
+
extras: set[str] = set()
|
|
174
|
+
for extra in candidates:
|
|
175
|
+
environment = default_environment()
|
|
176
|
+
environment["extra"] = extra
|
|
177
|
+
if req.marker.evaluate(environment):
|
|
178
|
+
extras.add(extra)
|
|
179
|
+
return extras
|
|
180
|
+
|
|
181
|
+
matches = set()
|
|
182
|
+
for match in _EXTRA_PATTERN.finditer(requirement):
|
|
183
|
+
extra = match.group("extra").lower()
|
|
184
|
+
if extra in candidates:
|
|
185
|
+
matches.add(extra)
|
|
186
|
+
return matches
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _requirement_name(requirement: str) -> str:
|
|
190
|
+
if Requirement is not None:
|
|
191
|
+
req = Requirement(requirement)
|
|
192
|
+
return req.name
|
|
193
|
+
|
|
194
|
+
candidate = requirement.split(";", 1)[0].strip()
|
|
195
|
+
for delimiter in ("[", "(", " ", "<", ">", "=", "!", "~"):
|
|
196
|
+
index = candidate.find(delimiter)
|
|
197
|
+
if index != -1:
|
|
198
|
+
return candidate[:index]
|
|
199
|
+
return candidate
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
__all__ = [
|
|
203
|
+
"OptionalDependency",
|
|
204
|
+
"datasets",
|
|
205
|
+
"verifiers",
|
|
206
|
+
"jellyfish",
|
|
207
|
+
"jsonschema",
|
|
208
|
+
"nltk",
|
|
209
|
+
"get_datasets_dataset",
|
|
210
|
+
"require_datasets",
|
|
211
|
+
"require_verifiers",
|
|
212
|
+
"require_jellyfish",
|
|
213
|
+
"get_installed_extras",
|
|
214
|
+
"reset_optional_dependencies",
|
|
215
|
+
]
|
glitchlings/config.py
CHANGED
|
@@ -3,10 +3,11 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import os
|
|
6
|
+
import warnings
|
|
6
7
|
from dataclasses import dataclass, field
|
|
7
8
|
from io import TextIOBase
|
|
8
9
|
from pathlib import Path
|
|
9
|
-
from typing import Any, Mapping, Sequence
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Mapping, Sequence
|
|
10
11
|
|
|
11
12
|
try: # Python 3.11+
|
|
12
13
|
import tomllib
|
|
@@ -15,6 +16,7 @@ except ModuleNotFoundError: # pragma: no cover - Python < 3.11
|
|
|
15
16
|
|
|
16
17
|
import yaml
|
|
17
18
|
|
|
19
|
+
from .compat import jsonschema
|
|
18
20
|
|
|
19
21
|
if TYPE_CHECKING: # pragma: no cover - typing only
|
|
20
22
|
from .zoo import Glitchling
|
|
@@ -25,6 +27,44 @@ DEFAULT_CONFIG_PATH = Path(__file__).with_name("config.toml")
|
|
|
25
27
|
DEFAULT_LEXICON_PRIORITY = ["vector", "graph", "wordnet"]
|
|
26
28
|
DEFAULT_ATTACK_SEED = 151
|
|
27
29
|
|
|
30
|
+
ATTACK_CONFIG_SCHEMA: dict[str, Any] = {
|
|
31
|
+
"type": "object",
|
|
32
|
+
"required": ["glitchlings"],
|
|
33
|
+
"properties": {
|
|
34
|
+
"glitchlings": {
|
|
35
|
+
"type": "array",
|
|
36
|
+
"minItems": 1,
|
|
37
|
+
"items": {
|
|
38
|
+
"anyOf": [
|
|
39
|
+
{"type": "string", "minLength": 1},
|
|
40
|
+
{
|
|
41
|
+
"type": "object",
|
|
42
|
+
"required": ["name"],
|
|
43
|
+
"properties": {
|
|
44
|
+
"name": {"type": "string", "minLength": 1},
|
|
45
|
+
"type": {"type": "string", "minLength": 1},
|
|
46
|
+
"parameters": {"type": "object"},
|
|
47
|
+
},
|
|
48
|
+
"additionalProperties": True,
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
"type": "object",
|
|
52
|
+
"required": ["type"],
|
|
53
|
+
"properties": {
|
|
54
|
+
"name": {"type": "string", "minLength": 1},
|
|
55
|
+
"type": {"type": "string", "minLength": 1},
|
|
56
|
+
"parameters": {"type": "object"},
|
|
57
|
+
},
|
|
58
|
+
"additionalProperties": True,
|
|
59
|
+
},
|
|
60
|
+
]
|
|
61
|
+
},
|
|
62
|
+
},
|
|
63
|
+
"seed": {"type": "integer"},
|
|
64
|
+
},
|
|
65
|
+
"additionalProperties": False,
|
|
66
|
+
}
|
|
67
|
+
|
|
28
68
|
|
|
29
69
|
@dataclass(slots=True)
|
|
30
70
|
class LexiconConfig:
|
|
@@ -48,21 +88,18 @@ _CONFIG: RuntimeConfig | None = None
|
|
|
48
88
|
|
|
49
89
|
def reset_config() -> None:
|
|
50
90
|
"""Forget any cached runtime configuration."""
|
|
51
|
-
|
|
52
91
|
global _CONFIG
|
|
53
92
|
_CONFIG = None
|
|
54
93
|
|
|
55
94
|
|
|
56
95
|
def reload_config() -> RuntimeConfig:
|
|
57
96
|
"""Reload the runtime configuration from disk."""
|
|
58
|
-
|
|
59
97
|
reset_config()
|
|
60
98
|
return get_config()
|
|
61
99
|
|
|
62
100
|
|
|
63
101
|
def get_config() -> RuntimeConfig:
|
|
64
102
|
"""Return the cached runtime configuration, loading it if necessary."""
|
|
65
|
-
|
|
66
103
|
global _CONFIG
|
|
67
104
|
if _CONFIG is None:
|
|
68
105
|
_CONFIG = _load_runtime_config()
|
|
@@ -72,12 +109,19 @@ def get_config() -> RuntimeConfig:
|
|
|
72
109
|
def _load_runtime_config() -> RuntimeConfig:
|
|
73
110
|
path = _resolve_config_path()
|
|
74
111
|
data = _read_toml(path)
|
|
75
|
-
|
|
112
|
+
mapping = _validate_runtime_config_data(data, source=path)
|
|
113
|
+
|
|
114
|
+
lexicon_section = mapping.get("lexicon", {})
|
|
76
115
|
|
|
77
116
|
priority = lexicon_section.get("priority", DEFAULT_LEXICON_PRIORITY)
|
|
78
117
|
if not isinstance(priority, Sequence) or isinstance(priority, (str, bytes)):
|
|
79
118
|
raise ValueError("lexicon.priority must be a sequence of strings.")
|
|
80
|
-
normalized_priority = [
|
|
119
|
+
normalized_priority = []
|
|
120
|
+
for item in priority:
|
|
121
|
+
string_value = str(item)
|
|
122
|
+
if not string_value:
|
|
123
|
+
raise ValueError("lexicon.priority entries must be non-empty strings.")
|
|
124
|
+
normalized_priority.append(string_value)
|
|
81
125
|
|
|
82
126
|
vector_cache = _resolve_optional_path(
|
|
83
127
|
lexicon_section.get("vector_cache"),
|
|
@@ -113,6 +157,36 @@ def _read_toml(path: Path) -> dict[str, Any]:
|
|
|
113
157
|
return tomllib.load(handle)
|
|
114
158
|
|
|
115
159
|
|
|
160
|
+
def _validate_runtime_config_data(data: Any, *, source: Path) -> Mapping[str, Any]:
|
|
161
|
+
if data is None:
|
|
162
|
+
return {}
|
|
163
|
+
if not isinstance(data, Mapping):
|
|
164
|
+
raise ValueError(f"Configuration file '{source}' must contain a top-level mapping.")
|
|
165
|
+
|
|
166
|
+
allowed_sections = {"lexicon"}
|
|
167
|
+
unexpected_sections = [str(key) for key in data if key not in allowed_sections]
|
|
168
|
+
if unexpected_sections:
|
|
169
|
+
extras = ", ".join(sorted(unexpected_sections))
|
|
170
|
+
raise ValueError(f"Configuration file '{source}' has unsupported sections: {extras}.")
|
|
171
|
+
|
|
172
|
+
lexicon_section = data.get("lexicon", {})
|
|
173
|
+
if not isinstance(lexicon_section, Mapping):
|
|
174
|
+
raise ValueError("Configuration 'lexicon' section must be a table.")
|
|
175
|
+
|
|
176
|
+
allowed_lexicon_keys = {"priority", "vector_cache", "graph_cache"}
|
|
177
|
+
unexpected_keys = [str(key) for key in lexicon_section if key not in allowed_lexicon_keys]
|
|
178
|
+
if unexpected_keys:
|
|
179
|
+
extras = ", ".join(sorted(unexpected_keys))
|
|
180
|
+
raise ValueError(f"Unknown lexicon settings: {extras}.")
|
|
181
|
+
|
|
182
|
+
for key in ("vector_cache", "graph_cache"):
|
|
183
|
+
value = lexicon_section.get(key)
|
|
184
|
+
if value is not None and not isinstance(value, (str, os.PathLike)):
|
|
185
|
+
raise ValueError(f"lexicon.{key} must be a path or string when provided.")
|
|
186
|
+
|
|
187
|
+
return data
|
|
188
|
+
|
|
189
|
+
|
|
116
190
|
def _resolve_optional_path(value: Any, *, base: Path) -> Path | None:
|
|
117
191
|
if value in (None, ""):
|
|
118
192
|
return None
|
|
@@ -137,7 +211,6 @@ def load_attack_config(
|
|
|
137
211
|
encoding: str = "utf-8",
|
|
138
212
|
) -> AttackConfig:
|
|
139
213
|
"""Load and parse an attack configuration from YAML."""
|
|
140
|
-
|
|
141
214
|
if isinstance(source, (str, Path)):
|
|
142
215
|
path = Path(source)
|
|
143
216
|
label = str(path)
|
|
@@ -155,36 +228,67 @@ def load_attack_config(
|
|
|
155
228
|
return parse_attack_config(data, source=label)
|
|
156
229
|
|
|
157
230
|
|
|
158
|
-
def
|
|
159
|
-
"""Convert arbitrary YAML data into a validated ``AttackConfig``."""
|
|
160
|
-
|
|
231
|
+
def _validate_attack_config_schema(data: Any, *, source: str) -> Mapping[str, Any]:
|
|
161
232
|
if data is None:
|
|
162
233
|
raise ValueError(f"Attack configuration '{source}' is empty.")
|
|
163
|
-
|
|
164
234
|
if not isinstance(data, Mapping):
|
|
165
235
|
raise ValueError(f"Attack configuration '{source}' must be a mapping.")
|
|
166
236
|
|
|
167
|
-
|
|
168
|
-
if
|
|
237
|
+
unexpected = [key for key in data if key not in {"glitchlings", "seed"}]
|
|
238
|
+
if unexpected:
|
|
239
|
+
extras = ", ".join(sorted(unexpected))
|
|
240
|
+
raise ValueError(f"Attack configuration '{source}' has unsupported fields: {extras}.")
|
|
241
|
+
|
|
242
|
+
if "glitchlings" not in data:
|
|
169
243
|
raise ValueError(f"Attack configuration '{source}' must define 'glitchlings'.")
|
|
170
244
|
|
|
245
|
+
raw_glitchlings = data["glitchlings"]
|
|
171
246
|
if not isinstance(raw_glitchlings, Sequence) or isinstance(raw_glitchlings, (str, bytes)):
|
|
172
247
|
raise ValueError(f"'glitchlings' in '{source}' must be a sequence.")
|
|
173
248
|
|
|
249
|
+
seed = data.get("seed")
|
|
250
|
+
if seed is not None and not isinstance(seed, int):
|
|
251
|
+
raise ValueError(f"Seed in '{source}' must be an integer if provided.")
|
|
252
|
+
|
|
253
|
+
for index, entry in enumerate(raw_glitchlings, start=1):
|
|
254
|
+
if isinstance(entry, Mapping):
|
|
255
|
+
name_candidate = entry.get("name") or entry.get("type")
|
|
256
|
+
if not isinstance(name_candidate, str) or not name_candidate.strip():
|
|
257
|
+
raise ValueError(f"{source}: glitchling #{index} is missing a 'name'.")
|
|
258
|
+
parameters = entry.get("parameters")
|
|
259
|
+
if parameters is not None and not isinstance(parameters, Mapping):
|
|
260
|
+
raise ValueError(
|
|
261
|
+
f"{source}: glitchling '{name_candidate}' parameters must be a mapping."
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
schema_module = jsonschema.get()
|
|
265
|
+
if schema_module is not None:
|
|
266
|
+
try:
|
|
267
|
+
schema_module.validate(instance=data, schema=ATTACK_CONFIG_SCHEMA)
|
|
268
|
+
except schema_module.exceptions.ValidationError as exc: # pragma: no cover - optional dep
|
|
269
|
+
message = exc.message
|
|
270
|
+
raise ValueError(f"Attack configuration '{source}' is invalid: {message}") from exc
|
|
271
|
+
|
|
272
|
+
return data
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def parse_attack_config(data: Any, *, source: str = "<config>") -> AttackConfig:
|
|
276
|
+
"""Convert arbitrary YAML data into a validated ``AttackConfig``."""
|
|
277
|
+
mapping = _validate_attack_config_schema(data, source=source)
|
|
278
|
+
|
|
279
|
+
raw_glitchlings = mapping["glitchlings"]
|
|
280
|
+
|
|
174
281
|
glitchlings: list["Glitchling"] = []
|
|
175
282
|
for index, entry in enumerate(raw_glitchlings, start=1):
|
|
176
283
|
glitchlings.append(_build_glitchling(entry, source, index))
|
|
177
284
|
|
|
178
|
-
seed =
|
|
179
|
-
if seed is not None and not isinstance(seed, int):
|
|
180
|
-
raise ValueError(f"Seed in '{source}' must be an integer if provided.")
|
|
285
|
+
seed = mapping.get("seed")
|
|
181
286
|
|
|
182
287
|
return AttackConfig(glitchlings=glitchlings, seed=seed)
|
|
183
288
|
|
|
184
289
|
|
|
185
290
|
def build_gaggle(config: AttackConfig, *, seed_override: int | None = None):
|
|
186
291
|
"""Instantiate a ``Gaggle`` according to ``config``."""
|
|
187
|
-
|
|
188
292
|
from .zoo import Gaggle # Imported lazily to avoid circular dependencies
|
|
189
293
|
|
|
190
294
|
seed = seed_override if seed_override is not None else config.seed
|
|
@@ -211,14 +315,27 @@ def _build_glitchling(entry: Any, source: str, index: int):
|
|
|
211
315
|
raise ValueError(f"{source}: glitchling #{index}: {exc}") from exc
|
|
212
316
|
|
|
213
317
|
if isinstance(entry, Mapping):
|
|
214
|
-
name_value = entry.get("name"
|
|
318
|
+
name_value = entry.get("name")
|
|
319
|
+
legacy_type = entry.get("type")
|
|
320
|
+
if name_value is None and legacy_type is not None:
|
|
321
|
+
warnings.warn(
|
|
322
|
+
f"{source}: glitchling #{index} uses 'type'; prefer 'name'.",
|
|
323
|
+
DeprecationWarning,
|
|
324
|
+
stacklevel=2,
|
|
325
|
+
)
|
|
326
|
+
name_value = legacy_type
|
|
327
|
+
elif name_value is None:
|
|
328
|
+
name_value = legacy_type
|
|
329
|
+
|
|
215
330
|
if not isinstance(name_value, str) or not name_value.strip():
|
|
216
331
|
raise ValueError(f"{source}: glitchling #{index} is missing a 'name'.")
|
|
217
332
|
|
|
218
333
|
parameters = entry.get("parameters")
|
|
219
334
|
if parameters is not None:
|
|
220
335
|
if not isinstance(parameters, Mapping):
|
|
221
|
-
raise ValueError(
|
|
336
|
+
raise ValueError(
|
|
337
|
+
f"{source}: glitchling '{name_value}' parameters must be a mapping."
|
|
338
|
+
)
|
|
222
339
|
kwargs = dict(parameters)
|
|
223
340
|
else:
|
|
224
341
|
kwargs = {
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Shared utilities for DLC integrations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Callable, Sequence
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def resolve_environment(
|
|
10
|
+
env: Any,
|
|
11
|
+
*,
|
|
12
|
+
loader: Callable[[str], Any],
|
|
13
|
+
environment_type: type[Any],
|
|
14
|
+
) -> Any:
|
|
15
|
+
"""Return a fully-instantiated verifier environment."""
|
|
16
|
+
if isinstance(env, str):
|
|
17
|
+
env = loader(env)
|
|
18
|
+
|
|
19
|
+
if not isinstance(env, environment_type):
|
|
20
|
+
raise TypeError("Invalid environment type")
|
|
21
|
+
|
|
22
|
+
return env
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def resolve_columns(dataset: Any, columns: Sequence[str] | None) -> list[str]:
|
|
26
|
+
"""Identify which dataset columns should be corrupted."""
|
|
27
|
+
available = set(getattr(dataset, "column_names", ()))
|
|
28
|
+
|
|
29
|
+
if columns is not None:
|
|
30
|
+
missing = sorted(set(columns) - available)
|
|
31
|
+
if missing:
|
|
32
|
+
missing_str = ", ".join(missing)
|
|
33
|
+
raise ValueError(f"Columns not found in dataset: {missing_str}")
|
|
34
|
+
return list(columns)
|
|
35
|
+
|
|
36
|
+
for candidate in ("prompt", "question"):
|
|
37
|
+
if candidate in available:
|
|
38
|
+
return [candidate]
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
dataset_length = len(dataset)
|
|
42
|
+
except TypeError:
|
|
43
|
+
preview_rows: list[dict[str, Any]]
|
|
44
|
+
take_fn = getattr(dataset, "take", None)
|
|
45
|
+
if callable(take_fn):
|
|
46
|
+
preview_rows = list(take_fn(1))
|
|
47
|
+
else:
|
|
48
|
+
iterator = iter(dataset)
|
|
49
|
+
try:
|
|
50
|
+
first_row = next(iterator)
|
|
51
|
+
except StopIteration:
|
|
52
|
+
preview_rows = []
|
|
53
|
+
else:
|
|
54
|
+
preview_rows = [first_row]
|
|
55
|
+
sample = dict(preview_rows[0]) if preview_rows else {}
|
|
56
|
+
else:
|
|
57
|
+
sample = dataset[0] if dataset_length else {}
|
|
58
|
+
inferred = [
|
|
59
|
+
name for name in getattr(dataset, "column_names", ()) if isinstance(sample.get(name), str)
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
if inferred:
|
|
63
|
+
return inferred
|
|
64
|
+
|
|
65
|
+
raise ValueError("Unable to determine which dataset columns to corrupt.")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
__all__ = ["resolve_columns", "resolve_environment"]
|