glitchlings 0.4.0__cp312-cp312-macosx_11_0_universal2.whl → 0.4.2__cp312-cp312-macosx_11_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of glitchlings might be problematic. Click here for more details.

Files changed (39) hide show
  1. glitchlings/__init__.py +26 -17
  2. glitchlings/__main__.py +0 -1
  3. glitchlings/_zoo_rust.cpython-312-darwin.so +0 -0
  4. glitchlings/compat.py +215 -0
  5. glitchlings/config.py +136 -19
  6. glitchlings/dlc/_shared.py +68 -0
  7. glitchlings/dlc/huggingface.py +26 -41
  8. glitchlings/dlc/prime.py +64 -101
  9. glitchlings/lexicon/__init__.py +26 -19
  10. glitchlings/lexicon/_cache.py +104 -0
  11. glitchlings/lexicon/graph.py +18 -39
  12. glitchlings/lexicon/metrics.py +1 -8
  13. glitchlings/lexicon/vector.py +29 -67
  14. glitchlings/lexicon/wordnet.py +39 -30
  15. glitchlings/main.py +9 -13
  16. glitchlings/util/__init__.py +18 -4
  17. glitchlings/util/adapters.py +27 -0
  18. glitchlings/zoo/__init__.py +21 -14
  19. glitchlings/zoo/_ocr_confusions.py +1 -3
  20. glitchlings/zoo/_rate.py +1 -4
  21. glitchlings/zoo/_sampling.py +0 -1
  22. glitchlings/zoo/_text_utils.py +1 -5
  23. glitchlings/zoo/adjax.py +0 -2
  24. glitchlings/zoo/core.py +185 -56
  25. glitchlings/zoo/jargoyle.py +9 -14
  26. glitchlings/zoo/mim1c.py +11 -10
  27. glitchlings/zoo/redactyl.py +5 -8
  28. glitchlings/zoo/reduple.py +3 -1
  29. glitchlings/zoo/rushmore.py +2 -8
  30. glitchlings/zoo/scannequin.py +5 -4
  31. glitchlings/zoo/typogre.py +3 -7
  32. glitchlings/zoo/zeedub.py +2 -2
  33. {glitchlings-0.4.0.dist-info → glitchlings-0.4.2.dist-info}/METADATA +68 -4
  34. glitchlings-0.4.2.dist-info/RECORD +42 -0
  35. glitchlings-0.4.0.dist-info/RECORD +0 -38
  36. {glitchlings-0.4.0.dist-info → glitchlings-0.4.2.dist-info}/WHEEL +0 -0
  37. {glitchlings-0.4.0.dist-info → glitchlings-0.4.2.dist-info}/entry_points.txt +0 -0
  38. {glitchlings-0.4.0.dist-info → glitchlings-0.4.2.dist-info}/licenses/LICENSE +0 -0
  39. {glitchlings-0.4.0.dist-info → glitchlings-0.4.2.dist-info}/top_level.txt +0 -0
glitchlings/__init__.py CHANGED
@@ -1,29 +1,33 @@
1
+ from .config import AttackConfig, build_gaggle, load_attack_config
2
+ from .util import SAMPLE_TEXT
1
3
  from .zoo import (
2
- Typogre,
3
- typogre,
4
- Mim1c,
5
- mim1c,
6
- Jargoyle,
7
- jargoyle,
8
4
  Adjax,
9
- adjax,
5
+ Gaggle,
6
+ Glitchling,
7
+ Jargoyle,
8
+ Mim1c,
10
9
  Redactyl,
11
- redactyl,
12
10
  Reduple,
13
- reduple,
14
11
  Rushmore,
15
- rushmore,
16
12
  Scannequin,
17
- scannequin,
13
+ Typogre,
18
14
  Zeedub,
19
- zeedub,
20
- Glitchling,
21
- Gaggle,
15
+ adjax,
16
+ is_rust_pipeline_enabled,
17
+ is_rust_pipeline_supported,
18
+ jargoyle,
19
+ mim1c,
20
+ pipeline_feature_flag_enabled,
21
+ plan_glitchling_specs,
22
+ plan_glitchlings,
23
+ redactyl,
24
+ reduple,
25
+ rushmore,
26
+ scannequin,
22
27
  summon,
28
+ typogre,
29
+ zeedub,
23
30
  )
24
- from .config import AttackConfig, build_gaggle, load_attack_config
25
- from .util import SAMPLE_TEXT
26
-
27
31
 
28
32
  __all__ = [
29
33
  "Typogre",
@@ -47,6 +51,11 @@ __all__ = [
47
51
  "summon",
48
52
  "Glitchling",
49
53
  "Gaggle",
54
+ "plan_glitchlings",
55
+ "plan_glitchling_specs",
56
+ "is_rust_pipeline_enabled",
57
+ "is_rust_pipeline_supported",
58
+ "pipeline_feature_flag_enabled",
50
59
  "SAMPLE_TEXT",
51
60
  "AttackConfig",
52
61
  "build_gaggle",
glitchlings/__main__.py CHANGED
@@ -4,6 +4,5 @@ import sys
4
4
 
5
5
  from .main import main
6
6
 
7
-
8
7
  if __name__ == "__main__":
9
8
  sys.exit(main())
Binary file
glitchlings/compat.py ADDED
@@ -0,0 +1,215 @@
1
+ """Compatibility helpers centralising optional dependency imports and extras."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from dataclasses import dataclass
7
+ from importlib import import_module, metadata
8
+ from types import ModuleType
9
+ from typing import Any, Iterable
10
+
11
+ try: # pragma: no cover - packaging is bundled with modern Python environments
12
+ from packaging.markers import default_environment
13
+ from packaging.requirements import Requirement
14
+ except ModuleNotFoundError: # pragma: no cover - fallback when packaging missing
15
+ Requirement = None # type: ignore[assignment]
16
+ default_environment = None # type: ignore[assignment]
17
+
18
+ _MISSING = object()
19
+
20
+
21
+ @dataclass
22
+ class OptionalDependency:
23
+ """Lazily import an optional dependency and retain the import error."""
24
+
25
+ module_name: str
26
+ _cached: ModuleType | object = _MISSING
27
+ _error: ModuleNotFoundError | None = None
28
+
29
+ def _attempt_import(self) -> ModuleType | None:
30
+ try:
31
+ module = import_module(self.module_name)
32
+ except ModuleNotFoundError as exc:
33
+ self._cached = None
34
+ self._error = exc
35
+ return None
36
+ else:
37
+ self._cached = module
38
+ self._error = None
39
+ return module
40
+
41
+ def get(self) -> ModuleType | None:
42
+ """Return the imported module or ``None`` when unavailable."""
43
+ if self._cached is _MISSING:
44
+ return self._attempt_import()
45
+ if self._cached is None:
46
+ return None
47
+ return self._cached
48
+
49
+ def load(self) -> ModuleType:
50
+ """Return the dependency, raising the original import error when absent."""
51
+ module = self.get()
52
+ if module is None:
53
+ error = self._error
54
+ if error is not None:
55
+ raise error
56
+ message = f"{self.module_name} is not installed"
57
+ raise ModuleNotFoundError(message)
58
+ return module
59
+
60
+ def require(self, message: str) -> ModuleType:
61
+ """Return the dependency or raise ``ModuleNotFoundError`` with ``message``."""
62
+ try:
63
+ return self.load()
64
+ except ModuleNotFoundError as exc:
65
+ raise ModuleNotFoundError(message) from exc
66
+
67
+ def available(self) -> bool:
68
+ """Return ``True`` when the dependency can be imported."""
69
+ return self.get() is not None
70
+
71
+ def reset(self) -> None:
72
+ """Forget any cached import result."""
73
+ self._cached = _MISSING
74
+ self._error = None
75
+
76
+ def attr(self, attribute: str) -> Any | None:
77
+ """Return ``attribute`` from the dependency when available."""
78
+ module = self.get()
79
+ if module is None:
80
+ return None
81
+ return getattr(module, attribute, None)
82
+
83
+ @property
84
+ def error(self) -> ModuleNotFoundError | None:
85
+ """Return the most recent ``ModuleNotFoundError`` (if any)."""
86
+ self.get()
87
+ return self._error
88
+
89
+
90
+ datasets = OptionalDependency("datasets")
91
+ verifiers = OptionalDependency("verifiers")
92
+ jellyfish = OptionalDependency("jellyfish")
93
+ jsonschema = OptionalDependency("jsonschema")
94
+ nltk = OptionalDependency("nltk")
95
+
96
+
97
+ def reset_optional_dependencies() -> None:
98
+ """Clear cached optional dependency imports (used by tests)."""
99
+ for dependency in (datasets, verifiers, jellyfish, jsonschema, nltk):
100
+ dependency.reset()
101
+
102
+
103
+ def get_datasets_dataset() -> Any | None:
104
+ """Return Hugging Face ``Dataset`` class when the dependency is installed."""
105
+ return datasets.attr("Dataset")
106
+
107
+
108
+ def require_datasets(message: str = "datasets is not installed") -> ModuleType:
109
+ """Ensure the Hugging Face datasets dependency is present."""
110
+ return datasets.require(message)
111
+
112
+
113
+ def require_verifiers(message: str = "verifiers is not installed") -> ModuleType:
114
+ """Ensure the verifiers dependency is present."""
115
+ return verifiers.require(message)
116
+
117
+
118
+ def require_jellyfish(message: str = "jellyfish is not installed") -> ModuleType:
119
+ """Ensure the jellyfish dependency is present."""
120
+ return jellyfish.require(message)
121
+
122
+
123
+ def get_installed_extras(
124
+ extras: Iterable[str] | None = None,
125
+ *,
126
+ distribution: str = "glitchlings",
127
+ ) -> dict[str, bool]:
128
+ """Return a mapping of optional extras to installation availability."""
129
+ try:
130
+ dist = metadata.distribution(distribution)
131
+ except metadata.PackageNotFoundError:
132
+ return {}
133
+
134
+ provided = {extra.lower() for extra in dist.metadata.get_all("Provides-Extra") or []}
135
+ targets = {extra.lower() for extra in extras} if extras is not None else provided
136
+ requirements = dist.requires or []
137
+ mapping: dict[str, set[str]] = {extra: set() for extra in provided}
138
+
139
+ for requirement in requirements:
140
+ names = _extras_from_requirement(requirement, provided)
141
+ if not names:
142
+ continue
143
+ req_name = _requirement_name(requirement)
144
+ for extra in names:
145
+ mapping.setdefault(extra, set()).add(req_name)
146
+
147
+ status: dict[str, bool] = {}
148
+ for extra in targets:
149
+ deps = mapping.get(extra)
150
+ if not deps:
151
+ status[extra] = False
152
+ continue
153
+ status[extra] = all(_distribution_installed(dep) for dep in deps)
154
+ return status
155
+
156
+
157
+ def _distribution_installed(name: str) -> bool:
158
+ try:
159
+ metadata.distribution(name)
160
+ except metadata.PackageNotFoundError:
161
+ return False
162
+ return True
163
+
164
+
165
+ _EXTRA_PATTERN = re.compile(r'extra\\s*==\\s*"(?P<extra>[^"]+)"')
166
+
167
+
168
+ def _extras_from_requirement(requirement: str, candidates: set[str]) -> set[str]:
169
+ if Requirement is not None and default_environment is not None:
170
+ req = Requirement(requirement)
171
+ if req.marker is None:
172
+ return set()
173
+ extras: set[str] = set()
174
+ for extra in candidates:
175
+ environment = default_environment()
176
+ environment["extra"] = extra
177
+ if req.marker.evaluate(environment):
178
+ extras.add(extra)
179
+ return extras
180
+
181
+ matches = set()
182
+ for match in _EXTRA_PATTERN.finditer(requirement):
183
+ extra = match.group("extra").lower()
184
+ if extra in candidates:
185
+ matches.add(extra)
186
+ return matches
187
+
188
+
189
+ def _requirement_name(requirement: str) -> str:
190
+ if Requirement is not None:
191
+ req = Requirement(requirement)
192
+ return req.name
193
+
194
+ candidate = requirement.split(";", 1)[0].strip()
195
+ for delimiter in ("[", "(", " ", "<", ">", "=", "!", "~"):
196
+ index = candidate.find(delimiter)
197
+ if index != -1:
198
+ return candidate[:index]
199
+ return candidate
200
+
201
+
202
+ __all__ = [
203
+ "OptionalDependency",
204
+ "datasets",
205
+ "verifiers",
206
+ "jellyfish",
207
+ "jsonschema",
208
+ "nltk",
209
+ "get_datasets_dataset",
210
+ "require_datasets",
211
+ "require_verifiers",
212
+ "require_jellyfish",
213
+ "get_installed_extras",
214
+ "reset_optional_dependencies",
215
+ ]
glitchlings/config.py CHANGED
@@ -3,10 +3,11 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import os
6
+ import warnings
6
7
  from dataclasses import dataclass, field
7
8
  from io import TextIOBase
8
9
  from pathlib import Path
9
- from typing import Any, Mapping, Sequence, TYPE_CHECKING
10
+ from typing import TYPE_CHECKING, Any, Mapping, Sequence
10
11
 
11
12
  try: # Python 3.11+
12
13
  import tomllib
@@ -15,6 +16,7 @@ except ModuleNotFoundError: # pragma: no cover - Python < 3.11
15
16
 
16
17
  import yaml
17
18
 
19
+ from .compat import jsonschema
18
20
 
19
21
  if TYPE_CHECKING: # pragma: no cover - typing only
20
22
  from .zoo import Glitchling
@@ -25,6 +27,44 @@ DEFAULT_CONFIG_PATH = Path(__file__).with_name("config.toml")
25
27
  DEFAULT_LEXICON_PRIORITY = ["vector", "graph", "wordnet"]
26
28
  DEFAULT_ATTACK_SEED = 151
27
29
 
30
+ ATTACK_CONFIG_SCHEMA: dict[str, Any] = {
31
+ "type": "object",
32
+ "required": ["glitchlings"],
33
+ "properties": {
34
+ "glitchlings": {
35
+ "type": "array",
36
+ "minItems": 1,
37
+ "items": {
38
+ "anyOf": [
39
+ {"type": "string", "minLength": 1},
40
+ {
41
+ "type": "object",
42
+ "required": ["name"],
43
+ "properties": {
44
+ "name": {"type": "string", "minLength": 1},
45
+ "type": {"type": "string", "minLength": 1},
46
+ "parameters": {"type": "object"},
47
+ },
48
+ "additionalProperties": True,
49
+ },
50
+ {
51
+ "type": "object",
52
+ "required": ["type"],
53
+ "properties": {
54
+ "name": {"type": "string", "minLength": 1},
55
+ "type": {"type": "string", "minLength": 1},
56
+ "parameters": {"type": "object"},
57
+ },
58
+ "additionalProperties": True,
59
+ },
60
+ ]
61
+ },
62
+ },
63
+ "seed": {"type": "integer"},
64
+ },
65
+ "additionalProperties": False,
66
+ }
67
+
28
68
 
29
69
  @dataclass(slots=True)
30
70
  class LexiconConfig:
@@ -48,21 +88,18 @@ _CONFIG: RuntimeConfig | None = None
48
88
 
49
89
  def reset_config() -> None:
50
90
  """Forget any cached runtime configuration."""
51
-
52
91
  global _CONFIG
53
92
  _CONFIG = None
54
93
 
55
94
 
56
95
  def reload_config() -> RuntimeConfig:
57
96
  """Reload the runtime configuration from disk."""
58
-
59
97
  reset_config()
60
98
  return get_config()
61
99
 
62
100
 
63
101
  def get_config() -> RuntimeConfig:
64
102
  """Return the cached runtime configuration, loading it if necessary."""
65
-
66
103
  global _CONFIG
67
104
  if _CONFIG is None:
68
105
  _CONFIG = _load_runtime_config()
@@ -72,12 +109,19 @@ def get_config() -> RuntimeConfig:
72
109
  def _load_runtime_config() -> RuntimeConfig:
73
110
  path = _resolve_config_path()
74
111
  data = _read_toml(path)
75
- lexicon_section = data.get("lexicon", {})
112
+ mapping = _validate_runtime_config_data(data, source=path)
113
+
114
+ lexicon_section = mapping.get("lexicon", {})
76
115
 
77
116
  priority = lexicon_section.get("priority", DEFAULT_LEXICON_PRIORITY)
78
117
  if not isinstance(priority, Sequence) or isinstance(priority, (str, bytes)):
79
118
  raise ValueError("lexicon.priority must be a sequence of strings.")
80
- normalized_priority = [str(item) for item in priority]
119
+ normalized_priority = []
120
+ for item in priority:
121
+ string_value = str(item)
122
+ if not string_value:
123
+ raise ValueError("lexicon.priority entries must be non-empty strings.")
124
+ normalized_priority.append(string_value)
81
125
 
82
126
  vector_cache = _resolve_optional_path(
83
127
  lexicon_section.get("vector_cache"),
@@ -113,6 +157,36 @@ def _read_toml(path: Path) -> dict[str, Any]:
113
157
  return tomllib.load(handle)
114
158
 
115
159
 
160
+ def _validate_runtime_config_data(data: Any, *, source: Path) -> Mapping[str, Any]:
161
+ if data is None:
162
+ return {}
163
+ if not isinstance(data, Mapping):
164
+ raise ValueError(f"Configuration file '{source}' must contain a top-level mapping.")
165
+
166
+ allowed_sections = {"lexicon"}
167
+ unexpected_sections = [str(key) for key in data if key not in allowed_sections]
168
+ if unexpected_sections:
169
+ extras = ", ".join(sorted(unexpected_sections))
170
+ raise ValueError(f"Configuration file '{source}' has unsupported sections: {extras}.")
171
+
172
+ lexicon_section = data.get("lexicon", {})
173
+ if not isinstance(lexicon_section, Mapping):
174
+ raise ValueError("Configuration 'lexicon' section must be a table.")
175
+
176
+ allowed_lexicon_keys = {"priority", "vector_cache", "graph_cache"}
177
+ unexpected_keys = [str(key) for key in lexicon_section if key not in allowed_lexicon_keys]
178
+ if unexpected_keys:
179
+ extras = ", ".join(sorted(unexpected_keys))
180
+ raise ValueError(f"Unknown lexicon settings: {extras}.")
181
+
182
+ for key in ("vector_cache", "graph_cache"):
183
+ value = lexicon_section.get(key)
184
+ if value is not None and not isinstance(value, (str, os.PathLike)):
185
+ raise ValueError(f"lexicon.{key} must be a path or string when provided.")
186
+
187
+ return data
188
+
189
+
116
190
  def _resolve_optional_path(value: Any, *, base: Path) -> Path | None:
117
191
  if value in (None, ""):
118
192
  return None
@@ -137,7 +211,6 @@ def load_attack_config(
137
211
  encoding: str = "utf-8",
138
212
  ) -> AttackConfig:
139
213
  """Load and parse an attack configuration from YAML."""
140
-
141
214
  if isinstance(source, (str, Path)):
142
215
  path = Path(source)
143
216
  label = str(path)
@@ -155,36 +228,67 @@ def load_attack_config(
155
228
  return parse_attack_config(data, source=label)
156
229
 
157
230
 
158
- def parse_attack_config(data: Any, *, source: str = "<config>") -> AttackConfig:
159
- """Convert arbitrary YAML data into a validated ``AttackConfig``."""
160
-
231
+ def _validate_attack_config_schema(data: Any, *, source: str) -> Mapping[str, Any]:
161
232
  if data is None:
162
233
  raise ValueError(f"Attack configuration '{source}' is empty.")
163
-
164
234
  if not isinstance(data, Mapping):
165
235
  raise ValueError(f"Attack configuration '{source}' must be a mapping.")
166
236
 
167
- raw_glitchlings = data.get("glitchlings")
168
- if raw_glitchlings is None:
237
+ unexpected = [key for key in data if key not in {"glitchlings", "seed"}]
238
+ if unexpected:
239
+ extras = ", ".join(sorted(unexpected))
240
+ raise ValueError(f"Attack configuration '{source}' has unsupported fields: {extras}.")
241
+
242
+ if "glitchlings" not in data:
169
243
  raise ValueError(f"Attack configuration '{source}' must define 'glitchlings'.")
170
244
 
245
+ raw_glitchlings = data["glitchlings"]
171
246
  if not isinstance(raw_glitchlings, Sequence) or isinstance(raw_glitchlings, (str, bytes)):
172
247
  raise ValueError(f"'glitchlings' in '{source}' must be a sequence.")
173
248
 
249
+ seed = data.get("seed")
250
+ if seed is not None and not isinstance(seed, int):
251
+ raise ValueError(f"Seed in '{source}' must be an integer if provided.")
252
+
253
+ for index, entry in enumerate(raw_glitchlings, start=1):
254
+ if isinstance(entry, Mapping):
255
+ name_candidate = entry.get("name") or entry.get("type")
256
+ if not isinstance(name_candidate, str) or not name_candidate.strip():
257
+ raise ValueError(f"{source}: glitchling #{index} is missing a 'name'.")
258
+ parameters = entry.get("parameters")
259
+ if parameters is not None and not isinstance(parameters, Mapping):
260
+ raise ValueError(
261
+ f"{source}: glitchling '{name_candidate}' parameters must be a mapping."
262
+ )
263
+
264
+ schema_module = jsonschema.get()
265
+ if schema_module is not None:
266
+ try:
267
+ schema_module.validate(instance=data, schema=ATTACK_CONFIG_SCHEMA)
268
+ except schema_module.exceptions.ValidationError as exc: # pragma: no cover - optional dep
269
+ message = exc.message
270
+ raise ValueError(f"Attack configuration '{source}' is invalid: {message}") from exc
271
+
272
+ return data
273
+
274
+
275
+ def parse_attack_config(data: Any, *, source: str = "<config>") -> AttackConfig:
276
+ """Convert arbitrary YAML data into a validated ``AttackConfig``."""
277
+ mapping = _validate_attack_config_schema(data, source=source)
278
+
279
+ raw_glitchlings = mapping["glitchlings"]
280
+
174
281
  glitchlings: list["Glitchling"] = []
175
282
  for index, entry in enumerate(raw_glitchlings, start=1):
176
283
  glitchlings.append(_build_glitchling(entry, source, index))
177
284
 
178
- seed = data.get("seed")
179
- if seed is not None and not isinstance(seed, int):
180
- raise ValueError(f"Seed in '{source}' must be an integer if provided.")
285
+ seed = mapping.get("seed")
181
286
 
182
287
  return AttackConfig(glitchlings=glitchlings, seed=seed)
183
288
 
184
289
 
185
290
  def build_gaggle(config: AttackConfig, *, seed_override: int | None = None):
186
291
  """Instantiate a ``Gaggle`` according to ``config``."""
187
-
188
292
  from .zoo import Gaggle # Imported lazily to avoid circular dependencies
189
293
 
190
294
  seed = seed_override if seed_override is not None else config.seed
@@ -211,14 +315,27 @@ def _build_glitchling(entry: Any, source: str, index: int):
211
315
  raise ValueError(f"{source}: glitchling #{index}: {exc}") from exc
212
316
 
213
317
  if isinstance(entry, Mapping):
214
- name_value = entry.get("name", entry.get("type"))
318
+ name_value = entry.get("name")
319
+ legacy_type = entry.get("type")
320
+ if name_value is None and legacy_type is not None:
321
+ warnings.warn(
322
+ f"{source}: glitchling #{index} uses 'type'; prefer 'name'.",
323
+ DeprecationWarning,
324
+ stacklevel=2,
325
+ )
326
+ name_value = legacy_type
327
+ elif name_value is None:
328
+ name_value = legacy_type
329
+
215
330
  if not isinstance(name_value, str) or not name_value.strip():
216
331
  raise ValueError(f"{source}: glitchling #{index} is missing a 'name'.")
217
332
 
218
333
  parameters = entry.get("parameters")
219
334
  if parameters is not None:
220
335
  if not isinstance(parameters, Mapping):
221
- raise ValueError(f"{source}: glitchling '{name_value}' parameters must be a mapping.")
336
+ raise ValueError(
337
+ f"{source}: glitchling '{name_value}' parameters must be a mapping."
338
+ )
222
339
  kwargs = dict(parameters)
223
340
  else:
224
341
  kwargs = {
@@ -0,0 +1,68 @@
1
+ """Shared utilities for DLC integrations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Callable, Sequence
6
+ from typing import Any
7
+
8
+
9
+ def resolve_environment(
10
+ env: Any,
11
+ *,
12
+ loader: Callable[[str], Any],
13
+ environment_type: type[Any],
14
+ ) -> Any:
15
+ """Return a fully-instantiated verifier environment."""
16
+ if isinstance(env, str):
17
+ env = loader(env)
18
+
19
+ if not isinstance(env, environment_type):
20
+ raise TypeError("Invalid environment type")
21
+
22
+ return env
23
+
24
+
25
+ def resolve_columns(dataset: Any, columns: Sequence[str] | None) -> list[str]:
26
+ """Identify which dataset columns should be corrupted."""
27
+ available = set(getattr(dataset, "column_names", ()))
28
+
29
+ if columns is not None:
30
+ missing = sorted(set(columns) - available)
31
+ if missing:
32
+ missing_str = ", ".join(missing)
33
+ raise ValueError(f"Columns not found in dataset: {missing_str}")
34
+ return list(columns)
35
+
36
+ for candidate in ("prompt", "question"):
37
+ if candidate in available:
38
+ return [candidate]
39
+
40
+ try:
41
+ dataset_length = len(dataset)
42
+ except TypeError:
43
+ preview_rows: list[dict[str, Any]]
44
+ take_fn = getattr(dataset, "take", None)
45
+ if callable(take_fn):
46
+ preview_rows = list(take_fn(1))
47
+ else:
48
+ iterator = iter(dataset)
49
+ try:
50
+ first_row = next(iterator)
51
+ except StopIteration:
52
+ preview_rows = []
53
+ else:
54
+ preview_rows = [first_row]
55
+ sample = dict(preview_rows[0]) if preview_rows else {}
56
+ else:
57
+ sample = dataset[0] if dataset_length else {}
58
+ inferred = [
59
+ name for name in getattr(dataset, "column_names", ()) if isinstance(sample.get(name), str)
60
+ ]
61
+
62
+ if inferred:
63
+ return inferred
64
+
65
+ raise ValueError("Unable to determine which dataset columns to corrupt.")
66
+
67
+
68
+ __all__ = ["resolve_columns", "resolve_environment"]