glitchlings 0.10.2__cp312-cp312-macosx_11_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +99 -0
- glitchlings/__main__.py +8 -0
- glitchlings/_zoo_rust/__init__.py +12 -0
- glitchlings/_zoo_rust.cpython-312-darwin.so +0 -0
- glitchlings/assets/__init__.py +180 -0
- glitchlings/assets/apostrofae_pairs.json +32 -0
- glitchlings/assets/ekkokin_homophones.json +2014 -0
- glitchlings/assets/hokey_assets.json +193 -0
- glitchlings/assets/lexemes/academic.json +1049 -0
- glitchlings/assets/lexemes/colors.json +1333 -0
- glitchlings/assets/lexemes/corporate.json +716 -0
- glitchlings/assets/lexemes/cyberpunk.json +22 -0
- glitchlings/assets/lexemes/lovecraftian.json +23 -0
- glitchlings/assets/lexemes/synonyms.json +3354 -0
- glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
- glitchlings/assets/ocr_confusions.tsv +30 -0
- glitchlings/assets/pipeline_assets.json +29 -0
- glitchlings/attack/__init__.py +147 -0
- glitchlings/attack/analysis.py +1321 -0
- glitchlings/attack/core.py +493 -0
- glitchlings/attack/core_execution.py +367 -0
- glitchlings/attack/core_planning.py +612 -0
- glitchlings/attack/encode.py +114 -0
- glitchlings/attack/metrics.py +218 -0
- glitchlings/attack/metrics_dispatch.py +70 -0
- glitchlings/attack/tokenization.py +227 -0
- glitchlings/auggie.py +284 -0
- glitchlings/compat/__init__.py +9 -0
- glitchlings/compat/loaders.py +355 -0
- glitchlings/compat/types.py +41 -0
- glitchlings/conf/__init__.py +41 -0
- glitchlings/conf/loaders.py +331 -0
- glitchlings/conf/schema.py +156 -0
- glitchlings/conf/types.py +72 -0
- glitchlings/config.toml +2 -0
- glitchlings/constants.py +59 -0
- glitchlings/dev/__init__.py +3 -0
- glitchlings/dev/docs.py +45 -0
- glitchlings/dlc/__init__.py +19 -0
- glitchlings/dlc/_shared.py +296 -0
- glitchlings/dlc/gutenberg.py +400 -0
- glitchlings/dlc/huggingface.py +68 -0
- glitchlings/dlc/prime.py +215 -0
- glitchlings/dlc/pytorch.py +98 -0
- glitchlings/dlc/pytorch_lightning.py +173 -0
- glitchlings/internal/__init__.py +16 -0
- glitchlings/internal/rust.py +159 -0
- glitchlings/internal/rust_ffi.py +490 -0
- glitchlings/main.py +426 -0
- glitchlings/protocols.py +91 -0
- glitchlings/runtime_config.py +24 -0
- glitchlings/util/__init__.py +27 -0
- glitchlings/util/adapters.py +65 -0
- glitchlings/util/keyboards.py +356 -0
- glitchlings/util/transcripts.py +108 -0
- glitchlings/zoo/__init__.py +161 -0
- glitchlings/zoo/assets/__init__.py +29 -0
- glitchlings/zoo/core.py +678 -0
- glitchlings/zoo/core_execution.py +154 -0
- glitchlings/zoo/core_planning.py +451 -0
- glitchlings/zoo/corrupt_dispatch.py +295 -0
- glitchlings/zoo/hokey.py +139 -0
- glitchlings/zoo/jargoyle.py +243 -0
- glitchlings/zoo/mim1c.py +148 -0
- glitchlings/zoo/pedant/__init__.py +109 -0
- glitchlings/zoo/pedant/core.py +105 -0
- glitchlings/zoo/pedant/forms.py +74 -0
- glitchlings/zoo/pedant/stones.py +74 -0
- glitchlings/zoo/redactyl.py +97 -0
- glitchlings/zoo/rng.py +259 -0
- glitchlings/zoo/rushmore.py +416 -0
- glitchlings/zoo/scannequin.py +66 -0
- glitchlings/zoo/transforms.py +346 -0
- glitchlings/zoo/typogre.py +128 -0
- glitchlings/zoo/validation.py +477 -0
- glitchlings/zoo/wherewolf.py +120 -0
- glitchlings/zoo/zeedub.py +93 -0
- glitchlings-0.10.2.dist-info/METADATA +337 -0
- glitchlings-0.10.2.dist-info/RECORD +83 -0
- glitchlings-0.10.2.dist-info/WHEEL +5 -0
- glitchlings-0.10.2.dist-info/entry_points.txt +3 -0
- glitchlings-0.10.2.dist-info/licenses/LICENSE +201 -0
- glitchlings-0.10.2.dist-info/top_level.txt +1 -0
glitchlings/auggie.py
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
"""Laboratory assistant for composing gaggles with behaviour-focused helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Iterable, Sequence
|
|
6
|
+
from typing import Collection, Literal
|
|
7
|
+
|
|
8
|
+
from .zoo.core import Gaggle, Glitchling
|
|
9
|
+
from .zoo.hokey import Hokey
|
|
10
|
+
from .zoo.jargoyle import (
|
|
11
|
+
DEFAULT_LEXEMES,
|
|
12
|
+
DEFAULT_MODE,
|
|
13
|
+
Jargoyle,
|
|
14
|
+
JargoyleMode,
|
|
15
|
+
)
|
|
16
|
+
from .zoo.mim1c import Mim1c
|
|
17
|
+
from .zoo.pedant import Pedant
|
|
18
|
+
from .zoo.pedant.stones import PedantStone
|
|
19
|
+
from .zoo.redactyl import FULL_BLOCK, Redactyl
|
|
20
|
+
from .zoo.rushmore import Rushmore, RushmoreMode
|
|
21
|
+
from .zoo.scannequin import Scannequin
|
|
22
|
+
from .zoo.typogre import Typogre
|
|
23
|
+
from .zoo.wherewolf import Wherewolf
|
|
24
|
+
from .zoo.zeedub import Zeedub
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class Auggie(Gaggle):
|
|
28
|
+
"""Assistant that incrementally assembles glitchlings into a gaggle."""
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
glitchlings: Iterable[Glitchling] | None = None,
|
|
33
|
+
*,
|
|
34
|
+
seed: int = 151,
|
|
35
|
+
) -> None:
|
|
36
|
+
self._blueprint: list[Glitchling] = []
|
|
37
|
+
initial = list(glitchlings or [])
|
|
38
|
+
super().__init__(initial, seed=seed)
|
|
39
|
+
if initial:
|
|
40
|
+
self._blueprint = [glitchling.clone() for glitchling in initial]
|
|
41
|
+
self._rebuild_plan()
|
|
42
|
+
else:
|
|
43
|
+
self._blueprint = []
|
|
44
|
+
|
|
45
|
+
def _rebuild_plan(self) -> None:
|
|
46
|
+
self._clones_by_index = []
|
|
47
|
+
for index, glitchling in enumerate(self._blueprint):
|
|
48
|
+
clone = glitchling.clone()
|
|
49
|
+
setattr(clone, "_gaggle_index", index)
|
|
50
|
+
self._clones_by_index.append(clone)
|
|
51
|
+
self.sort_glitchlings()
|
|
52
|
+
self._invalidate_pipeline_cache()
|
|
53
|
+
|
|
54
|
+
def _enqueue(self, glitchling: Glitchling) -> "Auggie":
|
|
55
|
+
self._blueprint.append(glitchling)
|
|
56
|
+
self._rebuild_plan()
|
|
57
|
+
return self
|
|
58
|
+
|
|
59
|
+
def clone(self, seed: int | None = None) -> "Auggie":
|
|
60
|
+
clone_seed = seed if seed is not None else self.seed
|
|
61
|
+
resolved_seed = 151 if clone_seed is None else int(clone_seed)
|
|
62
|
+
blueprint = [glitch.clone() for glitch in self._blueprint]
|
|
63
|
+
return Auggie(blueprint, seed=resolved_seed)
|
|
64
|
+
|
|
65
|
+
def typo(
|
|
66
|
+
self,
|
|
67
|
+
*,
|
|
68
|
+
rate: float | None = None,
|
|
69
|
+
keyboard: str = "CURATOR_QWERTY",
|
|
70
|
+
seed: int | None = None,
|
|
71
|
+
) -> "Auggie":
|
|
72
|
+
"""Add :class:`Typogre` using behaviour-driven nomenclature."""
|
|
73
|
+
|
|
74
|
+
return self._enqueue(Typogre(rate=rate, keyboard=keyboard, seed=seed))
|
|
75
|
+
|
|
76
|
+
def confusable(
|
|
77
|
+
self,
|
|
78
|
+
*,
|
|
79
|
+
rate: float | None = None,
|
|
80
|
+
classes: list[str] | Literal["all"] | None = None,
|
|
81
|
+
banned_characters: Collection[str] | None = None,
|
|
82
|
+
seed: int | None = None,
|
|
83
|
+
) -> "Auggie":
|
|
84
|
+
"""Add :class:`Mim1c` for homoglyph substitutions."""
|
|
85
|
+
|
|
86
|
+
return self._enqueue(
|
|
87
|
+
Mim1c(
|
|
88
|
+
rate=rate,
|
|
89
|
+
classes=classes,
|
|
90
|
+
banned_characters=banned_characters,
|
|
91
|
+
seed=seed,
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def curly_quotes(self, *, seed: int | None = None) -> "Auggie":
|
|
96
|
+
"""Add :class:`Pedant` evolved with Curlite to smarten punctuation."""
|
|
97
|
+
|
|
98
|
+
return self._enqueue(Pedant(stone=PedantStone.CURLITE, seed=seed))
|
|
99
|
+
|
|
100
|
+
def stretch(
|
|
101
|
+
self,
|
|
102
|
+
*,
|
|
103
|
+
rate: float = 0.3,
|
|
104
|
+
extension_min: int = 2,
|
|
105
|
+
extension_max: int = 5,
|
|
106
|
+
word_length_threshold: int = 6,
|
|
107
|
+
base_p: float = 0.45,
|
|
108
|
+
seed: int | None = None,
|
|
109
|
+
) -> "Auggie":
|
|
110
|
+
"""Add :class:`Hokey` for elongated, expressive words."""
|
|
111
|
+
|
|
112
|
+
return self._enqueue(
|
|
113
|
+
Hokey(
|
|
114
|
+
rate=rate,
|
|
115
|
+
extension_min=extension_min,
|
|
116
|
+
extension_max=extension_max,
|
|
117
|
+
word_length_threshold=word_length_threshold,
|
|
118
|
+
base_p=base_p,
|
|
119
|
+
seed=seed,
|
|
120
|
+
)
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
def homophone(
|
|
124
|
+
self,
|
|
125
|
+
*,
|
|
126
|
+
rate: float | None = None,
|
|
127
|
+
seed: int | None = None,
|
|
128
|
+
) -> "Auggie":
|
|
129
|
+
"""Add :class:`Wherewolf` to swap words for homophones."""
|
|
130
|
+
|
|
131
|
+
return self._enqueue(Wherewolf(rate=rate, seed=seed))
|
|
132
|
+
|
|
133
|
+
def pedantry(
|
|
134
|
+
self,
|
|
135
|
+
*,
|
|
136
|
+
stone: PedantStone | str = PedantStone.COEURITE,
|
|
137
|
+
seed: int | None = None,
|
|
138
|
+
) -> "Auggie":
|
|
139
|
+
"""Add :class:`Pedant` to evolve text via a chosen stone."""
|
|
140
|
+
|
|
141
|
+
return self._enqueue(Pedant(stone=stone, seed=seed))
|
|
142
|
+
|
|
143
|
+
def remix(
|
|
144
|
+
self,
|
|
145
|
+
*,
|
|
146
|
+
modes: RushmoreMode | str | Iterable[RushmoreMode | str] | None = None,
|
|
147
|
+
rate: float | None = None,
|
|
148
|
+
delete_rate: float | None = None,
|
|
149
|
+
duplicate_rate: float | None = None,
|
|
150
|
+
swap_rate: float | None = None,
|
|
151
|
+
seed: int | None = None,
|
|
152
|
+
unweighted: bool = False,
|
|
153
|
+
delete_unweighted: bool | None = None,
|
|
154
|
+
duplicate_unweighted: bool | None = None,
|
|
155
|
+
) -> "Auggie":
|
|
156
|
+
"""Add :class:`Rushmore` for deletion, duplication, and swap attacks."""
|
|
157
|
+
|
|
158
|
+
return self._enqueue(
|
|
159
|
+
Rushmore(
|
|
160
|
+
modes=modes,
|
|
161
|
+
rate=rate,
|
|
162
|
+
delete_rate=delete_rate,
|
|
163
|
+
duplicate_rate=duplicate_rate,
|
|
164
|
+
swap_rate=swap_rate,
|
|
165
|
+
seed=seed,
|
|
166
|
+
unweighted=unweighted,
|
|
167
|
+
delete_unweighted=delete_unweighted,
|
|
168
|
+
duplicate_unweighted=duplicate_unweighted,
|
|
169
|
+
)
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
def redact(
|
|
173
|
+
self,
|
|
174
|
+
*,
|
|
175
|
+
replacement_char: str = FULL_BLOCK,
|
|
176
|
+
rate: float | None = None,
|
|
177
|
+
merge_adjacent: bool = False,
|
|
178
|
+
seed: int | None = 151,
|
|
179
|
+
unweighted: bool = False,
|
|
180
|
+
) -> "Auggie":
|
|
181
|
+
"""Add :class:`Redactyl` to blackout words."""
|
|
182
|
+
|
|
183
|
+
return self._enqueue(
|
|
184
|
+
Redactyl(
|
|
185
|
+
replacement_char=replacement_char,
|
|
186
|
+
rate=rate,
|
|
187
|
+
merge_adjacent=merge_adjacent,
|
|
188
|
+
seed=seed if seed is not None else 151,
|
|
189
|
+
unweighted=unweighted,
|
|
190
|
+
)
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
def recolor(self, *, mode: JargoyleMode = "literal", seed: int | None = None) -> "Auggie":
|
|
194
|
+
"""Add :class:`Jargoyle` with ``lexemes="colors"`` to remap colour terms.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
mode: "literal" for deterministic first-entry swaps,
|
|
198
|
+
"drift" for random selection from palette.
|
|
199
|
+
seed: Seed for deterministic randomness.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
Self for method chaining.
|
|
203
|
+
"""
|
|
204
|
+
return self._enqueue(Jargoyle(lexemes="colors", mode=mode, rate=1.0, seed=seed))
|
|
205
|
+
|
|
206
|
+
def drift(
|
|
207
|
+
self,
|
|
208
|
+
*,
|
|
209
|
+
lexemes: str = DEFAULT_LEXEMES,
|
|
210
|
+
mode: JargoyleMode = DEFAULT_MODE,
|
|
211
|
+
rate: float | None = None,
|
|
212
|
+
seed: int | None = None,
|
|
213
|
+
) -> "Auggie":
|
|
214
|
+
"""Add :class:`Jargoyle` for dictionary-based word drift.
|
|
215
|
+
|
|
216
|
+
Swaps words with alternatives from the specified lexeme dictionary.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
lexemes: Dictionary to use. One of:
|
|
220
|
+
"colors" (color term swapping),
|
|
221
|
+
"synonyms" (general synonyms),
|
|
222
|
+
"corporate" (business jargon),
|
|
223
|
+
"academic" (scholarly terms).
|
|
224
|
+
mode: "literal" for deterministic first-entry swaps,
|
|
225
|
+
"drift" for random selection.
|
|
226
|
+
rate: Probability of transforming each matching word.
|
|
227
|
+
seed: Seed for deterministic randomness.
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
Self for method chaining.
|
|
231
|
+
"""
|
|
232
|
+
return self._enqueue(Jargoyle(lexemes=lexemes, mode=mode, rate=rate, seed=seed))
|
|
233
|
+
|
|
234
|
+
def ocr(
|
|
235
|
+
self,
|
|
236
|
+
*,
|
|
237
|
+
rate: float | None = None,
|
|
238
|
+
seed: int | None = None,
|
|
239
|
+
) -> "Auggie":
|
|
240
|
+
"""Add :class:`Scannequin` to simulate OCR artefacts."""
|
|
241
|
+
|
|
242
|
+
return self._enqueue(Scannequin(rate=rate, seed=seed))
|
|
243
|
+
|
|
244
|
+
def zero_width(
|
|
245
|
+
self,
|
|
246
|
+
*,
|
|
247
|
+
rate: float | None = None,
|
|
248
|
+
seed: int | None = None,
|
|
249
|
+
characters: Sequence[str] | None = None,
|
|
250
|
+
) -> "Auggie":
|
|
251
|
+
"""Add :class:`Zeedub` to hide zero-width glyphs inside text."""
|
|
252
|
+
|
|
253
|
+
return self._enqueue(Zeedub(rate=rate, seed=seed, characters=characters))
|
|
254
|
+
|
|
255
|
+
def synonym(
|
|
256
|
+
self,
|
|
257
|
+
*,
|
|
258
|
+
rate: float | None = None,
|
|
259
|
+
seed: int | None = None,
|
|
260
|
+
lexemes: str = "synonyms",
|
|
261
|
+
mode: JargoyleMode = "drift",
|
|
262
|
+
) -> "Auggie":
|
|
263
|
+
"""Add :class:`Jargoyle` for synonym substitutions.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
rate: Probability of transforming each matching word.
|
|
267
|
+
seed: Seed for deterministic randomness.
|
|
268
|
+
lexemes: Dictionary to use (default "synonyms").
|
|
269
|
+
mode: "literal" or "drift" (default "drift").
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
Self for method chaining.
|
|
273
|
+
"""
|
|
274
|
+
return self._enqueue(
|
|
275
|
+
Jargoyle(
|
|
276
|
+
rate=rate,
|
|
277
|
+
seed=seed,
|
|
278
|
+
lexemes=lexemes,
|
|
279
|
+
mode=mode,
|
|
280
|
+
)
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
__all__ = ["Auggie"]
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Compatibility helpers centralising optional dependency imports and extras.
|
|
2
|
+
|
|
3
|
+
For 1.0, this package no longer re-exports loader utilities or type sentinels.
|
|
4
|
+
Import directly from ``glitchlings.compat.loaders`` or ``glitchlings.compat.types``.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
__all__: list[str] = []
|
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
"""Lazy loading infrastructure for optional dependencies.
|
|
2
|
+
|
|
3
|
+
This module is IMPURE - it performs import attempts and caches results.
|
|
4
|
+
Import-time side effects: None (lazy loading only happens on access).
|
|
5
|
+
Runtime side effects: Module imports, file IO for metadata queries.
|
|
6
|
+
|
|
7
|
+
The OptionalDependency class provides lazy loading with:
|
|
8
|
+
- Cached import results
|
|
9
|
+
- Fallback factories for stub modules
|
|
10
|
+
- Error preservation for better diagnostics
|
|
11
|
+
- Thread-unsafe caching (by design - single-threaded use expected)
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from importlib import import_module, metadata
|
|
18
|
+
from types import ModuleType
|
|
19
|
+
from typing import Any, Callable, Iterable, NoReturn, cast
|
|
20
|
+
|
|
21
|
+
from packaging.markers import default_environment
|
|
22
|
+
from packaging.requirements import Requirement
|
|
23
|
+
|
|
24
|
+
from .types import MISSING, _MissingSentinel
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _build_lightning_stub() -> ModuleType:
|
|
28
|
+
"""Return a minimal PyTorch Lightning stub when the dependency is absent."""
|
|
29
|
+
|
|
30
|
+
module = ModuleType("pytorch_lightning")
|
|
31
|
+
|
|
32
|
+
class LightningDataModule: # pragma: no cover - simple compatibility shim
|
|
33
|
+
"""Lightweight stand-in for PyTorch Lightning's ``LightningDataModule``."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None: # noqa: D401 - parity with real class
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
def prepare_data(self, *args: Any, **kwargs: Any) -> None: # noqa: D401 - parity with real class
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
def setup(self, *args: Any, **kwargs: Any) -> None:
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
def teardown(self, *args: Any, **kwargs: Any) -> None:
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
def state_dict(self) -> dict[str, Any]:
|
|
48
|
+
return {}
|
|
49
|
+
|
|
50
|
+
def load_state_dict(self, state_dict: dict[str, Any]) -> None:
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
def transfer_batch_to_device(self, batch: Any, device: Any, dataloader_idx: int) -> Any:
|
|
54
|
+
return batch
|
|
55
|
+
|
|
56
|
+
def on_before_batch_transfer(self, batch: Any, dataloader_idx: int) -> Any:
|
|
57
|
+
return batch
|
|
58
|
+
|
|
59
|
+
def on_after_batch_transfer(self, batch: Any, dataloader_idx: int) -> Any:
|
|
60
|
+
return batch
|
|
61
|
+
|
|
62
|
+
def train_dataloader(self, *args: Any, **kwargs: Any) -> Any:
|
|
63
|
+
return []
|
|
64
|
+
|
|
65
|
+
def val_dataloader(self, *args: Any, **kwargs: Any) -> Any:
|
|
66
|
+
return []
|
|
67
|
+
|
|
68
|
+
def test_dataloader(self, *args: Any, **kwargs: Any) -> Any:
|
|
69
|
+
return []
|
|
70
|
+
|
|
71
|
+
def predict_dataloader(self, *args: Any, **kwargs: Any) -> Any:
|
|
72
|
+
return []
|
|
73
|
+
|
|
74
|
+
setattr(module, "LightningDataModule", LightningDataModule)
|
|
75
|
+
setattr(module, "__all__", ["LightningDataModule"])
|
|
76
|
+
setattr(
|
|
77
|
+
module,
|
|
78
|
+
"__doc__",
|
|
79
|
+
"Lightweight stub module that exposes a minimal LightningDataModule "
|
|
80
|
+
"when PyTorch Lightning is unavailable.",
|
|
81
|
+
)
|
|
82
|
+
setattr(module, "__version__", "0.0.0-stub")
|
|
83
|
+
return module
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass
|
|
87
|
+
class OptionalDependency:
|
|
88
|
+
"""Lazily import an optional dependency and retain the import error.
|
|
89
|
+
|
|
90
|
+
This class is impure:
|
|
91
|
+
- Performs module imports on first access
|
|
92
|
+
- Caches results in mutable instance state
|
|
93
|
+
- May trigger fallback factory execution
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
module_name: str
|
|
97
|
+
fallback_factory: Callable[[], ModuleType] | None = None
|
|
98
|
+
_cached: ModuleType | None | _MissingSentinel = field(default=MISSING)
|
|
99
|
+
_error: ModuleNotFoundError | None = field(default=None)
|
|
100
|
+
_used_fallback: bool = field(default=False)
|
|
101
|
+
_fallback_instance: ModuleType | None = field(default=None)
|
|
102
|
+
|
|
103
|
+
def _attempt_import(self) -> ModuleType | None:
|
|
104
|
+
try:
|
|
105
|
+
module = import_module(self.module_name)
|
|
106
|
+
except ModuleNotFoundError as exc:
|
|
107
|
+
if self.fallback_factory is not None:
|
|
108
|
+
if self._fallback_instance is None:
|
|
109
|
+
self._fallback_instance = self.fallback_factory()
|
|
110
|
+
module = self._fallback_instance
|
|
111
|
+
self._cached = module
|
|
112
|
+
# Preserve the original error so load()/require() can re-raise it
|
|
113
|
+
self._error = exc
|
|
114
|
+
self._used_fallback = True
|
|
115
|
+
return module
|
|
116
|
+
self._cached = None
|
|
117
|
+
self._error = exc
|
|
118
|
+
return None
|
|
119
|
+
else:
|
|
120
|
+
self._cached = module
|
|
121
|
+
self._error = None
|
|
122
|
+
self._used_fallback = False
|
|
123
|
+
return module
|
|
124
|
+
|
|
125
|
+
def _raise_missing_error(self) -> NoReturn:
|
|
126
|
+
"""Raise ModuleNotFoundError for the missing dependency."""
|
|
127
|
+
error = self._error
|
|
128
|
+
if error is not None:
|
|
129
|
+
raise error
|
|
130
|
+
message = f"{self.module_name} is not installed"
|
|
131
|
+
raise ModuleNotFoundError(message)
|
|
132
|
+
|
|
133
|
+
def get(self) -> ModuleType | None:
|
|
134
|
+
"""Return the imported module or ``None`` when unavailable."""
|
|
135
|
+
cached = self._cached
|
|
136
|
+
if isinstance(cached, _MissingSentinel):
|
|
137
|
+
return self._attempt_import()
|
|
138
|
+
if cached is None:
|
|
139
|
+
return None
|
|
140
|
+
return cached
|
|
141
|
+
|
|
142
|
+
def load(self) -> ModuleType:
|
|
143
|
+
"""Return the dependency, raising the original import error when absent."""
|
|
144
|
+
module = self.get()
|
|
145
|
+
if self._used_fallback:
|
|
146
|
+
self._raise_missing_error()
|
|
147
|
+
if module is None:
|
|
148
|
+
self._raise_missing_error()
|
|
149
|
+
return module
|
|
150
|
+
|
|
151
|
+
def require(self, message: str) -> ModuleType:
|
|
152
|
+
"""Return the dependency or raise ``ModuleNotFoundError`` with ``message``."""
|
|
153
|
+
try:
|
|
154
|
+
return self.load()
|
|
155
|
+
except ModuleNotFoundError as exc:
|
|
156
|
+
raise ModuleNotFoundError(message) from exc
|
|
157
|
+
|
|
158
|
+
def available(self) -> bool:
|
|
159
|
+
"""Return ``True`` when the dependency can be imported."""
|
|
160
|
+
module = self.get()
|
|
161
|
+
if module is None:
|
|
162
|
+
return False
|
|
163
|
+
if self._used_fallback:
|
|
164
|
+
return False
|
|
165
|
+
return True
|
|
166
|
+
|
|
167
|
+
def reset(self) -> None:
|
|
168
|
+
"""Forget any cached import result."""
|
|
169
|
+
self._cached = MISSING
|
|
170
|
+
self._error = None
|
|
171
|
+
self._used_fallback = False
|
|
172
|
+
self._fallback_instance = None
|
|
173
|
+
|
|
174
|
+
def attr(self, attribute: str) -> Any | None:
|
|
175
|
+
"""Return ``attribute`` from the dependency when available."""
|
|
176
|
+
module = self.get()
|
|
177
|
+
if module is None:
|
|
178
|
+
return None
|
|
179
|
+
if self._used_fallback:
|
|
180
|
+
return None
|
|
181
|
+
return getattr(module, attribute, None)
|
|
182
|
+
|
|
183
|
+
@property
|
|
184
|
+
def error(self) -> ModuleNotFoundError | None:
|
|
185
|
+
"""Return the most recent ``ModuleNotFoundError`` (if any)."""
|
|
186
|
+
self.get()
|
|
187
|
+
return self._error
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
# ---------------------------------------------------------------------------
|
|
191
|
+
# Global dependency instances (mutable singletons)
|
|
192
|
+
# ---------------------------------------------------------------------------
|
|
193
|
+
|
|
194
|
+
pytorch_lightning = OptionalDependency(
|
|
195
|
+
"pytorch_lightning",
|
|
196
|
+
fallback_factory=_build_lightning_stub,
|
|
197
|
+
)
|
|
198
|
+
datasets = OptionalDependency("datasets")
|
|
199
|
+
verifiers = OptionalDependency("verifiers")
|
|
200
|
+
jellyfish = OptionalDependency("jellyfish")
|
|
201
|
+
jsonschema = OptionalDependency("jsonschema")
|
|
202
|
+
torch = OptionalDependency("torch")
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def reset_optional_dependencies() -> None:
|
|
206
|
+
"""Clear cached optional dependency imports (used by tests)."""
|
|
207
|
+
for dependency in (pytorch_lightning, datasets, verifiers, jellyfish, jsonschema, torch):
|
|
208
|
+
dependency.reset()
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
# ---------------------------------------------------------------------------
|
|
212
|
+
# Convenience accessors
|
|
213
|
+
# ---------------------------------------------------------------------------
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def get_datasets_dataset() -> Any | None:
|
|
217
|
+
"""Return Hugging Face ``Dataset`` class when the dependency is installed."""
|
|
218
|
+
return datasets.attr("Dataset")
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def require_datasets(message: str = "datasets is not installed") -> ModuleType:
|
|
222
|
+
"""Ensure the Hugging Face datasets dependency is present."""
|
|
223
|
+
return datasets.require(message)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def get_pytorch_lightning_datamodule() -> Any | None:
|
|
227
|
+
"""Return the PyTorch Lightning ``LightningDataModule`` when available."""
|
|
228
|
+
return pytorch_lightning.attr("LightningDataModule")
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def require_pytorch_lightning(message: str = "pytorch_lightning is not installed") -> ModuleType:
|
|
232
|
+
"""Ensure the PyTorch Lightning dependency is present."""
|
|
233
|
+
return pytorch_lightning.require(message)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def require_verifiers(message: str = "verifiers is not installed") -> ModuleType:
|
|
237
|
+
"""Ensure the verifiers dependency is present."""
|
|
238
|
+
return verifiers.require(message)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def require_jellyfish(message: str = "jellyfish is not installed") -> ModuleType:
|
|
242
|
+
"""Ensure the jellyfish dependency is present."""
|
|
243
|
+
return jellyfish.require(message)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def require_torch(message: str = "torch is not installed") -> ModuleType:
|
|
247
|
+
"""Ensure the PyTorch dependency is present."""
|
|
248
|
+
return torch.require(message)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def get_torch_dataloader() -> Any | None:
|
|
252
|
+
"""Return PyTorch ``DataLoader`` when the dependency is installed."""
|
|
253
|
+
torch_module = torch.get()
|
|
254
|
+
if torch_module is None:
|
|
255
|
+
return None
|
|
256
|
+
|
|
257
|
+
utils_module = getattr(torch_module, "utils", None)
|
|
258
|
+
if utils_module is None:
|
|
259
|
+
return None
|
|
260
|
+
|
|
261
|
+
data_module = getattr(utils_module, "data", None)
|
|
262
|
+
if data_module is None:
|
|
263
|
+
return None
|
|
264
|
+
|
|
265
|
+
return getattr(data_module, "DataLoader", None)
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
# ---------------------------------------------------------------------------
|
|
269
|
+
# Extras metadata inspection (impure - queries package metadata)
|
|
270
|
+
# ---------------------------------------------------------------------------
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def get_installed_extras(
|
|
274
|
+
extras: Iterable[str] | None = None,
|
|
275
|
+
*,
|
|
276
|
+
distribution: str = "glitchlings",
|
|
277
|
+
) -> dict[str, bool]:
|
|
278
|
+
"""Return a mapping of optional extras to installation availability."""
|
|
279
|
+
try:
|
|
280
|
+
dist = metadata.distribution(distribution)
|
|
281
|
+
except metadata.PackageNotFoundError:
|
|
282
|
+
return {}
|
|
283
|
+
|
|
284
|
+
provided = {extra.lower() for extra in dist.metadata.get_all("Provides-Extra") or []}
|
|
285
|
+
targets = {extra.lower() for extra in extras} if extras is not None else provided
|
|
286
|
+
requirements = dist.requires or []
|
|
287
|
+
mapping: dict[str, set[str]] = {extra: set() for extra in provided}
|
|
288
|
+
|
|
289
|
+
for requirement in requirements:
|
|
290
|
+
names = _extras_from_requirement(requirement, provided)
|
|
291
|
+
if not names:
|
|
292
|
+
continue
|
|
293
|
+
req_name = _requirement_name(requirement)
|
|
294
|
+
for extra in names:
|
|
295
|
+
mapping.setdefault(extra, set()).add(req_name)
|
|
296
|
+
|
|
297
|
+
status: dict[str, bool] = {}
|
|
298
|
+
for extra in targets:
|
|
299
|
+
deps = mapping.get(extra)
|
|
300
|
+
if not deps:
|
|
301
|
+
status[extra] = False
|
|
302
|
+
continue
|
|
303
|
+
status[extra] = all(_distribution_installed(dep) for dep in deps)
|
|
304
|
+
return status
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _distribution_installed(name: str) -> bool:
|
|
308
|
+
try:
|
|
309
|
+
metadata.distribution(name)
|
|
310
|
+
except metadata.PackageNotFoundError:
|
|
311
|
+
return False
|
|
312
|
+
return True
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def _extras_from_requirement(requirement: str, candidates: set[str]) -> set[str]:
|
|
316
|
+
req = Requirement(requirement)
|
|
317
|
+
if req.marker is None:
|
|
318
|
+
return set()
|
|
319
|
+
extras: set[str] = set()
|
|
320
|
+
for extra in candidates:
|
|
321
|
+
environment = {k: str(v) for k, v in default_environment().items()}
|
|
322
|
+
environment["extra"] = extra
|
|
323
|
+
if req.marker.evaluate(environment):
|
|
324
|
+
extras.add(extra)
|
|
325
|
+
return extras
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _requirement_name(requirement: str) -> str:
|
|
329
|
+
req = Requirement(requirement)
|
|
330
|
+
return cast(str, req.name)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
__all__ = [
|
|
334
|
+
# Core class
|
|
335
|
+
"OptionalDependency",
|
|
336
|
+
# Global instances
|
|
337
|
+
"pytorch_lightning",
|
|
338
|
+
"datasets",
|
|
339
|
+
"verifiers",
|
|
340
|
+
"jellyfish",
|
|
341
|
+
"jsonschema",
|
|
342
|
+
"torch",
|
|
343
|
+
# Accessors
|
|
344
|
+
"get_datasets_dataset",
|
|
345
|
+
"require_datasets",
|
|
346
|
+
"get_pytorch_lightning_datamodule",
|
|
347
|
+
"require_pytorch_lightning",
|
|
348
|
+
"require_verifiers",
|
|
349
|
+
"require_jellyfish",
|
|
350
|
+
"require_torch",
|
|
351
|
+
"get_torch_dataloader",
|
|
352
|
+
# Utilities
|
|
353
|
+
"reset_optional_dependencies",
|
|
354
|
+
"get_installed_extras",
|
|
355
|
+
]
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Pure type definitions for compatibility infrastructure.
|
|
2
|
+
|
|
3
|
+
This module contains only type definitions and sentinels with no side effects.
|
|
4
|
+
It can be safely imported anywhere without triggering module loading or IO.
|
|
5
|
+
|
|
6
|
+
Pure guarantees:
|
|
7
|
+
- No import side effects
|
|
8
|
+
- No module loading attempts
|
|
9
|
+
- No file IO
|
|
10
|
+
- No RNG instantiation
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import Any, Protocol
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class _MissingSentinel:
|
|
19
|
+
"""Sentinel value indicating no cached import attempt has been made."""
|
|
20
|
+
|
|
21
|
+
__slots__ = ()
|
|
22
|
+
|
|
23
|
+
def __repr__(self) -> str:
|
|
24
|
+
return "<MISSING>"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class Dataset(Protocol):
|
|
28
|
+
"""Protocol mirroring the subset of Hugging Face datasets API we use."""
|
|
29
|
+
|
|
30
|
+
def with_transform(self, function: Any) -> "Dataset": ...
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
MISSING: _MissingSentinel = _MissingSentinel()
|
|
34
|
+
"""Singleton sentinel for uninitialized optional dependency cache."""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
__all__ = [
|
|
38
|
+
"Dataset",
|
|
39
|
+
"MISSING",
|
|
40
|
+
"_MissingSentinel",
|
|
41
|
+
]
|