glitchlings 0.4.0__cp311-cp311-macosx_11_0_universal2.whl → 0.4.1__cp311-cp311-macosx_11_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/_zoo_rust.cpython-311-darwin.so +0 -0
- glitchlings/lexicon/__init__.py +18 -0
- glitchlings/lexicon/_cache.py +111 -0
- glitchlings/lexicon/graph.py +16 -29
- glitchlings/lexicon/vector.py +16 -35
- glitchlings/lexicon/wordnet.py +12 -2
- glitchlings/zoo/core.py +103 -13
- {glitchlings-0.4.0.dist-info → glitchlings-0.4.1.dist-info}/METADATA +2 -2
- {glitchlings-0.4.0.dist-info → glitchlings-0.4.1.dist-info}/RECORD +13 -12
- {glitchlings-0.4.0.dist-info → glitchlings-0.4.1.dist-info}/WHEEL +0 -0
- {glitchlings-0.4.0.dist-info → glitchlings-0.4.1.dist-info}/entry_points.txt +0 -0
- {glitchlings-0.4.0.dist-info → glitchlings-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {glitchlings-0.4.0.dist-info → glitchlings-0.4.1.dist-info}/top_level.txt +0 -0
|
Binary file
|
glitchlings/lexicon/__init__.py
CHANGED
|
@@ -4,10 +4,12 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
from abc import ABC, abstractmethod
|
|
6
6
|
from hashlib import blake2s
|
|
7
|
+
from pathlib import Path
|
|
7
8
|
import random
|
|
8
9
|
from typing import Callable, Iterable
|
|
9
10
|
|
|
10
11
|
from glitchlings.config import get_config
|
|
12
|
+
from ._cache import CacheEntries, CacheSnapshot
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
class Lexicon(ABC):
|
|
@@ -79,6 +81,21 @@ class Lexicon(ABC):
|
|
|
79
81
|
return f"{self.__class__.__name__}(seed={self._seed!r})"
|
|
80
82
|
|
|
81
83
|
|
|
84
|
+
class LexiconBackend(Lexicon):
|
|
85
|
+
"""Extended lexicon interface that supports cache persistence."""
|
|
86
|
+
|
|
87
|
+
Cache = CacheEntries
|
|
88
|
+
|
|
89
|
+
@classmethod
|
|
90
|
+
@abstractmethod
|
|
91
|
+
def load_cache(cls, path: str | Path) -> CacheSnapshot:
|
|
92
|
+
"""Return a validated cache snapshot loaded from ``path``."""
|
|
93
|
+
|
|
94
|
+
@abstractmethod
|
|
95
|
+
def save_cache(self, path: str | Path | None = None) -> Path | None:
|
|
96
|
+
"""Persist the backend cache to ``path`` and return the destination."""
|
|
97
|
+
|
|
98
|
+
|
|
82
99
|
from .graph import GraphLexicon
|
|
83
100
|
from .metrics import (
|
|
84
101
|
compare_lexicons,
|
|
@@ -176,6 +193,7 @@ def get_default_lexicon(seed: int | None = None) -> Lexicon:
|
|
|
176
193
|
|
|
177
194
|
__all__ = [
|
|
178
195
|
"Lexicon",
|
|
196
|
+
"LexiconBackend",
|
|
179
197
|
"VectorLexicon",
|
|
180
198
|
"GraphLexicon",
|
|
181
199
|
"WordNetLexicon",
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Shared cache helpers for lexicon backends."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from hashlib import blake2s
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Mapping, Sequence
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
CacheEntries = dict[str, list[str]]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class CacheSnapshot:
|
|
17
|
+
"""Materialised cache data and its integrity checksum."""
|
|
18
|
+
|
|
19
|
+
entries: CacheEntries
|
|
20
|
+
checksum: str | None = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _normalise_entries(payload: Mapping[str, Sequence[str]]) -> CacheEntries:
|
|
24
|
+
"""Convert raw cache payloads into canonical mapping form."""
|
|
25
|
+
|
|
26
|
+
entries: CacheEntries = {}
|
|
27
|
+
for key, values in payload.items():
|
|
28
|
+
if not isinstance(key, str):
|
|
29
|
+
raise RuntimeError("Synonym cache keys must be strings.")
|
|
30
|
+
if not isinstance(values, Sequence):
|
|
31
|
+
raise RuntimeError("Synonym cache values must be sequences of strings.")
|
|
32
|
+
entries[key] = [str(value) for value in values]
|
|
33
|
+
return entries
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _canonical_json(entries: Mapping[str, Sequence[str]]) -> str:
|
|
37
|
+
"""Return a deterministic JSON serialisation for ``entries``."""
|
|
38
|
+
|
|
39
|
+
serialisable = {key: list(values) for key, values in sorted(entries.items())}
|
|
40
|
+
return json.dumps(serialisable, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def compute_checksum(entries: Mapping[str, Sequence[str]]) -> str:
|
|
44
|
+
"""Return a BLAKE2s checksum for ``entries``."""
|
|
45
|
+
|
|
46
|
+
digest = blake2s(_canonical_json(entries).encode("utf8"), digest_size=16)
|
|
47
|
+
return digest.hexdigest()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def load_cache(path: Path) -> CacheSnapshot:
|
|
51
|
+
"""Load a cache from ``path`` and verify its checksum if present."""
|
|
52
|
+
|
|
53
|
+
if not path.exists():
|
|
54
|
+
return CacheSnapshot(entries={}, checksum=None)
|
|
55
|
+
|
|
56
|
+
with path.open("r", encoding="utf8") as handle:
|
|
57
|
+
payload = json.load(handle)
|
|
58
|
+
|
|
59
|
+
checksum: str | None = None
|
|
60
|
+
entries_payload: Mapping[str, Sequence[str]]
|
|
61
|
+
|
|
62
|
+
if isinstance(payload, Mapping) and "__meta__" in payload and "entries" in payload:
|
|
63
|
+
meta = payload["__meta__"]
|
|
64
|
+
entries_payload = payload["entries"] # type: ignore[assignment]
|
|
65
|
+
if not isinstance(entries_payload, Mapping):
|
|
66
|
+
raise RuntimeError("Synonym cache entries must be stored as a mapping.")
|
|
67
|
+
if isinstance(meta, Mapping):
|
|
68
|
+
raw_checksum = meta.get("checksum")
|
|
69
|
+
if raw_checksum is not None and not isinstance(raw_checksum, str):
|
|
70
|
+
raise RuntimeError("Synonym cache checksum must be a string when provided.")
|
|
71
|
+
checksum = raw_checksum
|
|
72
|
+
else:
|
|
73
|
+
raise RuntimeError("Synonym cache metadata must be a mapping.")
|
|
74
|
+
elif isinstance(payload, Mapping):
|
|
75
|
+
entries_payload = payload # legacy format without metadata
|
|
76
|
+
else:
|
|
77
|
+
raise RuntimeError("Synonym cache payload must be a mapping of strings to lists.")
|
|
78
|
+
|
|
79
|
+
entries = _normalise_entries(entries_payload)
|
|
80
|
+
if checksum is not None:
|
|
81
|
+
expected = compute_checksum(entries)
|
|
82
|
+
if checksum != expected:
|
|
83
|
+
raise RuntimeError(
|
|
84
|
+
"Synonym cache checksum mismatch; the cache file appears to be corrupted."
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
return CacheSnapshot(entries=entries, checksum=checksum)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def write_cache(path: Path, entries: Mapping[str, Sequence[str]]) -> CacheSnapshot:
|
|
91
|
+
"""Persist ``entries`` to ``path`` with checksum metadata."""
|
|
92
|
+
|
|
93
|
+
serialisable = {key: list(values) for key, values in sorted(entries.items())}
|
|
94
|
+
checksum = compute_checksum(serialisable)
|
|
95
|
+
payload = {
|
|
96
|
+
"__meta__": {
|
|
97
|
+
"checksum": checksum,
|
|
98
|
+
"entries": len(serialisable),
|
|
99
|
+
},
|
|
100
|
+
"entries": serialisable,
|
|
101
|
+
}
|
|
102
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
103
|
+
|
|
104
|
+
with path.open("w", encoding="utf8") as handle:
|
|
105
|
+
json.dump(payload, handle, ensure_ascii=False, indent=2, sort_keys=True)
|
|
106
|
+
|
|
107
|
+
return CacheSnapshot(entries=serialisable, checksum=checksum)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
__all__ = ["CacheEntries", "CacheSnapshot", "compute_checksum", "load_cache", "write_cache"]
|
|
111
|
+
|
glitchlings/lexicon/graph.py
CHANGED
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
import json
|
|
6
5
|
import re
|
|
7
6
|
from pathlib import Path
|
|
8
7
|
from typing import Iterable, Mapping, MutableMapping, Sequence
|
|
9
8
|
|
|
10
|
-
from . import
|
|
9
|
+
from . import LexiconBackend
|
|
10
|
+
from ._cache import CacheSnapshot, load_cache as _load_cache_file, write_cache as _write_cache_file
|
|
11
11
|
from .vector import VectorLexicon
|
|
12
12
|
|
|
13
13
|
|
|
@@ -140,30 +140,7 @@ def _load_numberbatch(path: Path, *, languages: set[str]) -> Mapping[str, list[f
|
|
|
140
140
|
return embeddings
|
|
141
141
|
|
|
142
142
|
|
|
143
|
-
|
|
144
|
-
if not path.exists():
|
|
145
|
-
return {}
|
|
146
|
-
with path.open("r", encoding="utf8") as handle:
|
|
147
|
-
payload = json.load(handle)
|
|
148
|
-
if not isinstance(payload, Mapping):
|
|
149
|
-
raise RuntimeError("Graph lexicon cache must be a mapping of strings to lists.")
|
|
150
|
-
cache: dict[str, list[str]] = {}
|
|
151
|
-
for key, values in payload.items():
|
|
152
|
-
if not isinstance(key, str):
|
|
153
|
-
raise RuntimeError("Graph lexicon cache keys must be strings.")
|
|
154
|
-
if not isinstance(values, Sequence):
|
|
155
|
-
raise RuntimeError("Graph lexicon cache values must be sequences of strings.")
|
|
156
|
-
cache[key] = [str(value) for value in values]
|
|
157
|
-
return cache
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
def _write_cache(path: Path, cache: Mapping[str, Sequence[str]]) -> None:
|
|
161
|
-
serialisable = {key: list(values) for key, values in sorted(cache.items())}
|
|
162
|
-
with path.open("w", encoding="utf8") as handle:
|
|
163
|
-
json.dump(serialisable, handle, ensure_ascii=False, indent=2, sort_keys=True)
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
class GraphLexicon(Lexicon):
|
|
143
|
+
class GraphLexicon(LexiconBackend):
|
|
167
144
|
"""Lexicon backed by ConceptNet/Numberbatch embeddings."""
|
|
168
145
|
|
|
169
146
|
def __init__(
|
|
@@ -184,9 +161,12 @@ class GraphLexicon(Lexicon):
|
|
|
184
161
|
self._max_neighbors = max(1, max_neighbors)
|
|
185
162
|
self._min_similarity = min_similarity
|
|
186
163
|
self._cache: MutableMapping[str, list[str]] = {}
|
|
187
|
-
self._cache_path = Path(cache_path) if cache_path is not None else None
|
|
164
|
+
self._cache_path: Path | None = Path(cache_path) if cache_path is not None else None
|
|
165
|
+
self._cache_checksum: str | None = None
|
|
188
166
|
if self._cache_path is not None:
|
|
189
|
-
|
|
167
|
+
snapshot = _load_cache_file(self._cache_path)
|
|
168
|
+
self._cache.update(snapshot.entries)
|
|
169
|
+
self._cache_checksum = snapshot.checksum
|
|
190
170
|
if cache is not None:
|
|
191
171
|
for key, values in cache.items():
|
|
192
172
|
self._cache[str(key)] = [str(value) for value in values]
|
|
@@ -278,6 +258,12 @@ class GraphLexicon(Lexicon):
|
|
|
278
258
|
def export_cache(self) -> dict[str, list[str]]:
|
|
279
259
|
return {key: list(values) for key, values in self._cache.items()}
|
|
280
260
|
|
|
261
|
+
@classmethod
|
|
262
|
+
def load_cache(cls, path: str | Path) -> CacheSnapshot:
|
|
263
|
+
"""Load and validate a persisted ConceptNet cache file."""
|
|
264
|
+
|
|
265
|
+
return _load_cache_file(Path(path))
|
|
266
|
+
|
|
281
267
|
def save_cache(self, path: str | Path | None = None) -> Path:
|
|
282
268
|
if path is None:
|
|
283
269
|
if self._cache_path is None:
|
|
@@ -286,7 +272,8 @@ class GraphLexicon(Lexicon):
|
|
|
286
272
|
else:
|
|
287
273
|
target = Path(path)
|
|
288
274
|
self._cache_path = target
|
|
289
|
-
|
|
275
|
+
snapshot = _write_cache_file(target, self._cache)
|
|
276
|
+
self._cache_checksum = snapshot.checksum
|
|
290
277
|
self._cache_dirty = False
|
|
291
278
|
return target
|
|
292
279
|
|
glitchlings/lexicon/vector.py
CHANGED
|
@@ -10,7 +10,8 @@ from pathlib import Path
|
|
|
10
10
|
import sys
|
|
11
11
|
from typing import Any, Callable, Iterable, Iterator, Mapping, MutableMapping, Sequence
|
|
12
12
|
|
|
13
|
-
from . import
|
|
13
|
+
from . import LexiconBackend
|
|
14
|
+
from ._cache import CacheSnapshot, load_cache as _load_cache_file, write_cache as _write_cache_file
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
def _cosine_similarity(vector_a: Sequence[float], vector_b: Sequence[float]) -> float:
|
|
@@ -241,38 +242,7 @@ def _resolve_source(source: Any | None) -> _Adapter | None:
|
|
|
241
242
|
raise RuntimeError("Unsupported vector source supplied to VectorLexicon.")
|
|
242
243
|
|
|
243
244
|
|
|
244
|
-
|
|
245
|
-
"""Load a synonym cache from ``path`` if it exists."""
|
|
246
|
-
|
|
247
|
-
if not path.exists():
|
|
248
|
-
return {}
|
|
249
|
-
|
|
250
|
-
with path.open("r", encoding="utf8") as handle:
|
|
251
|
-
payload = json.load(handle)
|
|
252
|
-
|
|
253
|
-
if not isinstance(payload, Mapping):
|
|
254
|
-
raise RuntimeError("Synonym cache must be a JSON mapping of strings to lists.")
|
|
255
|
-
|
|
256
|
-
cache: dict[str, list[str]] = {}
|
|
257
|
-
for key, values in payload.items():
|
|
258
|
-
if not isinstance(key, str):
|
|
259
|
-
raise RuntimeError("Synonym cache keys must be strings.")
|
|
260
|
-
if not isinstance(values, Sequence):
|
|
261
|
-
raise RuntimeError("Synonym cache values must be lists of strings.")
|
|
262
|
-
cache[key] = [str(value) for value in values]
|
|
263
|
-
|
|
264
|
-
return cache
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
def _write_cache(path: Path, cache: Mapping[str, Sequence[str]]) -> None:
|
|
268
|
-
"""Write ``cache`` to ``path`` deterministically."""
|
|
269
|
-
|
|
270
|
-
serialisable = {key: list(values) for key, values in sorted(cache.items())}
|
|
271
|
-
with path.open("w", encoding="utf8") as handle:
|
|
272
|
-
json.dump(serialisable, handle, ensure_ascii=False, indent=2, sort_keys=True)
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
class VectorLexicon(Lexicon):
|
|
245
|
+
class VectorLexicon(LexiconBackend):
|
|
276
246
|
"""Lexicon implementation backed by dense word embeddings."""
|
|
277
247
|
|
|
278
248
|
def __init__(
|
|
@@ -292,9 +262,13 @@ class VectorLexicon(Lexicon):
|
|
|
292
262
|
self._max_neighbors = max(1, max_neighbors)
|
|
293
263
|
self._min_similarity = min_similarity
|
|
294
264
|
self._cache: MutableMapping[str, list[str]] = {}
|
|
265
|
+
self._cache_path: Path | None
|
|
266
|
+
self._cache_checksum: str | None = None
|
|
295
267
|
if cache_path is not None:
|
|
296
268
|
path = Path(cache_path)
|
|
297
|
-
|
|
269
|
+
snapshot = _load_cache_file(path)
|
|
270
|
+
self._cache.update(snapshot.entries)
|
|
271
|
+
self._cache_checksum = snapshot.checksum
|
|
298
272
|
self._cache_path = path
|
|
299
273
|
else:
|
|
300
274
|
self._cache_path = None
|
|
@@ -411,6 +385,12 @@ class VectorLexicon(Lexicon):
|
|
|
411
385
|
|
|
412
386
|
return {key: list(values) for key, values in self._cache.items()}
|
|
413
387
|
|
|
388
|
+
@classmethod
|
|
389
|
+
def load_cache(cls, path: str | Path) -> CacheSnapshot:
|
|
390
|
+
"""Load and validate a cache file for reuse."""
|
|
391
|
+
|
|
392
|
+
return _load_cache_file(Path(path))
|
|
393
|
+
|
|
414
394
|
def save_cache(self, path: str | Path | None = None) -> Path:
|
|
415
395
|
"""Persist the current cache to disk, returning the path used."""
|
|
416
396
|
|
|
@@ -422,7 +402,8 @@ class VectorLexicon(Lexicon):
|
|
|
422
402
|
target = Path(path)
|
|
423
403
|
self._cache_path = target
|
|
424
404
|
|
|
425
|
-
|
|
405
|
+
snapshot = _write_cache_file(target, self._cache)
|
|
406
|
+
self._cache_checksum = snapshot.checksum
|
|
426
407
|
self._cache_dirty = False
|
|
427
408
|
return target
|
|
428
409
|
|
glitchlings/lexicon/wordnet.py
CHANGED
|
@@ -32,7 +32,10 @@ if nltk is not None: # pragma: no cover - guarded by import success
|
|
|
32
32
|
else:
|
|
33
33
|
_WORDNET_MODULE = None
|
|
34
34
|
|
|
35
|
-
from
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
|
|
37
|
+
from . import LexiconBackend
|
|
38
|
+
from ._cache import CacheSnapshot
|
|
36
39
|
|
|
37
40
|
_WORDNET_HANDLE: WordNetCorpusReader | Any | None = _WORDNET_MODULE
|
|
38
41
|
_wordnet_ready = False
|
|
@@ -151,7 +154,7 @@ def _collect_synonyms(word: str, parts_of_speech: tuple[str, ...]) -> list[str]:
|
|
|
151
154
|
return sorted(synonyms)
|
|
152
155
|
|
|
153
156
|
|
|
154
|
-
class WordNetLexicon(
|
|
157
|
+
class WordNetLexicon(LexiconBackend):
|
|
155
158
|
"""Lexicon that retrieves synonyms from the NLTK WordNet corpus."""
|
|
156
159
|
|
|
157
160
|
def get_synonyms(
|
|
@@ -175,6 +178,13 @@ class WordNetLexicon(Lexicon):
|
|
|
175
178
|
return True
|
|
176
179
|
return pos.lower() in _VALID_POS
|
|
177
180
|
|
|
181
|
+
@classmethod
|
|
182
|
+
def load_cache(cls, path: str | Path) -> CacheSnapshot:
|
|
183
|
+
raise RuntimeError("WordNetLexicon does not persist or load caches.")
|
|
184
|
+
|
|
185
|
+
def save_cache(self, path: str | Path | None = None) -> Path | None:
|
|
186
|
+
raise RuntimeError("WordNetLexicon does not persist or load caches.")
|
|
187
|
+
|
|
178
188
|
def __repr__(self) -> str: # pragma: no cover - trivial representation
|
|
179
189
|
return f"WordNetLexicon(seed={self.seed!r})"
|
|
180
190
|
|
glitchlings/zoo/core.py
CHANGED
|
@@ -18,9 +18,13 @@ else:
|
|
|
18
18
|
_datasets_error = None
|
|
19
19
|
|
|
20
20
|
try: # pragma: no cover - optional dependency
|
|
21
|
-
from glitchlings._zoo_rust import
|
|
21
|
+
from glitchlings._zoo_rust import (
|
|
22
|
+
compose_glitchlings as _compose_glitchlings_rust,
|
|
23
|
+
plan_glitchlings as _plan_glitchlings_rust,
|
|
24
|
+
)
|
|
22
25
|
except ImportError: # pragma: no cover - compiled extension not present
|
|
23
26
|
_compose_glitchlings_rust = None
|
|
27
|
+
_plan_glitchlings_rust = None
|
|
24
28
|
|
|
25
29
|
|
|
26
30
|
log = logging.getLogger(__name__)
|
|
@@ -47,6 +51,76 @@ def _pipeline_feature_flag_enabled() -> bool:
|
|
|
47
51
|
|
|
48
52
|
return True
|
|
49
53
|
|
|
54
|
+
def _plan_glitchlings_python(
|
|
55
|
+
specs: list[dict[str, Any]],
|
|
56
|
+
master_seed: int,
|
|
57
|
+
) -> list[tuple[int, int]]:
|
|
58
|
+
"""Pure-Python fallback for orchestrating glitchlings in deterministic order."""
|
|
59
|
+
|
|
60
|
+
master_seed_int = int(master_seed)
|
|
61
|
+
planned: list[tuple[int, int, int, int, str]] = []
|
|
62
|
+
for index, spec in enumerate(specs):
|
|
63
|
+
name = str(spec["name"])
|
|
64
|
+
scope = int(spec["scope"])
|
|
65
|
+
order = int(spec["order"])
|
|
66
|
+
derived_seed = Gaggle.derive_seed(master_seed_int, name, index)
|
|
67
|
+
planned.append((index, derived_seed, scope, order, name))
|
|
68
|
+
|
|
69
|
+
planned.sort(key=lambda entry: (entry[2], entry[3], entry[4], entry[0]))
|
|
70
|
+
return [(index, seed) for index, seed, *_ in planned]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _plan_glitchlings_with_rust(
|
|
74
|
+
specs: list[dict[str, Any]],
|
|
75
|
+
master_seed: int,
|
|
76
|
+
) -> list[tuple[int, int]] | None:
|
|
77
|
+
"""Attempt to obtain the orchestration plan from the compiled Rust module."""
|
|
78
|
+
|
|
79
|
+
if _plan_glitchlings_rust is None:
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
plan = _plan_glitchlings_rust(specs, int(master_seed))
|
|
84
|
+
except Exception: # pragma: no cover - defer to Python fallback on failure
|
|
85
|
+
log.debug("Rust orchestration planning failed; falling back to Python plan", exc_info=True)
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
return [(int(index), int(seed)) for index, seed in plan]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _plan_glitchling_specs(
|
|
92
|
+
specs: list[dict[str, Any]],
|
|
93
|
+
master_seed: int | None,
|
|
94
|
+
) -> list[tuple[int, int]]:
|
|
95
|
+
"""Resolve orchestration order and seeds from glitchling specifications."""
|
|
96
|
+
|
|
97
|
+
if master_seed is None:
|
|
98
|
+
message = "Gaggle orchestration requires a master seed"
|
|
99
|
+
raise ValueError(message)
|
|
100
|
+
|
|
101
|
+
master_seed_int = int(master_seed)
|
|
102
|
+
plan = _plan_glitchlings_with_rust(specs, master_seed_int)
|
|
103
|
+
if plan is not None:
|
|
104
|
+
return plan
|
|
105
|
+
|
|
106
|
+
return _plan_glitchlings_python(specs, master_seed_int)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _plan_glitchling_sequence(
|
|
110
|
+
glitchlings: list["Glitchling"], master_seed: int | None
|
|
111
|
+
) -> list[tuple[int, int]]:
|
|
112
|
+
"""Derive orchestration plan for concrete glitchling instances."""
|
|
113
|
+
|
|
114
|
+
specs = [
|
|
115
|
+
{
|
|
116
|
+
"name": glitchling.name,
|
|
117
|
+
"scope": int(glitchling.level),
|
|
118
|
+
"order": int(glitchling.order),
|
|
119
|
+
}
|
|
120
|
+
for glitchling in glitchlings
|
|
121
|
+
]
|
|
122
|
+
return _plan_glitchling_specs(specs, master_seed)
|
|
123
|
+
|
|
50
124
|
if TYPE_CHECKING: # pragma: no cover - typing only
|
|
51
125
|
from datasets import Dataset # type: ignore
|
|
52
126
|
elif _DatasetsDataset is not None:
|
|
@@ -309,17 +383,17 @@ class Gaggle(Glitchling):
|
|
|
309
383
|
"""
|
|
310
384
|
|
|
311
385
|
super().__init__("Gaggle", self.corrupt, AttackWave.DOCUMENT, seed=seed)
|
|
386
|
+
self._clones_by_index: list[Glitchling] = []
|
|
387
|
+
for idx, glitchling in enumerate(glitchlings):
|
|
388
|
+
clone = glitchling.clone()
|
|
389
|
+
setattr(clone, "_gaggle_index", idx)
|
|
390
|
+
self._clones_by_index.append(clone)
|
|
391
|
+
|
|
312
392
|
self.glitchlings: dict[AttackWave, list[Glitchling]] = {
|
|
313
393
|
level: [] for level in AttackWave
|
|
314
394
|
}
|
|
315
395
|
self.apply_order: list[Glitchling] = []
|
|
316
|
-
|
|
317
|
-
for idx, g in enumerate(glitchlings):
|
|
318
|
-
_g = g.clone()
|
|
319
|
-
derived_seed = Gaggle.derive_seed(seed, _g.name, idx)
|
|
320
|
-
_g.reset_rng(derived_seed)
|
|
321
|
-
setattr(_g, "_gaggle_index", idx)
|
|
322
|
-
self.glitchlings[g.level].append(_g)
|
|
396
|
+
self._plan: list[tuple[int, int]] = []
|
|
323
397
|
self.sort_glitchlings()
|
|
324
398
|
|
|
325
399
|
@staticmethod
|
|
@@ -352,11 +426,27 @@ class Gaggle(Glitchling):
|
|
|
352
426
|
def sort_glitchlings(self) -> None:
|
|
353
427
|
"""Sort glitchlings by wave then order to produce application order."""
|
|
354
428
|
|
|
355
|
-
self.
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
429
|
+
plan = _plan_glitchling_sequence(self._clones_by_index, self.seed)
|
|
430
|
+
self._plan = plan
|
|
431
|
+
|
|
432
|
+
self.glitchlings = {level: [] for level in AttackWave}
|
|
433
|
+
for clone in self._clones_by_index:
|
|
434
|
+
self.glitchlings[clone.level].append(clone)
|
|
435
|
+
|
|
436
|
+
missing = set(range(len(self._clones_by_index)))
|
|
437
|
+
apply_order: list[Glitchling] = []
|
|
438
|
+
for index, derived_seed in plan:
|
|
439
|
+
clone = self._clones_by_index[index]
|
|
440
|
+
clone.reset_rng(int(derived_seed))
|
|
441
|
+
apply_order.append(clone)
|
|
442
|
+
missing.discard(index)
|
|
443
|
+
|
|
444
|
+
if missing:
|
|
445
|
+
missing_indices = ", ".join(str(idx) for idx in sorted(missing))
|
|
446
|
+
message = f"Orchestration plan missing glitchlings at indices: {missing_indices}"
|
|
447
|
+
raise RuntimeError(message)
|
|
448
|
+
|
|
449
|
+
self.apply_order = apply_order
|
|
360
450
|
|
|
361
451
|
@staticmethod
|
|
362
452
|
def rust_pipeline_supported() -> bool:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: glitchlings
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Monsters for your language games.
|
|
5
5
|
Author: osoleve
|
|
6
6
|
License: Apache License
|
|
@@ -420,7 +420,7 @@ _How can a computer need reading glasses?_
|
|
|
420
420
|
|
|
421
421
|
### Zeedub
|
|
422
422
|
|
|
423
|
-
|
|
423
|
+
_Watch your step around here._
|
|
424
424
|
|
|
425
425
|
> _**Invisible Ink.**_ Zeedub slips zero-width codepoints between non-space character pairs, forcing models to reason about text whose visible form masks hidden glyphs.
|
|
426
426
|
>
|
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
glitchlings/__init__.py,sha256=hEmQ1rl3G5uZBDbfJX_W4aIUNSsPAsy_Ai5DgQHasvk,813
|
|
2
2
|
glitchlings/__main__.py,sha256=EOiBgay0x6B9VlSDzSQvMuoq6bHJdSvFSgcAVGGKkd4,121
|
|
3
|
-
glitchlings/_zoo_rust.cpython-311-darwin.so,sha256=
|
|
3
|
+
glitchlings/_zoo_rust.cpython-311-darwin.so,sha256=g3v99fyeGelWex500hqBOCSQ26suGLoU2wiq8yTnhGM,2488368
|
|
4
4
|
glitchlings/config.py,sha256=hwkcMkhEvUzK8FECgG6kbf_4MpMQcopskiSgXzK5B3o,7785
|
|
5
5
|
glitchlings/config.toml,sha256=MWwgbx1-KIRAY3JZmMrCVbZNxFjHgRJXbtNAVuUNcxY,108
|
|
6
6
|
glitchlings/main.py,sha256=Rw9pCgNrGxwzC1rZbbng7cHUP9xlL0WWWTdjW95XiSM,10084
|
|
7
7
|
glitchlings/dlc/__init__.py,sha256=eTLEEWrVWPqniXHqee4W23H1rjElI1PQ_jcqWFe9D3g,141
|
|
8
8
|
glitchlings/dlc/huggingface.py,sha256=I1QWanWVxO02awgSpHDtgQEVF-9AQRLtsta2RCitWhE,2933
|
|
9
9
|
glitchlings/dlc/prime.py,sha256=wpRMNtgka1vNlEzifeCjGMp1q_-QclZn3NxXczGnNpM,9278
|
|
10
|
-
glitchlings/lexicon/__init__.py,sha256
|
|
11
|
-
glitchlings/lexicon/
|
|
10
|
+
glitchlings/lexicon/__init__.py,sha256=e3MbtV3R_UOoZXsckR3gnThwgqCi4HXnfduaqxqYXvw,6229
|
|
11
|
+
glitchlings/lexicon/_cache.py,sha256=KlcHKtOFH1yPxwhr8_HF_qgpALmUuHkGTzNfWnQ2Jb8,3955
|
|
12
|
+
glitchlings/lexicon/graph.py,sha256=YYLrYnmSZ8uf8VvrNLuVF_nIVDH7OoR3RuxJ-9JMA2c,10041
|
|
12
13
|
glitchlings/lexicon/metrics.py,sha256=W8TCemZaCjBOUSX8G7JdgQAbMykXXfRTfodkDSkc3aQ,4599
|
|
13
|
-
glitchlings/lexicon/vector.py,sha256=
|
|
14
|
-
glitchlings/lexicon/wordnet.py,sha256=
|
|
14
|
+
glitchlings/lexicon/vector.py,sha256=oeZQwYxrK25REu4MhUUlMmaStW17Gx6RwrU1v6NooOg,19713
|
|
15
|
+
glitchlings/lexicon/wordnet.py,sha256=Zv0YNHSM-DE2ucVZl_OOutTV1s0-i2xPOrfqYYdZKTU,6034
|
|
15
16
|
glitchlings/lexicon/data/default_vector_cache.json,sha256=7obKHqmR3odbTfgJPWLSRFYFh4J_6uvv_CntCSe_EjI,725
|
|
16
17
|
glitchlings/util/__init__.py,sha256=7KiZ0gKMjocfd34cajneZhTqYb7Hkwi_PpjltPqvkNI,4498
|
|
17
18
|
glitchlings/zoo/__init__.py,sha256=eFYmaWeFDlSqfaiED51HWM-OqiTo_BOz0ASeyhOwOsw,4818
|
|
@@ -20,7 +21,7 @@ glitchlings/zoo/_rate.py,sha256=TMyfVFV7pLxSGVswPlOAtBvk25Bjtx5xXTtpb_utgik,527
|
|
|
20
21
|
glitchlings/zoo/_sampling.py,sha256=VOSWDgYWXIiAuKxn2IckFJhpRgGotQP_KW28db8kTKI,1587
|
|
21
22
|
glitchlings/zoo/_text_utils.py,sha256=nAfFT_VdXMXciCR7eQ5EAmym5wvzL6_Sdn9dvCx2s3Q,2758
|
|
22
23
|
glitchlings/zoo/adjax.py,sha256=N3CzfM7m7mAYgFcQYLQkqK2VYLw_vFvEMBM2aNU--ZA,3530
|
|
23
|
-
glitchlings/zoo/core.py,sha256=
|
|
24
|
+
glitchlings/zoo/core.py,sha256=YymiEc66V4mW_4MbTST2038D7YdZVyRkiUZn886IV4I,17203
|
|
24
25
|
glitchlings/zoo/jargoyle.py,sha256=6-DJxUFz2AjT-iQDFlK2ZG9pVwq2boDtslEzCNyI_04,11481
|
|
25
26
|
glitchlings/zoo/mim1c.py,sha256=yAt1ngR3j2KXLbzc8LhrQlIWRO_KT5dFK1EE8QivMAQ,3429
|
|
26
27
|
glitchlings/zoo/ocr_confusions.tsv,sha256=KhtR7vJDTITpfTSGa-I7RHr6CK7LkGi2KjdhEWipI6o,183
|
|
@@ -30,9 +31,9 @@ glitchlings/zoo/rushmore.py,sha256=J1wd4IB7WOAR2TdntkxCMZWseWR0Yii8UQZ7ucfpWCc,4
|
|
|
30
31
|
glitchlings/zoo/scannequin.py,sha256=Ps8nxysKjkJV408zaL1kjVjy4jliATDBpYcNHLWbNFg,4859
|
|
31
32
|
glitchlings/zoo/typogre.py,sha256=0fYaxOEiTnxiCqmsiSN1r_wl1vC1Ueaiks2e94kks70,6668
|
|
32
33
|
glitchlings/zoo/zeedub.py,sha256=l51swlo556-TXhDk4nayHOm1XgHwWmfUKzQ01YMuCpE,4801
|
|
33
|
-
glitchlings-0.4.
|
|
34
|
-
glitchlings-0.4.
|
|
35
|
-
glitchlings-0.4.
|
|
36
|
-
glitchlings-0.4.
|
|
37
|
-
glitchlings-0.4.
|
|
38
|
-
glitchlings-0.4.
|
|
34
|
+
glitchlings-0.4.1.dist-info/licenses/LICENSE,sha256=YCvGip-LoaRyu6h0nPo71q6eHEkzUpsE11psDJOIRkw,11337
|
|
35
|
+
glitchlings-0.4.1.dist-info/METADATA,sha256=9HdqQt7PazdHMtPP5JpINljl3kvL8HOqTFE3Wwyrm2g,28260
|
|
36
|
+
glitchlings-0.4.1.dist-info/WHEEL,sha256=Tgp8Vc-mmQm0KX-V22BSUoymoX1p0w13bZbX85y8hSs,114
|
|
37
|
+
glitchlings-0.4.1.dist-info/entry_points.txt,sha256=kGOwuAsjFDLtztLisaXtOouq9wFVMOJg5FzaAkg-Hto,54
|
|
38
|
+
glitchlings-0.4.1.dist-info/top_level.txt,sha256=VHFNBrLjtDwPCYXbGKi6o17Eueedi81eNbR3hBOoST0,12
|
|
39
|
+
glitchlings-0.4.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|