glitchlings 0.4.0__cp312-cp312-win_amd64.whl → 0.4.2__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of glitchlings might be problematic. Click here for more details.
- glitchlings/__init__.py +26 -17
- glitchlings/__main__.py +0 -1
- glitchlings/_zoo_rust.cp312-win_amd64.pyd +0 -0
- glitchlings/compat.py +215 -0
- glitchlings/config.py +136 -19
- glitchlings/dlc/_shared.py +68 -0
- glitchlings/dlc/huggingface.py +26 -41
- glitchlings/dlc/prime.py +64 -101
- glitchlings/lexicon/__init__.py +26 -19
- glitchlings/lexicon/_cache.py +104 -0
- glitchlings/lexicon/graph.py +18 -39
- glitchlings/lexicon/metrics.py +1 -8
- glitchlings/lexicon/vector.py +29 -67
- glitchlings/lexicon/wordnet.py +39 -30
- glitchlings/main.py +9 -13
- glitchlings/util/__init__.py +18 -4
- glitchlings/util/adapters.py +27 -0
- glitchlings/zoo/__init__.py +21 -14
- glitchlings/zoo/_ocr_confusions.py +1 -3
- glitchlings/zoo/_rate.py +1 -4
- glitchlings/zoo/_sampling.py +0 -1
- glitchlings/zoo/_text_utils.py +1 -5
- glitchlings/zoo/adjax.py +0 -2
- glitchlings/zoo/core.py +185 -56
- glitchlings/zoo/jargoyle.py +9 -14
- glitchlings/zoo/mim1c.py +11 -10
- glitchlings/zoo/redactyl.py +5 -8
- glitchlings/zoo/reduple.py +3 -1
- glitchlings/zoo/rushmore.py +2 -8
- glitchlings/zoo/scannequin.py +5 -4
- glitchlings/zoo/typogre.py +3 -7
- glitchlings/zoo/zeedub.py +2 -2
- {glitchlings-0.4.0.dist-info → glitchlings-0.4.2.dist-info}/METADATA +68 -4
- glitchlings-0.4.2.dist-info/RECORD +42 -0
- glitchlings-0.4.0.dist-info/RECORD +0 -38
- {glitchlings-0.4.0.dist-info → glitchlings-0.4.2.dist-info}/WHEEL +0 -0
- {glitchlings-0.4.0.dist-info → glitchlings-0.4.2.dist-info}/entry_points.txt +0 -0
- {glitchlings-0.4.0.dist-info → glitchlings-0.4.2.dist-info}/licenses/LICENSE +0 -0
- {glitchlings-0.4.0.dist-info → glitchlings-0.4.2.dist-info}/top_level.txt +0 -0
glitchlings/util/__init__.py
CHANGED
|
@@ -1,12 +1,27 @@
|
|
|
1
1
|
import difflib
|
|
2
2
|
from collections.abc import Iterable
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
__all__ = [
|
|
5
|
+
"SAMPLE_TEXT",
|
|
6
|
+
"string_diffs",
|
|
7
|
+
"KeyNeighborMap",
|
|
8
|
+
"KeyboardLayouts",
|
|
9
|
+
"KeyNeighbors",
|
|
10
|
+
"KEYNEIGHBORS",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
SAMPLE_TEXT = (
|
|
14
|
+
"One morning, when Gregor Samsa woke from troubled dreams, he found himself "
|
|
15
|
+
"transformed in his bed into a horrible vermin. He lay on his armour-like back, and "
|
|
16
|
+
"if he lifted his head a little he could see his brown belly, slightly domed and "
|
|
17
|
+
"divided by arches into stiff sections. The bedding was hardly able to cover it and "
|
|
18
|
+
"seemed ready to slide off any moment. His many legs, pitifully thin compared with "
|
|
19
|
+
"the size of the rest of him, waved about helplessly as he looked."
|
|
20
|
+
)
|
|
5
21
|
|
|
6
22
|
|
|
7
23
|
def string_diffs(a: str, b: str) -> list[list[tuple[str, str, str]]]:
|
|
8
|
-
"""
|
|
9
|
-
Compare two strings using SequenceMatcher and return
|
|
24
|
+
"""Compare two strings using SequenceMatcher and return
|
|
10
25
|
grouped adjacent opcodes (excluding 'equal' tags).
|
|
11
26
|
|
|
12
27
|
Each element is a tuple: (tag, a_text, b_text).
|
|
@@ -39,7 +54,6 @@ KeyboardLayouts = dict[str, KeyNeighborMap]
|
|
|
39
54
|
|
|
40
55
|
def _build_neighbor_map(rows: Iterable[str]) -> KeyNeighborMap:
|
|
41
56
|
"""Derive 8-neighbour adjacency lists from keyboard layout rows."""
|
|
42
|
-
|
|
43
57
|
grid: dict[tuple[int, int], str] = {}
|
|
44
58
|
for y, row in enumerate(rows):
|
|
45
59
|
for x, char in enumerate(row):
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Adapter helpers shared across Python and DLC integrations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
|
|
7
|
+
from ..zoo import Gaggle, Glitchling, summon
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def coerce_gaggle(
|
|
11
|
+
glitchlings: Glitchling | Gaggle | str | Iterable[str | Glitchling],
|
|
12
|
+
*,
|
|
13
|
+
seed: int,
|
|
14
|
+
) -> Gaggle:
|
|
15
|
+
"""Return a :class:`Gaggle` built from any supported glitchling specifier."""
|
|
16
|
+
if isinstance(glitchlings, Gaggle):
|
|
17
|
+
return glitchlings
|
|
18
|
+
|
|
19
|
+
if isinstance(glitchlings, (Glitchling, str)):
|
|
20
|
+
resolved: Iterable[str | Glitchling] = [glitchlings]
|
|
21
|
+
else:
|
|
22
|
+
resolved = glitchlings
|
|
23
|
+
|
|
24
|
+
return summon(list(resolved), seed=seed)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
__all__ = ["coerce_gaggle"]
|
glitchlings/zoo/__init__.py
CHANGED
|
@@ -3,16 +3,25 @@ from __future__ import annotations
|
|
|
3
3
|
import ast
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
|
-
from .typogre import Typogre, typogre
|
|
7
|
-
from .mim1c import Mim1c, mim1c
|
|
8
|
-
from .jargoyle import Jargoyle, jargoyle, dependencies_available as _jargoyle_available
|
|
9
6
|
from .adjax import Adjax, adjax
|
|
7
|
+
from .core import (
|
|
8
|
+
Gaggle,
|
|
9
|
+
Glitchling,
|
|
10
|
+
is_rust_pipeline_enabled,
|
|
11
|
+
is_rust_pipeline_supported,
|
|
12
|
+
pipeline_feature_flag_enabled,
|
|
13
|
+
plan_glitchling_specs,
|
|
14
|
+
plan_glitchlings,
|
|
15
|
+
)
|
|
16
|
+
from .jargoyle import Jargoyle, jargoyle
|
|
17
|
+
from .jargoyle import dependencies_available as _jargoyle_available
|
|
18
|
+
from .mim1c import Mim1c, mim1c
|
|
19
|
+
from .redactyl import Redactyl, redactyl
|
|
10
20
|
from .reduple import Reduple, reduple
|
|
11
21
|
from .rushmore import Rushmore, rushmore
|
|
12
|
-
from .redactyl import Redactyl, redactyl
|
|
13
22
|
from .scannequin import Scannequin, scannequin
|
|
23
|
+
from .typogre import Typogre, typogre
|
|
14
24
|
from .zeedub import Zeedub, zeedub
|
|
15
|
-
from .core import Glitchling, Gaggle
|
|
16
25
|
|
|
17
26
|
__all__ = [
|
|
18
27
|
"Typogre",
|
|
@@ -35,6 +44,11 @@ __all__ = [
|
|
|
35
44
|
"zeedub",
|
|
36
45
|
"Glitchling",
|
|
37
46
|
"Gaggle",
|
|
47
|
+
"plan_glitchlings",
|
|
48
|
+
"plan_glitchling_specs",
|
|
49
|
+
"is_rust_pipeline_enabled",
|
|
50
|
+
"is_rust_pipeline_supported",
|
|
51
|
+
"pipeline_feature_flag_enabled",
|
|
38
52
|
"summon",
|
|
39
53
|
"BUILTIN_GLITCHLINGS",
|
|
40
54
|
"DEFAULT_GLITCHLING_NAMES",
|
|
@@ -71,7 +85,6 @@ DEFAULT_GLITCHLING_NAMES: list[str] = list(BUILTIN_GLITCHLINGS.keys())
|
|
|
71
85
|
|
|
72
86
|
def parse_glitchling_spec(specification: str) -> Glitchling:
|
|
73
87
|
"""Return a glitchling instance configured according to ``specification``."""
|
|
74
|
-
|
|
75
88
|
text = specification.strip()
|
|
76
89
|
if not text:
|
|
77
90
|
raise ValueError("Glitchling specification cannot be empty.")
|
|
@@ -98,14 +111,10 @@ def parse_glitchling_spec(specification: str) -> Glitchling:
|
|
|
98
111
|
try:
|
|
99
112
|
call_expr = ast.parse(f"_({arg_source})", mode="eval").body
|
|
100
113
|
except SyntaxError as exc:
|
|
101
|
-
raise ValueError(
|
|
102
|
-
f"Invalid parameter syntax for glitchling '{name}': {exc.msg}"
|
|
103
|
-
) from exc
|
|
114
|
+
raise ValueError(f"Invalid parameter syntax for glitchling '{name}': {exc.msg}") from exc
|
|
104
115
|
|
|
105
116
|
if not isinstance(call_expr, ast.Call) or call_expr.args:
|
|
106
|
-
raise ValueError(
|
|
107
|
-
f"Glitchling '{name}' parameters must be provided as keyword arguments."
|
|
108
|
-
)
|
|
117
|
+
raise ValueError(f"Glitchling '{name}' parameters must be provided as keyword arguments.")
|
|
109
118
|
|
|
110
119
|
kwargs: dict[str, Any] = {}
|
|
111
120
|
for keyword in call_expr.keywords:
|
|
@@ -128,7 +137,6 @@ def parse_glitchling_spec(specification: str) -> Glitchling:
|
|
|
128
137
|
|
|
129
138
|
def get_glitchling_class(name: str) -> type[Glitchling]:
|
|
130
139
|
"""Look up the glitchling class registered under ``name``."""
|
|
131
|
-
|
|
132
140
|
key = name.strip().lower()
|
|
133
141
|
if not key:
|
|
134
142
|
raise ValueError("Glitchling name cannot be empty.")
|
|
@@ -142,7 +150,6 @@ def get_glitchling_class(name: str) -> type[Glitchling]:
|
|
|
142
150
|
|
|
143
151
|
def summon(glitchlings: list[str | Glitchling], seed: int = 151) -> Gaggle:
|
|
144
152
|
"""Summon glitchlings by name (using defaults) or instance (to change parameters)."""
|
|
145
|
-
|
|
146
153
|
summoned: list[Glitchling] = []
|
|
147
154
|
for entry in glitchlings:
|
|
148
155
|
if isinstance(entry, Glitchling):
|
|
@@ -26,9 +26,7 @@ def load_confusion_table() -> list[tuple[str, list[str]]]:
|
|
|
26
26
|
|
|
27
27
|
# Sort longer patterns first to avoid overlapping matches, mirroring the
|
|
28
28
|
# behaviour of the Rust `confusion_table` helper.
|
|
29
|
-
indexed_entries.sort(
|
|
30
|
-
key=lambda item: (-len(item[1][0]), item[0])
|
|
31
|
-
)
|
|
29
|
+
indexed_entries.sort(key=lambda item: (-len(item[1][0]), item[0]))
|
|
32
30
|
entries = [entry for _, entry in indexed_entries]
|
|
33
31
|
_CONFUSION_TABLE = entries
|
|
34
32
|
return entries
|
glitchlings/zoo/_rate.py
CHANGED
|
@@ -9,11 +9,8 @@ def resolve_rate(
|
|
|
9
9
|
legacy_name: str,
|
|
10
10
|
) -> float:
|
|
11
11
|
"""Return the effective rate while enforcing mutual exclusivity."""
|
|
12
|
-
|
|
13
12
|
if rate is not None and legacy_value is not None:
|
|
14
|
-
raise ValueError(
|
|
15
|
-
f"Specify either 'rate' or '{legacy_name}', not both."
|
|
16
|
-
)
|
|
13
|
+
raise ValueError(f"Specify either 'rate' or '{legacy_name}', not both.")
|
|
17
14
|
if rate is not None:
|
|
18
15
|
return rate
|
|
19
16
|
if legacy_value is not None:
|
glitchlings/zoo/_sampling.py
CHANGED
glitchlings/zoo/_text_utils.py
CHANGED
|
@@ -10,13 +10,11 @@ _TOKEN_EDGES_PATTERN = re.compile(r"^(\W*)(.*?)(\W*)$")
|
|
|
10
10
|
|
|
11
11
|
def split_preserving_whitespace(text: str) -> list[str]:
|
|
12
12
|
"""Split text while keeping whitespace tokens for stable reconstruction."""
|
|
13
|
-
|
|
14
13
|
return _WORD_SPLIT_PATTERN.split(text)
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
def split_token_edges(token: str) -> tuple[str, str, str]:
|
|
18
17
|
"""Return leading, core, and trailing segments for a token."""
|
|
19
|
-
|
|
20
18
|
match = _TOKEN_EDGES_PATTERN.match(token)
|
|
21
19
|
if match is None:
|
|
22
20
|
return "", token, ""
|
|
@@ -25,7 +23,6 @@ def split_token_edges(token: str) -> tuple[str, str, str]:
|
|
|
25
23
|
|
|
26
24
|
def token_core_length(token: str) -> int:
|
|
27
25
|
"""Return the length of the main word characters for weighting heuristics."""
|
|
28
|
-
|
|
29
26
|
_, core, _ = split_token_edges(token)
|
|
30
27
|
candidate = core if core else token
|
|
31
28
|
length = len(candidate)
|
|
@@ -50,7 +47,6 @@ class WordToken:
|
|
|
50
47
|
@property
|
|
51
48
|
def has_core(self) -> bool:
|
|
52
49
|
"""Return ``True`` when the token contains at least one core character."""
|
|
53
|
-
|
|
54
50
|
return bool(self.core)
|
|
55
51
|
|
|
56
52
|
|
|
@@ -65,8 +61,8 @@ def collect_word_tokens(
|
|
|
65
61
|
tokens: Token sequence produced by :func:`split_preserving_whitespace`.
|
|
66
62
|
skip_first_word: Exclude the first candidate token (used by Rushmore to
|
|
67
63
|
preserve leading words).
|
|
68
|
-
"""
|
|
69
64
|
|
|
65
|
+
"""
|
|
70
66
|
start = 2 if skip_first_word else 0
|
|
71
67
|
collected: list[WordToken] = []
|
|
72
68
|
for index in range(start, len(tokens), 2):
|
glitchlings/zoo/adjax.py
CHANGED
|
@@ -20,7 +20,6 @@ def _python_swap_adjacent_words(
|
|
|
20
20
|
rng: random.Random,
|
|
21
21
|
) -> str:
|
|
22
22
|
"""Swap the cores of adjacent words while keeping affixes and spacing intact."""
|
|
23
|
-
|
|
24
23
|
tokens = split_preserving_whitespace(text)
|
|
25
24
|
if len(tokens) < 2:
|
|
26
25
|
return text
|
|
@@ -72,7 +71,6 @@ def swap_adjacent_words(
|
|
|
72
71
|
swap_rate: float | None = None,
|
|
73
72
|
) -> str:
|
|
74
73
|
"""Swap adjacent word cores while preserving spacing and punctuation."""
|
|
75
|
-
|
|
76
74
|
effective_rate = resolve_rate(
|
|
77
75
|
rate=rate,
|
|
78
76
|
legacy_value=swap_rate,
|
glitchlings/zoo/core.py
CHANGED
|
@@ -4,23 +4,21 @@ import inspect
|
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
6
|
import random
|
|
7
|
+
from collections.abc import Mapping, Sequence
|
|
7
8
|
from enum import IntEnum, auto
|
|
8
9
|
from hashlib import blake2s
|
|
9
|
-
from typing import TYPE_CHECKING, Any, Callable, Protocol
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, Protocol, TypedDict, Union
|
|
10
11
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
except ModuleNotFoundError as error: # pragma: no cover - optional dependency
|
|
15
|
-
_DatasetsDataset = None # type: ignore[assignment]
|
|
16
|
-
_datasets_error = error
|
|
17
|
-
else:
|
|
18
|
-
_datasets_error = None
|
|
12
|
+
from ..compat import get_datasets_dataset, require_datasets
|
|
13
|
+
|
|
14
|
+
_DatasetsDataset = get_datasets_dataset()
|
|
19
15
|
|
|
20
16
|
try: # pragma: no cover - optional dependency
|
|
21
17
|
from glitchlings._zoo_rust import compose_glitchlings as _compose_glitchlings_rust
|
|
18
|
+
from glitchlings._zoo_rust import plan_glitchlings as _plan_glitchlings_rust
|
|
22
19
|
except ImportError: # pragma: no cover - compiled extension not present
|
|
23
20
|
_compose_glitchlings_rust = None
|
|
21
|
+
_plan_glitchlings_rust = None
|
|
24
22
|
|
|
25
23
|
|
|
26
24
|
log = logging.getLogger(__name__)
|
|
@@ -31,9 +29,17 @@ _PIPELINE_ENABLE_VALUES = {"1", "true", "yes", "on"}
|
|
|
31
29
|
_PIPELINE_DISABLE_VALUES = {"0", "false", "no", "off"}
|
|
32
30
|
|
|
33
31
|
|
|
34
|
-
|
|
35
|
-
|
|
32
|
+
class PlanSpecification(TypedDict):
|
|
33
|
+
name: str
|
|
34
|
+
scope: int
|
|
35
|
+
order: int
|
|
36
36
|
|
|
37
|
+
|
|
38
|
+
PlanEntry = Union["Glitchling", Mapping[str, Any]]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def pipeline_feature_flag_enabled() -> bool:
|
|
42
|
+
"""Return ``True`` when the environment does not explicitly disable the Rust pipeline."""
|
|
37
43
|
value = os.environ.get(_PIPELINE_FEATURE_FLAG_ENV)
|
|
38
44
|
if value is None:
|
|
39
45
|
return True
|
|
@@ -47,6 +53,138 @@ def _pipeline_feature_flag_enabled() -> bool:
|
|
|
47
53
|
|
|
48
54
|
return True
|
|
49
55
|
|
|
56
|
+
|
|
57
|
+
def _pipeline_feature_flag_enabled() -> bool:
|
|
58
|
+
"""Compatibility shim for legacy callers."""
|
|
59
|
+
return pipeline_feature_flag_enabled()
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def is_rust_pipeline_supported() -> bool:
|
|
63
|
+
"""Return ``True`` when the optional Rust extension is importable."""
|
|
64
|
+
return _compose_glitchlings_rust is not None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def is_rust_pipeline_enabled() -> bool:
|
|
68
|
+
"""Return ``True`` when the Rust pipeline is available and not explicitly disabled."""
|
|
69
|
+
return is_rust_pipeline_supported() and pipeline_feature_flag_enabled()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _spec_from_glitchling(glitchling: "Glitchling") -> PlanSpecification:
|
|
73
|
+
"""Create a plan specification mapping from a glitchling instance."""
|
|
74
|
+
return {
|
|
75
|
+
"name": glitchling.name,
|
|
76
|
+
"scope": int(glitchling.level),
|
|
77
|
+
"order": int(glitchling.order),
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _normalize_plan_entry(entry: PlanEntry) -> PlanSpecification:
|
|
82
|
+
"""Convert a plan entry (glitchling or mapping) into a normalized specification."""
|
|
83
|
+
if isinstance(entry, Glitchling):
|
|
84
|
+
return _spec_from_glitchling(entry)
|
|
85
|
+
|
|
86
|
+
if not isinstance(entry, Mapping):
|
|
87
|
+
message = "plan_glitchlings expects Glitchling instances or mapping specifications"
|
|
88
|
+
raise TypeError(message)
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
name = str(entry["name"])
|
|
92
|
+
scope_value = int(entry["scope"])
|
|
93
|
+
order_value = int(entry["order"])
|
|
94
|
+
except KeyError as exc: # pragma: no cover - defensive guard
|
|
95
|
+
raise ValueError(f"Plan specification missing required field: {exc.args[0]}") from exc
|
|
96
|
+
except (TypeError, ValueError) as exc:
|
|
97
|
+
raise ValueError("Plan specification fields must be coercible to integers") from exc
|
|
98
|
+
|
|
99
|
+
return {"name": name, "scope": scope_value, "order": order_value}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _normalize_plan_entries(entries: Sequence[PlanEntry]) -> list[PlanSpecification]:
|
|
103
|
+
"""Normalize a collection of orchestration plan entries."""
|
|
104
|
+
return [_normalize_plan_entry(entry) for entry in entries]
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _plan_glitchlings_python(
|
|
108
|
+
specs: Sequence[Mapping[str, Any]],
|
|
109
|
+
master_seed: int,
|
|
110
|
+
) -> list[tuple[int, int]]:
|
|
111
|
+
"""Pure-Python fallback for orchestrating glitchlings in deterministic order."""
|
|
112
|
+
master_seed_int = int(master_seed)
|
|
113
|
+
planned: list[tuple[int, int, int, int, str]] = []
|
|
114
|
+
for index, spec in enumerate(specs):
|
|
115
|
+
name = str(spec["name"])
|
|
116
|
+
scope = int(spec["scope"])
|
|
117
|
+
order = int(spec["order"])
|
|
118
|
+
derived_seed = Gaggle.derive_seed(master_seed_int, name, index)
|
|
119
|
+
planned.append((index, derived_seed, scope, order, name))
|
|
120
|
+
|
|
121
|
+
planned.sort(key=lambda entry: (entry[2], entry[3], entry[4], entry[0]))
|
|
122
|
+
return [(index, seed) for index, seed, *_ in planned]
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _plan_glitchlings_with_rust(
|
|
126
|
+
specs: Sequence[Mapping[str, Any]],
|
|
127
|
+
master_seed: int,
|
|
128
|
+
) -> list[tuple[int, int]] | None:
|
|
129
|
+
"""Attempt to obtain the orchestration plan from the compiled Rust module."""
|
|
130
|
+
if _plan_glitchlings_rust is None:
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
try:
|
|
134
|
+
plan = _plan_glitchlings_rust(specs, int(master_seed))
|
|
135
|
+
except Exception: # pragma: no cover - defer to Python fallback on failure
|
|
136
|
+
log.debug("Rust orchestration planning failed; falling back to Python plan", exc_info=True)
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
return [(int(index), int(seed)) for index, seed in plan]
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _resolve_orchestration_plan(
|
|
143
|
+
specs: Sequence[PlanSpecification],
|
|
144
|
+
master_seed: int,
|
|
145
|
+
prefer_rust: bool,
|
|
146
|
+
) -> list[tuple[int, int]]:
|
|
147
|
+
"""Dispatch to the Rust planner when available, otherwise fall back to Python."""
|
|
148
|
+
if prefer_rust:
|
|
149
|
+
plan = _plan_glitchlings_with_rust(list(specs), master_seed)
|
|
150
|
+
if plan is not None:
|
|
151
|
+
return plan
|
|
152
|
+
|
|
153
|
+
return _plan_glitchlings_python(list(specs), master_seed)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def plan_glitchling_specs(
|
|
157
|
+
specs: Sequence[Mapping[str, Any]],
|
|
158
|
+
master_seed: int | None,
|
|
159
|
+
*,
|
|
160
|
+
prefer_rust: bool = True,
|
|
161
|
+
) -> list[tuple[int, int]]:
|
|
162
|
+
"""Resolve orchestration order and seeds from glitchling specifications."""
|
|
163
|
+
if master_seed is None:
|
|
164
|
+
message = "Gaggle orchestration requires a master seed"
|
|
165
|
+
raise ValueError(message)
|
|
166
|
+
|
|
167
|
+
normalized_specs = [_normalize_plan_entry(spec) for spec in specs]
|
|
168
|
+
master_seed_int = int(master_seed)
|
|
169
|
+
return _resolve_orchestration_plan(normalized_specs, master_seed_int, prefer_rust)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def plan_glitchlings(
|
|
173
|
+
entries: Sequence[PlanEntry],
|
|
174
|
+
master_seed: int | None,
|
|
175
|
+
*,
|
|
176
|
+
prefer_rust: bool = True,
|
|
177
|
+
) -> list[tuple[int, int]]:
|
|
178
|
+
"""Normalize glitchling instances or specs and compute an orchestration plan."""
|
|
179
|
+
if master_seed is None:
|
|
180
|
+
message = "Gaggle orchestration requires a master seed"
|
|
181
|
+
raise ValueError(message)
|
|
182
|
+
|
|
183
|
+
normalized_specs = _normalize_plan_entries(entries)
|
|
184
|
+
master_seed_int = int(master_seed)
|
|
185
|
+
return _resolve_orchestration_plan(normalized_specs, master_seed_int, prefer_rust)
|
|
186
|
+
|
|
187
|
+
|
|
50
188
|
if TYPE_CHECKING: # pragma: no cover - typing only
|
|
51
189
|
from datasets import Dataset # type: ignore
|
|
52
190
|
elif _DatasetsDataset is not None:
|
|
@@ -66,7 +204,6 @@ def _is_transcript(
|
|
|
66
204
|
require_all_content: bool = False,
|
|
67
205
|
) -> bool:
|
|
68
206
|
"""Return `True` when `value` appears to be a chat transcript."""
|
|
69
|
-
|
|
70
207
|
if not isinstance(value, list):
|
|
71
208
|
return False
|
|
72
209
|
|
|
@@ -135,8 +272,8 @@ class Glitchling:
|
|
|
135
272
|
order: Relative ordering within the same scope.
|
|
136
273
|
seed: Optional seed for deterministic random behaviour.
|
|
137
274
|
**kwargs: Additional parameters forwarded to the corruption callable.
|
|
138
|
-
"""
|
|
139
275
|
|
|
276
|
+
"""
|
|
140
277
|
# Each Glitchling maintains its own RNG for deterministic yet isolated behavior.
|
|
141
278
|
# If no seed is supplied, we fall back to Python's default entropy.
|
|
142
279
|
self.seed = seed
|
|
@@ -154,7 +291,6 @@ class Glitchling:
|
|
|
154
291
|
|
|
155
292
|
def set_param(self, key: str, value: Any) -> None:
|
|
156
293
|
"""Persist a parameter for use by the corruption callable."""
|
|
157
|
-
|
|
158
294
|
aliases = getattr(self, "_param_aliases", {})
|
|
159
295
|
canonical = aliases.get(key, key)
|
|
160
296
|
|
|
@@ -176,7 +312,6 @@ class Glitchling:
|
|
|
176
312
|
|
|
177
313
|
def pipeline_operation(self) -> dict[str, Any] | None:
|
|
178
314
|
"""Return the Rust pipeline operation descriptor for this glitchling."""
|
|
179
|
-
|
|
180
315
|
factory = self._pipeline_descriptor_factory
|
|
181
316
|
if factory is None:
|
|
182
317
|
return None
|
|
@@ -185,15 +320,11 @@ class Glitchling:
|
|
|
185
320
|
|
|
186
321
|
def _corruption_expects_rng(self) -> bool:
|
|
187
322
|
"""Return `True` when the corruption function accepts an rng keyword."""
|
|
188
|
-
|
|
189
323
|
cached_callable = self._cached_rng_callable
|
|
190
324
|
cached_expectation = self._cached_rng_expectation
|
|
191
325
|
corruption_function = self.corruption_function
|
|
192
326
|
|
|
193
|
-
if
|
|
194
|
-
cached_callable is corruption_function
|
|
195
|
-
and cached_expectation is not None
|
|
196
|
-
):
|
|
327
|
+
if cached_callable is corruption_function and cached_expectation is not None:
|
|
197
328
|
return cached_expectation
|
|
198
329
|
|
|
199
330
|
expects_rng = False
|
|
@@ -211,7 +342,6 @@ class Glitchling:
|
|
|
211
342
|
|
|
212
343
|
def __corrupt(self, text: str, *args: Any, **kwargs: Any) -> str:
|
|
213
344
|
"""Execute the corruption callable, injecting the RNG when required."""
|
|
214
|
-
|
|
215
345
|
# Pass rng to underlying corruption function if it expects it.
|
|
216
346
|
expects_rng = self._corruption_expects_rng()
|
|
217
347
|
|
|
@@ -223,23 +353,17 @@ class Glitchling:
|
|
|
223
353
|
|
|
224
354
|
def corrupt(self, text: str | list[dict[str, Any]]) -> str | list[dict[str, Any]]:
|
|
225
355
|
"""Apply the corruption function to text or conversational transcripts."""
|
|
226
|
-
|
|
227
356
|
if _is_transcript(text):
|
|
228
357
|
transcript = [dict(turn) for turn in text]
|
|
229
358
|
if transcript:
|
|
230
|
-
transcript[-1]["content"] = self.__corrupt(
|
|
231
|
-
transcript[-1]["content"], **self.kwargs
|
|
232
|
-
)
|
|
359
|
+
transcript[-1]["content"] = self.__corrupt(transcript[-1]["content"], **self.kwargs)
|
|
233
360
|
return transcript
|
|
234
361
|
|
|
235
362
|
return self.__corrupt(text, **self.kwargs)
|
|
236
363
|
|
|
237
364
|
def corrupt_dataset(self, dataset: Dataset, columns: list[str]) -> Dataset:
|
|
238
365
|
"""Apply corruption lazily across dataset columns."""
|
|
239
|
-
|
|
240
|
-
if _DatasetsDataset is None:
|
|
241
|
-
message = "datasets is not installed"
|
|
242
|
-
raise ModuleNotFoundError(message) from _datasets_error
|
|
366
|
+
require_datasets("datasets is not installed")
|
|
243
367
|
|
|
244
368
|
def __corrupt_row(row: dict[str, Any]) -> dict[str, Any]:
|
|
245
369
|
row = dict(row)
|
|
@@ -261,12 +385,10 @@ class Glitchling:
|
|
|
261
385
|
|
|
262
386
|
def __call__(self, text: str, *args: Any, **kwds: Any) -> str | list[dict[str, Any]]:
|
|
263
387
|
"""Allow a glitchling to be invoked directly like a callable."""
|
|
264
|
-
|
|
265
388
|
return self.corrupt(text, *args, **kwds)
|
|
266
389
|
|
|
267
390
|
def reset_rng(self, seed: int | None = None) -> None:
|
|
268
391
|
"""Reset the glitchling's RNG to its initial seed."""
|
|
269
|
-
|
|
270
392
|
if seed is not None:
|
|
271
393
|
self.seed = seed
|
|
272
394
|
if self.seed is not None:
|
|
@@ -274,7 +396,6 @@ class Glitchling:
|
|
|
274
396
|
|
|
275
397
|
def clone(self, seed: int | None = None) -> "Glitchling":
|
|
276
398
|
"""Create a copy of this glitchling, optionally with a new seed."""
|
|
277
|
-
|
|
278
399
|
cls = self.__class__
|
|
279
400
|
filtered_kwargs = {k: v for k, v in self.kwargs.items() if k != "seed"}
|
|
280
401
|
clone_seed = seed if seed is not None else self.seed
|
|
@@ -294,9 +415,6 @@ class Glitchling:
|
|
|
294
415
|
return cls(**filtered_kwargs)
|
|
295
416
|
|
|
296
417
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
418
|
class Gaggle(Glitchling):
|
|
301
419
|
"""A collection of glitchlings executed in a deterministic order."""
|
|
302
420
|
|
|
@@ -306,25 +424,24 @@ class Gaggle(Glitchling):
|
|
|
306
424
|
Args:
|
|
307
425
|
glitchlings: Glitchlings to orchestrate.
|
|
308
426
|
seed: Master seed used to derive per-glitchling seeds.
|
|
309
|
-
"""
|
|
310
427
|
|
|
428
|
+
"""
|
|
311
429
|
super().__init__("Gaggle", self.corrupt, AttackWave.DOCUMENT, seed=seed)
|
|
312
|
-
self.
|
|
313
|
-
|
|
314
|
-
|
|
430
|
+
self._clones_by_index: list[Glitchling] = []
|
|
431
|
+
for idx, glitchling in enumerate(glitchlings):
|
|
432
|
+
clone = glitchling.clone()
|
|
433
|
+
setattr(clone, "_gaggle_index", idx)
|
|
434
|
+
self._clones_by_index.append(clone)
|
|
435
|
+
|
|
436
|
+
self.glitchlings: dict[AttackWave, list[Glitchling]] = {level: [] for level in AttackWave}
|
|
315
437
|
self.apply_order: list[Glitchling] = []
|
|
316
|
-
|
|
317
|
-
for idx, g in enumerate(glitchlings):
|
|
318
|
-
_g = g.clone()
|
|
319
|
-
derived_seed = Gaggle.derive_seed(seed, _g.name, idx)
|
|
320
|
-
_g.reset_rng(derived_seed)
|
|
321
|
-
setattr(_g, "_gaggle_index", idx)
|
|
322
|
-
self.glitchlings[g.level].append(_g)
|
|
438
|
+
self._plan: list[tuple[int, int]] = []
|
|
323
439
|
self.sort_glitchlings()
|
|
324
440
|
|
|
325
441
|
@staticmethod
|
|
326
442
|
def derive_seed(master_seed: int, glitchling_name: str, index: int) -> int:
|
|
327
443
|
"""Derive a deterministic seed for a glitchling based on the master seed."""
|
|
444
|
+
|
|
328
445
|
def _int_to_bytes(value: int) -> bytes:
|
|
329
446
|
if value == 0:
|
|
330
447
|
return b"\x00"
|
|
@@ -351,24 +468,37 @@ class Gaggle(Glitchling):
|
|
|
351
468
|
|
|
352
469
|
def sort_glitchlings(self) -> None:
|
|
353
470
|
"""Sort glitchlings by wave then order to produce application order."""
|
|
471
|
+
plan = plan_glitchlings(self._clones_by_index, self.seed)
|
|
472
|
+
self._plan = plan
|
|
473
|
+
|
|
474
|
+
self.glitchlings = {level: [] for level in AttackWave}
|
|
475
|
+
for clone in self._clones_by_index:
|
|
476
|
+
self.glitchlings[clone.level].append(clone)
|
|
477
|
+
|
|
478
|
+
missing = set(range(len(self._clones_by_index)))
|
|
479
|
+
apply_order: list[Glitchling] = []
|
|
480
|
+
for index, derived_seed in plan:
|
|
481
|
+
clone = self._clones_by_index[index]
|
|
482
|
+
clone.reset_rng(int(derived_seed))
|
|
483
|
+
apply_order.append(clone)
|
|
484
|
+
missing.discard(index)
|
|
354
485
|
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
486
|
+
if missing:
|
|
487
|
+
missing_indices = ", ".join(str(idx) for idx in sorted(missing))
|
|
488
|
+
message = f"Orchestration plan missing glitchlings at indices: {missing_indices}"
|
|
489
|
+
raise RuntimeError(message)
|
|
490
|
+
|
|
491
|
+
self.apply_order = apply_order
|
|
360
492
|
|
|
361
493
|
@staticmethod
|
|
362
494
|
def rust_pipeline_supported() -> bool:
|
|
363
495
|
"""Return ``True`` when the compiled Rust pipeline is importable."""
|
|
364
|
-
|
|
365
|
-
return _compose_glitchlings_rust is not None
|
|
496
|
+
return is_rust_pipeline_supported()
|
|
366
497
|
|
|
367
498
|
@staticmethod
|
|
368
499
|
def rust_pipeline_enabled() -> bool:
|
|
369
500
|
"""Return ``True`` when the Rust pipeline is available and not explicitly disabled."""
|
|
370
|
-
|
|
371
|
-
return Gaggle.rust_pipeline_supported() and _pipeline_feature_flag_enabled()
|
|
501
|
+
return is_rust_pipeline_enabled()
|
|
372
502
|
|
|
373
503
|
def _pipeline_descriptors(self) -> list[dict[str, Any]] | None:
|
|
374
504
|
if not self.rust_pipeline_enabled():
|
|
@@ -400,7 +530,6 @@ class Gaggle(Glitchling):
|
|
|
400
530
|
|
|
401
531
|
def corrupt(self, text: str) -> str:
|
|
402
532
|
"""Apply each glitchling to the provided text sequentially."""
|
|
403
|
-
|
|
404
533
|
master_seed = self.seed
|
|
405
534
|
descriptors = self._pipeline_descriptors()
|
|
406
535
|
if master_seed is not None and descriptors is not None:
|