glitchlings 0.2.5__cp312-cp312-win_amd64.whl → 0.9.3__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glitchlings/__init__.py +36 -17
- glitchlings/__main__.py +0 -1
- glitchlings/_zoo_rust/__init__.py +12 -0
- glitchlings/_zoo_rust.cp312-win_amd64.pyd +0 -0
- glitchlings/assets/__init__.py +180 -0
- glitchlings/assets/apostrofae_pairs.json +32 -0
- glitchlings/assets/ekkokin_homophones.json +2014 -0
- glitchlings/assets/hokey_assets.json +193 -0
- glitchlings/assets/lexemes/academic.json +1049 -0
- glitchlings/assets/lexemes/colors.json +1333 -0
- glitchlings/assets/lexemes/corporate.json +716 -0
- glitchlings/assets/lexemes/cyberpunk.json +22 -0
- glitchlings/assets/lexemes/lovecraftian.json +23 -0
- glitchlings/assets/lexemes/synonyms.json +3354 -0
- glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
- glitchlings/assets/pipeline_assets.json +29 -0
- glitchlings/attack/__init__.py +53 -0
- glitchlings/attack/compose.py +299 -0
- glitchlings/attack/core.py +465 -0
- glitchlings/attack/encode.py +114 -0
- glitchlings/attack/metrics.py +104 -0
- glitchlings/attack/metrics_dispatch.py +70 -0
- glitchlings/attack/tokenization.py +157 -0
- glitchlings/auggie.py +283 -0
- glitchlings/compat/__init__.py +9 -0
- glitchlings/compat/loaders.py +355 -0
- glitchlings/compat/types.py +41 -0
- glitchlings/conf/__init__.py +41 -0
- glitchlings/conf/loaders.py +331 -0
- glitchlings/conf/schema.py +156 -0
- glitchlings/conf/types.py +72 -0
- glitchlings/config.toml +2 -0
- glitchlings/constants.py +59 -0
- glitchlings/dev/__init__.py +3 -0
- glitchlings/dev/docs.py +45 -0
- glitchlings/dlc/__init__.py +17 -3
- glitchlings/dlc/_shared.py +296 -0
- glitchlings/dlc/gutenberg.py +400 -0
- glitchlings/dlc/huggingface.py +37 -65
- glitchlings/dlc/prime.py +55 -114
- glitchlings/dlc/pytorch.py +98 -0
- glitchlings/dlc/pytorch_lightning.py +173 -0
- glitchlings/internal/__init__.py +16 -0
- glitchlings/internal/rust.py +159 -0
- glitchlings/internal/rust_ffi.py +432 -0
- glitchlings/main.py +123 -32
- glitchlings/runtime_config.py +24 -0
- glitchlings/util/__init__.py +29 -176
- glitchlings/util/adapters.py +65 -0
- glitchlings/util/keyboards.py +311 -0
- glitchlings/util/transcripts.py +108 -0
- glitchlings/zoo/__init__.py +47 -24
- glitchlings/zoo/assets/__init__.py +29 -0
- glitchlings/zoo/core.py +301 -167
- glitchlings/zoo/core_execution.py +98 -0
- glitchlings/zoo/core_planning.py +451 -0
- glitchlings/zoo/corrupt_dispatch.py +295 -0
- glitchlings/zoo/ekkokin.py +118 -0
- glitchlings/zoo/hokey.py +137 -0
- glitchlings/zoo/jargoyle.py +179 -274
- glitchlings/zoo/mim1c.py +106 -68
- glitchlings/zoo/pedant/__init__.py +107 -0
- glitchlings/zoo/pedant/core.py +105 -0
- glitchlings/zoo/pedant/forms.py +74 -0
- glitchlings/zoo/pedant/stones.py +74 -0
- glitchlings/zoo/redactyl.py +44 -175
- glitchlings/zoo/rng.py +259 -0
- glitchlings/zoo/rushmore.py +359 -116
- glitchlings/zoo/scannequin.py +18 -125
- glitchlings/zoo/transforms.py +386 -0
- glitchlings/zoo/typogre.py +76 -162
- glitchlings/zoo/validation.py +477 -0
- glitchlings/zoo/zeedub.py +33 -86
- glitchlings-0.9.3.dist-info/METADATA +334 -0
- glitchlings-0.9.3.dist-info/RECORD +80 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/entry_points.txt +1 -0
- glitchlings/zoo/_ocr_confusions.py +0 -34
- glitchlings/zoo/_rate.py +0 -21
- glitchlings/zoo/reduple.py +0 -169
- glitchlings-0.2.5.dist-info/METADATA +0 -490
- glitchlings-0.2.5.dist-info/RECORD +0 -27
- /glitchlings/{zoo → assets}/ocr_confusions.tsv +0 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/WHEEL +0 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/licenses/LICENSE +0 -0
- {glitchlings-0.2.5.dist-info → glitchlings-0.9.3.dist-info}/top_level.txt +0 -0
glitchlings/zoo/redactyl.py
CHANGED
|
@@ -1,198 +1,59 @@
|
|
|
1
|
-
import re
|
|
2
1
|
import random
|
|
3
|
-
from typing import
|
|
2
|
+
from typing import cast
|
|
4
3
|
|
|
5
|
-
from .
|
|
6
|
-
from .
|
|
4
|
+
from glitchlings.constants import DEFAULT_REDACTYL_CHAR, DEFAULT_REDACTYL_RATE
|
|
5
|
+
from glitchlings.internal.rust_ffi import redact_words_rust, resolve_seed
|
|
7
6
|
|
|
8
|
-
|
|
7
|
+
from .core import AttackWave, Glitchling, PipelineOperationPayload
|
|
9
8
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
from glitchlings._zoo_rust import redact_words as _redact_words_rust
|
|
13
|
-
except ImportError: # pragma: no cover - compiled extension not present
|
|
14
|
-
_redact_words_rust = None
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def _weighted_sample_without_replacement(
|
|
18
|
-
population: list[int],
|
|
19
|
-
weights: list[float],
|
|
20
|
-
*,
|
|
21
|
-
k: int,
|
|
22
|
-
rng: random.Random,
|
|
23
|
-
) -> list[int]:
|
|
24
|
-
"""Select `k` unique indices according to the given weights."""
|
|
25
|
-
|
|
26
|
-
selections: list[int] = []
|
|
27
|
-
items = list(zip(population, weights))
|
|
28
|
-
if k <= 0 or not items:
|
|
29
|
-
return selections
|
|
30
|
-
if k > len(items):
|
|
31
|
-
raise ValueError("Sample larger than population or is negative")
|
|
32
|
-
|
|
33
|
-
for _ in range(k):
|
|
34
|
-
total_weight = sum(weight for _, weight in items)
|
|
35
|
-
if total_weight <= 0:
|
|
36
|
-
chosen_index = rng.randrange(len(items))
|
|
37
|
-
else:
|
|
38
|
-
threshold = rng.random() * total_weight
|
|
39
|
-
cumulative = 0.0
|
|
40
|
-
chosen_index = len(items) - 1
|
|
41
|
-
for idx, (_, weight) in enumerate(items):
|
|
42
|
-
cumulative += weight
|
|
43
|
-
if cumulative >= threshold:
|
|
44
|
-
chosen_index = idx
|
|
45
|
-
break
|
|
46
|
-
value, _ = items.pop(chosen_index)
|
|
47
|
-
selections.append(value)
|
|
48
|
-
|
|
49
|
-
return selections
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def _python_redact_words(
|
|
53
|
-
text: str,
|
|
54
|
-
*,
|
|
55
|
-
replacement_char: str,
|
|
56
|
-
rate: float,
|
|
57
|
-
merge_adjacent: bool,
|
|
58
|
-
rng: random.Random,
|
|
59
|
-
unweighted: bool = False,
|
|
60
|
-
) -> str:
|
|
61
|
-
"""Redact random words by replacing their characters.
|
|
62
|
-
|
|
63
|
-
Parameters
|
|
64
|
-
- text: Input text.
|
|
65
|
-
- replacement_char: The character to use for redaction (default FULL_BLOCK).
|
|
66
|
-
- rate: Max proportion of words to redact (default 0.05).
|
|
67
|
-
- merge_adjacent: If True, merges adjacent redactions across intervening non-word chars.
|
|
68
|
-
- rng: RNG used for sampling decisions.
|
|
69
|
-
- unweighted: When True, sample words uniformly instead of by length.
|
|
70
|
-
"""
|
|
71
|
-
# Preserve exact spacing and punctuation by using regex
|
|
72
|
-
tokens = re.split(r"(\s+)", text)
|
|
73
|
-
word_indices = [i for i, token in enumerate(tokens) if i % 2 == 0 and token.strip()]
|
|
74
|
-
if not word_indices:
|
|
75
|
-
raise ValueError(
|
|
76
|
-
"Cannot redact words because the input text contains no redactable words."
|
|
77
|
-
)
|
|
78
|
-
weights: list[float] = []
|
|
79
|
-
for index in word_indices:
|
|
80
|
-
word = tokens[index]
|
|
81
|
-
match = re.match(r"^(\W*)(.*?)(\W*)$", word)
|
|
82
|
-
core = match.group(2) if match else word
|
|
83
|
-
core_length = len(core) if core else len(word)
|
|
84
|
-
if core_length <= 0:
|
|
85
|
-
core_length = len(word.strip()) or len(word)
|
|
86
|
-
if core_length <= 0:
|
|
87
|
-
core_length = 1
|
|
88
|
-
weights.append(1.0 if unweighted else float(core_length))
|
|
89
|
-
num_to_redact = max(1, int(len(word_indices) * rate))
|
|
90
|
-
if num_to_redact > len(word_indices):
|
|
91
|
-
raise ValueError("Sample larger than population or is negative")
|
|
92
|
-
indices_to_redact = _weighted_sample_without_replacement(
|
|
93
|
-
word_indices,
|
|
94
|
-
weights,
|
|
95
|
-
k=num_to_redact,
|
|
96
|
-
rng=rng,
|
|
97
|
-
)
|
|
98
|
-
indices_to_redact.sort()
|
|
99
|
-
|
|
100
|
-
for i in indices_to_redact:
|
|
101
|
-
if i >= len(tokens):
|
|
102
|
-
break
|
|
103
|
-
|
|
104
|
-
word = tokens[i]
|
|
105
|
-
if not word or word.isspace(): # Skip empty or whitespace
|
|
106
|
-
continue
|
|
107
|
-
|
|
108
|
-
# Check if word has trailing punctuation
|
|
109
|
-
match = re.match(r"^(\W*)(.*?)(\W*)$", word)
|
|
110
|
-
if match:
|
|
111
|
-
prefix, core, suffix = match.groups()
|
|
112
|
-
tokens[i] = f"{prefix}{replacement_char * len(core)}{suffix}"
|
|
113
|
-
else:
|
|
114
|
-
tokens[i] = f"{replacement_char * len(word)}"
|
|
115
|
-
|
|
116
|
-
text = "".join(tokens)
|
|
117
|
-
|
|
118
|
-
if merge_adjacent:
|
|
119
|
-
text = re.sub(
|
|
120
|
-
rf"{replacement_char}\W+{replacement_char}",
|
|
121
|
-
lambda m: replacement_char * (len(m.group(0)) - 1),
|
|
122
|
-
text,
|
|
123
|
-
)
|
|
124
|
-
|
|
125
|
-
return text
|
|
9
|
+
# Backwards compatibility alias
|
|
10
|
+
FULL_BLOCK = DEFAULT_REDACTYL_CHAR
|
|
126
11
|
|
|
127
12
|
|
|
128
13
|
def redact_words(
|
|
129
14
|
text: str,
|
|
130
|
-
replacement_char: str =
|
|
15
|
+
replacement_char: str | None = DEFAULT_REDACTYL_CHAR,
|
|
131
16
|
rate: float | None = None,
|
|
132
|
-
merge_adjacent: bool = False,
|
|
17
|
+
merge_adjacent: bool | None = False,
|
|
133
18
|
seed: int = 151,
|
|
134
19
|
rng: random.Random | None = None,
|
|
135
20
|
*,
|
|
136
|
-
redaction_rate: float | None = None,
|
|
137
21
|
unweighted: bool = False,
|
|
138
22
|
) -> str:
|
|
139
23
|
"""Redact random words by replacing their characters."""
|
|
24
|
+
effective_rate = DEFAULT_REDACTYL_RATE if rate is None else rate
|
|
140
25
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
legacy_value=redaction_rate,
|
|
144
|
-
default=0.025,
|
|
145
|
-
legacy_name="redaction_rate",
|
|
146
|
-
)
|
|
147
|
-
|
|
148
|
-
if rng is None:
|
|
149
|
-
rng = random.Random(seed)
|
|
26
|
+
replacement = DEFAULT_REDACTYL_CHAR if replacement_char is None else str(replacement_char)
|
|
27
|
+
merge = False if merge_adjacent is None else bool(merge_adjacent)
|
|
150
28
|
|
|
151
|
-
clamped_rate = max(0.0, effective_rate)
|
|
29
|
+
clamped_rate = max(0.0, min(effective_rate, 1.0))
|
|
152
30
|
unweighted_flag = bool(unweighted)
|
|
153
31
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
if use_rust:
|
|
157
|
-
return _redact_words_rust(
|
|
158
|
-
text,
|
|
159
|
-
replacement_char,
|
|
160
|
-
clamped_rate,
|
|
161
|
-
merge_adjacent,
|
|
162
|
-
unweighted_flag,
|
|
163
|
-
rng,
|
|
164
|
-
)
|
|
165
|
-
|
|
166
|
-
return _python_redact_words(
|
|
32
|
+
return redact_words_rust(
|
|
167
33
|
text,
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
34
|
+
replacement,
|
|
35
|
+
clamped_rate,
|
|
36
|
+
merge,
|
|
37
|
+
unweighted_flag,
|
|
38
|
+
resolve_seed(seed, rng),
|
|
173
39
|
)
|
|
174
40
|
|
|
175
41
|
|
|
176
42
|
class Redactyl(Glitchling):
|
|
177
43
|
"""Glitchling that redacts words with block characters."""
|
|
178
44
|
|
|
45
|
+
flavor = "Some things are better left ████████."
|
|
46
|
+
|
|
179
47
|
def __init__(
|
|
180
48
|
self,
|
|
181
49
|
*,
|
|
182
|
-
replacement_char: str =
|
|
50
|
+
replacement_char: str = DEFAULT_REDACTYL_CHAR,
|
|
183
51
|
rate: float | None = None,
|
|
184
|
-
redaction_rate: float | None = None,
|
|
185
52
|
merge_adjacent: bool = False,
|
|
186
53
|
seed: int = 151,
|
|
187
54
|
unweighted: bool = False,
|
|
188
55
|
) -> None:
|
|
189
|
-
|
|
190
|
-
effective_rate = resolve_rate(
|
|
191
|
-
rate=rate,
|
|
192
|
-
legacy_value=redaction_rate,
|
|
193
|
-
default=0.025,
|
|
194
|
-
legacy_name="redaction_rate",
|
|
195
|
-
)
|
|
56
|
+
effective_rate = DEFAULT_REDACTYL_RATE if rate is None else rate
|
|
196
57
|
super().__init__(
|
|
197
58
|
name="Redactyl",
|
|
198
59
|
corruption_function=redact_words,
|
|
@@ -204,23 +65,31 @@ class Redactyl(Glitchling):
|
|
|
204
65
|
unweighted=unweighted,
|
|
205
66
|
)
|
|
206
67
|
|
|
207
|
-
def pipeline_operation(self) ->
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
68
|
+
def pipeline_operation(self) -> PipelineOperationPayload:
|
|
69
|
+
replacement_char_value = self.kwargs.get("replacement_char", DEFAULT_REDACTYL_CHAR)
|
|
70
|
+
rate_value = self.kwargs.get("rate", DEFAULT_REDACTYL_RATE)
|
|
71
|
+
merge_value = self.kwargs.get("merge_adjacent", False)
|
|
72
|
+
|
|
73
|
+
replacement_char = str(
|
|
74
|
+
DEFAULT_REDACTYL_CHAR if replacement_char_value is None else replacement_char_value
|
|
75
|
+
)
|
|
76
|
+
rate = float(DEFAULT_REDACTYL_RATE if rate_value is None else rate_value)
|
|
77
|
+
merge_adjacent = bool(merge_value)
|
|
213
78
|
unweighted = bool(self.kwargs.get("unweighted", False))
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
79
|
+
|
|
80
|
+
return cast(
|
|
81
|
+
PipelineOperationPayload,
|
|
82
|
+
{
|
|
83
|
+
"type": "redact",
|
|
84
|
+
"replacement_char": replacement_char,
|
|
85
|
+
"rate": rate,
|
|
86
|
+
"merge_adjacent": merge_adjacent,
|
|
87
|
+
"unweighted": unweighted,
|
|
88
|
+
},
|
|
89
|
+
)
|
|
221
90
|
|
|
222
91
|
|
|
223
92
|
redactyl = Redactyl()
|
|
224
93
|
|
|
225
94
|
|
|
226
|
-
__all__ = ["Redactyl", "redactyl"]
|
|
95
|
+
__all__ = ["Redactyl", "redactyl", "redact_words"]
|
glitchlings/zoo/rng.py
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""RNG boundary layer for seed resolution.
|
|
2
|
+
|
|
3
|
+
This module provides the interface between RNG state and concrete random values.
|
|
4
|
+
All randomness in the glitchlings library flows through these functions.
|
|
5
|
+
|
|
6
|
+
Design Philosophy
|
|
7
|
+
-----------------
|
|
8
|
+
RNG management is an *impure* operation - it involves stateful objects
|
|
9
|
+
(random.Random) and non-deterministic behavior when no seed is provided.
|
|
10
|
+
This module provides the boundary layer that converts RNG state into
|
|
11
|
+
concrete values that can be passed to pure functions.
|
|
12
|
+
|
|
13
|
+
The pattern is:
|
|
14
|
+
1. User provides `seed: int | None` and/or `rng: random.Random | None`
|
|
15
|
+
2. Boundary layer resolves to a concrete `int` via `resolve_seed()`
|
|
16
|
+
3. Pure/Rust functions receive the concrete seed value
|
|
17
|
+
|
|
18
|
+
This separation means:
|
|
19
|
+
- Pure transformation code never touches RNG objects
|
|
20
|
+
- Tests can provide explicit seed values for reproducibility
|
|
21
|
+
- RNG state management is isolated to the boundary
|
|
22
|
+
|
|
23
|
+
See AGENTS.md "Functional Purity Architecture" for full details.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import random
|
|
29
|
+
from hashlib import blake2s
|
|
30
|
+
from typing import Protocol, runtime_checkable
|
|
31
|
+
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
# Constants
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
# Bit width for seed values (64-bit for compatibility with Rust u64)
|
|
37
|
+
SEED_BIT_WIDTH = 64
|
|
38
|
+
SEED_MASK = (1 << SEED_BIT_WIDTH) - 1 # 0xFFFFFFFFFFFFFFFF
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
# Protocols
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@runtime_checkable
|
|
47
|
+
class RandomBitsSource(Protocol):
|
|
48
|
+
"""Protocol for objects that can provide random bits."""
|
|
49
|
+
|
|
50
|
+
def getrandbits(self, k: int) -> int:
|
|
51
|
+
"""Return a non-negative integer with k random bits."""
|
|
52
|
+
...
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
# Core Boundary Functions
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def resolve_seed(
|
|
61
|
+
seed: int | None,
|
|
62
|
+
rng: random.Random | None,
|
|
63
|
+
) -> int:
|
|
64
|
+
"""Resolve a seed from optional explicit seed or RNG state.
|
|
65
|
+
|
|
66
|
+
This is the primary boundary function for RNG resolution. Call this
|
|
67
|
+
once at the boundary layer, then pass the resulting int to all
|
|
68
|
+
downstream pure/Rust functions.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
seed: Explicit seed value. If provided, takes precedence over rng.
|
|
72
|
+
rng: Random generator to sample from if seed is None.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
A 64-bit unsigned integer suitable for Rust FFI.
|
|
76
|
+
|
|
77
|
+
Note:
|
|
78
|
+
If both seed and rng are None, uses module-level random state.
|
|
79
|
+
This is non-deterministic and should only happen at top-level CLI usage.
|
|
80
|
+
|
|
81
|
+
Examples:
|
|
82
|
+
>>> resolve_seed(42, None) # explicit seed
|
|
83
|
+
42
|
|
84
|
+
>>> rng = random.Random(123)
|
|
85
|
+
>>> resolve_seed(None, rng) # sample from RNG
|
|
86
|
+
14522756016584210807
|
|
87
|
+
"""
|
|
88
|
+
if seed is not None:
|
|
89
|
+
return int(seed) & SEED_MASK
|
|
90
|
+
if rng is not None:
|
|
91
|
+
return rng.getrandbits(SEED_BIT_WIDTH)
|
|
92
|
+
return random.getrandbits(SEED_BIT_WIDTH)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def resolve_seed_deterministic(
|
|
96
|
+
seed: int | None,
|
|
97
|
+
rng: random.Random | None,
|
|
98
|
+
) -> int:
|
|
99
|
+
"""Resolve a seed, requiring explicit seed or RNG.
|
|
100
|
+
|
|
101
|
+
Like resolve_seed(), but raises ValueError if both seed and rng are None.
|
|
102
|
+
Use this when non-deterministic behavior would be a bug.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
seed: Explicit seed value.
|
|
106
|
+
rng: Random generator to sample from.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
A 64-bit unsigned integer.
|
|
110
|
+
|
|
111
|
+
Raises:
|
|
112
|
+
ValueError: If both seed and rng are None.
|
|
113
|
+
"""
|
|
114
|
+
if seed is not None:
|
|
115
|
+
return int(seed) & SEED_MASK
|
|
116
|
+
if rng is not None:
|
|
117
|
+
return rng.getrandbits(SEED_BIT_WIDTH)
|
|
118
|
+
raise ValueError("Either seed or rng must be provided for deterministic behavior")
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
# ---------------------------------------------------------------------------
|
|
122
|
+
# Seed Derivation (Deterministic)
|
|
123
|
+
# ---------------------------------------------------------------------------
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def derive_seed(base_seed: int, *components: int | str) -> int:
|
|
127
|
+
"""Derive a new seed from a base seed and components.
|
|
128
|
+
|
|
129
|
+
This is a pure function for hierarchical seed derivation.
|
|
130
|
+
Used by Gaggle to give each glitchling a unique but reproducible seed.
|
|
131
|
+
|
|
132
|
+
Uses blake2s for stable hashing across interpreter runs (unlike Python's
|
|
133
|
+
built-in hash() which is salted per-process). This ensures identical
|
|
134
|
+
inputs always produce identical seeds regardless of PYTHONHASHSEED.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
base_seed: The parent seed.
|
|
138
|
+
*components: Additional components to mix in (integers or strings).
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
A derived 64-bit seed.
|
|
142
|
+
|
|
143
|
+
Examples:
|
|
144
|
+
>>> derive_seed(12345, 0) # first child
|
|
145
|
+
13704458811836263874
|
|
146
|
+
>>> derive_seed(12345, 1) # second child
|
|
147
|
+
7874335407589182396
|
|
148
|
+
>>> derive_seed(12345, "typogre") # named child
|
|
149
|
+
561509252352425601
|
|
150
|
+
"""
|
|
151
|
+
# Use blake2s for stable, deterministic hashing across runs
|
|
152
|
+
hasher = blake2s(digest_size=8)
|
|
153
|
+
|
|
154
|
+
# Helper to convert int to bytes (handles arbitrary size)
|
|
155
|
+
def _int_to_bytes(value: int) -> bytes:
|
|
156
|
+
if value == 0:
|
|
157
|
+
return b"\x00"
|
|
158
|
+
abs_value = abs(value)
|
|
159
|
+
length = (abs_value.bit_length() + 7) // 8
|
|
160
|
+
if value < 0:
|
|
161
|
+
while True:
|
|
162
|
+
try:
|
|
163
|
+
return value.to_bytes(length, "big", signed=True)
|
|
164
|
+
except OverflowError:
|
|
165
|
+
length += 1
|
|
166
|
+
return abs_value.to_bytes(length, "big", signed=False)
|
|
167
|
+
|
|
168
|
+
hasher.update(_int_to_bytes(base_seed))
|
|
169
|
+
for component in components:
|
|
170
|
+
hasher.update(b"\x00") # separator
|
|
171
|
+
if isinstance(component, str):
|
|
172
|
+
hasher.update(component.encode("utf-8"))
|
|
173
|
+
else:
|
|
174
|
+
hasher.update(_int_to_bytes(component))
|
|
175
|
+
|
|
176
|
+
return int.from_bytes(hasher.digest(), "big")
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# ---------------------------------------------------------------------------
|
|
180
|
+
# Random Value Generation (Impure)
|
|
181
|
+
# ---------------------------------------------------------------------------
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def create_rng(seed: int) -> random.Random:
|
|
185
|
+
"""Create a new Random instance from a seed.
|
|
186
|
+
|
|
187
|
+
Use this when you need to create child RNG states for parallel operations.
|
|
188
|
+
Prefer passing concrete seed values to functions when possible.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
seed: The seed for the new RNG.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
A new random.Random instance.
|
|
195
|
+
"""
|
|
196
|
+
return random.Random(seed)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def sample_random_float(rng: random.Random) -> float:
|
|
200
|
+
"""Sample a random float in [0.0, 1.0) from an RNG.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
rng: The random generator.
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
Float in range [0.0, 1.0).
|
|
207
|
+
"""
|
|
208
|
+
return rng.random()
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def sample_random_int(rng: random.Random, *, low: int, high: int) -> int:
|
|
212
|
+
"""Sample a random integer in [low, high] inclusive.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
rng: The random generator.
|
|
216
|
+
low: Minimum value (inclusive).
|
|
217
|
+
high: Maximum value (inclusive).
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
Random integer in range [low, high].
|
|
221
|
+
"""
|
|
222
|
+
return rng.randint(low, high)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def sample_random_index(rng: random.Random, length: int) -> int:
|
|
226
|
+
"""Sample a random index for a sequence of given length.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
rng: The random generator.
|
|
230
|
+
length: The sequence length.
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
Random index in range [0, length).
|
|
234
|
+
|
|
235
|
+
Raises:
|
|
236
|
+
ValueError: If length <= 0.
|
|
237
|
+
"""
|
|
238
|
+
if length <= 0:
|
|
239
|
+
raise ValueError("Cannot sample index from empty sequence")
|
|
240
|
+
return rng.randrange(length)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
__all__ = [
|
|
244
|
+
# Constants
|
|
245
|
+
"SEED_BIT_WIDTH",
|
|
246
|
+
"SEED_MASK",
|
|
247
|
+
# Protocols
|
|
248
|
+
"RandomBitsSource",
|
|
249
|
+
# Boundary functions
|
|
250
|
+
"resolve_seed",
|
|
251
|
+
"resolve_seed_deterministic",
|
|
252
|
+
# Derivation
|
|
253
|
+
"derive_seed",
|
|
254
|
+
# RNG operations (impure)
|
|
255
|
+
"create_rng",
|
|
256
|
+
"sample_random_float",
|
|
257
|
+
"sample_random_int",
|
|
258
|
+
"sample_random_index",
|
|
259
|
+
]
|