glitchlings 1.0.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glitchlings/__init__.py +101 -0
- glitchlings/__main__.py +8 -0
- glitchlings/_corruption_engine/__init__.py +12 -0
- glitchlings/_corruption_engine.cp313-win_amd64.pyd +0 -0
- glitchlings/assets/__init__.py +180 -0
- glitchlings/assets/apostrofae_pairs.json +32 -0
- glitchlings/assets/ekkokin_homophones.json +2014 -0
- glitchlings/assets/hokey_assets.json +193 -0
- glitchlings/assets/lexemes/academic.json +1049 -0
- glitchlings/assets/lexemes/colors.json +1333 -0
- glitchlings/assets/lexemes/corporate.json +716 -0
- glitchlings/assets/lexemes/cyberpunk.json +22 -0
- glitchlings/assets/lexemes/lovecraftian.json +23 -0
- glitchlings/assets/lexemes/synonyms.json +3354 -0
- glitchlings/assets/mim1c_homoglyphs.json.gz.b64 +1064 -0
- glitchlings/assets/ocr_confusions.tsv +30 -0
- glitchlings/assets/pipeline_assets.json +29 -0
- glitchlings/attack/__init__.py +184 -0
- glitchlings/attack/analysis.py +1321 -0
- glitchlings/attack/core.py +819 -0
- glitchlings/attack/core_execution.py +378 -0
- glitchlings/attack/core_planning.py +612 -0
- glitchlings/attack/encode.py +114 -0
- glitchlings/attack/metrics.py +211 -0
- glitchlings/attack/metrics_dispatch.py +70 -0
- glitchlings/attack/tokenization.py +338 -0
- glitchlings/attack/tokenizer_metrics.py +373 -0
- glitchlings/auggie.py +285 -0
- glitchlings/compat/__init__.py +9 -0
- glitchlings/compat/loaders.py +355 -0
- glitchlings/compat/types.py +41 -0
- glitchlings/conf/__init__.py +39 -0
- glitchlings/conf/loaders.py +331 -0
- glitchlings/conf/schema.py +156 -0
- glitchlings/conf/types.py +72 -0
- glitchlings/config.toml +2 -0
- glitchlings/constants.py +139 -0
- glitchlings/dev/__init__.py +3 -0
- glitchlings/dev/docs.py +45 -0
- glitchlings/dlc/__init__.py +21 -0
- glitchlings/dlc/_shared.py +300 -0
- glitchlings/dlc/gutenberg.py +400 -0
- glitchlings/dlc/huggingface.py +68 -0
- glitchlings/dlc/langchain.py +147 -0
- glitchlings/dlc/nemo.py +283 -0
- glitchlings/dlc/prime.py +215 -0
- glitchlings/dlc/pytorch.py +98 -0
- glitchlings/dlc/pytorch_lightning.py +173 -0
- glitchlings/internal/__init__.py +16 -0
- glitchlings/internal/rust.py +159 -0
- glitchlings/internal/rust_ffi.py +599 -0
- glitchlings/main.py +426 -0
- glitchlings/protocols.py +91 -0
- glitchlings/runtime_config.py +24 -0
- glitchlings/util/__init__.py +41 -0
- glitchlings/util/adapters.py +65 -0
- glitchlings/util/keyboards.py +508 -0
- glitchlings/util/transcripts.py +108 -0
- glitchlings/zoo/__init__.py +161 -0
- glitchlings/zoo/assets/__init__.py +29 -0
- glitchlings/zoo/core.py +852 -0
- glitchlings/zoo/core_execution.py +154 -0
- glitchlings/zoo/core_planning.py +451 -0
- glitchlings/zoo/corrupt_dispatch.py +291 -0
- glitchlings/zoo/hokey.py +139 -0
- glitchlings/zoo/jargoyle.py +301 -0
- glitchlings/zoo/mim1c.py +269 -0
- glitchlings/zoo/pedant/__init__.py +109 -0
- glitchlings/zoo/pedant/core.py +99 -0
- glitchlings/zoo/pedant/forms.py +50 -0
- glitchlings/zoo/pedant/stones.py +83 -0
- glitchlings/zoo/redactyl.py +94 -0
- glitchlings/zoo/rng.py +280 -0
- glitchlings/zoo/rushmore.py +416 -0
- glitchlings/zoo/scannequin.py +370 -0
- glitchlings/zoo/transforms.py +331 -0
- glitchlings/zoo/typogre.py +194 -0
- glitchlings/zoo/validation.py +643 -0
- glitchlings/zoo/wherewolf.py +120 -0
- glitchlings/zoo/zeedub.py +165 -0
- glitchlings-1.0.0.dist-info/METADATA +404 -0
- glitchlings-1.0.0.dist-info/RECORD +86 -0
- glitchlings-1.0.0.dist-info/WHEEL +5 -0
- glitchlings-1.0.0.dist-info/entry_points.txt +3 -0
- glitchlings-1.0.0.dist-info/licenses/LICENSE +201 -0
- glitchlings-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Core classes for the pedant evolution chain backed by the Rust pipeline."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Dict, Type
|
|
6
|
+
|
|
7
|
+
from glitchlings.internal.rust_ffi import apply_grammar_rule_rust
|
|
8
|
+
|
|
9
|
+
from ..core import Gaggle
|
|
10
|
+
from .stones import PedantStone
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def apply_pedant(
|
|
14
|
+
text: str,
|
|
15
|
+
*,
|
|
16
|
+
stone: PedantStone,
|
|
17
|
+
seed: int,
|
|
18
|
+
) -> str:
|
|
19
|
+
"""Apply a pedant transformation via the Rust extension."""
|
|
20
|
+
|
|
21
|
+
return apply_grammar_rule_rust(
|
|
22
|
+
text,
|
|
23
|
+
stone=stone.label,
|
|
24
|
+
seed=int(seed),
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class PedantEvolution:
|
|
29
|
+
"""Concrete pedant form that delegates to the Rust implementation."""
|
|
30
|
+
|
|
31
|
+
stone: PedantStone
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
seed: int,
|
|
36
|
+
*,
|
|
37
|
+
stone: PedantStone | None = None,
|
|
38
|
+
) -> None:
|
|
39
|
+
resolved_stone = stone or getattr(self, "stone", None)
|
|
40
|
+
if resolved_stone is None: # pragma: no cover - defensive guard
|
|
41
|
+
raise ValueError("PedantEvolution requires a PedantStone")
|
|
42
|
+
self.seed = int(seed)
|
|
43
|
+
self.stone = resolved_stone
|
|
44
|
+
|
|
45
|
+
def move(self, text: str) -> str:
|
|
46
|
+
result = apply_pedant(text, stone=self.stone, seed=self.seed)
|
|
47
|
+
return result
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class PedantBase:
|
|
51
|
+
"""Base pedant capable of evolving into specialised grammar forms."""
|
|
52
|
+
|
|
53
|
+
name: str = "Pedant"
|
|
54
|
+
type: str = "Normal"
|
|
55
|
+
flavor: str = "A novice grammarian waiting to evolve."
|
|
56
|
+
|
|
57
|
+
def __init__(self, seed: int, *, root_seed: int | None = None) -> None:
|
|
58
|
+
self.seed = int(seed)
|
|
59
|
+
self.root_seed = int(seed if root_seed is None else root_seed)
|
|
60
|
+
|
|
61
|
+
def evolve(self, stone: PedantStone | str) -> PedantEvolution:
|
|
62
|
+
pedant_stone = PedantStone.from_value(stone)
|
|
63
|
+
form_cls = EVOLUTIONS.get(pedant_stone)
|
|
64
|
+
if form_cls is None: # pragma: no cover - sanity guard
|
|
65
|
+
raise KeyError(f"Unknown stone: {stone}")
|
|
66
|
+
derived_seed = Gaggle.derive_seed(self.root_seed, pedant_stone.label, 0)
|
|
67
|
+
return form_cls(seed=int(derived_seed))
|
|
68
|
+
|
|
69
|
+
def move(self, text: str) -> str:
|
|
70
|
+
return text
|
|
71
|
+
|
|
72
|
+
def __repr__(self) -> str: # pragma: no cover - debugging helper
|
|
73
|
+
return f"<{self.__class__.__name__} seed={self.seed} type={self.type}>"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
EVOLUTIONS: Dict[PedantStone, Type[PedantEvolution]] = {}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
try: # pragma: no cover - import resolution occurs at runtime
|
|
80
|
+
from .forms import (
|
|
81
|
+
Aetheria,
|
|
82
|
+
Andi,
|
|
83
|
+
Apostrofae,
|
|
84
|
+
Commama,
|
|
85
|
+
Infinitoad,
|
|
86
|
+
)
|
|
87
|
+
except ImportError: # pragma: no cover - partial imports during type checking
|
|
88
|
+
pass
|
|
89
|
+
else:
|
|
90
|
+
EVOLUTIONS = {
|
|
91
|
+
PedantStone.HYPERCORRECTITE: Andi,
|
|
92
|
+
PedantStone.UNSPLITTIUM: Infinitoad,
|
|
93
|
+
PedantStone.COEURITE: Aetheria,
|
|
94
|
+
PedantStone.CURLITE: Apostrofae,
|
|
95
|
+
PedantStone.OXFORDIUM: Commama,
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
__all__ = ["PedantBase", "PedantEvolution", "EVOLUTIONS", "apply_pedant"]
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Pedant evolution forms delegating to the Rust-backed core."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .core import PedantEvolution
|
|
6
|
+
from .stones import PedantStone
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Andi(PedantEvolution):
|
|
10
|
+
stone = PedantStone.HYPERCORRECTITE
|
|
11
|
+
name = "Andi"
|
|
12
|
+
type = "Ghost"
|
|
13
|
+
flavor = "Learned that 'me' is wrong and now overcorrects everywhere."
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Infinitoad(PedantEvolution):
|
|
17
|
+
stone = PedantStone.UNSPLITTIUM
|
|
18
|
+
name = "Infinitoad"
|
|
19
|
+
type = "Steel"
|
|
20
|
+
flavor = "To never split what was never whole."
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Aetheria(PedantEvolution):
|
|
24
|
+
stone = PedantStone.COEURITE
|
|
25
|
+
name = "Aetheria"
|
|
26
|
+
type = "Psychic"
|
|
27
|
+
flavor = "Resurrects archaic ligatures and diacritics."
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Apostrofae(PedantEvolution):
|
|
31
|
+
stone = PedantStone.CURLITE
|
|
32
|
+
name = "Apostrofae"
|
|
33
|
+
type = "Fairy"
|
|
34
|
+
flavor = "Curves quotes into typeset perfection."
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class Commama(PedantEvolution):
|
|
38
|
+
stone = PedantStone.OXFORDIUM
|
|
39
|
+
name = "Commama"
|
|
40
|
+
type = "Steel"
|
|
41
|
+
flavor = "Oxonian hero of the list."
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
__all__ = [
|
|
45
|
+
"Andi",
|
|
46
|
+
"Infinitoad",
|
|
47
|
+
"Aetheria",
|
|
48
|
+
"Apostrofae",
|
|
49
|
+
"Commama",
|
|
50
|
+
]
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Evolution stones recognised by the pedant."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from enum import Enum
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class Stone:
|
|
11
|
+
"""Descriptor for an evolution stone."""
|
|
12
|
+
|
|
13
|
+
name: str
|
|
14
|
+
type: str
|
|
15
|
+
effect: str
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class PedantStone(Enum):
|
|
19
|
+
"""Enumeration of evolution stones available to the pedant."""
|
|
20
|
+
|
|
21
|
+
HYPERCORRECTITE = Stone(
|
|
22
|
+
"Hypercorrectite",
|
|
23
|
+
"Ghost",
|
|
24
|
+
"Induces prestigious-sounding pronoun errors in coordinate structures.",
|
|
25
|
+
)
|
|
26
|
+
UNSPLITTIUM = Stone(
|
|
27
|
+
"Unsplittium",
|
|
28
|
+
"Steel",
|
|
29
|
+
"Unsplits infinitives that were never meant to be joined.",
|
|
30
|
+
)
|
|
31
|
+
COEURITE = Stone(
|
|
32
|
+
"Coeurite",
|
|
33
|
+
"Psychic",
|
|
34
|
+
"Restores archaic ligatures to modern words.",
|
|
35
|
+
)
|
|
36
|
+
CURLITE = Stone(
|
|
37
|
+
"Curlite",
|
|
38
|
+
"Fairy",
|
|
39
|
+
"Coaches punctuation to embrace typographic curls.",
|
|
40
|
+
)
|
|
41
|
+
OXFORDIUM = Stone("Oxfordium", "Steel", "Polishes serial comma usage.")
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def descriptor(self) -> Stone:
|
|
45
|
+
"""Return the metadata describing this stone."""
|
|
46
|
+
|
|
47
|
+
return self.value
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def label(self) -> str:
|
|
51
|
+
"""Return the display name for this stone."""
|
|
52
|
+
|
|
53
|
+
return self.value.name
|
|
54
|
+
|
|
55
|
+
def __str__(self) -> str: # pragma: no cover - convenience for reprs/CLI echo
|
|
56
|
+
return self.label
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def from_value(cls, value: object) -> "PedantStone":
|
|
60
|
+
"""Normalise ``value`` into a :class:`PedantStone` member."""
|
|
61
|
+
|
|
62
|
+
if isinstance(value, cls):
|
|
63
|
+
return value
|
|
64
|
+
if isinstance(value, Stone):
|
|
65
|
+
for member in cls:
|
|
66
|
+
if member.value == value:
|
|
67
|
+
return member
|
|
68
|
+
msg = f"Unknown pedant stone descriptor: {value!r}"
|
|
69
|
+
raise ValueError(msg)
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
return _STONE_BY_NAME[str(value)]
|
|
73
|
+
except KeyError as exc:
|
|
74
|
+
raise ValueError(f"Unknown pedant stone: {value!r}") from exc
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
_STONE_BY_NAME: dict[str, PedantStone] = {stone.value.name: stone for stone in PedantStone}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
STONES: dict[str, Stone] = {stone.label: stone.descriptor for stone in PedantStone}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
__all__ = ["Stone", "PedantStone", "STONES"]
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import random
|
|
2
|
+
from typing import Any, cast
|
|
3
|
+
|
|
4
|
+
from glitchlings.constants import DEFAULT_REDACTYL_CHAR, DEFAULT_REDACTYL_RATE
|
|
5
|
+
from glitchlings.internal.rust_ffi import redact_words_rust, resolve_seed
|
|
6
|
+
|
|
7
|
+
from .core import AttackWave, Glitchling, PipelineOperationPayload
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def redact_words(
|
|
11
|
+
text: str,
|
|
12
|
+
replacement_char: str | None = DEFAULT_REDACTYL_CHAR,
|
|
13
|
+
rate: float | None = None,
|
|
14
|
+
merge_adjacent: bool | None = False,
|
|
15
|
+
seed: int = 151,
|
|
16
|
+
rng: random.Random | None = None,
|
|
17
|
+
*,
|
|
18
|
+
unweighted: bool = False,
|
|
19
|
+
) -> str:
|
|
20
|
+
"""Redact random words by replacing their characters."""
|
|
21
|
+
effective_rate = DEFAULT_REDACTYL_RATE if rate is None else rate
|
|
22
|
+
|
|
23
|
+
replacement = DEFAULT_REDACTYL_CHAR if replacement_char is None else str(replacement_char)
|
|
24
|
+
merge = False if merge_adjacent is None else bool(merge_adjacent)
|
|
25
|
+
|
|
26
|
+
clamped_rate = max(0.0, min(effective_rate, 1.0))
|
|
27
|
+
unweighted_flag = bool(unweighted)
|
|
28
|
+
|
|
29
|
+
return redact_words_rust(
|
|
30
|
+
text,
|
|
31
|
+
replacement,
|
|
32
|
+
clamped_rate,
|
|
33
|
+
merge,
|
|
34
|
+
unweighted_flag,
|
|
35
|
+
resolve_seed(seed, rng),
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class Redactyl(Glitchling):
|
|
40
|
+
"""Glitchling that redacts words with block characters."""
|
|
41
|
+
|
|
42
|
+
flavor = "Some things are better left ████████."
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
*,
|
|
47
|
+
replacement_char: str = DEFAULT_REDACTYL_CHAR,
|
|
48
|
+
rate: float | None = None,
|
|
49
|
+
merge_adjacent: bool = False,
|
|
50
|
+
seed: int = 151,
|
|
51
|
+
unweighted: bool = False,
|
|
52
|
+
**kwargs: Any,
|
|
53
|
+
) -> None:
|
|
54
|
+
effective_rate = DEFAULT_REDACTYL_RATE if rate is None else rate
|
|
55
|
+
super().__init__(
|
|
56
|
+
name="Redactyl",
|
|
57
|
+
corruption_function=redact_words,
|
|
58
|
+
scope=AttackWave.WORD,
|
|
59
|
+
seed=seed,
|
|
60
|
+
replacement_char=replacement_char,
|
|
61
|
+
rate=effective_rate,
|
|
62
|
+
merge_adjacent=merge_adjacent,
|
|
63
|
+
unweighted=unweighted,
|
|
64
|
+
**kwargs,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def pipeline_operation(self) -> PipelineOperationPayload:
|
|
68
|
+
replacement_char_value = self.kwargs.get("replacement_char", DEFAULT_REDACTYL_CHAR)
|
|
69
|
+
rate_value = self.kwargs.get("rate", DEFAULT_REDACTYL_RATE)
|
|
70
|
+
merge_value = self.kwargs.get("merge_adjacent", False)
|
|
71
|
+
|
|
72
|
+
replacement_char = str(
|
|
73
|
+
DEFAULT_REDACTYL_CHAR if replacement_char_value is None else replacement_char_value
|
|
74
|
+
)
|
|
75
|
+
rate = float(DEFAULT_REDACTYL_RATE if rate_value is None else rate_value)
|
|
76
|
+
merge_adjacent = bool(merge_value)
|
|
77
|
+
unweighted = bool(self.kwargs.get("unweighted", False))
|
|
78
|
+
|
|
79
|
+
return cast(
|
|
80
|
+
PipelineOperationPayload,
|
|
81
|
+
{
|
|
82
|
+
"type": "redact",
|
|
83
|
+
"replacement_char": replacement_char,
|
|
84
|
+
"rate": rate,
|
|
85
|
+
"merge_adjacent": merge_adjacent,
|
|
86
|
+
"unweighted": unweighted,
|
|
87
|
+
},
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
redactyl = Redactyl()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
__all__ = ["Redactyl", "redactyl", "redact_words"]
|
glitchlings/zoo/rng.py
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
"""RNG boundary layer for seed resolution.
|
|
2
|
+
|
|
3
|
+
This module provides the interface between RNG state and concrete random values.
|
|
4
|
+
All randomness in the glitchlings library flows through these functions.
|
|
5
|
+
|
|
6
|
+
Design Philosophy
|
|
7
|
+
-----------------
|
|
8
|
+
RNG management is an *impure* operation - it involves stateful objects
|
|
9
|
+
(random.Random) and non-deterministic behavior when no seed is provided.
|
|
10
|
+
This module provides the boundary layer that converts RNG state into
|
|
11
|
+
concrete values that can be passed to pure functions.
|
|
12
|
+
|
|
13
|
+
The pattern is:
|
|
14
|
+
1. User provides `seed: int | None` and/or `rng: random.Random | None`
|
|
15
|
+
2. Boundary layer resolves to a concrete `int` via `resolve_seed()`
|
|
16
|
+
3. Pure/Rust functions receive the concrete seed value
|
|
17
|
+
|
|
18
|
+
This separation means:
|
|
19
|
+
- Pure transformation code never touches RNG objects
|
|
20
|
+
- Tests can provide explicit seed values for reproducibility
|
|
21
|
+
- RNG state management is isolated to the boundary
|
|
22
|
+
|
|
23
|
+
See AGENTS.md "Functional Purity Architecture" for full details.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import random
|
|
29
|
+
from typing import Protocol, runtime_checkable
|
|
30
|
+
|
|
31
|
+
# ---------------------------------------------------------------------------
|
|
32
|
+
# Constants
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
# Bit width for seed values (64-bit for compatibility with Rust u64)
|
|
36
|
+
SEED_BIT_WIDTH = 64
|
|
37
|
+
SEED_MASK = (1 << SEED_BIT_WIDTH) - 1 # 0xFFFFFFFFFFFFFFFF
|
|
38
|
+
|
|
39
|
+
# FNV-1a constants for 64-bit hashing (fast, simple string hashing)
|
|
40
|
+
_FNV_OFFSET_BASIS = 0xCBF29CE484222325
|
|
41
|
+
_FNV_PRIME = 0x100000001B3
|
|
42
|
+
|
|
43
|
+
# SplitMix64 constants (standard PRNG seed mixer)
|
|
44
|
+
_SPLITMIX_GAMMA = 0x9E3779B97F4A7C15
|
|
45
|
+
_SPLITMIX_MIX1 = 0xBF58476D1CE4E5B9
|
|
46
|
+
_SPLITMIX_MIX2 = 0x94D049BB133111EB
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# ---------------------------------------------------------------------------
|
|
50
|
+
# Protocols
|
|
51
|
+
# ---------------------------------------------------------------------------
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@runtime_checkable
|
|
55
|
+
class RandomBitsSource(Protocol):
|
|
56
|
+
"""Protocol for objects that can provide random bits."""
|
|
57
|
+
|
|
58
|
+
def getrandbits(self, k: int) -> int:
|
|
59
|
+
"""Return a non-negative integer with k random bits."""
|
|
60
|
+
...
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
# Core Boundary Functions
|
|
65
|
+
# ---------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def resolve_seed(
|
|
69
|
+
seed: int | None,
|
|
70
|
+
rng: random.Random | None,
|
|
71
|
+
) -> int:
|
|
72
|
+
"""Resolve a seed from optional explicit seed or RNG state.
|
|
73
|
+
|
|
74
|
+
This is the primary boundary function for RNG resolution. Call this
|
|
75
|
+
once at the boundary layer, then pass the resulting int to all
|
|
76
|
+
downstream pure/Rust functions.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
seed: Explicit seed value. If provided, takes precedence over rng.
|
|
80
|
+
rng: Random generator to sample from if seed is None.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
A 64-bit unsigned integer suitable for Rust FFI.
|
|
84
|
+
|
|
85
|
+
Note:
|
|
86
|
+
If both seed and rng are None, uses module-level random state.
|
|
87
|
+
This is non-deterministic and should only happen at top-level CLI usage.
|
|
88
|
+
|
|
89
|
+
Examples:
|
|
90
|
+
>>> resolve_seed(42, None) # explicit seed
|
|
91
|
+
42
|
|
92
|
+
>>> rng = random.Random(123)
|
|
93
|
+
>>> resolve_seed(None, rng) # sample from RNG
|
|
94
|
+
14522756016584210807
|
|
95
|
+
"""
|
|
96
|
+
if seed is not None:
|
|
97
|
+
return int(seed) & SEED_MASK
|
|
98
|
+
if rng is not None:
|
|
99
|
+
return rng.getrandbits(SEED_BIT_WIDTH)
|
|
100
|
+
return random.getrandbits(SEED_BIT_WIDTH)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def resolve_seed_deterministic(
|
|
104
|
+
seed: int | None,
|
|
105
|
+
rng: random.Random | None,
|
|
106
|
+
) -> int:
|
|
107
|
+
"""Resolve a seed, requiring explicit seed or RNG.
|
|
108
|
+
|
|
109
|
+
Like resolve_seed(), but raises ValueError if both seed and rng are None.
|
|
110
|
+
Use this when non-deterministic behavior would be a bug.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
seed: Explicit seed value.
|
|
114
|
+
rng: Random generator to sample from.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
A 64-bit unsigned integer.
|
|
118
|
+
|
|
119
|
+
Raises:
|
|
120
|
+
ValueError: If both seed and rng are None.
|
|
121
|
+
"""
|
|
122
|
+
if seed is not None:
|
|
123
|
+
return int(seed) & SEED_MASK
|
|
124
|
+
if rng is not None:
|
|
125
|
+
return rng.getrandbits(SEED_BIT_WIDTH)
|
|
126
|
+
raise ValueError("Either seed or rng must be provided for deterministic behavior")
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
# Seed Derivation (Deterministic)
|
|
131
|
+
# ---------------------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _fnv1a_hash(data: bytes) -> int:
|
|
135
|
+
"""FNV-1a 64-bit hash of bytes.
|
|
136
|
+
|
|
137
|
+
Fast, simple string hashing with good distribution.
|
|
138
|
+
No cryptographic properties needed for seed derivation.
|
|
139
|
+
"""
|
|
140
|
+
h = _FNV_OFFSET_BASIS
|
|
141
|
+
for byte in data:
|
|
142
|
+
h ^= byte
|
|
143
|
+
h = (h * _FNV_PRIME) & SEED_MASK
|
|
144
|
+
return h
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _splitmix64(state: int) -> int:
|
|
148
|
+
"""SplitMix64 mixing function.
|
|
149
|
+
|
|
150
|
+
Standard PRNG seed mixer used by Java's SplittableRandom
|
|
151
|
+
and Rust's rand crate. Provides excellent avalanche properties.
|
|
152
|
+
"""
|
|
153
|
+
state = (state + _SPLITMIX_GAMMA) & SEED_MASK
|
|
154
|
+
state = ((state ^ (state >> 30)) * _SPLITMIX_MIX1) & SEED_MASK
|
|
155
|
+
state = ((state ^ (state >> 27)) * _SPLITMIX_MIX2) & SEED_MASK
|
|
156
|
+
return (state ^ (state >> 31)) & SEED_MASK
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def derive_seed(base_seed: int, *components: int | str) -> int:
|
|
160
|
+
"""Derive a new seed from a base seed and components.
|
|
161
|
+
|
|
162
|
+
This is a pure function for hierarchical seed derivation.
|
|
163
|
+
Used by Gaggle to give each glitchling a unique but reproducible seed.
|
|
164
|
+
|
|
165
|
+
Uses FNV-1a for string hashing and SplitMix64 for mixing. This provides
|
|
166
|
+
stable, deterministic derivation across interpreter runs without the
|
|
167
|
+
overhead of cryptographic hashing.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
base_seed: The parent seed.
|
|
171
|
+
*components: Additional components to mix in (integers or strings).
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
A derived 64-bit seed.
|
|
175
|
+
|
|
176
|
+
Examples:
|
|
177
|
+
>>> derive_seed(12345, 0) # first child
|
|
178
|
+
2454886589211414944
|
|
179
|
+
>>> derive_seed(12345, 1) # second child
|
|
180
|
+
18133564086679993456
|
|
181
|
+
>>> derive_seed(12345, "typogre") # named child
|
|
182
|
+
1187037253482581891
|
|
183
|
+
"""
|
|
184
|
+
state = base_seed & SEED_MASK
|
|
185
|
+
|
|
186
|
+
for component in components:
|
|
187
|
+
if isinstance(component, str):
|
|
188
|
+
# Hash string to u64 via FNV-1a, then XOR into state
|
|
189
|
+
state ^= _fnv1a_hash(component.encode("utf-8"))
|
|
190
|
+
else:
|
|
191
|
+
# XOR integer directly (masked to 64 bits)
|
|
192
|
+
state ^= abs(component) & SEED_MASK
|
|
193
|
+
|
|
194
|
+
# Mix with SplitMix64 after each component
|
|
195
|
+
state = _splitmix64(state)
|
|
196
|
+
|
|
197
|
+
return state
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
# ---------------------------------------------------------------------------
|
|
201
|
+
# Random Value Generation (Impure)
|
|
202
|
+
# ---------------------------------------------------------------------------
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def create_rng(seed: int) -> random.Random:
|
|
206
|
+
"""Create a new Random instance from a seed.
|
|
207
|
+
|
|
208
|
+
Use this when you need to create child RNG states for parallel operations.
|
|
209
|
+
Prefer passing concrete seed values to functions when possible.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
seed: The seed for the new RNG.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
A new random.Random instance.
|
|
216
|
+
"""
|
|
217
|
+
return random.Random(seed)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def sample_random_float(rng: random.Random) -> float:
|
|
221
|
+
"""Sample a random float in [0.0, 1.0) from an RNG.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
rng: The random generator.
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Float in range [0.0, 1.0).
|
|
228
|
+
"""
|
|
229
|
+
return rng.random()
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def sample_random_int(rng: random.Random, *, low: int, high: int) -> int:
|
|
233
|
+
"""Sample a random integer in [low, high] inclusive.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
rng: The random generator.
|
|
237
|
+
low: Minimum value (inclusive).
|
|
238
|
+
high: Maximum value (inclusive).
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
Random integer in range [low, high].
|
|
242
|
+
"""
|
|
243
|
+
return rng.randint(low, high)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def sample_random_index(rng: random.Random, length: int) -> int:
|
|
247
|
+
"""Sample a random index for a sequence of given length.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
rng: The random generator.
|
|
251
|
+
length: The sequence length.
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
Random index in range [0, length).
|
|
255
|
+
|
|
256
|
+
Raises:
|
|
257
|
+
ValueError: If length <= 0.
|
|
258
|
+
"""
|
|
259
|
+
if length <= 0:
|
|
260
|
+
raise ValueError("Cannot sample index from empty sequence")
|
|
261
|
+
return rng.randrange(length)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
__all__ = [
|
|
265
|
+
# Constants
|
|
266
|
+
"SEED_BIT_WIDTH",
|
|
267
|
+
"SEED_MASK",
|
|
268
|
+
# Protocols
|
|
269
|
+
"RandomBitsSource",
|
|
270
|
+
# Boundary functions
|
|
271
|
+
"resolve_seed",
|
|
272
|
+
"resolve_seed_deterministic",
|
|
273
|
+
# Derivation
|
|
274
|
+
"derive_seed",
|
|
275
|
+
# RNG operations (impure)
|
|
276
|
+
"create_rng",
|
|
277
|
+
"sample_random_float",
|
|
278
|
+
"sample_random_int",
|
|
279
|
+
"sample_random_index",
|
|
280
|
+
]
|