seedloop 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
seedloop/__init__.py ADDED
@@ -0,0 +1,41 @@
1
+ """seedloop — deterministic simulation testing for Python asyncio.
2
+
3
+ Write a scenario against a :class:`World`, then ``check`` it across many seeds; a failing seed is
4
+ the reproduction — ``replay`` it to debug. The deterministic core (loop, virtual clock, seeded
5
+ entropy), the simulated network with fault injection (loss, duplication, partitions), the invariant
6
+ API, and the non-determinism auditor are in place; a worked Raft demo ships in ``seedloop.demos``.
7
+ """
8
+
9
+ from seedloop._audit import audit_mode
10
+ from seedloop._entropy import ensure_hash_seed
11
+ from seedloop._net import Address, Endpoint, Message, Transport
12
+ from seedloop._run import CheckResult, Scenario, check, replay
13
+ from seedloop._world import Node, World
14
+ from seedloop.errors import (
15
+ BoundaryError,
16
+ DeadlockError,
17
+ EntropyLeakError,
18
+ InvariantError,
19
+ SeedloopError,
20
+ )
21
+
22
+ __all__ = [
23
+ "Address",
24
+ "BoundaryError",
25
+ "CheckResult",
26
+ "DeadlockError",
27
+ "Endpoint",
28
+ "EntropyLeakError",
29
+ "InvariantError",
30
+ "Message",
31
+ "Node",
32
+ "Scenario",
33
+ "SeedloopError",
34
+ "Transport",
35
+ "World",
36
+ "audit_mode",
37
+ "check",
38
+ "ensure_hash_seed",
39
+ "replay",
40
+ ]
41
+ __version__ = "0.3.0"
seedloop/_audit.py ADDED
@@ -0,0 +1,107 @@
1
+ """The non-determinism auditor: runtime tripwires for uncontrolled entropy (ADR-0008).
2
+
3
+ A run is a pure function of its seed only if every entropy source it touches is the World's seeded
4
+ one. The loop already rejects the I/O boundary (``run_in_executor``, real sockets, DNS) in every
5
+ mode. This adds an opt-in *audit mode* that closes the Python-level entropy sources the loop does
6
+ not see: real wall-clock time, the unseeded global ``random``, ``os.urandom``/``secrets``, and a
7
+ bare ``threading.Thread``. In audit mode each raises instead of running, so a leak is a loud,
8
+ reproducible failure on the seed that hit it — the boundary enforced, not just stated (scope.md).
9
+
10
+ The tripwires patch only module-level entry points, never ``random.Random`` itself, so the World's
11
+ seeded ``rng`` keeps working; they are pure raises that touch no entropy and leave a clean run's
12
+ timeline unchanged; and they are restored on exit even on error.
13
+
14
+ Like any monkeypatch (and the CSPRNG shim), a tripwire catches a call that looks the name up at call
15
+ time — ``time.monotonic()``, ``random.random()`` — but not a reference bound *before* audit started
16
+ (``from time import monotonic`` then ``monotonic()``). The common attribute-call form is caught; the
17
+ same C-level caveat as ``scope.md`` applies below Python.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import os
23
+ import random
24
+ import threading
25
+ import time
26
+ from collections.abc import Callable, Iterator
27
+ from contextlib import contextmanager
28
+ from typing import Any
29
+
30
+ from seedloop.errors import BoundaryError, EntropyLeakError
31
+
32
+ # Real-time entry points (the loop owns virtual time via loop.time(), so any direct call is a leak).
33
+ _REAL_TIME = ("time", "monotonic", "perf_counter", "time_ns", "monotonic_ns", "perf_counter_ns")
34
+
35
+ # Every entropy-drawing module-level `random` function — the *complete* set on the global unseeded
36
+ # instance, not a subset, so a leak through any (e.g. expovariate for latency jitter) is caught.
37
+ # These are module functions; `random.Random` instances such as the seeded rng are untouched.
38
+ _RANDOM_FUNCS = (
39
+ "random",
40
+ "uniform",
41
+ "triangular",
42
+ "randint",
43
+ "randrange",
44
+ "choice",
45
+ "choices",
46
+ "shuffle",
47
+ "sample",
48
+ "getrandbits",
49
+ "randbytes",
50
+ "betavariate",
51
+ "expovariate",
52
+ "gammavariate",
53
+ "gauss",
54
+ "lognormvariate",
55
+ "normalvariate",
56
+ "vonmisesvariate",
57
+ "paretovariate",
58
+ "weibullvariate",
59
+ "binomialvariate", # 3.12+
60
+ )
61
+
62
+ # Each tripwire is (module, attribute, display name). hasattr-guarded so a name absent on a given
63
+ # interpreter is skipped rather than crashing the patcher. os.urandom and the random._urandom alias
64
+ # that secrets draws through are intercepted too.
65
+ _ENTROPY_SURFACES: list[tuple[Any, str, str]] = [
66
+ *((time, name, f"time.{name}") for name in _REAL_TIME if hasattr(time, name)),
67
+ (os, "urandom", "os.urandom"),
68
+ (random, "_urandom", "secrets/os.urandom"),
69
+ *((random, name, f"random.{name}") for name in _RANDOM_FUNCS if hasattr(random, name)),
70
+ ]
71
+
72
+
73
+ def _entropy_tripwire(source: str) -> Callable[..., Any]:
74
+ def tripwire(*_args: Any, **_kwargs: Any) -> Any:
75
+ raise EntropyLeakError(source)
76
+
77
+ return tripwire
78
+
79
+
80
+ def _thread_tripwire(*_args: Any, **_kwargs: Any) -> Any:
81
+ raise BoundaryError(
82
+ "threading.Thread (a real thread) cannot be made deterministic and is out of scope in a "
83
+ "simulated run (see docs/scope.md)"
84
+ )
85
+
86
+
87
+ @contextmanager
88
+ def audit_mode() -> Iterator[None]:
89
+ """Trip on uncontrolled entropy for the duration of the context.
90
+
91
+ Inside the context, real time, the unseeded global ``random``, ``os.urandom``/``secrets``, and
92
+ ``threading.Thread.start`` raise (``EntropyLeakError`` for entropy, ``BoundaryError`` for the
93
+ thread) instead of running. The World's seeded ``rng`` and virtual clock are unaffected. Use it
94
+ via ``check(..., audit=True)`` / ``replay(..., audit=True)``, or directly to wrap your own run.
95
+ All patches are restored on exit, even on error.
96
+ """
97
+ saved = [(mod, attr, getattr(mod, attr)) for mod, attr, _ in _ENTROPY_SURFACES]
98
+ saved_thread_start = threading.Thread.start
99
+ for mod, attr, name in _ENTROPY_SURFACES:
100
+ setattr(mod, attr, _entropy_tripwire(name))
101
+ threading.Thread.start = _thread_tripwire # type: ignore[method-assign]
102
+ try:
103
+ yield
104
+ finally:
105
+ for mod, attr, original in saved:
106
+ setattr(mod, attr, original)
107
+ threading.Thread.start = saved_thread_start # type: ignore[method-assign]
seedloop/_entropy.py ADDED
@@ -0,0 +1,96 @@
1
+ """Seeded entropy: per-component sub-streams, a CSPRNG shim, and a hash-seed launcher.
2
+
3
+ A run is a pure function of its seed, so every source of randomness must derive from it. The root
4
+ seed is split into independent named sub-streams (ADR-0009) so adding a draw in one component does
5
+ not perturb another's sequence. The CSPRNG shim routes ``os.urandom``/``secrets`` to a seeded
6
+ source for the duration of a run, and the launcher pins ``PYTHONHASHSEED`` before the interpreter
7
+ starts so set/dict iteration order is fixed (ADR-0010).
8
+
9
+ Verified against the interpreter during design: shimming ``os.urandom`` alone does *not* control
10
+ ``secrets``/``random``, because ``random`` binds ``from os import urandom as _urandom`` at import —
11
+ so the shim patches ``random._urandom`` too; and two child processes launched with the same
12
+ ``PYTHONHASHSEED`` hash identically while a different value differs.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import hashlib
18
+ import os
19
+ import random
20
+ import sys
21
+ from collections.abc import Callable, Iterator
22
+ from contextlib import contextmanager
23
+
24
+ _REEXEC_GUARD = "_SEEDLOOP_HASHSEED_REEXEC"
25
+
26
+
27
+ def substream(root_seed: int, label: str) -> random.Random:
28
+ """Derive an independent, reproducible ``random.Random`` for a named component.
29
+
30
+ The stream is a pure function of ``(root_seed, label)``. Derivation hashes the canonical text
31
+ ``f"{root_seed}:{label}"`` with ``blake2b`` — never the builtin ``hash()``, which is randomized
32
+ per process — so the same pair yields the same stream in every process, and any ``int`` seed
33
+ works (negative, or larger than 64 bits).
34
+ """
35
+ digest = hashlib.blake2b(f"{root_seed}:{label}".encode(), digest_size=32).digest()
36
+ return random.Random(int.from_bytes(digest, "big"))
37
+
38
+
39
+ @contextmanager
40
+ def csprng_shim(stream: random.Random) -> Iterator[None]:
41
+ """Route ``os.urandom`` and ``secrets`` to ``stream`` for the duration of the context.
42
+
43
+ Patches both ``os.urandom`` and the ``random._urandom`` alias that ``secrets`` draws through;
44
+ restores both originals on exit, even on error. Scoped to a single run; runs do not overlap in
45
+ one process.
46
+ """
47
+ seeded = _seeded_urandom(stream)
48
+ orig_os = os.urandom
49
+ orig_random = random._urandom # type: ignore[attr-defined] # private alias secrets draws through
50
+ os.urandom = seeded
51
+ random._urandom = seeded # type: ignore[attr-defined]
52
+ try:
53
+ yield
54
+ finally:
55
+ os.urandom = orig_os
56
+ random._urandom = orig_random # type: ignore[attr-defined]
57
+
58
+
59
+ def _seeded_urandom(stream: random.Random) -> Callable[[int], bytes]:
60
+ def seeded_urandom(n: int) -> bytes:
61
+ return stream.getrandbits(n * 8).to_bytes(n, "big") if n else b""
62
+
63
+ return seeded_urandom
64
+
65
+
66
+ def hash_seed_for(root_seed: int) -> int:
67
+ """The ``PYTHONHASHSEED`` value (0..4294967295) a run pins, derived from its root seed."""
68
+ digest = hashlib.blake2b(f"{root_seed}:hashseed".encode(), digest_size=4).digest()
69
+ return int.from_bytes(digest, "big")
70
+
71
+
72
+ def ensure_hash_seed(root_seed: int) -> None:
73
+ """Ensure the interpreter runs with the run's pinned ``PYTHONHASHSEED``.
74
+
75
+ ``PYTHONHASHSEED`` is read once at interpreter start, so it cannot be set from inside a run;
76
+ this re-runs the interpreter with the pinned value when needed. If already pinned, returns and
77
+ the caller proceeds in-process. Otherwise it launches a pinned child running the same command
78
+ and does not return — on POSIX by replacing the process (``execve``), on Windows (no true
79
+ ``exec``) by spawning a child and exiting with its return code. A guard env var prevents
80
+ infinite recursion.
81
+ """
82
+ target = str(hash_seed_for(root_seed))
83
+ if os.environ.get(_REEXEC_GUARD) == target or os.environ.get("PYTHONHASHSEED") == target:
84
+ return # already pinned (our child, or started correctly); proceed in-process
85
+ child_env = dict(os.environ, PYTHONHASHSEED=target, **{_REEXEC_GUARD: target})
86
+ # sys.orig_argv is the full original command (including -c / -m and their payload), so the
87
+ # child re-runs exactly what the parent ran; reconstructing from sys.argv would drop -c code.
88
+ argv = [sys.executable, *sys.orig_argv[1:]]
89
+ if os.name == "posix":
90
+ os.execve(sys.executable, argv, child_env)
91
+ else:
92
+ # Windows has no in-place exec; spawn a pinned child and propagate its exit code.
93
+ import subprocess
94
+
95
+ completed = subprocess.run(argv, env=child_env)
96
+ sys.exit(completed.returncode)
seedloop/_loop.py ADDED
@@ -0,0 +1,165 @@
1
+ """The deterministic event loop.
2
+
3
+ ``asyncio``'s loop is already single-threaded and its scheduling is deterministic by
4
+ construction; the one nondeterministic seam is the I/O poll (``selector.select()``). seedloop
5
+ subclasses :class:`asyncio.BaseEventLoop` and overrides only ``_run_once`` to remove that poll
6
+ (ADR-0013): the ready queue is drained in faithful ``call_soon`` FIFO order (ADR-0012), and the
7
+ real-I/O surface is rejected rather than run (``docs/scope.md``).
8
+
9
+ Time is virtual: ``loop.time()`` starts at 0 and never advances by waiting. When every task is
10
+ blocked, the loop jumps the clock to the next scheduled timer (the autojump of ADR-0005), so a
11
+ ten-second ``sleep`` resolves instantly. Timers live in a heap keyed ``(when, seq)``, so equal
12
+ deadlines fire in scheduling order — a deterministic tie-break CPython's ``TimerHandle`` (ordered by
13
+ deadline alone) lacks. ``BaseEventLoop`` (unlike ``BaseSelectorEventLoop``) creates no selector and
14
+ no self-pipe, so no real socket exists in the loop.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import asyncio
20
+ import heapq
21
+ from collections.abc import Callable
22
+ from typing import Any, NoReturn
23
+
24
+ from seedloop.errors import BoundaryError, DeadlockError
25
+
26
+
27
+ class DeterministicLoop(asyncio.BaseEventLoop):
28
+ """A single-threaded ``asyncio`` loop with no real I/O and a virtual clock."""
29
+
30
+ def __init__(self) -> None:
31
+ super().__init__()
32
+ self._sl_time = 0.0 # virtual monotonic time; advanced only by the autojump
33
+ # Timer heap of (when, seq, handle); the monotonic seq is the deterministic tie-break,
34
+ # so equal deadlines fire in scheduling order.
35
+ self._sl_timers: list[tuple[float, int, asyncio.TimerHandle]] = []
36
+ self._sl_timer_seq = 0
37
+ # Optional hook the World uses to check invariants after each step; None by default, so a
38
+ # run without invariants is unchanged. It may raise to fail the run.
39
+ self._sl_after_step: Callable[[], None] | None = None
40
+
41
+ def time(self) -> float:
42
+ return self._sl_time
43
+
44
+ def call_at( # type: ignore[override]
45
+ self, when: float, callback: Any, *args: Any, context: Any = None
46
+ ) -> asyncio.TimerHandle:
47
+ self._check_closed() # type: ignore[attr-defined] # BaseEventLoop guard, not in the stubs
48
+ timer = asyncio.TimerHandle(when, callback, args, self, context)
49
+ heapq.heappush(self._sl_timers, (when, self._sl_timer_seq, timer))
50
+ self._sl_timer_seq += 1
51
+ return timer
52
+
53
+ def call_later( # type: ignore[override]
54
+ self, delay: float, callback: Any, *args: Any, context: Any = None
55
+ ) -> asyncio.TimerHandle:
56
+ return self.call_at(self._sl_time + delay, callback, *args, context=context)
57
+
58
+ def _timer_handle_cancelled(self, handle: asyncio.TimerHandle) -> None:
59
+ # Cancelled timers are tombstoned and skipped when popped; no count bookkeeping needed.
60
+ pass
61
+
62
+ def _run_once(self) -> None:
63
+ # Deterministic replacement for BaseEventLoop._run_once: no select(), no real I/O. When
64
+ # nothing is ready, advance virtual time to the next timer (autojump); then promote every
65
+ # timer now due and run the ready batch in faithful FIFO order (ADR-0012).
66
+ ready: Any = self._ready # type: ignore[attr-defined] # BaseEventLoop's ready deque
67
+ if not ready:
68
+ self._purge_cancelled_timers()
69
+ if self._sl_timers:
70
+ self._sl_time = max(self._sl_time, self._sl_timers[0][0]) # jump forward only
71
+ elif not self._stopping: # type: ignore[attr-defined] # BaseEventLoop stop flag
72
+ raise DeadlockError(
73
+ "the run is quiescent: every task is blocked and no timer is scheduled to "
74
+ "wake one"
75
+ )
76
+ self._fire_due_timers()
77
+ # Run the batch ready at step start in registration order; callbacks scheduled mid-batch
78
+ # run on the next step (the len() bound), matching CPython.
79
+ for _ in range(len(ready)):
80
+ handle = ready.popleft()
81
+ if not handle.cancelled():
82
+ handle._run()
83
+ if self._sl_after_step is not None:
84
+ self._sl_after_step() # check invariants; may raise to fail the run
85
+
86
+ def _fire_due_timers(self) -> None:
87
+ # Promote every timer whose deadline has arrived (<= the clock) to the ready queue.
88
+ ready = self._ready # type: ignore[attr-defined]
89
+ while self._sl_timers and self._sl_timers[0][0] <= self._sl_time:
90
+ handle = heapq.heappop(self._sl_timers)[2]
91
+ if not handle.cancelled():
92
+ ready.append(handle)
93
+
94
+ def _purge_cancelled_timers(self) -> None:
95
+ # Drop cancelled timers from the heap head so the earliest entry is a live deadline.
96
+ while self._sl_timers and self._sl_timers[0][2].cancelled():
97
+ heapq.heappop(self._sl_timers)
98
+
99
+ # --- boundary: operations that cannot be made deterministic are rejected (ADR-0002) ---
100
+
101
+ def _reject(self, what: str) -> NoReturn:
102
+ raise BoundaryError(
103
+ f"{what} cannot be made deterministic and is out of scope inside a simulated run "
104
+ f"(see docs/scope.md)"
105
+ )
106
+
107
+ def run_in_executor(self, *args: Any, **kwargs: Any) -> NoReturn: # type: ignore[override]
108
+ self._reject("run_in_executor (real threads)")
109
+
110
+ def call_soon_threadsafe(self, *args: Any, **kwargs: Any) -> NoReturn: # type: ignore[override]
111
+ self._reject("call_soon_threadsafe (another thread)")
112
+
113
+ def add_reader(self, *args: Any, **kwargs: Any) -> NoReturn: # type: ignore[override]
114
+ self._reject("add_reader (real I/O)")
115
+
116
+ def add_writer(self, *args: Any, **kwargs: Any) -> NoReturn: # type: ignore[override]
117
+ self._reject("add_writer (real I/O)")
118
+
119
+ async def sock_recv(self, *args: Any, **kwargs: Any) -> NoReturn:
120
+ self._reject("sock_recv (real socket)")
121
+
122
+ async def sock_sendall(self, *args: Any, **kwargs: Any) -> NoReturn:
123
+ self._reject("sock_sendall (real socket)")
124
+
125
+ async def sock_connect(self, *args: Any, **kwargs: Any) -> NoReturn:
126
+ self._reject("sock_connect (real socket)")
127
+
128
+ async def getaddrinfo(self, *args: Any, **kwargs: Any) -> NoReturn:
129
+ self._reject("getaddrinfo (real DNS)")
130
+
131
+ async def getnameinfo(self, *args: Any, **kwargs: Any) -> NoReturn:
132
+ self._reject("getnameinfo (real DNS)")
133
+
134
+ async def create_connection(self, *args: Any, **kwargs: Any) -> NoReturn:
135
+ self._reject("create_connection (real socket)")
136
+
137
+ async def create_server(self, *args: Any, **kwargs: Any) -> NoReturn:
138
+ self._reject("create_server (real socket)")
139
+
140
+ async def create_datagram_endpoint(self, *args: Any, **kwargs: Any) -> NoReturn:
141
+ # BaseEventLoop's version opens and binds a real UDP socket before failing; reject first.
142
+ self._reject("create_datagram_endpoint (real socket)")
143
+
144
+ async def connect_read_pipe(self, *args: Any, **kwargs: Any) -> NoReturn:
145
+ self._reject("connect_read_pipe (real pipe)")
146
+
147
+ async def connect_write_pipe(self, *args: Any, **kwargs: Any) -> NoReturn:
148
+ self._reject("connect_write_pipe (real pipe)")
149
+
150
+ async def subprocess_exec(self, *args: Any, **kwargs: Any) -> NoReturn:
151
+ self._reject("subprocess_exec (real subprocess)")
152
+
153
+ async def subprocess_shell(self, *args: Any, **kwargs: Any) -> NoReturn:
154
+ self._reject("subprocess_shell (real subprocess)")
155
+
156
+ def add_signal_handler(self, *args: Any, **kwargs: Any) -> NoReturn: # type: ignore[override]
157
+ self._reject("add_signal_handler (real signals)")
158
+
159
+ # _process_events and _write_to_self are abstract on BaseEventLoop. We never poll and never
160
+ # need a cross-thread wakeup, so both are inert.
161
+ def _process_events(self, event_list: Any) -> None:
162
+ pass
163
+
164
+ def _write_to_self(self) -> None:
165
+ pass
seedloop/_net.py ADDED
@@ -0,0 +1,190 @@
1
+ """The simulated network: messages delivered as seeded timer events, with faults.
2
+
3
+ A message in flight is an ordinary timer on the loop's heap (``docs/network.md``). ``send`` draws a
4
+ latency from the seed's ``"net"`` sub-stream and schedules a delivery at ``now + latency``; ``recv``
5
+ blocks in virtual time until a message is queued. Reordering is emergent — two messages sent close
6
+ together draw independent latencies, so arrival order can differ from send order, reproducibly.
7
+
8
+ Faults — loss, duplication, and partitions — are drawn from the seed's ``"faults"`` sub-stream
9
+ (independent of ``"net"``, so enabling a fault does not shift surviving messages' latencies). An
10
+ endpoint can opt into a reliable, ordered channel. No real socket exists; the "network" is queues
11
+ and timers.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import asyncio
17
+ from collections import deque
18
+ from random import Random
19
+ from typing import Protocol, runtime_checkable
20
+
21
+ from seedloop._trace import Timeline
22
+ from seedloop.errors import SeedloopError
23
+
24
+ Address = int # a node's address on the simulated network
25
+ Message = object # an opaque payload; seedloop schedules and orders it, never inspects it
26
+
27
+ # Default per-message latency range, in virtual seconds. Wide enough that two near-simultaneous
28
+ # sends can reorder.
29
+ _LAT_MIN = 0.001
30
+ _LAT_MAX = 0.020
31
+
32
+
33
+ @runtime_checkable
34
+ class Endpoint(Protocol):
35
+ """A node's bound handle on the network."""
36
+
37
+ address: Address
38
+
39
+ async def send(self, dst: Address, msg: Message) -> None: ...
40
+ async def recv(self) -> tuple[Address, Message]: ...
41
+
42
+
43
+ class Transport:
44
+ """The simulated network behind ``world.net``."""
45
+
46
+ def __init__(
47
+ self,
48
+ loop: asyncio.AbstractEventLoop,
49
+ net_rng: Random,
50
+ faults_rng: Random,
51
+ timeline: Timeline,
52
+ ) -> None:
53
+ self._loop = loop
54
+ self._net = net_rng
55
+ self._faults = faults_rng
56
+ self._timeline = timeline
57
+ self._endpoints: dict[Address, _Endpoint] = {}
58
+ self._next_mid = 0 # monotonic message id — the stable timeline identity, not Python id()
59
+ self._partition: list[set[Address]] | None = None # groups; None means full connectivity
60
+ self._reliable_clock: dict[
61
+ tuple[Address, Address], float
62
+ ] = {} # per-link FIFO delivery time
63
+
64
+ def bind(
65
+ self,
66
+ address: Address,
67
+ *,
68
+ reliable: bool = False,
69
+ loss: float = 0.0,
70
+ duplicate: float = 0.0,
71
+ ) -> Endpoint:
72
+ """Give a node an endpoint at ``address``.
73
+
74
+ ``loss``/``duplicate`` are per-message probabilities on this endpoint's outgoing links;
75
+ ``reliable=True`` gives no-loss, in-order delivery (and ignores loss/duplicate).
76
+ Binding the same address twice is an error.
77
+ """
78
+ if address in self._endpoints:
79
+ raise SeedloopError(f"address {address} is already bound")
80
+ if not 0.0 <= loss <= 1.0:
81
+ raise SeedloopError(f"loss must be a probability in [0, 1], got {loss}")
82
+ if not 0.0 <= duplicate <= 1.0:
83
+ raise SeedloopError(f"duplicate must be a probability in [0, 1], got {duplicate}")
84
+ endpoint = _Endpoint(self, address, reliable=reliable, loss=loss, duplicate=duplicate)
85
+ self._endpoints[address] = endpoint
86
+ return endpoint
87
+
88
+ def partition(self, *groups: set[Address]) -> None:
89
+ """Split the network: nodes in different groups cannot reach each other until ``heal``.
90
+
91
+ A node in no listed group stays connected to everyone (it is not partitioned away).
92
+ """
93
+ self._partition = [set(g) for g in groups]
94
+
95
+ def heal(self) -> None:
96
+ """Restore full connectivity."""
97
+ self._partition = None
98
+
99
+ def _reachable(self, src: Address, dst: Address) -> bool:
100
+ if self._partition is None:
101
+ return True
102
+ gs = next((g for g in self._partition if src in g), None)
103
+ gd = next((g for g in self._partition if dst in g), None)
104
+ if gs is None or gd is None:
105
+ return True # an unpartitioned node reaches everyone
106
+ return gs is gd
107
+
108
+ def _send(self, endpoint: _Endpoint, dst: Address, msg: Message) -> None:
109
+ src = endpoint.address
110
+ mid = self._next_mid
111
+ self._next_mid += 1
112
+ self._timeline.record((self._loop.time(), "send", mid, src, dst))
113
+ if endpoint._reliable:
114
+ self._schedule_reliable(mid, src, dst, msg)
115
+ return
116
+ if endpoint._loss > 0.0 and self._faults.random() < endpoint._loss:
117
+ self._timeline.record((self._loop.time(), "drop", mid, src, dst))
118
+ return
119
+ self._schedule_delivery(mid, src, dst, msg)
120
+ if endpoint._duplicate > 0.0 and self._faults.random() < endpoint._duplicate:
121
+ self._timeline.record((self._loop.time(), "duplicate", mid, src, dst))
122
+ self._schedule_delivery(mid, src, dst, msg)
123
+
124
+ def _schedule_delivery(self, mid: int, src: Address, dst: Address, msg: Message) -> None:
125
+ latency = self._net.uniform(_LAT_MIN, _LAT_MAX)
126
+ self._loop.call_later(latency, self._deliver, mid, src, dst, msg)
127
+
128
+ def _schedule_reliable(self, mid: int, src: Address, dst: Address, msg: Message) -> None:
129
+ # Non-decreasing delivery times per (src, dst); equal times fire in send order via the timer
130
+ # (when, seq) tie-break — so a reliable link delivers in order, with no loss or duplication.
131
+ latency = self._net.uniform(_LAT_MIN, _LAT_MAX)
132
+ key = (src, dst)
133
+ when = max(self._loop.time() + latency, self._reliable_clock.get(key, 0.0))
134
+ self._reliable_clock[key] = when
135
+ self._loop.call_at(when, self._deliver, mid, src, dst, msg)
136
+
137
+ def _deliver(self, mid: int, src: Address, dst: Address, msg: Message) -> None:
138
+ if not self._reachable(src, dst):
139
+ # Reachability is evaluated when the delivery fires, not at send: a partition opened in
140
+ # flight cuts the message; one that healed in time lets it through.
141
+ self._timeline.record((self._loop.time(), "drop-partitioned", mid, src, dst))
142
+ return
143
+ self._timeline.record((self._loop.time(), "deliver", mid, src, dst))
144
+ endpoint = self._endpoints.get(dst)
145
+ if endpoint is None:
146
+ return # datagram to an unbound address is dropped, like sending into the void
147
+ endpoint._enqueue((src, msg))
148
+
149
+
150
+ class _Endpoint:
151
+ """Concrete endpoint: a receive queue, an optional waiter, and its outgoing-link policy."""
152
+
153
+ def __init__(
154
+ self,
155
+ transport: Transport,
156
+ address: Address,
157
+ *,
158
+ reliable: bool,
159
+ loss: float,
160
+ duplicate: float,
161
+ ) -> None:
162
+ self.address = address
163
+ self._transport = transport
164
+ self._reliable = reliable
165
+ self._loss = loss
166
+ self._duplicate = duplicate
167
+ self._queue: deque[tuple[Address, Message]] = deque()
168
+ self._waiter: asyncio.Future[None] | None = None
169
+
170
+ async def send(self, dst: Address, msg: Message) -> None:
171
+ # Schedules a delivery and returns immediately; it does not block on delivery.
172
+ self._transport._send(self, dst, msg)
173
+
174
+ async def recv(self) -> tuple[Address, Message]:
175
+ if self._waiter is not None:
176
+ # One endpoint has one logical receiver; a second concurrent recv would orphan the
177
+ # first's waiter. Fail loudly rather than corrupt delivery silently.
178
+ raise SeedloopError("concurrent recv on one endpoint is not supported")
179
+ while not self._queue:
180
+ self._waiter = self._transport._loop.create_future()
181
+ try:
182
+ await self._waiter
183
+ finally:
184
+ self._waiter = None
185
+ return self._queue.popleft()
186
+
187
+ def _enqueue(self, item: tuple[Address, Message]) -> None:
188
+ self._queue.append(item)
189
+ if self._waiter is not None and not self._waiter.done():
190
+ self._waiter.set_result(None)
seedloop/_run.py ADDED
@@ -0,0 +1,88 @@
1
+ """Running scenarios: ``check`` sweeps seeds, ``replay`` reproduces one.
2
+
3
+ The contract (ADR-0003): a run is a pure function of its seed, so a failing seed *is* the
4
+ reproduction. ``check`` runs a scenario once per seed and reports the first failing seed; ``replay``
5
+ rebuilds that exact run. A fresh :class:`World` is built per seed with no shared mutable state, so
6
+ one run cannot bleed into the next.
7
+
8
+ ``check``/``replay`` do not pin ``PYTHONHASHSEED`` (ADR-0015): the launcher re-runs the whole
9
+ interpreter, which is wrong to trigger implicitly from inside a test runner. The guarantee instead
10
+ rests on library code never depending on hash order; a user whose own code does can call
11
+ ``seedloop.ensure_hash_seed`` at their entry point.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from collections.abc import Awaitable, Callable, Iterable, Sequence
17
+ from dataclasses import dataclass
18
+ from typing import Literal
19
+
20
+ from seedloop._audit import audit_mode
21
+ from seedloop._entropy import csprng_shim, substream
22
+ from seedloop._world import World
23
+
24
+ Scenario = Callable[[World], Awaitable[None]]
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class CheckResult:
29
+ """The outcome of a seed sweep."""
30
+
31
+ checked: int # how many seeds ran
32
+ failing_seed: int | None # first failing seed, or None if all passed
33
+ error: Exception | None # the exception that seed raised, or None
34
+
35
+
36
+ def _run_one(scenario: Scenario, seed: int, *, audit: bool = False) -> Sequence[object]:
37
+ """Run ``scenario`` for one seed and return its recorded timeline.
38
+
39
+ Normally the CSPRNG shim is installed for the run and removed after, so ``os.urandom`` and
40
+ ``secrets`` draw from the seed without leaking the seeded source into later runs. With
41
+ ``audit=True`` the non-determinism auditor runs instead: uncontrolled entropy (real time, the
42
+ unseeded global ``random``, ``os.urandom``/``secrets``, a real thread) raises rather than being
43
+ seeded or run, so a leak fails on this seed (ADR-0008).
44
+ """
45
+ world = World(seed)
46
+ context = audit_mode() if audit else csprng_shim(substream(seed, "csprng"))
47
+ with context:
48
+ world._drive(scenario(world))
49
+ return world.timeline
50
+
51
+
52
+ def check(
53
+ scenario: Scenario,
54
+ *,
55
+ seeds: int | Iterable[int] = 1000,
56
+ on_failure: Literal["raise", "return"] = "raise",
57
+ audit: bool = False,
58
+ ) -> CheckResult:
59
+ """Run ``scenario`` once per seed; report the first seed that fails.
60
+
61
+ ``seeds=N`` runs seeds ``0..N-1``; an iterable runs exactly those seeds. The first run that
62
+ raises — an ``assert``, a ``SeedloopError``, or any exception from the scenario — is the
63
+ failure. With ``on_failure="raise"`` the exception is re-raised tagged with its seed; with
64
+ ``"return"`` the sweep stops and returns the :class:`CheckResult`. With ``audit=True`` the
65
+ non-determinism auditor runs each seed: an uncontrolled entropy source fails it (ADR-0008).
66
+ """
67
+ seed_iter: Iterable[int] = range(seeds) if isinstance(seeds, int) else seeds
68
+ checked = 0
69
+ for seed in seed_iter:
70
+ checked += 1
71
+ try:
72
+ _run_one(scenario, seed, audit=audit)
73
+ except Exception as error:
74
+ # Only a scenario *failure* is caught; KeyboardInterrupt/SystemExit propagate so a
75
+ # long sweep stays abortable (and is never mis-tagged as a failing seed).
76
+ error.add_note(f"seedloop: failing seed={seed} (replay with seedloop.replay)")
77
+ if on_failure == "raise":
78
+ raise
79
+ return CheckResult(checked=checked, failing_seed=seed, error=error)
80
+ return CheckResult(checked=checked, failing_seed=None, error=None)
81
+
82
+
83
+ def replay(scenario: Scenario, *, seed: int, audit: bool = False) -> None:
84
+ """Rebuild the exact run for ``seed`` and run it once, re-raising any failure.
85
+
86
+ ``audit=True`` reproduces the run under the non-determinism auditor (ADR-0008).
87
+ """
88
+ _run_one(scenario, seed, audit=audit)
seedloop/_trace.py ADDED
@@ -0,0 +1,29 @@
1
+ """The timeline: an append-only record of a run's events.
2
+
3
+ Determinism is proven by replay — running the same scenario twice must produce an identical
4
+ timeline (``docs/testing.md``). This slice records the events a scenario chooses to log; later
5
+ slices add scheduled network and fault events with stable identities.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections.abc import Sequence
11
+
12
+
13
+ class Timeline:
14
+ """An ordered, append-only log of events for one run."""
15
+
16
+ def __init__(self) -> None:
17
+ self._events: list[object] = []
18
+
19
+ def record(self, event: object) -> None:
20
+ """Append one event to the timeline."""
21
+ self._events.append(event)
22
+
23
+ @property
24
+ def events(self) -> Sequence[object]:
25
+ """The events recorded so far, in order (a read-only snapshot)."""
26
+ return tuple(self._events)
27
+
28
+ def __repr__(self) -> str:
29
+ return f"Timeline({self._events!r})"
seedloop/_world.py ADDED
@@ -0,0 +1,112 @@
1
+ """The World: everything for one deterministic run, derived from one seed.
2
+
3
+ A run is a pure function of its seed. The World assembles the deterministic loop, the virtual clock,
4
+ and the seeded entropy into one object, exposes the user's seeded ``rng`` and the virtual clock, and
5
+ records a timeline so two runs of a seed can be compared. Users do not construct a World;
6
+ ``check``/``replay`` build it and pass it to the scenario.
7
+
8
+ Scheduling stays faithful FIFO (ADR-0012), so the seed's observable effect is ``rng``, timer timing,
9
+ and the simulated network's delivery timing — not callback order.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import asyncio
15
+ from collections.abc import Awaitable, Callable
16
+ from typing import Protocol, runtime_checkable
17
+
18
+ from seedloop._entropy import substream
19
+ from seedloop._loop import DeterministicLoop
20
+ from seedloop._net import Transport
21
+ from seedloop._trace import Timeline
22
+ from seedloop.errors import InvariantError
23
+
24
+
25
+ @runtime_checkable
26
+ class Node(Protocol):
27
+ """User code the World can start: any object with an async ``run``."""
28
+
29
+ async def run(self) -> None: ...
30
+
31
+
32
+ class World:
33
+ """One deterministic run, all derived from ``seed``."""
34
+
35
+ def __init__(self, seed: int) -> None:
36
+ self.seed = seed
37
+ self.rng = substream(seed, "user") # the user's entropy; never the global random
38
+ self._loop = DeterministicLoop()
39
+ self._timeline = Timeline()
40
+ self._started: list[asyncio.Task[None]] = []
41
+ self._invariants: list[tuple[str, Callable[[], bool]]] = []
42
+ self.net = Transport(
43
+ self._loop, substream(seed, "net"), substream(seed, "faults"), self._timeline
44
+ )
45
+
46
+ def now(self) -> float:
47
+ """Current virtual time in seconds (advances by autojump, never by real waiting)."""
48
+ return self._loop.time()
49
+
50
+ def record(self, event: object) -> None:
51
+ """Append an event to the run's timeline, stamped with the current virtual time.
52
+
53
+ The timeline is the artifact that proves determinism: two runs of a seed must record an
54
+ identical sequence. A scenario records the decisions whose reproducibility it cares about.
55
+ """
56
+ self._timeline.record((self._loop.time(), event))
57
+
58
+ def always(self, predicate: Callable[[], bool], *, name: str) -> None:
59
+ """Register a safety property that must hold throughout the run.
60
+
61
+ ``predicate`` is evaluated after every step (not during teardown); the first step where it
62
+ is false raises ``InvariantError(name)``, which ``check`` reports. It must be pure and
63
+ read-only — a predicate that mutates state or draws entropy would break determinism. A
64
+ started node's body runs a step after ``start``, so a predicate over node state sees its
65
+ initial value on the first check.
66
+ """
67
+ self._invariants.append((name, predicate))
68
+ self._loop._sl_after_step = self._check_invariants # check from the next step on
69
+
70
+ def _check_invariants(self) -> None:
71
+ for name, predicate in self._invariants:
72
+ if not predicate():
73
+ raise InvariantError(name, self._loop.time())
74
+
75
+ def start(self, *nodes: Node) -> None:
76
+ """Schedule each node's ``run()`` coroutine as a task on the loop.
77
+
78
+ A started node that raises fails the run (its exception surfaces from the run), rather than
79
+ being orphaned and silently logged — a failure the seed must report.
80
+ """
81
+ for node in nodes:
82
+ self._started.append(self._loop.create_task(node.run()))
83
+
84
+ @property
85
+ def timeline(self) -> tuple[object, ...]:
86
+ """The recorded timeline so far (a read-only snapshot)."""
87
+ return tuple(self._timeline.events)
88
+
89
+ def _drive(self, main: Awaitable[None]) -> None:
90
+ """Run the scenario to completion, surface any started-node failure, then close the loop."""
91
+ try:
92
+ self._loop.run_until_complete(main)
93
+ # The scenario finished without raising; surface the first started node that failed
94
+ # (a crashed node would otherwise be an orphaned task, only logged).
95
+ for task in self._started:
96
+ exc = task.exception() if task.done() and not task.cancelled() else None
97
+ if exc is not None:
98
+ raise exc
99
+ finally:
100
+ # Invariants describe the logical run, not cancellation cleanup — stop checking them
101
+ # before teardown, so a node mutating observed state in its cancel handler cannot raise
102
+ # a spurious InvariantError (and cannot mask the real failure raised above).
103
+ self._loop._sl_after_step = None
104
+ # Cancel every task still pending — started nodes and any the scenario spawned — and let
105
+ # the cancellations process, so the loop closes without "Task was destroyed but it is
106
+ # pending" warnings (a node loop that never returns, or a recv stuck under a fault).
107
+ pending = [t for t in asyncio.all_tasks(self._loop) if not t.done()]
108
+ for task in pending:
109
+ task.cancel()
110
+ if pending:
111
+ self._loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
112
+ self._loop.close()
@@ -0,0 +1 @@
1
+ """Worked demos that run real protocols under seedloop."""
seedloop/demos/raft.py ADDED
@@ -0,0 +1,181 @@
1
+ """A small Raft leader election, run under seedloop — the worked proof.
2
+
3
+ This is election only (terms, ``RequestVote``, majority, heartbeats); log replication, persistence,
4
+ and membership changes are out of scope. It exists to demonstrate one thing end to end: seedloop
5
+ finds a real class of consensus bug and replays it from a seed.
6
+
7
+ The bug is a deliberate, labelled toggle, not a claimed discovery in canonical Raft. With
8
+ ``buggy=True`` a node omits the single-vote-per-term rule, so it can grant a vote to two candidates
9
+ in the same term; in a three-node cluster that lets both reach a majority and become leader in one
10
+ term — split-brain, the exact failure the majority rule exists to prevent. With ``buggy=False`` the
11
+ rule is enforced and the same seed sweep finds no violation. That two-sided result is the proof: the
12
+ violation is the toggled flaw, not an accident of the harness.
13
+
14
+ Run it: ``python -m seedloop.demos.raft``
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import asyncio
20
+ import sys
21
+ from typing import cast
22
+
23
+ import seedloop
24
+ from seedloop import World
25
+
26
+ FOLLOWER, CANDIDATE, LEADER = "follower", "candidate", "leader"
27
+
28
+ # Election timeouts are drawn from world.rng (so the seed owns the race); the leader's heartbeat is
29
+ # faster than any election timeout, so a stable leader suppresses new elections.
30
+ _ELECTION_MIN, _ELECTION_MAX = 0.15, 0.30
31
+ _HEARTBEAT = 0.05
32
+
33
+
34
+ class RaftNode:
35
+ """One node's election logic, sans-I/O against ``world.net``."""
36
+
37
+ def __init__(self, world: World, addr: int, peers: list[int], *, buggy: bool) -> None:
38
+ self._world = world
39
+ self._ep = world.net.bind(addr)
40
+ self.addr = addr
41
+ self._peers = peers
42
+ self._all = len(peers) + 1
43
+ self._buggy = buggy
44
+ self.term = 0
45
+ self.role = FOLLOWER
46
+ self._voted_for: int | None = None
47
+ self._votes: set[int] = set()
48
+
49
+ def _election_timeout(self) -> float:
50
+ return self._world.rng.uniform(_ELECTION_MIN, _ELECTION_MAX)
51
+
52
+ async def _broadcast(self, msg: object) -> None:
53
+ for p in self._peers:
54
+ await self._ep.send(p, msg)
55
+
56
+ def _adopt_newer_term(self, term: int) -> None:
57
+ # Only a strictly higher term resets the vote — a node votes at most once per term, so
58
+ # stepping down within the same term must NOT clear who it already voted for.
59
+ if term > self.term:
60
+ self.term = term
61
+ self.role = FOLLOWER
62
+ self._voted_for = None
63
+ self._votes = set()
64
+
65
+ async def run(self) -> None:
66
+ while True:
67
+ timeout = _HEARTBEAT if self.role == LEADER else self._election_timeout()
68
+ try:
69
+ src, msg = await asyncio.wait_for(self._ep.recv(), timeout=timeout)
70
+ except TimeoutError:
71
+ if self.role == LEADER:
72
+ await self._broadcast(("heartbeat", self.term, self.addr))
73
+ else:
74
+ await self._begin_election()
75
+ continue
76
+ await self._handle(src, msg)
77
+
78
+ async def _begin_election(self) -> None:
79
+ self.term += 1
80
+ self.role = CANDIDATE
81
+ self._voted_for = self.addr
82
+ self._votes = {self.addr} # vote for self
83
+ await self._broadcast(("request_vote", self.term, self.addr))
84
+
85
+ async def _handle(self, src: int, msg: object) -> None:
86
+ fields = cast("tuple[object, ...]", msg)
87
+ kind = fields[0]
88
+ if kind == "request_vote":
89
+ await self._on_request_vote(src, cast("int", fields[1]))
90
+ elif kind == "vote":
91
+ await self._on_vote(
92
+ cast("int", fields[1]), cast("int", fields[2]), cast("bool", fields[3])
93
+ )
94
+ elif kind == "heartbeat":
95
+ self._on_heartbeat(cast("int", fields[1]))
96
+
97
+ async def _on_request_vote(self, src: int, term: int) -> None:
98
+ self._adopt_newer_term(term)
99
+ grant = False
100
+ # The bug: the correct rule grants at most one vote per term (`_voted_for` guard); the buggy
101
+ # path drops the guard, so a node can vote for two candidates in one term.
102
+ if (
103
+ term == self.term
104
+ and self.role != LEADER
105
+ and (self._buggy or self._voted_for in (None, src))
106
+ ):
107
+ grant = True
108
+ self._voted_for = src
109
+ await self._ep.send(src, ("vote", self.term, self.addr, grant))
110
+
111
+ async def _on_vote(self, term: int, voter: int, granted: bool) -> None:
112
+ if term == self.term and self.role == CANDIDATE and granted:
113
+ self._votes.add(voter) # distinct voters; a majority of them elects
114
+ if len(self._votes) > self._all // 2: # a majority elects
115
+ self.role = LEADER
116
+ await self._broadcast(("heartbeat", self.term, self.addr))
117
+
118
+ def _on_heartbeat(self, term: int) -> None:
119
+ self._adopt_newer_term(term)
120
+ if term == self.term and self.role != FOLLOWER:
121
+ self.role = FOLLOWER # a leader exists this term; step down but keep our vote
122
+
123
+
124
+ def leaders_by_term(nodes: list[RaftNode]) -> dict[int, set[int]]:
125
+ """Map each term to the set of nodes that currently believe they lead it."""
126
+ out: dict[int, set[int]] = {}
127
+ for node in nodes:
128
+ if node.role == LEADER:
129
+ out.setdefault(node.term, set()).add(node.addr)
130
+ return out
131
+
132
+
133
+ def at_most_one_leader_per_term(nodes: list[RaftNode]) -> bool:
134
+ """Raft's election-safety property: no term ever has two leaders."""
135
+ return all(len(leaders) <= 1 for leaders in leaders_by_term(nodes).values())
136
+
137
+
138
+ def election_scenario(*, buggy: bool, nodes: int = 3, seconds: float = 3.0) -> seedloop.Scenario:
139
+ """A scenario that runs a cluster and asserts election safety throughout."""
140
+
141
+ async def scenario(world: World) -> None:
142
+ addrs = list(range(nodes))
143
+ cluster = [RaftNode(world, a, [p for p in addrs if p != a], buggy=buggy) for a in addrs]
144
+ for node in cluster:
145
+ world.start(node)
146
+ world.always(
147
+ lambda: at_most_one_leader_per_term(cluster), name="at-most-one-leader-per-term"
148
+ )
149
+ await asyncio.sleep(seconds) # let elections run; the invariant is checked each step
150
+
151
+ return scenario
152
+
153
+
154
+ def find_split_brain(seeds: int = 200) -> int | None:
155
+ """Sweep the buggy election for a seed that violates election safety; None if none found."""
156
+ result = seedloop.check(election_scenario(buggy=True), seeds=seeds, on_failure="return")
157
+ return result.failing_seed
158
+
159
+
160
+ def main() -> None:
161
+ print("seedloop Raft election demo - hunting for split-brain\n")
162
+ seed = find_split_brain()
163
+ if seed is None:
164
+ print("no split-brain found in the swept seeds (try more seeds)")
165
+ sys.exit(1) # the proof did not reproduce — fail loudly (CI runs this)
166
+ print(f"buggy election: split-brain found at seed={seed}")
167
+ print(f" reproduce it: seedloop.replay(election_scenario(buggy=True), seed={seed})")
168
+ try:
169
+ seedloop.replay(election_scenario(buggy=True), seed=seed)
170
+ except seedloop.InvariantError as exc:
171
+ print(f" replay reproduces it: {exc}")
172
+ clean = seedloop.check(election_scenario(buggy=False), seeds=200, on_failure="return")
173
+ verdict = (
174
+ "no violation" if clean.failing_seed is None else f"FAILED at seed={clean.failing_seed}"
175
+ )
176
+ print(f"\ncorrect election (single-vote rule enforced): {verdict} over the same 200 seeds")
177
+ print("-> the violation is the toggled flaw, not the harness.")
178
+
179
+
180
+ if __name__ == "__main__":
181
+ main()
seedloop/errors.py ADDED
@@ -0,0 +1,57 @@
1
+ """Exceptions seedloop raises.
2
+
3
+ One specific exception per failure mode; nothing is swallowed. The hierarchy is rooted at
4
+ ``SeedloopError`` so everything seedloop raises can be caught with a single class.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+
10
+ class SeedloopError(Exception):
11
+ """Base class for every error seedloop raises."""
12
+
13
+
14
+ class BoundaryError(SeedloopError):
15
+ """A simulated run reached outside the determinism boundary.
16
+
17
+ Real threads, ``run_in_executor``, subprocesses, real sockets, and cross-thread wakeups
18
+ cannot be made deterministic, so they are rejected rather than run silently (``docs/scope.md``).
19
+ """
20
+
21
+
22
+ class DeadlockError(SeedloopError):
23
+ """The run cannot progress and nothing is scheduled to wake it.
24
+
25
+ A real ``asyncio`` program would hang here; a simulated run raises instead of spinning, so
26
+ the deadlock is a visible failure tied to the seed that produced it.
27
+ """
28
+
29
+
30
+ class InvariantError(SeedloopError):
31
+ """An ``always(...)`` invariant was violated during a run.
32
+
33
+ A continuous safety property (e.g. "at most one leader") that must hold throughout, checked
34
+ after every step; the first step where it is false raises this, which ``check`` reports as the
35
+ failure. Carries the invariant's ``name`` and the virtual ``time`` of the violation.
36
+ """
37
+
38
+ def __init__(self, name: str, time: float) -> None:
39
+ super().__init__(f"invariant {name!r} violated at t={time}")
40
+ self.name = name
41
+ self.time = time
42
+
43
+
44
+ class EntropyLeakError(BoundaryError):
45
+ """An uncontrolled entropy source was touched inside a simulated run.
46
+
47
+ In audit mode the non-determinism auditor raises this when code reaches for real
48
+ ``os.urandom``/``secrets``, real time, or the unseeded global ``random`` instead of the World's
49
+ seeded source (``docs/decisions.md`` ADR-0008). Carries the offending ``source``.
50
+ """
51
+
52
+ def __init__(self, source: str) -> None:
53
+ super().__init__(
54
+ f"uncontrolled entropy source {source!r} used inside a run; route it through the seed "
55
+ f"(world.rng) or the virtual clock — see docs/scope.md"
56
+ )
57
+ self.source = source
seedloop/py.typed ADDED
File without changes
@@ -0,0 +1,180 @@
1
+ Metadata-Version: 2.4
2
+ Name: seedloop
3
+ Version: 0.3.0
4
+ Summary: Deterministic simulation testing for Python asyncio.
5
+ Author-email: Vojtěch Klíma <vojtechklima02@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/klimavojtech2002/seedloop
8
+ Project-URL: Repository, https://github.com/klimavojtech2002/seedloop
9
+ Project-URL: Issues, https://github.com/klimavojtech2002/seedloop/issues
10
+ Project-URL: Changelog, https://github.com/klimavojtech2002/seedloop/blob/main/CHANGELOG.md
11
+ Keywords: asyncio,testing,determinism,simulation,concurrency
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Software Development :: Testing
18
+ Classifier: Typing :: Typed
19
+ Requires-Python: >=3.12
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Provides-Extra: dev
23
+ Requires-Dist: ruff>=0.6; extra == "dev"
24
+ Requires-Dist: mypy>=1.10; extra == "dev"
25
+ Requires-Dist: pytest>=8; extra == "dev"
26
+ Requires-Dist: pytest-timeout>=2; extra == "dev"
27
+ Dynamic: license-file
28
+
29
+ # seedloop
30
+
31
+ Deterministic simulation testing for Python. Run your concurrent async logic through thousands of
32
+ controlled, reproducible timelines — varying message timing and delivery order, injecting network
33
+ faults, partitions, and delays — to surface the rare concurrency bug that shows up once in a million
34
+ runs, and replay it exactly from a seed.
35
+
36
+ It brings the FoundationDB / TigerBeetle / Antithesis style of reliability testing — until now living
37
+ only in Rust, C++, and Java — to Python's `asyncio`, as a `pip`-installable library.
38
+
39
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
40
+
41
+ ## The problem
42
+
43
+ Concurrency bugs are the worst bugs. A protocol or state machine works in every test, then once a
44
+ week in CI a test fails, and nobody can reproduce it — because the failure depended on an exact
45
+ interleaving of events, a message arriving late, a partition healing at the wrong moment. You cannot
46
+ fix what you cannot reproduce, so these bugs are patched by guesswork and survive for years.
47
+
48
+ Deterministic simulation testing (DST) inverts this. It takes total control of every source of
49
+ nondeterminism — scheduling order, time, randomness, the network — and drives them all from a single
50
+ seed. The same seed produces the same timeline, so the same bug, every time. You explore thousands of
51
+ seeds to hunt for failures, and when one is found, the seed *is* the reproduction: replay it and the
52
+ bug happens again, deterministically, every run.
53
+
54
+ This is how FoundationDB reached its reliability record. It exists as a polished library in Rust
55
+ (`madsim`, `turmoil`). In Python — where a great deal of distributed and protocol code is written — it
56
+ does not exist at all. `seedloop` is that library.
57
+
58
+ ## What you do with it
59
+
60
+ You write your protocol or algorithm against an abstract transport (the
61
+ [sans-I/O](https://sans-io.readthedocs.io/) style), and `seedloop` runs it inside a deterministic
62
+ world it fully controls. A test looks like this (`World`, `check`, `replay`, the network `world.net`
63
+ with loss/duplication/partitions, the `world.always` invariant API, and the `audit=True`
64
+ non-determinism auditor are all implemented; the seed-scheduled `world.run_for` fault schedule is the
65
+ next phase, specified in [docs/api.md](docs/api.md)):
66
+
67
+ ```python
68
+ import seedloop
69
+
70
+ async def scenario(world: seedloop.World) -> None:
71
+ # Spin up your nodes; they send messages through the simulated network.
72
+ nodes = [RaftNode(addr, world.net) for addr in range(5)]
73
+ world.start(*nodes)
74
+
75
+ # State the invariant that must hold at every step, not just at the end.
76
+ world.always(lambda: at_most_one_leader(nodes), name="at-most-one-leader")
77
+
78
+ # Inject chaos the seed decides the details of.
79
+ await world.run_for(seconds=10, faults=[world.partition(), world.slow_link()])
80
+
81
+ # Hunt across 10,000 seeded timelines; on failure, print the seed.
82
+ seedloop.check(scenario, seeds=10_000)
83
+ # A failing run prints: seed=4823 → replay with seedloop.replay(scenario, seed=4823)
84
+ ```
85
+
86
+ `seedloop.replay(scenario, seed=4823)` re-runs that exact timeline, deterministically, as many times
87
+ as you need to debug it. The full API is in [docs/api.md](docs/api.md).
88
+
89
+ ## The worked proof: a Raft split-brain, found and replayed
90
+
91
+ A small Raft leader election ships as a demo. With a deliberate, labelled flaw — a node that omits the
92
+ single-vote-per-term rule — a seed sweep finds the timing where two nodes both win an election in the
93
+ same term (split-brain), and replays it from the seed. The corrected election passes the same sweep, so
94
+ the violation is the toggled flaw, not the harness: in a three-node cluster the shared third voter can
95
+ only break the tie once under the single-vote rule, so one candidate gets two votes and the other one —
96
+ never two leaders.
97
+
98
+ ```
99
+ $ python -m seedloop.demos.raft
100
+ seedloop Raft election demo - hunting for split-brain
101
+
102
+ buggy election: split-brain found at seed=7
103
+ reproduce it: seedloop.replay(election_scenario(buggy=True), seed=7)
104
+ replay reproduces it: invariant 'at-most-one-leader-per-term' violated at t=0.229...
105
+ correct election (single-vote rule enforced): no violation over the same 200 seeds
106
+ -> the violation is the toggled flaw, not the harness.
107
+ ```
108
+
109
+ The election logic is in [`src/seedloop/demos/raft.py`](src/seedloop/demos/raft.py). It is election only
110
+ (terms, `RequestVote`, majority, heartbeats) — log replication, persistence, and membership changes are
111
+ out of scope.
112
+
113
+ ## What it does
114
+
115
+ - A **deterministic event loop** that makes `asyncio` task scheduling reproducible and drives the I/O
116
+ seam — where nondeterminism actually enters — from the seed.
117
+ - A **virtual clock** — `sleep` and timeouts advance simulated time instantly; no run is slower for
118
+ testing a 10-second scenario.
119
+ - **Seeded randomness** everywhere, so a run is a pure function of its seed.
120
+ - A **simulated network** with seeded latency, reordering, message loss, and partitions.
121
+ - **Fault injection** driven by the seed, so chaos is reproducible rather than random.
122
+ - **Invariants** — `world.always(...)` checks a continuous safety property at every step.
123
+ - A **non-determinism auditor** — `audit=True` turns any uncontrolled entropy source into a loud,
124
+ reproducible failure, so the determinism boundary is enforced, not just stated.
125
+ - **Seed replay** — the whole point: any failure reduces to a single integer you can replay forever.
126
+
127
+ ## Scope — what it tests, and what it deliberately does not
128
+
129
+ The honesty in this section is the point. `seedloop` makes your async *logic* deterministic; it does
130
+ not make your *infrastructure* deterministic, and it does not pretend to. The full boundary, and the engineering reasons behind it, are in
131
+ [docs/scope.md](docs/scope.md). In short:
132
+
133
+ - **It is for** pure-Python async code that talks to an abstract transport: consensus (Raft/Paxos),
134
+ replication, gossip, CRDTs, custom wire protocols, schedulers, retry/backoff/circuit-breaker logic,
135
+ rate limiters — code where the *logic* holds the concurrency bugs.
136
+ - **It is not for** I/O-heavy applications bound to real drivers. Real threads, `multiprocessing`,
137
+ `uvloop`, and C-extension drivers (`asyncpg`, `grpcio`) are explicitly out of scope, because their
138
+ scheduling cannot be controlled from Python — the same wall that stops deterministic testing in Go.
139
+ `seedloop` tests your algorithm, not your database driver.
140
+
141
+ Choosing this boundary deliberately — rather than promising determinism it cannot deliver — is what
142
+ keeps the guarantee real.
143
+
144
+ ## Status
145
+
146
+ The planned build is **complete through v0.3.0**: the deterministic core (custom event loop, virtual
147
+ clock with autojump, seeded entropy, the `World` / `check` / `replay` API), the simulated network with
148
+ fault injection (loss, duplication, partitions), the `world.always` invariant API, the non-determinism
149
+ auditor (`audit=True`), and the worked Raft demo (which runs today) — so `asyncio` runs are reproducible
150
+ and instant, a partition- or timing-dependent bug replays identically from its seed, and an uncontrolled
151
+ entropy source fails loudly under audit. Deferred: the seed-scheduled `world.run_for` fault schedule and
152
+ an optional Hypothesis integration (`seedloop[hypothesis]`). The full API target is in
153
+ [docs/api.md](docs/api.md) and the phased build in [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md).
154
+
155
+ ## Why it exists
156
+
157
+ There is no `pip`-installable deterministic simulation testing framework for Python `asyncio` — the
158
+ capability lives in Rust (`madsim`, `turmoil`), C++ (FoundationDB), Java (OpenDST), and behind a
159
+ commercial hypervisor (Antithesis), but not in Python. Meanwhile the discipline is rising fast among
160
+ serious engineers (Antithesis raised a $105M round led by Jane Street to standardize DST; AWS has
161
+ codified deterministic and formal methods as standing practice). As one of its proponents puts it:
162
+ *writing code is no longer the bottleneck — making sure it does the right thing is.* `seedloop` is a
163
+ tool for exactly that, in the language that lacked it.
164
+
165
+ ## Documentation
166
+
167
+ The design is specified before the code:
168
+
169
+ - [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) — how `asyncio` is made deterministic, and the phased build.
170
+ - [docs/api.md](docs/api.md) — the public API: `World`, `check`/`replay`, the transport, faults.
171
+ - [docs/internals.md](docs/internals.md) — the loop, virtual clock, entropy control, network and fault scheduling.
172
+ - [docs/network.md](docs/network.md) — the simulated transport and fault model.
173
+ - [docs/scope.md](docs/scope.md) — the determinism boundary: what is controlled and what is not.
174
+ - [docs/testing.md](docs/testing.md) — how determinism is proven by replay.
175
+ - [docs/decisions.md](docs/decisions.md) — the decision records (ADRs).
176
+ - [docs/glossary.md](docs/glossary.md) — the vocabulary.
177
+
178
+ ## License
179
+
180
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,17 @@
1
+ seedloop/__init__.py,sha256=v0u4D7SHFXW-Jo_FGEJbr9GhtXy4jhtOLwW8QvIQDhM,1191
2
+ seedloop/_audit.py,sha256=FH0go-ybFodw1h3AIr3IRim3kPUPMAc-U2KFC0z6P0o,4517
3
+ seedloop/_entropy.py,sha256=4ZZfXN4HsZ9EiZ3024-zxjvbXR2XWaxQxqT2MQEXGP8,4499
4
+ seedloop/_loop.py,sha256=UT0lh6lawusSogsmXvAJ52AwG7tK0W0OYizGpt3rZ-s,7885
5
+ seedloop/_net.py,sha256=GqywpytBmLuPLtpB8KJpCbotYsl-OLXnjNkt0LFKwg0,7979
6
+ seedloop/_run.py,sha256=OPZQgCN5f9IG4xN_E616s_YhP0UAn2M7dSVvpePx31c,3911
7
+ seedloop/_trace.py,sha256=TEfJGP0E5M5Y1L-MEvPOsBb6QR3moZH49z5CloMOmkk,921
8
+ seedloop/_world.py,sha256=Pn9mTP4yu8PsmhX6hdIPwuknjJsyd_dsUJZBBXn9qHM,5253
9
+ seedloop/errors.py,sha256=absRq_4jWgzLlxIOBcJwMGYenlldml2p3maJVGwYbfI,2168
10
+ seedloop/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ seedloop/demos/__init__.py,sha256=7ux1hd1HRarZjNPHN3cELwwBXo7dHR-NTuCkKOVMdEs,59
12
+ seedloop/demos/raft.py,sha256=FUl06jOu-8axF4nplkOmZCZZEaW01O4UqN6ZpKPnS10,7520
13
+ seedloop-0.3.0.dist-info/licenses/LICENSE,sha256=bq78RJMIno1EDrCmF4-Q6E8TqWGPk12dM1yauYtEGqM,1072
14
+ seedloop-0.3.0.dist-info/METADATA,sha256=A4-L13cM1e35LrMNKYK5hKA_uIvEis9jRIDJrB1HAlg,10201
15
+ seedloop-0.3.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
16
+ seedloop-0.3.0.dist-info/top_level.txt,sha256=CDWSLLQIpYsE2ds0ATOpEYX_eQUGUjng4tE8l2sO9MA,9
17
+ seedloop-0.3.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Vojtěch Klíma
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ seedloop