meshlogd 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meshlog/__init__.py +21 -0
- meshlog/cli.py +101 -0
- meshlog/event.py +99 -0
- meshlog/feedstore.py +176 -0
- meshlog/hlc.py +64 -0
- meshlog/node.py +114 -0
- meshlog/reducer.py +87 -0
- meshlog/rns_transport.py +212 -0
- meshlog/transport.py +95 -0
- meshlogd-0.1.0.dist-info/METADATA +163 -0
- meshlogd-0.1.0.dist-info/RECORD +14 -0
- meshlogd-0.1.0.dist-info/WHEEL +4 -0
- meshlogd-0.1.0.dist-info/entry_points.txt +2 -0
- meshlogd-0.1.0.dist-info/licenses/LICENSE +202 -0
meshlog/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""meshlog -- an event-sourced, eventually-consistent dataset that converges over
|
|
2
|
+
a frequently-partitioning, low-bandwidth mesh (Reticulum on real hardware).
|
|
3
|
+
|
|
4
|
+
Public surface:
|
|
5
|
+
Node, Event, FeedStore, HLC, materialize
|
|
6
|
+
SimNetwork, SimTransport (in-memory transport for demo/tests)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__version__ = "0.1.0"
|
|
10
|
+
|
|
11
|
+
from .event import Event
|
|
12
|
+
from .feedstore import FeedStore
|
|
13
|
+
from .hlc import HLC
|
|
14
|
+
from .node import Node
|
|
15
|
+
from .reducer import materialize
|
|
16
|
+
from .transport import SimNetwork, SimTransport, Transport
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"Node", "Event", "FeedStore", "HLC", "materialize",
|
|
20
|
+
"SimNetwork", "SimTransport", "Transport",
|
|
21
|
+
]
|
meshlog/cli.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Run a meshlog node over a real Reticulum mesh.
|
|
2
|
+
|
|
3
|
+
Start two of these on machines that share any Reticulum interface (same LAN via
|
|
4
|
+
AutoInterface to begin with; add an RNode/LoRa interface later in your
|
|
5
|
+
~/.reticulum/config -- the application code does not change) and watch a shared
|
|
6
|
+
dataset converge.
|
|
7
|
+
|
|
8
|
+
Examples
|
|
9
|
+
--------
|
|
10
|
+
Terminal 1 (base camp):
|
|
11
|
+
python node_rns.py --name base --data ./base.jsonl \\
|
|
12
|
+
--create R-100 site_assessment --set R-100 status OPENED
|
|
13
|
+
|
|
14
|
+
Terminal 2 (field team), on another machine:
|
|
15
|
+
python node_rns.py --name team_a --data ./team_a.jsonl \\
|
|
16
|
+
--create R-200 water_point --set R-200 functional true
|
|
17
|
+
|
|
18
|
+
Each node periodically gossips; type `view` + Enter at any node to print its
|
|
19
|
+
current materialized dataset, or `front` to print its frontier.
|
|
20
|
+
|
|
21
|
+
NOTE: requires `pip install rns`. This script drives the real RNSTransport, which
|
|
22
|
+
is faithful to the RNS API but should be validated on your own mesh.
|
|
23
|
+
"""
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import argparse
|
|
27
|
+
import json
|
|
28
|
+
import threading
|
|
29
|
+
import time
|
|
30
|
+
|
|
31
|
+
from .node import Node
|
|
32
|
+
from .rns_transport import RNSTransport
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def coerce(value: str):
|
|
36
|
+
"""Light type coercion so CLI values aren't all strings."""
|
|
37
|
+
low = value.lower()
|
|
38
|
+
if low in ("true", "false"):
|
|
39
|
+
return low == "true"
|
|
40
|
+
try:
|
|
41
|
+
return int(value)
|
|
42
|
+
except ValueError:
|
|
43
|
+
try:
|
|
44
|
+
return float(value)
|
|
45
|
+
except ValueError:
|
|
46
|
+
return value
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def gossip_loop(node: Node, interval: float):
|
|
50
|
+
while True:
|
|
51
|
+
node.gossip_round()
|
|
52
|
+
time.sleep(interval)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def main():
|
|
56
|
+
ap = argparse.ArgumentParser(description="meshlog node over Reticulum")
|
|
57
|
+
ap.add_argument("--name", help="author id; default = identity hash", default=None)
|
|
58
|
+
ap.add_argument("--data", help="append-only log file (jsonl)", default=None)
|
|
59
|
+
ap.add_argument("--identity", help="persistent identity file", default=None)
|
|
60
|
+
ap.add_argument("--configdir", help="Reticulum config dir", default=None)
|
|
61
|
+
ap.add_argument("--gossip-interval", type=float, default=10.0)
|
|
62
|
+
ap.add_argument("--create", nargs=2, action="append", metavar=("RID", "FORM"),
|
|
63
|
+
default=[], help="create a record")
|
|
64
|
+
ap.add_argument("--set", nargs=3, action="append", metavar=("RID", "FIELD", "VALUE"),
|
|
65
|
+
default=[], help="set a field")
|
|
66
|
+
ap.add_argument("--note", nargs=2, action="append", metavar=("RID", "TEXT"),
|
|
67
|
+
default=[], help="add a note")
|
|
68
|
+
args = ap.parse_args()
|
|
69
|
+
|
|
70
|
+
transport = RNSTransport(configdir=args.configdir, identity_path=args.identity)
|
|
71
|
+
author = args.name or transport.node_id
|
|
72
|
+
node = Node(author, transport, persist_path=args.data)
|
|
73
|
+
transport.set_frontier_provider(node.frontier)
|
|
74
|
+
|
|
75
|
+
for rid, form in args.create:
|
|
76
|
+
node.create_record(rid, form)
|
|
77
|
+
for rid, field, value in args.set:
|
|
78
|
+
node.set_field(rid, field, coerce(value))
|
|
79
|
+
for rid, text in args.note:
|
|
80
|
+
node.add_note(rid, text)
|
|
81
|
+
|
|
82
|
+
threading.Thread(target=gossip_loop, args=(node, args.gossip_interval),
|
|
83
|
+
daemon=True).start()
|
|
84
|
+
|
|
85
|
+
print(f"meshlog node '{author}' running over Reticulum.")
|
|
86
|
+
print("commands: view | front | quit")
|
|
87
|
+
try:
|
|
88
|
+
while True:
|
|
89
|
+
cmd = input("> ").strip().lower()
|
|
90
|
+
if cmd == "view":
|
|
91
|
+
print(json.dumps(node.view(), indent=2, sort_keys=True))
|
|
92
|
+
elif cmd == "front":
|
|
93
|
+
print(json.dumps(node.frontier(), sort_keys=True))
|
|
94
|
+
elif cmd in ("quit", "exit"):
|
|
95
|
+
break
|
|
96
|
+
except (EOFError, KeyboardInterrupt):
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
if __name__ == "__main__":
|
|
101
|
+
main()
|
meshlog/event.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""The Event: the only thing that ever crosses the wire or hits disk.
|
|
2
|
+
|
|
3
|
+
Design choices that make the distributed-systems math easy:
|
|
4
|
+
|
|
5
|
+
* **Immutable.** An event is never edited. "Edits" are new events that
|
|
6
|
+
reference an earlier record. This is what gives us a free audit trail and,
|
|
7
|
+
more importantly, conflict-free merges: adding events to a set never
|
|
8
|
+
conflicts with adding other events.
|
|
9
|
+
|
|
10
|
+
* **Per-author feeds.** Every event carries (author, seq). For a given author,
|
|
11
|
+
seq is strictly contiguous: 1, 2, 3, ... This is the Secure-Scuttlebutt
|
|
12
|
+
trick. It collapses "what do you have?" from "enumerate a set" down to a
|
|
13
|
+
single integer per author (the high-water mark), which is what makes the
|
|
14
|
+
anti-entropy digest tiny enough to ride inside a Reticulum announce.
|
|
15
|
+
|
|
16
|
+
* **Hash-chained per feed.** Each event names the id of the previous event in
|
|
17
|
+
its own feed (``prev``). That makes a feed tamper-evident and lets a
|
|
18
|
+
receiver verify it is appending contiguous, authentic history rather than a
|
|
19
|
+
forged gap. (Trusted-collaborator model: we still want integrity, just not
|
|
20
|
+
anonymity.)
|
|
21
|
+
|
|
22
|
+
* **Content-addressed id.** ``id`` is a hash of the whole event. Receiving the
|
|
23
|
+
same event twice is trivially detected and ignored -> idempotent ingest.
|
|
24
|
+
"""
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import hashlib
|
|
28
|
+
import json
|
|
29
|
+
from dataclasses import dataclass, field
|
|
30
|
+
from typing import Any
|
|
31
|
+
|
|
32
|
+
# Truncated hash length in bytes. 16 bytes = 128 bits is plenty for collision
|
|
33
|
+
# resistance here and keeps ids compact on low-bandwidth links.
|
|
34
|
+
_ID_BYTES = 16
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _canonical(obj: Any) -> bytes:
|
|
38
|
+
"""Deterministic byte encoding used for both hashing and the wire."""
|
|
39
|
+
return json.dumps(obj, sort_keys=True, separators=(",", ":")).encode("utf-8")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass(frozen=True)
|
|
43
|
+
class Event:
|
|
44
|
+
author: str # stable node id (in RNS: identity hash hex)
|
|
45
|
+
seq: int # per-author sequence, 1-based, contiguous
|
|
46
|
+
hlc: tuple[int, int] # hybrid logical clock stamp (wall_ms, counter)
|
|
47
|
+
kind: str # domain event type, e.g. "field.set"
|
|
48
|
+
payload: dict # domain data
|
|
49
|
+
prev: str # id of previous event in this author's feed ("" if seq==1)
|
|
50
|
+
id: str = field(default="") # content hash; filled by from_parts()
|
|
51
|
+
|
|
52
|
+
# ---- construction -----------------------------------------------------
|
|
53
|
+
@staticmethod
|
|
54
|
+
def _hash_fields(author, seq, hlc, kind, payload, prev) -> str:
|
|
55
|
+
body = _canonical(
|
|
56
|
+
{
|
|
57
|
+
"author": author,
|
|
58
|
+
"seq": seq,
|
|
59
|
+
"hlc": list(hlc),
|
|
60
|
+
"kind": kind,
|
|
61
|
+
"payload": payload,
|
|
62
|
+
"prev": prev,
|
|
63
|
+
}
|
|
64
|
+
)
|
|
65
|
+
return hashlib.sha256(body).hexdigest()[: _ID_BYTES * 2]
|
|
66
|
+
|
|
67
|
+
@classmethod
|
|
68
|
+
def from_parts(cls, author, seq, hlc, kind, payload, prev) -> "Event":
|
|
69
|
+
ev_id = cls._hash_fields(author, seq, tuple(hlc), kind, payload, prev)
|
|
70
|
+
return cls(author, seq, tuple(hlc), kind, payload, prev, ev_id)
|
|
71
|
+
|
|
72
|
+
# ---- integrity --------------------------------------------------------
|
|
73
|
+
def recompute_id(self) -> str:
|
|
74
|
+
return self._hash_fields(
|
|
75
|
+
self.author, self.seq, self.hlc, self.kind, self.payload, self.prev
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def is_authentic(self) -> bool:
|
|
79
|
+
"""The id must match the content. (A full implementation would also
|
|
80
|
+
verify an Ed25519 signature by ``author``'s key here -- RNS Identity
|
|
81
|
+
gives us exactly that primitive; omitted in the prototype core to keep
|
|
82
|
+
it dependency-free.)"""
|
|
83
|
+
return self.id == self.recompute_id()
|
|
84
|
+
|
|
85
|
+
# ---- wire encoding ----------------------------------------------------
|
|
86
|
+
def to_wire(self) -> dict:
|
|
87
|
+
return {
|
|
88
|
+
"a": self.author,
|
|
89
|
+
"s": self.seq,
|
|
90
|
+
"h": list(self.hlc),
|
|
91
|
+
"k": self.kind,
|
|
92
|
+
"p": self.payload,
|
|
93
|
+
"v": self.prev,
|
|
94
|
+
"i": self.id,
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
@classmethod
|
|
98
|
+
def from_wire(cls, d: dict) -> "Event":
|
|
99
|
+
return cls(d["a"], d["s"], tuple(d["h"]), d["k"], d["p"], d["v"], d["i"])
|
meshlog/feedstore.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""FeedStore: the append-only log, organised as one contiguous feed per author.
|
|
2
|
+
|
|
3
|
+
This is the heart of the replication model. It enforces three invariants that
|
|
4
|
+
together make merging safe over a constantly-partitioning network:
|
|
5
|
+
|
|
6
|
+
1. **Contiguity.** A feed accepts seq N only when it already holds 1..N-1.
|
|
7
|
+
Out-of-order arrivals are buffered until the gap fills. This guarantees
|
|
8
|
+
that a high-water mark (an integer per author) is a *complete* description
|
|
9
|
+
of what we hold for that author -- no holes.
|
|
10
|
+
|
|
11
|
+
2. **Idempotence.** Re-ingesting an event we already have is a no-op. So we can
|
|
12
|
+
push aggressively and overlap freely; duplicates cost a hash compare.
|
|
13
|
+
|
|
14
|
+
3. **Chain integrity.** seq N's ``prev`` must equal the id of seq N-1. A feed
|
|
15
|
+
that fails this is rejected rather than silently corrupting the merge.
|
|
16
|
+
|
|
17
|
+
Because of (1)-(3), the global dataset is just the set-union of every node's
|
|
18
|
+
feeds, and union is commutative, associative, and idempotent -> the database is
|
|
19
|
+
a CRDT (a grow-only set of immutable events) and converges regardless of the
|
|
20
|
+
order or grouping in which events arrive.
|
|
21
|
+
"""
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import json
|
|
25
|
+
import os
|
|
26
|
+
from typing import Iterable
|
|
27
|
+
|
|
28
|
+
from .event import Event
|
|
29
|
+
|
|
30
|
+
Frontier = dict[str, int] # author -> highest contiguous seq held
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class FeedStore:
|
|
34
|
+
def __init__(self, persist_path: str | None = None):
|
|
35
|
+
self._feeds: dict[str, list[Event]] = {} # author -> [seq1, seq2, ...]
|
|
36
|
+
self._ids: set[str] = set() # all event ids held
|
|
37
|
+
self._pending: dict[str, dict[int, Event]] = {} # author -> {seq: event} buffer
|
|
38
|
+
self._persist_path = persist_path
|
|
39
|
+
if persist_path and os.path.exists(persist_path):
|
|
40
|
+
self._load()
|
|
41
|
+
|
|
42
|
+
# ---- queries ----------------------------------------------------------
|
|
43
|
+
def frontier(self) -> Frontier:
|
|
44
|
+
return {author: len(feed) for author, feed in self._feeds.items()}
|
|
45
|
+
|
|
46
|
+
def has(self, event_id: str) -> bool:
|
|
47
|
+
return event_id in self._ids
|
|
48
|
+
|
|
49
|
+
def all_events(self) -> list[Event]:
|
|
50
|
+
out: list[Event] = []
|
|
51
|
+
for feed in self._feeds.values():
|
|
52
|
+
out.extend(feed)
|
|
53
|
+
return out
|
|
54
|
+
|
|
55
|
+
def event_count(self) -> int:
|
|
56
|
+
return len(self._ids)
|
|
57
|
+
|
|
58
|
+
def last_id(self, author: str) -> str:
|
|
59
|
+
feed = self._feeds.get(author)
|
|
60
|
+
return feed[-1].id if feed else ""
|
|
61
|
+
|
|
62
|
+
def next_seq(self, author: str) -> int:
|
|
63
|
+
return len(self._feeds.get(author, [])) + 1
|
|
64
|
+
|
|
65
|
+
# ---- anti-entropy -----------------------------------------------------
|
|
66
|
+
def delta_for(self, remote: Frontier, limit: int | None = None) -> list[Event]:
|
|
67
|
+
"""Events this store holds that ``remote`` is missing, given its frontier.
|
|
68
|
+
|
|
69
|
+
This is the entire "what to send" computation: for each author, ship the
|
|
70
|
+
slice of the feed above the remote's high-water mark. Note it naturally
|
|
71
|
+
relays *other* authors' events too (store-and-forward): if we hold A's
|
|
72
|
+
feed because we once met A, we'll offer it to C who never met A.
|
|
73
|
+
"""
|
|
74
|
+
out: list[Event] = []
|
|
75
|
+
for author, feed in self._feeds.items():
|
|
76
|
+
have = remote.get(author, 0)
|
|
77
|
+
if len(feed) > have:
|
|
78
|
+
out.extend(feed[have:])
|
|
79
|
+
# Deterministic, dependency-friendly order: lower seqs first per author.
|
|
80
|
+
out.sort(key=lambda e: (e.author, e.seq))
|
|
81
|
+
if limit is not None:
|
|
82
|
+
out = out[:limit]
|
|
83
|
+
return out
|
|
84
|
+
|
|
85
|
+
# ---- mutation ---------------------------------------------------------
|
|
86
|
+
def append_local(self, event: Event) -> None:
|
|
87
|
+
"""Append an event minted by *this* node. Assumes seq/prev already set
|
|
88
|
+
correctly by the Node (which owns the author identity and HLC)."""
|
|
89
|
+
feed = self._feeds.setdefault(event.author, [])
|
|
90
|
+
assert event.seq == len(feed) + 1, "local feed must stay contiguous"
|
|
91
|
+
feed.append(event)
|
|
92
|
+
self._ids.add(event.id)
|
|
93
|
+
self._persist(event)
|
|
94
|
+
|
|
95
|
+
def ingest(self, event: Event) -> bool:
|
|
96
|
+
"""Accept a remote event. Returns True iff it advanced our state.
|
|
97
|
+
|
|
98
|
+
Handles duplicates, out-of-order arrivals (buffered), and chain
|
|
99
|
+
verification. Safe to call with anything; bad events are dropped.
|
|
100
|
+
"""
|
|
101
|
+
if not event.is_authentic():
|
|
102
|
+
return False
|
|
103
|
+
if event.id in self._ids:
|
|
104
|
+
return False # idempotent
|
|
105
|
+
|
|
106
|
+
feed = self._feeds.setdefault(event.author, [])
|
|
107
|
+
expected = len(feed) + 1
|
|
108
|
+
|
|
109
|
+
if event.seq < expected:
|
|
110
|
+
return False # stale / already have a different copy of this slot
|
|
111
|
+
if event.seq > expected:
|
|
112
|
+
# Future event: buffer and wait for the gap to fill.
|
|
113
|
+
self._pending.setdefault(event.author, {})[event.seq] = event
|
|
114
|
+
return False
|
|
115
|
+
|
|
116
|
+
# event.seq == expected: verify chain link, then append.
|
|
117
|
+
prev_id = feed[-1].id if feed else ""
|
|
118
|
+
if event.prev != prev_id:
|
|
119
|
+
return False # forged or forked history; reject
|
|
120
|
+
feed.append(event)
|
|
121
|
+
self._ids.add(event.id)
|
|
122
|
+
self._persist(event)
|
|
123
|
+
|
|
124
|
+
# Try to drain any buffered successors that are now contiguous.
|
|
125
|
+
self._drain(event.author)
|
|
126
|
+
return True
|
|
127
|
+
|
|
128
|
+
def ingest_many(self, events: Iterable[Event]) -> int:
|
|
129
|
+
"""Ingest a batch (e.g. a received delta). Because a batch may arrive in
|
|
130
|
+
any order, we loop until no further progress is made."""
|
|
131
|
+
events = list(events)
|
|
132
|
+
applied = 0
|
|
133
|
+
progress = True
|
|
134
|
+
while progress:
|
|
135
|
+
progress = False
|
|
136
|
+
for ev in events:
|
|
137
|
+
if ev.id not in self._ids and self.ingest(ev):
|
|
138
|
+
applied += 1
|
|
139
|
+
progress = True
|
|
140
|
+
return applied
|
|
141
|
+
|
|
142
|
+
def _drain(self, author: str) -> None:
|
|
143
|
+
buf = self._pending.get(author)
|
|
144
|
+
if not buf:
|
|
145
|
+
return
|
|
146
|
+
feed = self._feeds[author]
|
|
147
|
+
while (nxt := buf.pop(len(feed) + 1, None)) is not None:
|
|
148
|
+
expected_prev = feed[-1].id if feed else ""
|
|
149
|
+
if nxt.prev != expected_prev:
|
|
150
|
+
# Forked history at this slot; stop draining this feed.
|
|
151
|
+
break
|
|
152
|
+
feed.append(nxt)
|
|
153
|
+
self._ids.add(nxt.id)
|
|
154
|
+
self._persist(nxt)
|
|
155
|
+
|
|
156
|
+
# ---- persistence (append-only jsonl; the log *is* the database) -------
|
|
157
|
+
def _persist(self, event: Event) -> None:
|
|
158
|
+
if not self._persist_path:
|
|
159
|
+
return
|
|
160
|
+
with open(self._persist_path, "a") as fh:
|
|
161
|
+
fh.write(json.dumps(event.to_wire()) + "\n")
|
|
162
|
+
|
|
163
|
+
def _load(self) -> None:
|
|
164
|
+
rows = []
|
|
165
|
+
with open(self._persist_path) as fh:
|
|
166
|
+
for line in fh:
|
|
167
|
+
line = line.strip()
|
|
168
|
+
if line:
|
|
169
|
+
rows.append(Event.from_wire(json.loads(line)))
|
|
170
|
+
# Sort so feeds load contiguously, then ingest.
|
|
171
|
+
rows.sort(key=lambda e: (e.author, e.seq))
|
|
172
|
+
for ev in rows:
|
|
173
|
+
feed = self._feeds.setdefault(ev.author, [])
|
|
174
|
+
if ev.seq == len(feed) + 1 and ev.id not in self._ids:
|
|
175
|
+
feed.append(ev)
|
|
176
|
+
self._ids.add(ev.id)
|
meshlog/hlc.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Hybrid Logical Clocks (HLC).
|
|
2
|
+
|
|
3
|
+
Why not wall-clock timestamps? In this network there is no NTP, devices drift,
|
|
4
|
+
and a node may be offline for days. Ordering events by wall time alone would let
|
|
5
|
+
a laggy clock silently win a last-writer-wins race. HLCs fuse physical time with
|
|
6
|
+
a logical counter so that:
|
|
7
|
+
|
|
8
|
+
* timestamps stay close to real wall-clock time (good for humans/display), and
|
|
9
|
+
* causality is never violated: if event B is created after observing event A,
|
|
10
|
+
then hlc(B) > hlc(A), regardless of clock skew.
|
|
11
|
+
|
|
12
|
+
Reference: Kulkarni et al., "Logical Physical Clocks and Consistent Snapshots
|
|
13
|
+
in Globally Distributed Databases" (2014).
|
|
14
|
+
|
|
15
|
+
An HLC stamp here is (wall_ms, counter). For a *total* order across authors we
|
|
16
|
+
break ties with the author id at the call site (see reducer.total_order_key).
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import time
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _phys_now_ms() -> int:
|
|
25
|
+
return int(time.time() * 1000)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class HLC:
|
|
30
|
+
"""A per-node hybrid logical clock.
|
|
31
|
+
|
|
32
|
+
``now()`` is called when the node *creates* a local event.
|
|
33
|
+
``update()`` is called when the node *receives* a remote stamp, so that any
|
|
34
|
+
event it subsequently creates causally follows what it has seen.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
wall: int = 0
|
|
38
|
+
counter: int = 0
|
|
39
|
+
# Injectable clock for deterministic tests; defaults to real time.
|
|
40
|
+
_phys: callable = _phys_now_ms
|
|
41
|
+
|
|
42
|
+
def now(self) -> tuple[int, int]:
|
|
43
|
+
pt = self._phys()
|
|
44
|
+
if pt > self.wall:
|
|
45
|
+
self.wall, self.counter = pt, 0
|
|
46
|
+
else:
|
|
47
|
+
# Physical clock didn't advance (or went backwards): bump logical.
|
|
48
|
+
self.counter += 1
|
|
49
|
+
return (self.wall, self.counter)
|
|
50
|
+
|
|
51
|
+
def update(self, remote: tuple[int, int]) -> tuple[int, int]:
|
|
52
|
+
r_wall, r_counter = remote
|
|
53
|
+
pt = self._phys()
|
|
54
|
+
new_wall = max(self.wall, r_wall, pt)
|
|
55
|
+
if new_wall == self.wall == r_wall:
|
|
56
|
+
self.counter = max(self.counter, r_counter) + 1
|
|
57
|
+
elif new_wall == self.wall:
|
|
58
|
+
self.counter += 1
|
|
59
|
+
elif new_wall == r_wall:
|
|
60
|
+
self.counter = r_counter + 1
|
|
61
|
+
else: # new_wall == pt, a fresh physical tick beyond anything seen
|
|
62
|
+
self.counter = 0
|
|
63
|
+
self.wall = new_wall
|
|
64
|
+
return (self.wall, self.counter)
|
meshlog/node.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""The Node and the anti-entropy protocol.
|
|
2
|
+
|
|
3
|
+
Anti-entropy (a.k.a. gossip reconciliation) is deliberately tiny:
|
|
4
|
+
|
|
5
|
+
When two peers can talk, each sends the other its FRONTIER (the per-author
|
|
6
|
+
high-water-mark vector). On receiving a peer's frontier, a node computes the
|
|
7
|
+
slice of its log the peer is missing and PUSHES it. Ingest is idempotent and
|
|
8
|
+
gap-tolerant, so overlapping pushes and reordered batches are harmless.
|
|
9
|
+
|
|
10
|
+
message := {"t": "frontier", "f": {author: seq}}
|
|
11
|
+
| {"t": "events", "e": [event_wire, ...]}
|
|
12
|
+
|
|
13
|
+
That's the whole protocol. One frontier exchange per contact reconciles the pair
|
|
14
|
+
completely; transitive (multi-hop) convergence happens because each node relays
|
|
15
|
+
every author's events it holds, not just its own. Over a partitioning network,
|
|
16
|
+
repeated pairwise contacts compose into global convergence -- which is exactly
|
|
17
|
+
the store-and-forward property we want for "base camp + teams that never meet."
|
|
18
|
+
|
|
19
|
+
Batching: deltas are chunked (``MAX_EVENTS_PER_MSG``) so a single message stays
|
|
20
|
+
small on a low-bandwidth link. A real radio build would additionally send large
|
|
21
|
+
batches as an RNS Resource rather than inline packets (see rns_transport.py).
|
|
22
|
+
"""
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import json
|
|
26
|
+
|
|
27
|
+
from .event import Event
|
|
28
|
+
from .feedstore import FeedStore, Frontier
|
|
29
|
+
from .hlc import HLC
|
|
30
|
+
from .reducer import materialize
|
|
31
|
+
from .transport import Transport
|
|
32
|
+
|
|
33
|
+
MAX_EVENTS_PER_MSG = 32
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Node:
|
|
37
|
+
def __init__(self, author: str, transport: Transport,
|
|
38
|
+
persist_path: str | None = None, hlc: HLC | None = None):
|
|
39
|
+
self.author = author
|
|
40
|
+
self.store = FeedStore(persist_path)
|
|
41
|
+
self.hlc = hlc or HLC()
|
|
42
|
+
self.transport = transport
|
|
43
|
+
transport.set_receive(self._on_message)
|
|
44
|
+
|
|
45
|
+
# ---- producing local events ------------------------------------------
|
|
46
|
+
def emit(self, kind: str, payload: dict) -> Event:
|
|
47
|
+
seq = self.store.next_seq(self.author)
|
|
48
|
+
ev = Event.from_parts(
|
|
49
|
+
author=self.author,
|
|
50
|
+
seq=seq,
|
|
51
|
+
hlc=self.hlc.now(),
|
|
52
|
+
kind=kind,
|
|
53
|
+
payload=payload,
|
|
54
|
+
prev=self.store.last_id(self.author),
|
|
55
|
+
)
|
|
56
|
+
self.store.append_local(ev)
|
|
57
|
+
return ev
|
|
58
|
+
|
|
59
|
+
# Convenience domain helpers (KoboToolbox-style records) ----------------
|
|
60
|
+
def create_record(self, record_id: str, form: str) -> Event:
|
|
61
|
+
return self.emit("record.create", {"record_id": record_id, "form": form, "by": self.author})
|
|
62
|
+
|
|
63
|
+
def set_field(self, record_id: str, field: str, value) -> Event:
|
|
64
|
+
return self.emit("field.set", {"record_id": record_id, "field": field, "value": value})
|
|
65
|
+
|
|
66
|
+
def add_note(self, record_id: str, text: str) -> Event:
|
|
67
|
+
return self.emit("note.add", {"record_id": record_id, "text": text})
|
|
68
|
+
|
|
69
|
+
def add_attachment(self, record_id: str, field: str, blob_hash: str, size: int, mime: str) -> Event:
|
|
70
|
+
return self.emit("attachment.add", {
|
|
71
|
+
"record_id": record_id, "field": field,
|
|
72
|
+
"blob_hash": blob_hash, "size": size, "mime": mime,
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
# ---- the materialized view -------------------------------------------
|
|
76
|
+
def view(self) -> dict:
|
|
77
|
+
return materialize(self.store.all_events())
|
|
78
|
+
|
|
79
|
+
def frontier(self) -> Frontier:
|
|
80
|
+
return self.store.frontier()
|
|
81
|
+
|
|
82
|
+
# ---- anti-entropy: initiate ------------------------------------------
|
|
83
|
+
def gossip_round(self) -> None:
|
|
84
|
+
"""Offer our frontier to everyone currently reachable."""
|
|
85
|
+
msg = self._encode({"t": "frontier", "f": self.store.frontier()})
|
|
86
|
+
for peer in self.transport.reachable_peers():
|
|
87
|
+
self.transport.send(peer, msg)
|
|
88
|
+
|
|
89
|
+
# ---- anti-entropy: react ---------------------------------------------
|
|
90
|
+
def _on_message(self, src: str, raw: bytes) -> None:
|
|
91
|
+
msg = self._decode(raw)
|
|
92
|
+
if msg["t"] == "frontier":
|
|
93
|
+
# Peer told us what it has; push what it lacks, in bounded chunks.
|
|
94
|
+
delta = self.store.delta_for(msg["f"])
|
|
95
|
+
for i in range(0, len(delta), MAX_EVENTS_PER_MSG):
|
|
96
|
+
chunk = delta[i:i + MAX_EVENTS_PER_MSG]
|
|
97
|
+
self.transport.send(src, self._encode(
|
|
98
|
+
{"t": "events", "e": [e.to_wire() for e in chunk]}
|
|
99
|
+
))
|
|
100
|
+
elif msg["t"] == "events":
|
|
101
|
+
incoming = [Event.from_wire(d) for d in msg["e"]]
|
|
102
|
+
# Advance our HLC past anything we observe (causality).
|
|
103
|
+
for e in incoming:
|
|
104
|
+
self.hlc.update(e.hlc)
|
|
105
|
+
self.store.ingest_many(incoming)
|
|
106
|
+
|
|
107
|
+
# ---- wire codec (swap for msgpack/CBOR in production) -----------------
|
|
108
|
+
@staticmethod
|
|
109
|
+
def _encode(obj: dict) -> bytes:
|
|
110
|
+
return json.dumps(obj, separators=(",", ":")).encode("utf-8")
|
|
111
|
+
|
|
112
|
+
@staticmethod
|
|
113
|
+
def _decode(raw: bytes) -> dict:
|
|
114
|
+
return json.loads(raw.decode("utf-8"))
|
meshlog/reducer.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Reducer: fold the event set into the materialized view (the "current" data).
|
|
2
|
+
|
|
3
|
+
The view is a pure function of the *set* of events. Two nodes holding the same
|
|
4
|
+
set always compute byte-identical views, no matter what order events arrived in.
|
|
5
|
+
That is the property that turns "everyone eventually holds the same events"
|
|
6
|
+
(guaranteed by FeedStore + anti-entropy) into "everyone eventually shows the
|
|
7
|
+
same data" (what the user actually cares about).
|
|
8
|
+
|
|
9
|
+
Conflict handling:
|
|
10
|
+
* Most events are additive (notes, attachments) and never conflict.
|
|
11
|
+
* Mutable scalar fields use Last-Writer-Wins, but "last" is decided by the
|
|
12
|
+
event's HLC stamp -- NOT arrival order and NOT wall clock -- with the author
|
|
13
|
+
id as a deterministic tiebreaker. So concurrent edits from two partitions
|
|
14
|
+
resolve to the same winner on every node.
|
|
15
|
+
|
|
16
|
+
Domain model here is a stand-in for KoboToolbox-style assessment records:
|
|
17
|
+
record.create {record_id, form, by}
|
|
18
|
+
field.set {record_id, field, value}
|
|
19
|
+
attachment.add {record_id, field, blob_hash, size, mime}
|
|
20
|
+
note.add {record_id, text}
|
|
21
|
+
"""
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
from dataclasses import dataclass, field as dc_field
|
|
25
|
+
|
|
26
|
+
from .event import Event
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def total_order_key(ev: Event):
|
|
30
|
+
"""A deterministic total order over events for LWW resolution."""
|
|
31
|
+
return (ev.hlc[0], ev.hlc[1], ev.author, ev.seq)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class Record:
|
|
36
|
+
record_id: str
|
|
37
|
+
form: str = ""
|
|
38
|
+
created_by: str = ""
|
|
39
|
+
fields: dict = dc_field(default_factory=dict) # field -> value (LWW)
|
|
40
|
+
_field_stamp: dict = dc_field(default_factory=dict) # field -> winning order key
|
|
41
|
+
attachments: list = dc_field(default_factory=list) # [{field, blob_hash, size, mime}]
|
|
42
|
+
notes: list = dc_field(default_factory=list) # [text, ...] (append-only)
|
|
43
|
+
|
|
44
|
+
def public(self) -> dict:
|
|
45
|
+
return {
|
|
46
|
+
"record_id": self.record_id,
|
|
47
|
+
"form": self.form,
|
|
48
|
+
"created_by": self.created_by,
|
|
49
|
+
"fields": dict(sorted(self.fields.items())),
|
|
50
|
+
"attachments": sorted(self.attachments, key=lambda a: (a["field"], a["blob_hash"])),
|
|
51
|
+
"notes": list(self.notes),
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def materialize(events: list[Event]) -> dict[str, dict]:
|
|
56
|
+
"""Fold events -> {record_id: public_record_dict}."""
|
|
57
|
+
records: dict[str, Record] = {}
|
|
58
|
+
# Process in HLC total order so LWW is order-independent of *arrival*.
|
|
59
|
+
for ev in sorted(events, key=total_order_key):
|
|
60
|
+
p = ev.payload
|
|
61
|
+
rid = p.get("record_id")
|
|
62
|
+
if ev.kind == "record.create":
|
|
63
|
+
rec = records.setdefault(rid, Record(rid))
|
|
64
|
+
rec.form = p.get("form", rec.form)
|
|
65
|
+
rec.created_by = p.get("by", rec.created_by)
|
|
66
|
+
elif ev.kind == "field.set":
|
|
67
|
+
rec = records.setdefault(rid, Record(rid))
|
|
68
|
+
key = total_order_key(ev)
|
|
69
|
+
# LWW: only overwrite if this event is later in the total order.
|
|
70
|
+
if key >= rec._field_stamp.get(p["field"], (-1, -1, "", -1)):
|
|
71
|
+
rec.fields[p["field"]] = p["value"]
|
|
72
|
+
rec._field_stamp[p["field"]] = key
|
|
73
|
+
elif ev.kind == "attachment.add":
|
|
74
|
+
rec = records.setdefault(rid, Record(rid))
|
|
75
|
+
rec.attachments.append(
|
|
76
|
+
{
|
|
77
|
+
"field": p["field"],
|
|
78
|
+
"blob_hash": p["blob_hash"],
|
|
79
|
+
"size": p.get("size", 0),
|
|
80
|
+
"mime": p.get("mime", ""),
|
|
81
|
+
}
|
|
82
|
+
)
|
|
83
|
+
elif ev.kind == "note.add":
|
|
84
|
+
rec = records.setdefault(rid, Record(rid))
|
|
85
|
+
rec.notes.append(p["text"])
|
|
86
|
+
# Unknown kinds are ignored: forward-compatible by construction.
|
|
87
|
+
return {rid: rec.public() for rid, rec in records.items()}
|