nedb-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nedb/__init__.py ADDED
@@ -0,0 +1,23 @@
1
+ """
2
+ NEDB — a versioned, self-compressing, time-traveling embedded database.
3
+
4
+ * Replay-protected & idempotent: every write carries a monotonic nonce and an
5
+ optional idempotency key, enforced by a hash-chained append-only log.
6
+ * Time-travel: read the database AS OF any past sequence number.
7
+ * Relational: first-class, time-travel-aware relations with O(1) traversal.
8
+ * Filterable / sortable / searchable: equality, ordered, and full-text indexes.
9
+ * Queryable: NQL text queries and a fluent builder that share one plan.
10
+ * git-style files with Cascade compression: content-defined chunking + dedup +
11
+ temperature tiers, with a Merkle root per version anchorable on-chain.
12
+
13
+ This pure-Python package is the reference implementation. The production speed core
14
+ is Rust (see ../rust), exposed to PyPI via PyO3 and to npm via napi-rs.
15
+ """
16
+ from __future__ import annotations
17
+
18
+ from .engine import NEDB
19
+ from .log import Op, OpLog, ReplayError
20
+ from .query import Query, parse_nql
21
+
22
+ __all__ = ["NEDB", "OpLog", "Op", "ReplayError", "Query", "parse_nql"]
23
+ __version__ = "0.1.0"
nedb/cascade.py ADDED
@@ -0,0 +1,130 @@
1
+ """
2
+ nedb.cascade — the Cascade compression pipeline + content-addressed blob store.
3
+
4
+ This is what makes NEDB double as a git-style file manager with maximum compression
5
+ WITHOUT inventing a new entropy coder. The novelty is the pipeline composition:
6
+
7
+ 1. Content-defined chunking (Gear rolling hash) — boundaries follow content, so a
8
+ one-byte insert only changes the chunk(s) around it, not everything after it.
9
+ 2. Content-addressed dedup (BLAKE) — identical chunks across all files and all
10
+ versions are stored exactly once.
11
+ 3. Temperature tiers — warm data uses a fast codec (zstd in prod; zlib in this
12
+ reference), cold/archival history uses a maximum-ratio codec (LZMA).
13
+
14
+ The production pipeline adds similarity-picked binary deltas (zstd --patch-from) and
15
+ schema-aware columnar transforms before the entropy stage; both are documented in
16
+ docs/SPEC.md and stubbed for the reference engine.
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import hashlib
21
+ import lzma
22
+ import random
23
+ import zlib
24
+ from typing import Dict, List
25
+
26
+ from .merkle import merkle_root
27
+
28
+ # --- Gear-hash content-defined chunking -------------------------------------
29
+ _MASK = (1 << 13) - 1 # ~8 KiB average chunk
30
+ _MIN = 2 * 1024
31
+ _MAX = 64 * 1024
32
+ _M64 = 0xFFFFFFFFFFFFFFFF
33
+ _GEAR = [random.Random(0x12345678 + i).getrandbits(64) for i in range(256)]
34
+
35
+
36
+ def chunk(data: bytes) -> List[bytes]:
37
+ chunks: List[bytes] = []
38
+ n = len(data)
39
+ i = 0
40
+ while i < n:
41
+ limit = min(i + _MAX, n)
42
+ h = 0
43
+ pos = i
44
+ cut = limit
45
+ while pos < limit:
46
+ h = ((h << 1) + _GEAR[data[pos]]) & _M64
47
+ pos += 1
48
+ if (pos - i) >= _MIN and (h & _MASK) == 0:
49
+ cut = pos
50
+ break
51
+ chunks.append(data[i:cut])
52
+ i = cut
53
+ return chunks
54
+
55
+
56
+ def _blake(b: bytes) -> str:
57
+ return hashlib.blake2b(b, digest_size=32).hexdigest()
58
+
59
+
60
+ # --- temperature tiers ------------------------------------------------------
61
+ def warm_compress(b: bytes) -> bytes: # zstd stand-in in the reference
62
+ return zlib.compress(b, 6)
63
+
64
+
65
+ def warm_decompress(b: bytes) -> bytes:
66
+ return zlib.decompress(b)
67
+
68
+
69
+ def cold_compress(b: bytes) -> bytes: # real LZMA — the maximum-ratio archival tier
70
+ return lzma.compress(b, preset=9 | lzma.PRESET_EXTREME)
71
+
72
+
73
+ def cold_decompress(b: bytes) -> bytes:
74
+ return lzma.decompress(b)
75
+
76
+
77
+ class BlobStore:
78
+ """Content-addressed, deduplicated, tiered blob store with versioned files."""
79
+
80
+ def __init__(self, tier: str = "warm") -> None:
81
+ self.tier = tier
82
+ self.chunks: Dict[str, bytes] = {} # hash -> compressed bytes
83
+ self.files: Dict[str, Dict[str, list]] = {} # name -> {versions, roots}
84
+ self.logical_bytes = 0
85
+ self.dedup_hits = 0
86
+
87
+ def _compress(self, b: bytes) -> bytes:
88
+ return cold_compress(b) if self.tier == "cold" else warm_compress(b)
89
+
90
+ def _decompress(self, b: bytes) -> bytes:
91
+ return cold_decompress(b) if self.tier == "cold" else warm_decompress(b)
92
+
93
+ def put_file(self, name: str, data: bytes) -> int:
94
+ recipe: List[str] = []
95
+ for c in chunk(data):
96
+ hh = _blake(c)
97
+ recipe.append(hh)
98
+ if hh in self.chunks:
99
+ self.dedup_hits += 1
100
+ else:
101
+ self.chunks[hh] = self._compress(c)
102
+ self.logical_bytes += len(data)
103
+ f = self.files.setdefault(name, {"versions": [], "roots": []})
104
+ f["versions"].append(recipe)
105
+ f["roots"].append(merkle_root(recipe))
106
+ return len(f["versions"]) - 1
107
+
108
+ def get_file(self, name: str, version: int = -1) -> bytes:
109
+ recipe = self.files[name]["versions"][version]
110
+ out = bytearray()
111
+ for hh in recipe:
112
+ out += self._decompress(self.chunks[hh])
113
+ return bytes(out)
114
+
115
+ def root(self, name: str, version: int = -1) -> str:
116
+ return self.files[name]["roots"][version]
117
+
118
+ def stored_bytes(self) -> int:
119
+ return sum(len(v) for v in self.chunks.values())
120
+
121
+ def stats(self) -> dict:
122
+ stored = self.stored_bytes()
123
+ return {
124
+ "tier": self.tier,
125
+ "unique_chunks": len(self.chunks),
126
+ "dedup_hits": self.dedup_hits,
127
+ "logical_bytes": self.logical_bytes,
128
+ "stored_bytes": stored,
129
+ "ratio": round(self.logical_bytes / stored, 2) if stored else 0.0,
130
+ }
nedb/engine.py ADDED
@@ -0,0 +1,253 @@
1
+ """
2
+ nedb.engine — the NEDB database: log + MVCC store + relations + indexes + Cascade.
3
+
4
+ The OpLog is the source of truth. Every mutation appends an Op; `_apply` deterministically
5
+ folds an Op into the materialized state (store / relations / indexes). Because state is a
6
+ pure function of the log, we get crash recovery and determinism (rebuild) for free, and
7
+ "AS OF seq" time-travel because the log carries monotonic seqs.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ from typing import Any, Dict, List, Optional
12
+
13
+ from .cascade import BlobStore
14
+ from .index import Indexes, tokenize
15
+ from .log import Op, OpLog, ReplayError # noqa: F401 (re-exported)
16
+ from .merkle import merkle_proof, merkle_verify
17
+ from .query import Query, cmp, parse_nql
18
+ from .relations import Relations
19
+ from .store import MVCCStore
20
+
21
+
22
+ def apply_op(store: MVCCStore, relations: Relations, indexes: Indexes, op: Op) -> None:
23
+ """Deterministically fold one op into materialized state."""
24
+ p = op.payload
25
+ if op.op == "put":
26
+ key, coll, doc = p["key"], p["coll"], p["doc"]
27
+ old = store.get(key)
28
+ if old is not None:
29
+ indexes.remove(coll, key, old)
30
+ store.put(key, doc, op.seq)
31
+ indexes.add(coll, key, doc)
32
+ elif op.op == "delete":
33
+ key, coll = p["key"], p["coll"]
34
+ old = store.get(key)
35
+ if old is not None:
36
+ indexes.remove(coll, key, old)
37
+ store.delete(key, op.seq)
38
+ elif op.op == "link":
39
+ relations.link(p["frm"], p["rel"], p["to"], op.seq)
40
+ elif op.op == "unlink":
41
+ relations.unlink(p["frm"], p["rel"], p["to"], op.seq)
42
+ elif op.op == "put_file":
43
+ pass # bytes live in the content-addressed BlobStore; log records the root only
44
+
45
+
46
+ class NEDB:
47
+ def __init__(self) -> None:
48
+ self.log = OpLog()
49
+ self.store = MVCCStore()
50
+ self.relations = Relations()
51
+ self.indexes = Indexes()
52
+ self.blobs: Dict[str, BlobStore] = {"warm": BlobStore("warm"), "cold": BlobStore("cold")}
53
+ self._nonce: Dict[str, int] = {}
54
+
55
+ # --- nonce helper -------------------------------------------------------
56
+ def _next(self, client: str) -> int:
57
+ n = self._nonce.get(client, 0) + 1
58
+ self._nonce[client] = n
59
+ return n
60
+
61
+ # --- mutations ----------------------------------------------------------
62
+ def put(self, coll: str, id: str, doc: dict, client: str = "local",
63
+ nonce: Optional[int] = None, idem: Optional[str] = None) -> dict:
64
+ key = f"{coll}:{id}"
65
+ doc = dict(doc)
66
+ doc.setdefault("_id", id)
67
+ nonce = self._next(client) if nonce is None else nonce
68
+ op, created = self.log.append(client, nonce, "put",
69
+ {"key": key, "coll": coll, "id": id, "doc": doc}, idem)
70
+ if created:
71
+ apply_op(self.store, self.relations, self.indexes, op)
72
+ return self.store.get(key)
73
+
74
+ def delete(self, coll: str, id: str, client: str = "local",
75
+ nonce: Optional[int] = None, idem: Optional[str] = None) -> None:
76
+ key = f"{coll}:{id}"
77
+ nonce = self._next(client) if nonce is None else nonce
78
+ op, created = self.log.append(client, nonce, "delete",
79
+ {"key": key, "coll": coll, "id": id}, idem)
80
+ if created:
81
+ apply_op(self.store, self.relations, self.indexes, op)
82
+
83
+ def get(self, coll: str, id: str, as_of: Optional[int] = None) -> Optional[dict]:
84
+ return self.store.get(f"{coll}:{id}", as_of)
85
+
86
+ # --- relations ----------------------------------------------------------
87
+ def link(self, frm: str, rel: str, to: str, client: str = "local",
88
+ nonce: Optional[int] = None) -> None:
89
+ nonce = self._next(client) if nonce is None else nonce
90
+ op, created = self.log.append(client, nonce, "link", {"frm": frm, "rel": rel, "to": to})
91
+ if created:
92
+ apply_op(self.store, self.relations, self.indexes, op)
93
+
94
+ def unlink(self, frm: str, rel: str, to: str, client: str = "local",
95
+ nonce: Optional[int] = None) -> None:
96
+ nonce = self._next(client) if nonce is None else nonce
97
+ op, created = self.log.append(client, nonce, "unlink", {"frm": frm, "rel": rel, "to": to})
98
+ if created:
99
+ apply_op(self.store, self.relations, self.indexes, op)
100
+
101
+ def neighbors(self, frm: str, rel: str, as_of: Optional[int] = None) -> List[str]:
102
+ return self.relations.neighbors(frm, rel, as_of)
103
+
104
+ def inbound(self, to: str, rel: str, as_of: Optional[int] = None) -> List[str]:
105
+ return self.relations.inbound(to, rel, as_of)
106
+
107
+ # --- indexes ------------------------------------------------------------
108
+ def create_index(self, coll: str, field: str, kind: str = "eq") -> None:
109
+ self.indexes.ensure(coll, field, kind)
110
+ # backfill existing rows at HEAD
111
+ for key in self.store.keys(coll + ":"):
112
+ doc = self.store.get(key)
113
+ if doc is not None:
114
+ self.indexes.add(coll, key, doc)
115
+
116
+ # --- queries ------------------------------------------------------------
117
+ def q(self, coll: str) -> Query:
118
+ return Query(self, coll)
119
+
120
+ def query(self, nql: str) -> List[dict]:
121
+ return self.execute(parse_nql(nql))
122
+
123
+ def execute(self, plan: dict) -> List[dict]:
124
+ coll = plan["from"]
125
+ as_of = plan.get("as_of")
126
+ prefix = coll + ":"
127
+ where = plan.get("where", [])
128
+ search = plan.get("search")
129
+
130
+ candidates: Optional[set] = None
131
+
132
+ # 1) full-text search is usually most selective
133
+ if search:
134
+ sfields = self.indexes.search_fields(coll)
135
+ if sfields:
136
+ per_term = []
137
+ for term in tokenize(search):
138
+ s: set = set()
139
+ for f in sfields:
140
+ s |= self.indexes.search_lookup(coll, f, term)
141
+ per_term.append(s)
142
+ candidates = set.intersection(*per_term) if per_term else set()
143
+
144
+ # 2) equality-index acceleration (HEAD reads only)
145
+ if candidates is None and as_of is None:
146
+ for (f, op, v) in where:
147
+ if op == "=" and self.indexes.has_eq(coll, f):
148
+ candidates = self.indexes.eq_lookup(coll, f, v)
149
+ break
150
+
151
+ # 3) fallback: scan the collection
152
+ if candidates is None:
153
+ candidates = set(self.store.keys(prefix, as_of))
154
+
155
+ # load + final predicate filter (guarantees correctness regardless of index path)
156
+ rows = []
157
+ for key in candidates:
158
+ doc = self.store.get(key, as_of)
159
+ if doc is None:
160
+ continue
161
+ if all(cmp(doc.get(f), op, v) for (f, op, v) in where):
162
+ if search and not self.indexes.search_fields(coll):
163
+ blob = " ".join(str(x) for x in doc.values()).lower()
164
+ if not all(t in blob for t in tokenize(search)):
165
+ continue
166
+ rows.append((key, doc))
167
+
168
+ # order
169
+ ob = plan.get("order_by")
170
+ if ob:
171
+ field, direction = ob
172
+ try:
173
+ rows.sort(key=lambda kv: (kv[1].get(field) is None, kv[1].get(field)),
174
+ reverse=(direction == "DESC"))
175
+ except TypeError:
176
+ rows.sort(key=lambda kv: str(kv[1].get(field)), reverse=(direction == "DESC"))
177
+
178
+ # traverse relations
179
+ if plan.get("traverse"):
180
+ rel = plan["traverse"]
181
+ seen, trav = set(), []
182
+ for key, _ in rows:
183
+ for nb in self.relations.neighbors(key, rel, as_of):
184
+ if nb in seen:
185
+ continue
186
+ seen.add(nb)
187
+ d = self.store.get(nb, as_of)
188
+ if d is not None:
189
+ trav.append((nb, d))
190
+ rows = trav
191
+
192
+ if plan.get("limit") is not None:
193
+ rows = rows[: plan["limit"]]
194
+ return [d for _, d in rows]
195
+
196
+ # --- files (git-style, Cascade-compressed) ------------------------------
197
+ def put_file(self, name: str, data: bytes, tier: str = "warm", client: str = "local",
198
+ nonce: Optional[int] = None, idem: Optional[str] = None) -> int:
199
+ """Store a file version (Cascade-compressed, deduplicated). Returns the
200
+ integer version index; fetch its anchorable hash via file_root(name, version)."""
201
+ bs = self.blobs[tier]
202
+ version = bs.put_file(name, data)
203
+ root = bs.root(name, version)
204
+ nonce = self._next(client) if nonce is None else nonce
205
+ self.log.append(client, nonce, "put_file",
206
+ {"name": name, "tier": tier, "version": version, "root": root}, idem)
207
+ return version
208
+
209
+ def get_file(self, name: str, version: int = -1, tier: str = "warm") -> bytes:
210
+ return self.blobs[tier].get_file(name, version)
211
+
212
+ def file_root(self, name: str, version: int = -1, tier: str = "warm") -> str:
213
+ return self.blobs[tier].root(name, version)
214
+
215
+ def file_proof(self, name: str, chunk_index: int, version: int = -1, tier: str = "warm"):
216
+ """Return (leaf, proof, root) proving chunk_index is part of the version."""
217
+ recipe = self.blobs[tier].files[name]["versions"][version]
218
+ root = self.blobs[tier].files[name]["roots"][version]
219
+ leaf = recipe[chunk_index]
220
+ return leaf, merkle_proof(recipe, chunk_index), root
221
+
222
+ @staticmethod
223
+ def verify_proof(leaf, proof, root) -> bool:
224
+ return merkle_verify(leaf, proof, root)
225
+
226
+ def compression_stats(self, tier: str = "warm") -> dict:
227
+ return self.blobs[tier].stats()
228
+
229
+ # --- integrity / determinism -------------------------------------------
230
+ def verify(self) -> bool:
231
+ """Verify the hash-chained op log has not been tampered with."""
232
+ return self.log.verify()
233
+
234
+ def rebuild(self):
235
+ """Replay the log into fresh state — proves state is a pure function of the log."""
236
+ store, relations, indexes = MVCCStore(), Relations(), Indexes()
237
+ for (c, f, k) in self.indexes.config:
238
+ indexes.ensure(c, f, k)
239
+ for op in self.log.ops:
240
+ apply_op(store, relations, indexes, op)
241
+ return store, relations, indexes
242
+
243
+ def verify_determinism(self) -> bool:
244
+ store, _, _ = self.rebuild()
245
+ return store.snapshot() == self.store.snapshot()
246
+
247
+ @property
248
+ def head(self) -> str:
249
+ return self.log.head
250
+
251
+ @property
252
+ def seq(self) -> int:
253
+ return len(self.log) - 1
nedb/index.py ADDED
@@ -0,0 +1,88 @@
1
+ """
2
+ nedb.index — secondary indexes: equality (hash), ordered (bisect), full-text (inverted).
3
+
4
+ Indexes are maintained incrementally on write and reflect HEAD. They turn filter,
5
+ sort and search from O(n) scans into index lookups. Indexes are keyed by
6
+ "collection.field" so each collection has its own index namespace.
7
+
8
+ (Time-travel queries fall back to a version scan in the engine; temporally-indexed
9
+ reads are a documented later optimization.)
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import bisect
14
+ import re
15
+ from typing import Any, Dict, List, Set
16
+
17
+ _TOKEN = re.compile(r"[a-z0-9]+")
18
+
19
+
20
+ def tokenize(text: str) -> Set[str]:
21
+ return set(_TOKEN.findall(text.lower()))
22
+
23
+
24
+ class Indexes:
25
+ def __init__(self) -> None:
26
+ self.eq: Dict[str, Dict[Any, Set[str]]] = {} # key -> value -> {ids}
27
+ self.ordered: Dict[str, List[tuple]] = {} # key -> sorted [(value,id)]
28
+ self.inv: Dict[str, Dict[str, Set[str]]] = {} # key -> token -> {ids}
29
+ self.config: List[tuple] = [] # [(coll, field, kind)]
30
+
31
+ def ensure(self, coll: str, field: str, kind: str = "eq") -> None:
32
+ k = f"{coll}.{field}"
33
+ if (coll, field, kind) not in self.config:
34
+ self.config.append((coll, field, kind))
35
+ if kind == "eq":
36
+ self.eq.setdefault(k, {})
37
+ elif kind == "ordered":
38
+ self.ordered.setdefault(k, [])
39
+ elif kind == "search":
40
+ self.inv.setdefault(k, {})
41
+ else:
42
+ raise ValueError(f"unknown index kind: {kind}")
43
+
44
+ def add(self, coll: str, key: str, doc: dict) -> None:
45
+ for field, vmap in self.eq.items():
46
+ f = field.split(".", 1)[1]
47
+ if field.startswith(coll + ".") and f in doc:
48
+ vmap.setdefault(doc[f], set()).add(key)
49
+ for field, lst in self.ordered.items():
50
+ f = field.split(".", 1)[1]
51
+ if field.startswith(coll + ".") and f in doc and isinstance(doc[f], (int, float, str)):
52
+ bisect.insort(lst, (doc[f], key))
53
+ for field, inv in self.inv.items():
54
+ f = field.split(".", 1)[1]
55
+ if field.startswith(coll + ".") and isinstance(doc.get(f), str):
56
+ for tok in tokenize(doc[f]):
57
+ inv.setdefault(tok, set()).add(key)
58
+
59
+ def remove(self, coll: str, key: str, doc: dict) -> None:
60
+ for field, vmap in self.eq.items():
61
+ f = field.split(".", 1)[1]
62
+ if field.startswith(coll + ".") and f in doc and doc[f] in vmap:
63
+ vmap[doc[f]].discard(key)
64
+ for field, lst in self.ordered.items():
65
+ f = field.split(".", 1)[1]
66
+ if field.startswith(coll + ".") and f in doc:
67
+ try:
68
+ lst.remove((doc[f], key))
69
+ except ValueError:
70
+ pass
71
+ for field, inv in self.inv.items():
72
+ f = field.split(".", 1)[1]
73
+ if field.startswith(coll + ".") and isinstance(doc.get(f), str):
74
+ for tok in tokenize(doc[f]):
75
+ if tok in inv:
76
+ inv[tok].discard(key)
77
+
78
+ def eq_lookup(self, coll: str, field: str, value: Any):
79
+ return set(self.eq.get(f"{coll}.{field}", {}).get(value, set()))
80
+
81
+ def search_lookup(self, coll: str, field: str, term: str):
82
+ return set(self.inv.get(f"{coll}.{field}", {}).get(term, set()))
83
+
84
+ def has_eq(self, coll: str, field: str) -> bool:
85
+ return f"{coll}.{field}" in self.eq
86
+
87
+ def search_fields(self, coll: str) -> List[str]:
88
+ return [k.split(".", 1)[1] for k in self.inv if k.startswith(coll + ".")]
nedb/log.py ADDED
@@ -0,0 +1,126 @@
1
+ """
2
+ nedb.log — the append-only, hash-chained, nonce-enforced, idempotent operation log.
3
+
4
+ This is the single source of truth for NEDB. Every mutation in the database is an
5
+ Op appended here. Three guarantees live in this one structure:
6
+
7
+ * Replay protection — each client has a strictly-monotonic nonce; an op whose
8
+ nonce is <= the client's last seen nonce is rejected.
9
+ * Idempotency — an op carrying an idempotency key that was already applied
10
+ returns the original result and is NOT appended again.
11
+ * Tamper evidence — ops are chained by hash (h_n = H(h_{n-1} || op_n)), so the
12
+ whole history is a verifiable chain and the head hash is a
13
+ commitment to the entire log (anchorable on a blockchain).
14
+
15
+ The same log is the substrate for MVCC snapshot isolation, crash recovery, and
16
+ time-travel reads: every Op has a monotonic `seq`, and state "AS OF seq N" is just
17
+ the log truncated at N.
18
+ """
19
+ from __future__ import annotations
20
+
21
+ import hashlib
22
+ import json
23
+ import time
24
+ from dataclasses import dataclass
25
+ from typing import Any, Dict, List, Optional, Tuple
26
+
27
+ GENESIS = "0" * 64
28
+
29
+
30
+ def canon(obj: Any) -> bytes:
31
+ """Deterministic canonical encoding for hashing."""
32
+ return json.dumps(obj, sort_keys=True, separators=(",", ":"), default=str).encode()
33
+
34
+
35
+ def blake(data: bytes) -> str:
36
+ # Reference uses BLAKE2b (stdlib). The production Rust core uses BLAKE3
37
+ # (faster, natively tree-structured for the Merkle history).
38
+ return hashlib.blake2b(data, digest_size=32).hexdigest()
39
+
40
+
41
+ class ReplayError(Exception):
42
+ """Raised when an op is replayed with a stale/duplicate nonce."""
43
+
44
+
45
+ @dataclass
46
+ class Op:
47
+ seq: int
48
+ client: str
49
+ nonce: int
50
+ op: str # put | delete | link | unlink | put_file
51
+ payload: dict
52
+ ts: float
53
+ idem: Optional[str]
54
+ prev_hash: str
55
+ hash: str
56
+
57
+
58
+ class OpLog:
59
+ def __init__(self) -> None:
60
+ self.ops: List[Op] = []
61
+ self._last_nonce: Dict[str, int] = {}
62
+ self._idem: Dict[str, int] = {} # idem key -> seq of original op
63
+ self._head = GENESIS
64
+
65
+ def append(
66
+ self,
67
+ client: str,
68
+ nonce: int,
69
+ op: str,
70
+ payload: dict,
71
+ idem: Optional[str] = None,
72
+ ts: Optional[float] = None,
73
+ ) -> Tuple[Op, bool]:
74
+ """Append an op. Returns (op, created). `created` is False when the op was
75
+ deduplicated by its idempotency key (a no-op replay-safe return)."""
76
+ # Idempotency: a known key returns the original op without re-appending.
77
+ if idem is not None and idem in self._idem:
78
+ return self.ops[self._idem[idem]], False
79
+
80
+ # Replay protection: nonce must strictly exceed the client's last nonce.
81
+ last = self._last_nonce.get(client, 0)
82
+ if nonce <= last:
83
+ raise ReplayError(
84
+ f"replay/stale nonce for client '{client}': {nonce} <= {last}"
85
+ )
86
+
87
+ seq = len(self.ops)
88
+ ts = time.time() if ts is None else ts
89
+ body = {
90
+ "seq": seq, "client": client, "nonce": nonce,
91
+ "op": op, "payload": payload, "ts": ts, "idem": idem,
92
+ }
93
+ h = blake(self._head.encode() + canon(body))
94
+ rec = Op(seq, client, nonce, op, payload, ts, idem, self._head, h)
95
+
96
+ self.ops.append(rec)
97
+ self._last_nonce[client] = nonce
98
+ if idem is not None:
99
+ self._idem[idem] = seq
100
+ self._head = h
101
+ return rec, True
102
+
103
+ def verify(self) -> bool:
104
+ """Re-walk the chain and confirm no op has been tampered with."""
105
+ prev = GENESIS
106
+ for o in self.ops:
107
+ body = {
108
+ "seq": o.seq, "client": o.client, "nonce": o.nonce,
109
+ "op": o.op, "payload": o.payload, "ts": o.ts, "idem": o.idem,
110
+ }
111
+ if o.prev_hash != prev:
112
+ return False
113
+ if o.hash != blake(prev.encode() + canon(body)):
114
+ return False
115
+ prev = o.hash
116
+ return True
117
+
118
+ @property
119
+ def head(self) -> str:
120
+ return self._head
121
+
122
+ def slice_until(self, as_of: int) -> List[Op]:
123
+ return [o for o in self.ops if o.seq <= as_of]
124
+
125
+ def __len__(self) -> int:
126
+ return len(self.ops)
nedb/merkle.py ADDED
@@ -0,0 +1,62 @@
1
+ """
2
+ nedb.merkle — Merkle tree over content-addressed chunk hashes.
3
+
4
+ Because every file version is a list of BLAKE-addressed chunks, a file version has
5
+ a Merkle root that commits to its exact bytes. Any chunk's membership is provable in
6
+ O(log n), and the root can be anchored on-chain (e.g. ITC) for tamper-evident,
7
+ notarized version history.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+ from typing import List, Tuple
13
+
14
+
15
+ def _h(b: bytes) -> bytes:
16
+ return hashlib.blake2b(b, digest_size=32).digest()
17
+
18
+
19
+ def _to_bytes(x) -> bytes:
20
+ return bytes.fromhex(x) if isinstance(x, str) else bytes(x)
21
+
22
+
23
+ def merkle_root(leaves: List[str]) -> str:
24
+ if not leaves:
25
+ return "0" * 64
26
+ level = [_to_bytes(x) for x in leaves]
27
+ while len(level) > 1:
28
+ nxt = []
29
+ for i in range(0, len(level), 2):
30
+ a = level[i]
31
+ b = level[i + 1] if i + 1 < len(level) else level[i]
32
+ nxt.append(_h(a + b))
33
+ level = nxt
34
+ return level[0].hex()
35
+
36
+
37
+ def merkle_proof(leaves: List[str], idx: int) -> List[Tuple[str, str]]:
38
+ """Return inclusion proof for leaf at idx: list of (sibling_hex, side)."""
39
+ level = [_to_bytes(x) for x in leaves]
40
+ path: List[Tuple[str, str]] = []
41
+ while len(level) > 1:
42
+ if idx % 2 == 0:
43
+ sib = level[idx + 1] if idx + 1 < len(level) else level[idx]
44
+ path.append((sib.hex(), "R"))
45
+ else:
46
+ path.append((level[idx - 1].hex(), "L"))
47
+ nxt = []
48
+ for i in range(0, len(level), 2):
49
+ a = level[i]
50
+ b = level[i + 1] if i + 1 < len(level) else level[i]
51
+ nxt.append(_h(a + b))
52
+ level = nxt
53
+ idx //= 2
54
+ return path
55
+
56
+
57
+ def merkle_verify(leaf: str, path: List[Tuple[str, str]], root: str) -> bool:
58
+ h = _to_bytes(leaf)
59
+ for sib_hex, side in path:
60
+ sib = _to_bytes(sib_hex)
61
+ h = _h(h + sib) if side == "R" else _h(sib + h)
62
+ return h.hex() == root