cryptodb 2.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nedb/index.py ADDED
@@ -0,0 +1,98 @@
1
+ """
2
+ nedb.index — secondary indexes: equality (hash), ordered (bisect), full-text (inverted).
3
+
4
+ Indexes are maintained incrementally on write and reflect HEAD. They turn filter,
5
+ sort and search from O(n) scans into index lookups. Indexes are keyed by
6
+ "collection.field" so each collection has its own index namespace.
7
+
8
+ (Time-travel queries fall back to a version scan in the engine; temporally-indexed
9
+ reads are a documented later optimization.)
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import bisect
14
+ import re
15
+ from typing import Any, Dict, List, Set
16
+
17
+ _TOKEN = re.compile(r"[a-z0-9]+")
18
+
19
+
20
+ def tokenize(text: str) -> Set[str]:
21
+ return set(_TOKEN.findall(text.lower()))
22
+
23
+
24
+ class Indexes:
25
+ def __init__(self) -> None:
26
+ self.eq: Dict[str, Dict[Any, Set[str]]] = {} # key -> value -> {ids}
27
+ self.ordered: Dict[str, List[tuple]] = {} # key -> sorted [(value,id)]
28
+ self.inv: Dict[str, Dict[str, Set[str]]] = {} # key -> token -> {ids}
29
+ self.config: List[tuple] = [] # [(coll, field, kind)]
30
+
31
+ def ensure(self, coll: str, field: str, kind: str = "eq") -> None:
32
+ k = f"{coll}.{field}"
33
+ if (coll, field, kind) not in self.config:
34
+ self.config.append((coll, field, kind))
35
+ if kind == "eq":
36
+ self.eq.setdefault(k, {})
37
+ elif kind == "ordered":
38
+ self.ordered.setdefault(k, [])
39
+ elif kind == "search":
40
+ self.inv.setdefault(k, {})
41
+ else:
42
+ raise ValueError(f"unknown index kind: {kind}")
43
+
44
+ def add(self, coll: str, key: str, doc: dict) -> None:
45
+ for field, vmap in self.eq.items():
46
+ f = field.split(".", 1)[1]
47
+ if field.startswith(coll + ".") and f in doc:
48
+ vmap.setdefault(doc[f], set()).add(key)
49
+ for field, lst in self.ordered.items():
50
+ f = field.split(".", 1)[1]
51
+ if field.startswith(coll + ".") and f in doc and isinstance(doc[f], (int, float, str)):
52
+ bisect.insort(lst, (doc[f], key))
53
+ for field, inv in self.inv.items():
54
+ f = field.split(".", 1)[1]
55
+ if field.startswith(coll + ".") and isinstance(doc.get(f), str):
56
+ for tok in tokenize(doc[f]):
57
+ inv.setdefault(tok, set()).add(key)
58
+
59
+ def remove(self, coll: str, key: str, doc: dict) -> None:
60
+ for field, vmap in self.eq.items():
61
+ f = field.split(".", 1)[1]
62
+ if field.startswith(coll + ".") and f in doc and doc[f] in vmap:
63
+ vmap[doc[f]].discard(key)
64
+ for field, lst in self.ordered.items():
65
+ f = field.split(".", 1)[1]
66
+ if field.startswith(coll + ".") and f in doc:
67
+ try:
68
+ lst.remove((doc[f], key))
69
+ except ValueError:
70
+ pass
71
+ for field, inv in self.inv.items():
72
+ f = field.split(".", 1)[1]
73
+ if field.startswith(coll + ".") and isinstance(doc.get(f), str):
74
+ for tok in tokenize(doc[f]):
75
+ if tok in inv:
76
+ inv[tok].discard(key)
77
+
78
+ def eq_lookup(self, coll: str, field: str, value: Any):
79
+ return set(self.eq.get(f"{coll}.{field}", {}).get(value, set()))
80
+
81
+ def search_lookup(self, coll: str, field: str, term: str):
82
+ return set(self.inv.get(f"{coll}.{field}", {}).get(term, set()))
83
+
84
+ def has_eq(self, coll: str, field: str) -> bool:
85
+ return f"{coll}.{field}" in self.eq
86
+
87
+ def search_fields(self, coll: str) -> List[str]:
88
+ # Lock-free: snapshot keys with retry (a concurrent create_index may add
89
+ # one mid-iteration under the Sequencer's single-writer committer).
90
+ for _ in range(128):
91
+ try:
92
+ inv_keys = list(self.inv.keys())
93
+ break
94
+ except RuntimeError:
95
+ continue
96
+ else:
97
+ inv_keys = list(self.inv.keys())
98
+ return [k.split(".", 1)[1] for k in inv_keys if k.startswith(coll + ".")]
nedb/log.py ADDED
@@ -0,0 +1,216 @@
1
+ """
2
+ nedb.log — the append-only, hash-chained, nonce-enforced, idempotent operation log.
3
+
4
+ This is the single source of truth for NEDB. Every mutation in the database is an
5
+ Op appended here. Three guarantees live in this one structure:
6
+
7
+ * Replay protection — each client has a strictly-monotonic nonce; an op whose
8
+ nonce is <= the client's last seen nonce is rejected.
9
+ * Idempotency — an op carrying an idempotency key that was already applied
10
+ returns the original result and is NOT appended again.
11
+ * Tamper evidence — ops are chained by hash (h_n = H(h_{n-1} || op_n)), so the
12
+ whole history is a verifiable chain and the head hash is a
13
+ commitment to the entire log (anchorable on a blockchain).
14
+
15
+ The same log is the substrate for MVCC snapshot isolation, crash recovery, and
16
+ time-travel reads: every Op has a monotonic `seq`, and state "AS OF seq N" is just
17
+ the log truncated at N.
18
+ """
19
+ from __future__ import annotations
20
+
21
+ import hashlib
22
+ import json
23
+ import time
24
+ from dataclasses import dataclass
25
+ from typing import Any, Dict, List, Optional, Tuple
26
+
27
+ GENESIS = "0" * 64
28
+
29
+
30
+ def canon(obj: Any) -> bytes:
31
+ """Deterministic canonical encoding for hashing."""
32
+ return json.dumps(obj, sort_keys=True, separators=(",", ":"), default=str).encode()
33
+
34
+
35
+ def blake(data: bytes) -> str:
36
+ # Reference uses BLAKE2b (stdlib). The production Rust core uses BLAKE3
37
+ # (faster, natively tree-structured for the Merkle history).
38
+ return hashlib.blake2b(data, digest_size=32).hexdigest()
39
+
40
+
41
+ class ReplayError(Exception):
42
+ """Raised when an op is replayed with a stale/duplicate nonce."""
43
+
44
+
45
+ @dataclass
46
+ class Op:
47
+ seq: int
48
+ client: str
49
+ nonce: int
50
+ op: str # put | delete | link | unlink | put_file
51
+ payload: dict
52
+ ts: float
53
+ idem: Optional[str]
54
+ prev_hash: str
55
+ hash: str
56
+ # ── Causal provenance (v0.9.0+) ─────────────────────────────────────────
57
+ # Optional fields that, when present, are sealed inside the hash chain so
58
+ # they are tamper-evident and time-stamped at write time.
59
+ # caused_by — seqs of the ops that led to this write (backward trace).
60
+ # evidence — source type: "user_message" | "inference" | "tool_result"
61
+ # | "correction" | "external"
62
+ # confidence — agent's certainty in this write (0.0 – 1.0).
63
+ caused_by: Optional[List[int]] = None
64
+ evidence: Optional[str] = None
65
+ confidence: Optional[float] = None
66
+
67
+ # ── Bi-temporal valid time (v1.0.0+) ─────────────────────────────────────
68
+ # When was this fact TRUE IN THE WORLD (independent of when it was written)?
69
+ # valid_from — ISO 8601 date/datetime string; None = "from the beginning"
70
+ # valid_to — ISO 8601 date/datetime string; None = "still valid / open-ended"
71
+ #
72
+ # ISO 8601 strings sort lexicographically correctly, so comparisons are
73
+ # safe as plain string ops: "2024-01-01" < "2024-06-15" ✓
74
+ #
75
+ # Backward-compatible: ops without valid-time fields are treated as always
76
+ # valid (they pass every VALID AS OF filter). Existing chains verify unchanged.
77
+ valid_from: Optional[str] = None
78
+ valid_to: Optional[str] = None
79
+
80
+ def to_dict(self) -> dict:
81
+ """Serialize for the append-only log file (AOF)."""
82
+ d: dict = {
83
+ "seq": self.seq, "client": self.client, "nonce": self.nonce,
84
+ "op": self.op, "payload": self.payload, "ts": self.ts,
85
+ "idem": self.idem, "prev_hash": self.prev_hash, "hash": self.hash,
86
+ }
87
+ if self.caused_by is not None: d["caused_by"] = self.caused_by
88
+ if self.evidence is not None: d["evidence"] = self.evidence
89
+ if self.confidence is not None: d["confidence"] = self.confidence
90
+ if self.valid_from is not None: d["valid_from"] = self.valid_from
91
+ if self.valid_to is not None: d["valid_to"] = self.valid_to
92
+ return d
93
+
94
+ @classmethod
95
+ def from_dict(cls, d: dict) -> "Op":
96
+ return cls(
97
+ d["seq"], d["client"], d["nonce"], d["op"], d["payload"],
98
+ d["ts"], d.get("idem"), d["prev_hash"], d["hash"],
99
+ caused_by = d.get("caused_by"),
100
+ evidence = d.get("evidence"),
101
+ confidence = d.get("confidence"),
102
+ valid_from = d.get("valid_from"),
103
+ valid_to = d.get("valid_to"),
104
+ )
105
+
106
+
107
+ class OpLog:
108
+ def __init__(self) -> None:
109
+ self.ops: List[Op] = []
110
+ self._last_nonce: Dict[str, int] = {}
111
+ self._idem: Dict[str, int] = {} # idem key -> seq of original op
112
+ self._head = GENESIS
113
+
114
+ def append(
115
+ self,
116
+ client: str,
117
+ nonce: int,
118
+ op: str,
119
+ payload: dict,
120
+ idem: Optional[str] = None,
121
+ ts: Optional[float] = None,
122
+ caused_by: Optional[List[int]] = None,
123
+ evidence: Optional[str] = None,
124
+ confidence: Optional[float] = None,
125
+ valid_from: Optional[str] = None,
126
+ valid_to: Optional[str] = None,
127
+ ) -> Tuple[Op, bool]:
128
+ """Append an op. Returns (op, created). `created` is False when the op was
129
+ deduplicated by its idempotency key (a no-op replay-safe return)."""
130
+ # Idempotency: a known key returns the original op without re-appending.
131
+ if idem is not None and idem in self._idem:
132
+ return self.ops[self._idem[idem]], False
133
+
134
+ # Replay protection: nonce must strictly exceed the client's last nonce.
135
+ last = self._last_nonce.get(client, 0)
136
+ if nonce <= last:
137
+ raise ReplayError(
138
+ f"replay/stale nonce for client '{client}': {nonce} <= {last}"
139
+ )
140
+
141
+ seq = len(self.ops)
142
+ ts = time.time() if ts is None else ts
143
+ body: dict = {
144
+ "seq": seq, "client": client, "nonce": nonce,
145
+ "op": op, "payload": payload, "ts": ts, "idem": idem,
146
+ }
147
+ # Provenance fields are sealed INTO the hash when present so they are
148
+ # tamper-evident — omitting them when absent keeps old ops verifiable.
149
+ if caused_by is not None: body["caused_by"] = caused_by
150
+ if evidence is not None: body["evidence"] = evidence
151
+ if confidence is not None: body["confidence"] = confidence
152
+ if valid_from is not None: body["valid_from"] = valid_from
153
+ if valid_to is not None: body["valid_to"] = valid_to
154
+ h = blake(self._head.encode() + canon(body))
155
+ rec = Op(seq, client, nonce, op, payload, ts, idem, self._head, h,
156
+ caused_by=caused_by, evidence=evidence, confidence=confidence,
157
+ valid_from=valid_from, valid_to=valid_to)
158
+
159
+ self.ops.append(rec)
160
+ self._last_nonce[client] = nonce
161
+ if idem is not None:
162
+ self._idem[idem] = seq
163
+ self._head = h
164
+ return rec, True
165
+
166
+ def load(self, ops: List[Op]) -> None:
167
+ """Rehydrate the log from persisted ops WITHOUT recomputing hashes, so the
168
+ original chain (and thus verify() and the head commitment) is preserved
169
+ exactly across a restart. Nonce, idempotency, and head state are restored
170
+ from the ops themselves — replay protection survives a reload."""
171
+ self.ops = list(ops)
172
+ self._last_nonce = {}
173
+ self._idem = {}
174
+ for o in self.ops:
175
+ if o.nonce > self._last_nonce.get(o.client, 0):
176
+ self._last_nonce[o.client] = o.nonce
177
+ if o.idem is not None and o.idem not in self._idem:
178
+ self._idem[o.idem] = o.seq
179
+ self._head = self.ops[-1].hash if self.ops else GENESIS
180
+
181
+ @staticmethod
182
+ def _op_body(o: "Op") -> dict:
183
+ """The canonical hash body for an op — must match exactly what append() hashes."""
184
+ body: dict = {
185
+ "seq": o.seq, "client": o.client, "nonce": o.nonce,
186
+ "op": o.op, "payload": o.payload, "ts": o.ts, "idem": o.idem,
187
+ }
188
+ # Optional fields included only when present (backward-compat with old ops).
189
+ if o.caused_by is not None: body["caused_by"] = o.caused_by
190
+ if o.evidence is not None: body["evidence"] = o.evidence
191
+ if o.confidence is not None: body["confidence"] = o.confidence
192
+ if o.valid_from is not None: body["valid_from"] = o.valid_from
193
+ if o.valid_to is not None: body["valid_to"] = o.valid_to
194
+ return body
195
+
196
+ def verify(self) -> bool:
197
+ """Re-walk the chain and confirm no op has been tampered with."""
198
+ prev = GENESIS
199
+ for o in self.ops:
200
+ body = self._op_body(o)
201
+ if o.prev_hash != prev:
202
+ return False
203
+ if o.hash != blake(prev.encode() + canon(body)):
204
+ return False
205
+ prev = o.hash
206
+ return True
207
+
208
+ @property
209
+ def head(self) -> str:
210
+ return self._head
211
+
212
+ def slice_until(self, as_of: int) -> List[Op]:
213
+ return [o for o in self.ops if o.seq <= as_of]
214
+
215
+ def __len__(self) -> int:
216
+ return len(self.ops)
nedb/merkle.py ADDED
@@ -0,0 +1,62 @@
1
+ """
2
+ nedb.merkle — Merkle tree over content-addressed chunk hashes.
3
+
4
+ Because every file version is a list of BLAKE-addressed chunks, a file version has
5
+ a Merkle root that commits to its exact bytes. Any chunk's membership is provable in
6
+ O(log n), and the root can be anchored on-chain (e.g. ITC) for tamper-evident,
7
+ notarized version history.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+ from typing import List, Tuple
13
+
14
+
15
+ def _h(b: bytes) -> bytes:
16
+ return hashlib.blake2b(b, digest_size=32).digest()
17
+
18
+
19
+ def _to_bytes(x) -> bytes:
20
+ return bytes.fromhex(x) if isinstance(x, str) else bytes(x)
21
+
22
+
23
+ def merkle_root(leaves: List[str]) -> str:
24
+ if not leaves:
25
+ return "0" * 64
26
+ level = [_to_bytes(x) for x in leaves]
27
+ while len(level) > 1:
28
+ nxt = []
29
+ for i in range(0, len(level), 2):
30
+ a = level[i]
31
+ b = level[i + 1] if i + 1 < len(level) else level[i]
32
+ nxt.append(_h(a + b))
33
+ level = nxt
34
+ return level[0].hex()
35
+
36
+
37
+ def merkle_proof(leaves: List[str], idx: int) -> List[Tuple[str, str]]:
38
+ """Return inclusion proof for leaf at idx: list of (sibling_hex, side)."""
39
+ level = [_to_bytes(x) for x in leaves]
40
+ path: List[Tuple[str, str]] = []
41
+ while len(level) > 1:
42
+ if idx % 2 == 0:
43
+ sib = level[idx + 1] if idx + 1 < len(level) else level[idx]
44
+ path.append((sib.hex(), "R"))
45
+ else:
46
+ path.append((level[idx - 1].hex(), "L"))
47
+ nxt = []
48
+ for i in range(0, len(level), 2):
49
+ a = level[i]
50
+ b = level[i + 1] if i + 1 < len(level) else level[i]
51
+ nxt.append(_h(a + b))
52
+ level = nxt
53
+ idx //= 2
54
+ return path
55
+
56
+
57
+ def merkle_verify(leaf: str, path: List[Tuple[str, str]], root: str) -> bool:
58
+ h = _to_bytes(leaf)
59
+ for sib_hex, side in path:
60
+ sib = _to_bytes(sib_hex)
61
+ h = _h(h + sib) if side == "R" else _h(sib + h)
62
+ return h.hex() == root