nitrodb 2.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nedb/__init__.py +92 -0
- nedb/autoindex.py +142 -0
- nedb/backends/__init__.py +0 -0
- nedb/backends/redis_backend.py +115 -0
- nedb/cascade.py +130 -0
- nedb/concurrent.py +218 -0
- nedb/crypto.py +294 -0
- nedb/engine.py +783 -0
- nedb/index.py +98 -0
- nedb/log.py +216 -0
- nedb/merkle.py +62 -0
- nedb/mongo.py +824 -0
- nedb/proof.py +126 -0
- nedb/query.py +305 -0
- nedb/redis_compat.py +516 -0
- nedb/relations.py +51 -0
- nedb/resp2.py +250 -0
- nedb/server.py +1011 -0
- nedb/snapshot.py +216 -0
- nedb/sql.py +430 -0
- nedb/store.py +68 -0
- nedb/wrap_redis.py +725 -0
- nitrodb-2.4.3.dist-info/METADATA +64 -0
- nitrodb-2.4.3.dist-info/RECORD +27 -0
- nitrodb-2.4.3.dist-info/WHEEL +4 -0
- nitrodb-2.4.3.dist-info/entry_points.txt +2 -0
- nitrodb-2.4.3.dist-info/licenses/LICENSE +65 -0
nedb/index.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""
|
|
2
|
+
nedb.index — secondary indexes: equality (hash), ordered (bisect), full-text (inverted).
|
|
3
|
+
|
|
4
|
+
Indexes are maintained incrementally on write and reflect HEAD. They turn filter,
|
|
5
|
+
sort and search from O(n) scans into index lookups. Indexes are keyed by
|
|
6
|
+
"collection.field" so each collection has its own index namespace.
|
|
7
|
+
|
|
8
|
+
(Time-travel queries fall back to a version scan in the engine; temporally-indexed
|
|
9
|
+
reads are a documented later optimization.)
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import bisect
|
|
14
|
+
import re
|
|
15
|
+
from typing import Any, Dict, List, Set
|
|
16
|
+
|
|
17
|
+
_TOKEN = re.compile(r"[a-z0-9]+")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def tokenize(text: str) -> Set[str]:
|
|
21
|
+
return set(_TOKEN.findall(text.lower()))
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Indexes:
|
|
25
|
+
def __init__(self) -> None:
|
|
26
|
+
self.eq: Dict[str, Dict[Any, Set[str]]] = {} # key -> value -> {ids}
|
|
27
|
+
self.ordered: Dict[str, List[tuple]] = {} # key -> sorted [(value,id)]
|
|
28
|
+
self.inv: Dict[str, Dict[str, Set[str]]] = {} # key -> token -> {ids}
|
|
29
|
+
self.config: List[tuple] = [] # [(coll, field, kind)]
|
|
30
|
+
|
|
31
|
+
def ensure(self, coll: str, field: str, kind: str = "eq") -> None:
|
|
32
|
+
k = f"{coll}.{field}"
|
|
33
|
+
if (coll, field, kind) not in self.config:
|
|
34
|
+
self.config.append((coll, field, kind))
|
|
35
|
+
if kind == "eq":
|
|
36
|
+
self.eq.setdefault(k, {})
|
|
37
|
+
elif kind == "ordered":
|
|
38
|
+
self.ordered.setdefault(k, [])
|
|
39
|
+
elif kind == "search":
|
|
40
|
+
self.inv.setdefault(k, {})
|
|
41
|
+
else:
|
|
42
|
+
raise ValueError(f"unknown index kind: {kind}")
|
|
43
|
+
|
|
44
|
+
def add(self, coll: str, key: str, doc: dict) -> None:
|
|
45
|
+
for field, vmap in self.eq.items():
|
|
46
|
+
f = field.split(".", 1)[1]
|
|
47
|
+
if field.startswith(coll + ".") and f in doc:
|
|
48
|
+
vmap.setdefault(doc[f], set()).add(key)
|
|
49
|
+
for field, lst in self.ordered.items():
|
|
50
|
+
f = field.split(".", 1)[1]
|
|
51
|
+
if field.startswith(coll + ".") and f in doc and isinstance(doc[f], (int, float, str)):
|
|
52
|
+
bisect.insort(lst, (doc[f], key))
|
|
53
|
+
for field, inv in self.inv.items():
|
|
54
|
+
f = field.split(".", 1)[1]
|
|
55
|
+
if field.startswith(coll + ".") and isinstance(doc.get(f), str):
|
|
56
|
+
for tok in tokenize(doc[f]):
|
|
57
|
+
inv.setdefault(tok, set()).add(key)
|
|
58
|
+
|
|
59
|
+
def remove(self, coll: str, key: str, doc: dict) -> None:
|
|
60
|
+
for field, vmap in self.eq.items():
|
|
61
|
+
f = field.split(".", 1)[1]
|
|
62
|
+
if field.startswith(coll + ".") and f in doc and doc[f] in vmap:
|
|
63
|
+
vmap[doc[f]].discard(key)
|
|
64
|
+
for field, lst in self.ordered.items():
|
|
65
|
+
f = field.split(".", 1)[1]
|
|
66
|
+
if field.startswith(coll + ".") and f in doc:
|
|
67
|
+
try:
|
|
68
|
+
lst.remove((doc[f], key))
|
|
69
|
+
except ValueError:
|
|
70
|
+
pass
|
|
71
|
+
for field, inv in self.inv.items():
|
|
72
|
+
f = field.split(".", 1)[1]
|
|
73
|
+
if field.startswith(coll + ".") and isinstance(doc.get(f), str):
|
|
74
|
+
for tok in tokenize(doc[f]):
|
|
75
|
+
if tok in inv:
|
|
76
|
+
inv[tok].discard(key)
|
|
77
|
+
|
|
78
|
+
def eq_lookup(self, coll: str, field: str, value: Any):
|
|
79
|
+
return set(self.eq.get(f"{coll}.{field}", {}).get(value, set()))
|
|
80
|
+
|
|
81
|
+
def search_lookup(self, coll: str, field: str, term: str):
|
|
82
|
+
return set(self.inv.get(f"{coll}.{field}", {}).get(term, set()))
|
|
83
|
+
|
|
84
|
+
def has_eq(self, coll: str, field: str) -> bool:
|
|
85
|
+
return f"{coll}.{field}" in self.eq
|
|
86
|
+
|
|
87
|
+
def search_fields(self, coll: str) -> List[str]:
|
|
88
|
+
# Lock-free: snapshot keys with retry (a concurrent create_index may add
|
|
89
|
+
# one mid-iteration under the Sequencer's single-writer committer).
|
|
90
|
+
for _ in range(128):
|
|
91
|
+
try:
|
|
92
|
+
inv_keys = list(self.inv.keys())
|
|
93
|
+
break
|
|
94
|
+
except RuntimeError:
|
|
95
|
+
continue
|
|
96
|
+
else:
|
|
97
|
+
inv_keys = list(self.inv.keys())
|
|
98
|
+
return [k.split(".", 1)[1] for k in inv_keys if k.startswith(coll + ".")]
|
nedb/log.py
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""
|
|
2
|
+
nedb.log — the append-only, hash-chained, nonce-enforced, idempotent operation log.
|
|
3
|
+
|
|
4
|
+
This is the single source of truth for NEDB. Every mutation in the database is an
|
|
5
|
+
Op appended here. Three guarantees live in this one structure:
|
|
6
|
+
|
|
7
|
+
* Replay protection — each client has a strictly-monotonic nonce; an op whose
|
|
8
|
+
nonce is <= the client's last seen nonce is rejected.
|
|
9
|
+
* Idempotency — an op carrying an idempotency key that was already applied
|
|
10
|
+
returns the original result and is NOT appended again.
|
|
11
|
+
* Tamper evidence — ops are chained by hash (h_n = H(h_{n-1} || op_n)), so the
|
|
12
|
+
whole history is a verifiable chain and the head hash is a
|
|
13
|
+
commitment to the entire log (anchorable on a blockchain).
|
|
14
|
+
|
|
15
|
+
The same log is the substrate for MVCC snapshot isolation, crash recovery, and
|
|
16
|
+
time-travel reads: every Op has a monotonic `seq`, and state "AS OF seq N" is just
|
|
17
|
+
the log truncated at N.
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import hashlib
|
|
22
|
+
import json
|
|
23
|
+
import time
|
|
24
|
+
from dataclasses import dataclass
|
|
25
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
26
|
+
|
|
27
|
+
GENESIS = "0" * 64
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def canon(obj: Any) -> bytes:
|
|
31
|
+
"""Deterministic canonical encoding for hashing."""
|
|
32
|
+
return json.dumps(obj, sort_keys=True, separators=(",", ":"), default=str).encode()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def blake(data: bytes) -> str:
|
|
36
|
+
# Reference uses BLAKE2b (stdlib). The production Rust core uses BLAKE3
|
|
37
|
+
# (faster, natively tree-structured for the Merkle history).
|
|
38
|
+
return hashlib.blake2b(data, digest_size=32).hexdigest()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ReplayError(Exception):
|
|
42
|
+
"""Raised when an op is replayed with a stale/duplicate nonce."""
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class Op:
|
|
47
|
+
seq: int
|
|
48
|
+
client: str
|
|
49
|
+
nonce: int
|
|
50
|
+
op: str # put | delete | link | unlink | put_file
|
|
51
|
+
payload: dict
|
|
52
|
+
ts: float
|
|
53
|
+
idem: Optional[str]
|
|
54
|
+
prev_hash: str
|
|
55
|
+
hash: str
|
|
56
|
+
# ── Causal provenance (v0.9.0+) ─────────────────────────────────────────
|
|
57
|
+
# Optional fields that, when present, are sealed inside the hash chain so
|
|
58
|
+
# they are tamper-evident and time-stamped at write time.
|
|
59
|
+
# caused_by — seqs of the ops that led to this write (backward trace).
|
|
60
|
+
# evidence — source type: "user_message" | "inference" | "tool_result"
|
|
61
|
+
# | "correction" | "external"
|
|
62
|
+
# confidence — agent's certainty in this write (0.0 – 1.0).
|
|
63
|
+
caused_by: Optional[List[int]] = None
|
|
64
|
+
evidence: Optional[str] = None
|
|
65
|
+
confidence: Optional[float] = None
|
|
66
|
+
|
|
67
|
+
# ── Bi-temporal valid time (v1.0.0+) ─────────────────────────────────────
|
|
68
|
+
# When was this fact TRUE IN THE WORLD (independent of when it was written)?
|
|
69
|
+
# valid_from — ISO 8601 date/datetime string; None = "from the beginning"
|
|
70
|
+
# valid_to — ISO 8601 date/datetime string; None = "still valid / open-ended"
|
|
71
|
+
#
|
|
72
|
+
# ISO 8601 strings sort lexicographically correctly, so comparisons are
|
|
73
|
+
# safe as plain string ops: "2024-01-01" < "2024-06-15" ✓
|
|
74
|
+
#
|
|
75
|
+
# Backward-compatible: ops without valid-time fields are treated as always
|
|
76
|
+
# valid (they pass every VALID AS OF filter). Existing chains verify unchanged.
|
|
77
|
+
valid_from: Optional[str] = None
|
|
78
|
+
valid_to: Optional[str] = None
|
|
79
|
+
|
|
80
|
+
def to_dict(self) -> dict:
|
|
81
|
+
"""Serialize for the append-only log file (AOF)."""
|
|
82
|
+
d: dict = {
|
|
83
|
+
"seq": self.seq, "client": self.client, "nonce": self.nonce,
|
|
84
|
+
"op": self.op, "payload": self.payload, "ts": self.ts,
|
|
85
|
+
"idem": self.idem, "prev_hash": self.prev_hash, "hash": self.hash,
|
|
86
|
+
}
|
|
87
|
+
if self.caused_by is not None: d["caused_by"] = self.caused_by
|
|
88
|
+
if self.evidence is not None: d["evidence"] = self.evidence
|
|
89
|
+
if self.confidence is not None: d["confidence"] = self.confidence
|
|
90
|
+
if self.valid_from is not None: d["valid_from"] = self.valid_from
|
|
91
|
+
if self.valid_to is not None: d["valid_to"] = self.valid_to
|
|
92
|
+
return d
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def from_dict(cls, d: dict) -> "Op":
|
|
96
|
+
return cls(
|
|
97
|
+
d["seq"], d["client"], d["nonce"], d["op"], d["payload"],
|
|
98
|
+
d["ts"], d.get("idem"), d["prev_hash"], d["hash"],
|
|
99
|
+
caused_by = d.get("caused_by"),
|
|
100
|
+
evidence = d.get("evidence"),
|
|
101
|
+
confidence = d.get("confidence"),
|
|
102
|
+
valid_from = d.get("valid_from"),
|
|
103
|
+
valid_to = d.get("valid_to"),
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class OpLog:
|
|
108
|
+
def __init__(self) -> None:
|
|
109
|
+
self.ops: List[Op] = []
|
|
110
|
+
self._last_nonce: Dict[str, int] = {}
|
|
111
|
+
self._idem: Dict[str, int] = {} # idem key -> seq of original op
|
|
112
|
+
self._head = GENESIS
|
|
113
|
+
|
|
114
|
+
def append(
|
|
115
|
+
self,
|
|
116
|
+
client: str,
|
|
117
|
+
nonce: int,
|
|
118
|
+
op: str,
|
|
119
|
+
payload: dict,
|
|
120
|
+
idem: Optional[str] = None,
|
|
121
|
+
ts: Optional[float] = None,
|
|
122
|
+
caused_by: Optional[List[int]] = None,
|
|
123
|
+
evidence: Optional[str] = None,
|
|
124
|
+
confidence: Optional[float] = None,
|
|
125
|
+
valid_from: Optional[str] = None,
|
|
126
|
+
valid_to: Optional[str] = None,
|
|
127
|
+
) -> Tuple[Op, bool]:
|
|
128
|
+
"""Append an op. Returns (op, created). `created` is False when the op was
|
|
129
|
+
deduplicated by its idempotency key (a no-op replay-safe return)."""
|
|
130
|
+
# Idempotency: a known key returns the original op without re-appending.
|
|
131
|
+
if idem is not None and idem in self._idem:
|
|
132
|
+
return self.ops[self._idem[idem]], False
|
|
133
|
+
|
|
134
|
+
# Replay protection: nonce must strictly exceed the client's last nonce.
|
|
135
|
+
last = self._last_nonce.get(client, 0)
|
|
136
|
+
if nonce <= last:
|
|
137
|
+
raise ReplayError(
|
|
138
|
+
f"replay/stale nonce for client '{client}': {nonce} <= {last}"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
seq = len(self.ops)
|
|
142
|
+
ts = time.time() if ts is None else ts
|
|
143
|
+
body: dict = {
|
|
144
|
+
"seq": seq, "client": client, "nonce": nonce,
|
|
145
|
+
"op": op, "payload": payload, "ts": ts, "idem": idem,
|
|
146
|
+
}
|
|
147
|
+
# Provenance fields are sealed INTO the hash when present so they are
|
|
148
|
+
# tamper-evident — omitting them when absent keeps old ops verifiable.
|
|
149
|
+
if caused_by is not None: body["caused_by"] = caused_by
|
|
150
|
+
if evidence is not None: body["evidence"] = evidence
|
|
151
|
+
if confidence is not None: body["confidence"] = confidence
|
|
152
|
+
if valid_from is not None: body["valid_from"] = valid_from
|
|
153
|
+
if valid_to is not None: body["valid_to"] = valid_to
|
|
154
|
+
h = blake(self._head.encode() + canon(body))
|
|
155
|
+
rec = Op(seq, client, nonce, op, payload, ts, idem, self._head, h,
|
|
156
|
+
caused_by=caused_by, evidence=evidence, confidence=confidence,
|
|
157
|
+
valid_from=valid_from, valid_to=valid_to)
|
|
158
|
+
|
|
159
|
+
self.ops.append(rec)
|
|
160
|
+
self._last_nonce[client] = nonce
|
|
161
|
+
if idem is not None:
|
|
162
|
+
self._idem[idem] = seq
|
|
163
|
+
self._head = h
|
|
164
|
+
return rec, True
|
|
165
|
+
|
|
166
|
+
def load(self, ops: List[Op]) -> None:
|
|
167
|
+
"""Rehydrate the log from persisted ops WITHOUT recomputing hashes, so the
|
|
168
|
+
original chain (and thus verify() and the head commitment) is preserved
|
|
169
|
+
exactly across a restart. Nonce, idempotency, and head state are restored
|
|
170
|
+
from the ops themselves — replay protection survives a reload."""
|
|
171
|
+
self.ops = list(ops)
|
|
172
|
+
self._last_nonce = {}
|
|
173
|
+
self._idem = {}
|
|
174
|
+
for o in self.ops:
|
|
175
|
+
if o.nonce > self._last_nonce.get(o.client, 0):
|
|
176
|
+
self._last_nonce[o.client] = o.nonce
|
|
177
|
+
if o.idem is not None and o.idem not in self._idem:
|
|
178
|
+
self._idem[o.idem] = o.seq
|
|
179
|
+
self._head = self.ops[-1].hash if self.ops else GENESIS
|
|
180
|
+
|
|
181
|
+
@staticmethod
|
|
182
|
+
def _op_body(o: "Op") -> dict:
|
|
183
|
+
"""The canonical hash body for an op — must match exactly what append() hashes."""
|
|
184
|
+
body: dict = {
|
|
185
|
+
"seq": o.seq, "client": o.client, "nonce": o.nonce,
|
|
186
|
+
"op": o.op, "payload": o.payload, "ts": o.ts, "idem": o.idem,
|
|
187
|
+
}
|
|
188
|
+
# Optional fields included only when present (backward-compat with old ops).
|
|
189
|
+
if o.caused_by is not None: body["caused_by"] = o.caused_by
|
|
190
|
+
if o.evidence is not None: body["evidence"] = o.evidence
|
|
191
|
+
if o.confidence is not None: body["confidence"] = o.confidence
|
|
192
|
+
if o.valid_from is not None: body["valid_from"] = o.valid_from
|
|
193
|
+
if o.valid_to is not None: body["valid_to"] = o.valid_to
|
|
194
|
+
return body
|
|
195
|
+
|
|
196
|
+
def verify(self) -> bool:
|
|
197
|
+
"""Re-walk the chain and confirm no op has been tampered with."""
|
|
198
|
+
prev = GENESIS
|
|
199
|
+
for o in self.ops:
|
|
200
|
+
body = self._op_body(o)
|
|
201
|
+
if o.prev_hash != prev:
|
|
202
|
+
return False
|
|
203
|
+
if o.hash != blake(prev.encode() + canon(body)):
|
|
204
|
+
return False
|
|
205
|
+
prev = o.hash
|
|
206
|
+
return True
|
|
207
|
+
|
|
208
|
+
@property
|
|
209
|
+
def head(self) -> str:
|
|
210
|
+
return self._head
|
|
211
|
+
|
|
212
|
+
def slice_until(self, as_of: int) -> List[Op]:
|
|
213
|
+
return [o for o in self.ops if o.seq <= as_of]
|
|
214
|
+
|
|
215
|
+
def __len__(self) -> int:
|
|
216
|
+
return len(self.ops)
|
nedb/merkle.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""
|
|
2
|
+
nedb.merkle — Merkle tree over content-addressed chunk hashes.
|
|
3
|
+
|
|
4
|
+
Because every file version is a list of BLAKE-addressed chunks, a file version has
|
|
5
|
+
a Merkle root that commits to its exact bytes. Any chunk's membership is provable in
|
|
6
|
+
O(log n), and the root can be anchored on-chain (e.g. ITC) for tamper-evident,
|
|
7
|
+
notarized version history.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
from typing import List, Tuple
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _h(b: bytes) -> bytes:
|
|
16
|
+
return hashlib.blake2b(b, digest_size=32).digest()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _to_bytes(x) -> bytes:
|
|
20
|
+
return bytes.fromhex(x) if isinstance(x, str) else bytes(x)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def merkle_root(leaves: List[str]) -> str:
|
|
24
|
+
if not leaves:
|
|
25
|
+
return "0" * 64
|
|
26
|
+
level = [_to_bytes(x) for x in leaves]
|
|
27
|
+
while len(level) > 1:
|
|
28
|
+
nxt = []
|
|
29
|
+
for i in range(0, len(level), 2):
|
|
30
|
+
a = level[i]
|
|
31
|
+
b = level[i + 1] if i + 1 < len(level) else level[i]
|
|
32
|
+
nxt.append(_h(a + b))
|
|
33
|
+
level = nxt
|
|
34
|
+
return level[0].hex()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def merkle_proof(leaves: List[str], idx: int) -> List[Tuple[str, str]]:
|
|
38
|
+
"""Return inclusion proof for leaf at idx: list of (sibling_hex, side)."""
|
|
39
|
+
level = [_to_bytes(x) for x in leaves]
|
|
40
|
+
path: List[Tuple[str, str]] = []
|
|
41
|
+
while len(level) > 1:
|
|
42
|
+
if idx % 2 == 0:
|
|
43
|
+
sib = level[idx + 1] if idx + 1 < len(level) else level[idx]
|
|
44
|
+
path.append((sib.hex(), "R"))
|
|
45
|
+
else:
|
|
46
|
+
path.append((level[idx - 1].hex(), "L"))
|
|
47
|
+
nxt = []
|
|
48
|
+
for i in range(0, len(level), 2):
|
|
49
|
+
a = level[i]
|
|
50
|
+
b = level[i + 1] if i + 1 < len(level) else level[i]
|
|
51
|
+
nxt.append(_h(a + b))
|
|
52
|
+
level = nxt
|
|
53
|
+
idx //= 2
|
|
54
|
+
return path
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def merkle_verify(leaf: str, path: List[Tuple[str, str]], root: str) -> bool:
|
|
58
|
+
h = _to_bytes(leaf)
|
|
59
|
+
for sib_hex, side in path:
|
|
60
|
+
sib = _to_bytes(sib_hex)
|
|
61
|
+
h = _h(h + sib) if side == "R" else _h(sib + h)
|
|
62
|
+
return h.hex() == root
|