nitrodb 2.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nedb/__init__.py ADDED
@@ -0,0 +1,92 @@
1
+ """
2
+ NEDB — a versioned, self-compressing, time-traveling embedded database.
3
+
4
+ * Replay-protected & idempotent: every write carries a monotonic nonce and an
5
+ optional idempotency key, enforced by a hash-chained append-only log.
6
+ * Time-travel: read the database AS OF any past sequence number.
7
+ * Relational: first-class, time-travel-aware relations with O(1) traversal.
8
+ * Filterable / sortable / searchable: equality, ordered, and full-text indexes.
9
+ * Queryable: NQL text queries and a fluent builder that share one plan.
10
+ * git-style files with Cascade compression: content-defined chunking + dedup +
11
+ temperature tiers, with a Merkle root per version anchorable on-chain.
12
+
13
+ The pure-Python package is the reference implementation and the always-works
14
+ fallback. When installed from a platform wheel, the compiled Rust core is available
15
+ as ``nedb._native`` (``nedb.__has_native__`` reports whether it loaded).
16
+ """
17
+ from __future__ import annotations
18
+
19
+ from .engine import NEDB
20
+ from .log import Op, OpLog, ReplayError
21
+ from .query import Query, parse_nql
22
+ from .snapshot import save_snapshot, load_snapshot
23
+ from .crypto import resolve_tmk, rewrap_dek
24
+ from .sql import sql_exec, sql_to_nql, SQLError, SQLUnsupportedError
25
+ from .redis_compat import RedisCompat, RedisError, RedisUnsupportedError
26
+ from .mongo import (
27
+ MongoCompat, MongoClient, MongoError, MongoUnsupportedError, ObjectId,
28
+ )
29
+ from .autoindex import AutoIndexDB
30
+ from .concurrent import Sequencer
31
+ from .wrap_redis import wrap_redis, WrappedRedis
32
+ from .proof import verify_proof, fold_head
33
+
34
+ try: # compiled Rust core, present in platform wheels (PyO3 via maturin)
35
+ from . import _native # type: ignore
36
+ __has_native__ = True
37
+ except ImportError: # pure-Python install (sdist / unsupported platform)
38
+ # Provide a stub module so `from nedb._native import NedbCore` raises an
39
+ # informative error instead of a bare ImportError with no guidance.
40
+ import types as _types, sys as _sys
41
+
42
+ import sys as _sys_tmp, os as _os_tmp
43
+ _is_msys2 = bool(_os_tmp.environ.get("MSYSTEM")) or "mingw" in _sys_tmp.executable.lower()
44
+ del _sys_tmp, _os_tmp
45
+
46
+ class _NativeStub(_types.ModuleType):
47
+ # Primary fix: install the Rust crate → get the nedbd server → use HTTP mode.
48
+ # Secondary fix (CPython only): pip reinstall to get the platform wheel with _native embedded.
49
+ _MSG_MSYS2 = (
50
+ "\n\n"
51
+ " nedb._native (embedded v2 DAG core) is not available on MSYS2/MinGW Python.\n\n"
52
+ " To use NEDB v2 features, install the server binary and use HTTP mode:\n\n"
53
+ " cargo install nedb-engine # install nedbd v2 server\n"
54
+ " nedbd --dag ./data # start DAG server\n"
55
+ " NEDB_URL=http://localhost:7070 python3 your_script.py\n\n"
56
+ " Run 'nedbd --doctor' for a full diagnosis.\n"
57
+ )
58
+ _MSG_OTHER = (
59
+ "\n\n"
60
+ " nedb._native (embedded v2 DAG core) is not available.\n"
61
+ " You have the universal wheel — reinstall to get the platform wheel:\n\n"
62
+ " pip install --force-reinstall --no-cache-dir nedb-engine\n\n"
63
+ " Or install the server binary and use HTTP mode (works everywhere):\n\n"
64
+ " cargo install nedb-engine # install nedbd v2 server\n"
65
+ " nedbd --dag ./data # start DAG server\n"
66
+ " NEDB_URL=http://localhost:7070 python3 your_script.py\n\n"
67
+ " Run 'nedbd --doctor' for a full diagnosis.\n"
68
+ )
69
+ _MSG = _MSG_MSYS2 if _is_msys2 else _MSG_OTHER
70
+
71
+ def __getattr__(self, name: str):
72
+ raise ImportError(f"nedb._native.{name} is not available.{self._MSG}")
73
+
74
+ _native_stub = _NativeStub("nedb._native")
75
+ _native_stub.__package__ = "nedb"
76
+ _sys.modules["nedb._native"] = _native_stub # type: ignore
77
+ _native = _native_stub # type: ignore
78
+ __has_native__ = False
79
+ del _types, _sys, _NativeStub, _native_stub
80
+
81
+ __all__ = [
82
+ "NEDB", "OpLog", "Op", "ReplayError", "Query", "parse_nql",
83
+ "save_snapshot", "load_snapshot",
84
+ "sql_exec", "sql_to_nql", "SQLError", "SQLUnsupportedError",
85
+ "RedisCompat", "RedisError", "RedisUnsupportedError",
86
+ "MongoCompat", "MongoClient", "MongoError", "MongoUnsupportedError", "ObjectId",
87
+ "AutoIndexDB", "Sequencer",
88
+ "wrap_redis", "WrappedRedis",
89
+ "verify_proof", "fold_head",
90
+ "_native", "__has_native__",
91
+ ]
92
+ __version__ = "2.4.3"
nedb/autoindex.py ADDED
@@ -0,0 +1,142 @@
1
+ """
2
+ nedb.autoindex — automatic index management.
3
+
4
+ Wraps a NEDB instance and intercepts query() calls. It tracks which fields are
5
+ used in WHERE and ORDER BY clauses per collection. Once a field reaches the
6
+ usage threshold it auto-creates the appropriate index:
7
+
8
+ - Equality conditions (= / !=) → "eq" index
9
+ - Ordered comparisons (< > ≤ ≥) → "ordered" index
10
+ - ORDER BY field → "ordered" index
11
+ - SEARCH clause on a field → deferred (no per-field signal in NQL)
12
+
13
+ Usage::
14
+
15
+ from nedb import NEDB
16
+ from nedb.autoindex import AutoIndexDB
17
+
18
+ db = AutoIndexDB(NEDB("./data"), threshold=3)
19
+ db.query('FROM users WHERE status = "active"') # tallied
20
+ db.query('FROM users WHERE status = "active"')
21
+ db.query('FROM users WHERE status = "active"') # threshold reached → index created
22
+ """
23
+ from __future__ import annotations
24
+
25
+ import re
26
+ from collections import defaultdict
27
+ from typing import Any, Dict, List, Optional, Tuple
28
+
29
+
30
+ _WHERE_RE = re.compile(r"\bWHERE\b([\s\S]*?)(?:\bSEARCH\b|\bORDER\b|\bTRAVERSE\b|\bLIMIT\b|$)", re.IGNORECASE)
31
+ _ORDER_RE = re.compile(r"\bORDER\s+BY\s+(\w+)", re.IGNORECASE)
32
+ _FROM_RE = re.compile(r"\bFROM\s+(\w+)", re.IGNORECASE)
33
+ _COND_RE = re.compile(r"(\w+)\s*(=|!=|<>|<=|>=|<|>)", re.IGNORECASE)
34
+
35
+
36
+ def _parse_signals(nql: str) -> List[Tuple[str, str, str]]:
37
+ """Return [(collection, field, 'eq'|'ordered')] from a NQL query string."""
38
+ signals = []
39
+ fm = _FROM_RE.search(nql)
40
+ if not fm:
41
+ return signals
42
+ coll = fm.group(1)
43
+
44
+ wm = _WHERE_RE.search(nql)
45
+ if wm:
46
+ for m in _COND_RE.finditer(wm.group(1)):
47
+ field, op = m.group(1), m.group(2)
48
+ kind = "eq" if op in ("=", "!=", "<>") else "ordered"
49
+ signals.append((coll, field, kind))
50
+
51
+ om = _ORDER_RE.search(nql)
52
+ if om:
53
+ signals.append((coll, om.group(1), "ordered"))
54
+
55
+ return signals
56
+
57
+
58
+ class AutoIndexDB:
59
+ """
60
+ NEDB wrapper that creates indexes automatically based on query usage.
61
+
62
+ Parameters
63
+ ----------
64
+ db : NEDB
65
+ A NEDB database instance (embedded or opened with a path).
66
+ threshold : int
67
+ Number of times a (collection, field, kind) combination must be
68
+ observed before the index is created. Default: 5.
69
+ verbose : bool
70
+ Print a message when an index is auto-created. Default: False.
71
+ """
72
+
73
+ def __init__(self, db: Any, threshold: int = 5, verbose: bool = False):
74
+ self._db = db
75
+ self.threshold = threshold
76
+ self.verbose = verbose
77
+ # counts[(coll, field, kind)] = n
78
+ self._counts: Dict[Tuple[str, str, str], int] = defaultdict(int)
79
+ # indexes already created so we don't re-create
80
+ self._created: set = set()
81
+ # Seed from existing index config if available
82
+ if hasattr(db, "indexes") and hasattr(db.indexes, "config"):
83
+ for coll, field, kind in db.indexes.config:
84
+ self._created.add((coll, field, kind))
85
+
86
+ # ── Proxy every NEDB attribute ────────────────────────────────────────────
87
+
88
+ def __getattr__(self, name: str) -> Any:
89
+ return getattr(self._db, name)
90
+
91
+ # ── Instrumented query ────────────────────────────────────────────────────
92
+
93
+ def query(self, nql: str) -> List[dict]:
94
+ """Execute a NQL query, tally field usage, and auto-create indexes."""
95
+ signals = _parse_signals(nql)
96
+ for coll, field, kind in signals:
97
+ key = (coll, field, kind)
98
+ if key in self._created:
99
+ continue
100
+ # "ordered" supersedes "eq" — if we already have eq, upgrade to ordered
101
+ eq_key = (coll, field, "eq")
102
+ if kind == "ordered" and eq_key not in self._created:
103
+ self._counts[key] += 1
104
+ elif kind == "eq" and (coll, field, "ordered") not in self._created:
105
+ self._counts[key] += 1
106
+ else:
107
+ self._counts[key] += 1
108
+
109
+ if self._counts[key] >= self.threshold:
110
+ self._auto_create(coll, field, kind)
111
+
112
+ return self._db.query(nql)
113
+
114
+ def _auto_create(self, coll: str, field: str, kind: str) -> None:
115
+ key = (coll, field, kind)
116
+ if key in self._created:
117
+ return
118
+ # Don't index internal NEDB fields
119
+ if field.startswith("_") and field not in ("_id",):
120
+ return
121
+ self._db.create_index(coll, field, kind)
122
+ self._created.add(key)
123
+ if self.verbose:
124
+ print(f"[autoindex] created {kind} index on {coll}.{field} (threshold={self.threshold})")
125
+
126
+ # ── Manual analysis ───────────────────────────────────────────────────────
127
+
128
+ def analyze(self) -> Dict[str, Any]:
129
+ """Return current tallies and the indexes already created."""
130
+ return {
131
+ "tallies": {f"{c}.{f} ({k})": n for (c, f, k), n in self._counts.items()},
132
+ "indexes_created": [f"{c}.{f} ({k})" for (c, f, k) in sorted(self._created)],
133
+ "threshold": self.threshold,
134
+ }
135
+
136
+ def suggest(self) -> List[str]:
137
+ """Return suggestions for indexes that are close to the threshold."""
138
+ out = []
139
+ for (coll, field, kind), count in sorted(self._counts.items(), key=lambda x: -x[1]):
140
+ if (coll, field, kind) not in self._created:
141
+ out.append(f"{coll}.{field} ({kind}) — {count}/{self.threshold} queries")
142
+ return out
File without changes
@@ -0,0 +1,115 @@
1
+ """
2
+ nedb.backends.redis_backend — Redis Streams as the NEDB append-only log.
3
+
4
+ Alice's existing Redis keys are NEVER touched. NEDB operates in a strictly
5
+ isolated namespace:
6
+
7
+ nedb:{db_name}:oplog Redis Stream — hash-chained op log
8
+ nedb:{db_name}:snapshot Redis Hash — checkpoint for fast restart
9
+ nedb:{db_name}:events Pub/Sub chan — live subscriptions (future)
10
+ nedb:{db_name}:meta Redis Hash — version, index config
11
+
12
+ On startup NEDB replays the stream to rebuild its in-memory MVCC store.
13
+ On every write a new entry is XADD'd. One Redis connection, zero impact on
14
+ the user's existing keys.
15
+
16
+ © INTERCHAINED LLC × Claude Sonnet 4.6
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ from typing import Any, Dict, List, Optional
22
+
23
+
24
+ class RedisBackend:
25
+ """
26
+ Redis-Streams-backed persistence for NEDB.
27
+
28
+ Pass an instance to NEDB as the `backend` parameter::
29
+
30
+ import redis
31
+ from nedb.backends.redis_backend import RedisBackend
32
+ from nedb import NEDB
33
+
34
+ r = redis.Redis("localhost", 6379)
35
+ db = NEDB(backend=RedisBackend(r, "rideshare"))
36
+ """
37
+
38
+ def __init__(self, r: Any, db_name: str):
39
+ self._r = r
40
+ self.db_name = db_name
41
+ self.stream = f"nedb:{db_name}:oplog"
42
+ self.snap_key = f"nedb:{db_name}:snapshot"
43
+ self.meta_key = f"nedb:{db_name}:meta"
44
+ self.events_ch = f"nedb:{db_name}:events"
45
+
46
+ # ── Op log ──────────────────────────────────────────────────────────────────
47
+
48
+ def append(self, op_json: str) -> None:
49
+ """Append one JSON-serialised op to the stream."""
50
+ self._r.xadd(self.stream, {"op": op_json})
51
+
52
+ def append_batch(self, ops: List[str]) -> None:
53
+ """Append multiple ops in a single pipeline (one round-trip)."""
54
+ pipe = self._r.pipeline(transaction=False)
55
+ for op_json in ops:
56
+ pipe.xadd(self.stream, {"op": op_json})
57
+ pipe.execute()
58
+
59
+ def read_all(self) -> List[str]:
60
+ """Return all ops from the stream in insertion order."""
61
+ entries = self._r.xrange(self.stream, "-", "+")
62
+ return [e[1][b"op"].decode() for e in entries]
63
+
64
+ def read_after(self, last_id: str = "0") -> List[str]:
65
+ """Return ops appended after `last_id` (for incremental replay)."""
66
+ entries = self._r.xrange(self.stream, f"({last_id}", "+")
67
+ return [e[1][b"op"].decode() for e in entries]
68
+
69
+ # ── Snapshot / checkpoint ────────────────────────────────────────────────────
70
+
71
+ def save_snapshot(self, data: Dict[str, Any]) -> None:
72
+ """Persist a checkpoint so restart replay only needs the delta."""
73
+ self._r.hset(self.snap_key, mapping={
74
+ k: json.dumps(v, separators=(",", ":"), default=str)
75
+ for k, v in data.items()
76
+ })
77
+
78
+ def load_snapshot(self) -> Optional[Dict[str, Any]]:
79
+ """Load the last checkpoint, or None if none exists."""
80
+ raw = self._r.hgetall(self.snap_key)
81
+ if not raw:
82
+ return None
83
+ return {k.decode(): json.loads(v) for k, v in raw.items()}
84
+
85
+ # ── Pub/sub live events ──────────────────────────────────────────────────────
86
+
87
+ def publish_ops(self, ops: List[str]) -> None:
88
+ """Publish committed ops to the events channel for live subscribers."""
89
+ if ops:
90
+ payload = json.dumps(ops, separators=(",", ":"))
91
+ self._r.publish(self.events_ch, payload)
92
+
93
+ # ── Meta ─────────────────────────────────────────────────────────────────────
94
+
95
+ def save_meta(self, meta: Dict[str, Any]) -> None:
96
+ self._r.hset(self.meta_key, mapping={
97
+ k: json.dumps(v, separators=(",", ":"), default=str)
98
+ for k, v in meta.items()
99
+ })
100
+
101
+ def load_meta(self) -> Dict[str, Any]:
102
+ raw = self._r.hgetall(self.meta_key)
103
+ if not raw:
104
+ return {}
105
+ return {k.decode(): json.loads(v) for k, v in raw.items()}
106
+
107
+ # ── Utility ──────────────────────────────────────────────────────────────────
108
+
109
+ def stream_len(self) -> int:
110
+ return self._r.xlen(self.stream)
111
+
112
+ def flush(self) -> None:
113
+ """Delete all NEDB shadow keys for this database (non-destructive to user keys)."""
114
+ for key in [self.stream, self.snap_key, self.meta_key]:
115
+ self._r.delete(key)
nedb/cascade.py ADDED
@@ -0,0 +1,130 @@
1
+ """
2
+ nedb.cascade — the Cascade compression pipeline + content-addressed blob store.
3
+
4
+ This is what makes NEDB double as a git-style file manager with maximum compression
5
+ WITHOUT inventing a new entropy coder. The novelty is the pipeline composition:
6
+
7
+ 1. Content-defined chunking (Gear rolling hash) — boundaries follow content, so a
8
+ one-byte insert only changes the chunk(s) around it, not everything after it.
9
+ 2. Content-addressed dedup (BLAKE) — identical chunks across all files and all
10
+ versions are stored exactly once.
11
+ 3. Temperature tiers — warm data uses a fast codec (zstd in prod; zlib in this
12
+ reference), cold/archival history uses a maximum-ratio codec (LZMA).
13
+
14
+ The production pipeline adds similarity-picked binary deltas (zstd --patch-from) and
15
+ schema-aware columnar transforms before the entropy stage; both are documented in
16
+ docs/SPEC.md and stubbed for the reference engine.
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import hashlib
21
+ import lzma
22
+ import random
23
+ import zlib
24
+ from typing import Dict, List
25
+
26
+ from .merkle import merkle_root
27
+
28
+ # --- Gear-hash content-defined chunking -------------------------------------
29
+ _MASK = (1 << 13) - 1 # ~8 KiB average chunk
30
+ _MIN = 2 * 1024
31
+ _MAX = 64 * 1024
32
+ _M64 = 0xFFFFFFFFFFFFFFFF
33
+ _GEAR = [random.Random(0x12345678 + i).getrandbits(64) for i in range(256)]
34
+
35
+
36
+ def chunk(data: bytes) -> List[bytes]:
37
+ chunks: List[bytes] = []
38
+ n = len(data)
39
+ i = 0
40
+ while i < n:
41
+ limit = min(i + _MAX, n)
42
+ h = 0
43
+ pos = i
44
+ cut = limit
45
+ while pos < limit:
46
+ h = ((h << 1) + _GEAR[data[pos]]) & _M64
47
+ pos += 1
48
+ if (pos - i) >= _MIN and (h & _MASK) == 0:
49
+ cut = pos
50
+ break
51
+ chunks.append(data[i:cut])
52
+ i = cut
53
+ return chunks
54
+
55
+
56
+ def _blake(b: bytes) -> str:
57
+ return hashlib.blake2b(b, digest_size=32).hexdigest()
58
+
59
+
60
+ # --- temperature tiers ------------------------------------------------------
61
+ def warm_compress(b: bytes) -> bytes: # zstd stand-in in the reference
62
+ return zlib.compress(b, 6)
63
+
64
+
65
+ def warm_decompress(b: bytes) -> bytes:
66
+ return zlib.decompress(b)
67
+
68
+
69
+ def cold_compress(b: bytes) -> bytes: # real LZMA — the maximum-ratio archival tier
70
+ return lzma.compress(b, preset=9 | lzma.PRESET_EXTREME)
71
+
72
+
73
+ def cold_decompress(b: bytes) -> bytes:
74
+ return lzma.decompress(b)
75
+
76
+
77
+ class BlobStore:
78
+ """Content-addressed, deduplicated, tiered blob store with versioned files."""
79
+
80
+ def __init__(self, tier: str = "warm") -> None:
81
+ self.tier = tier
82
+ self.chunks: Dict[str, bytes] = {} # hash -> compressed bytes
83
+ self.files: Dict[str, Dict[str, list]] = {} # name -> {versions, roots}
84
+ self.logical_bytes = 0
85
+ self.dedup_hits = 0
86
+
87
+ def _compress(self, b: bytes) -> bytes:
88
+ return cold_compress(b) if self.tier == "cold" else warm_compress(b)
89
+
90
+ def _decompress(self, b: bytes) -> bytes:
91
+ return cold_decompress(b) if self.tier == "cold" else warm_decompress(b)
92
+
93
+ def put_file(self, name: str, data: bytes) -> int:
94
+ recipe: List[str] = []
95
+ for c in chunk(data):
96
+ hh = _blake(c)
97
+ recipe.append(hh)
98
+ if hh in self.chunks:
99
+ self.dedup_hits += 1
100
+ else:
101
+ self.chunks[hh] = self._compress(c)
102
+ self.logical_bytes += len(data)
103
+ f = self.files.setdefault(name, {"versions": [], "roots": []})
104
+ f["versions"].append(recipe)
105
+ f["roots"].append(merkle_root(recipe))
106
+ return len(f["versions"]) - 1
107
+
108
+ def get_file(self, name: str, version: int = -1) -> bytes:
109
+ recipe = self.files[name]["versions"][version]
110
+ out = bytearray()
111
+ for hh in recipe:
112
+ out += self._decompress(self.chunks[hh])
113
+ return bytes(out)
114
+
115
+ def root(self, name: str, version: int = -1) -> str:
116
+ return self.files[name]["roots"][version]
117
+
118
+ def stored_bytes(self) -> int:
119
+ return sum(len(v) for v in self.chunks.values())
120
+
121
+ def stats(self) -> dict:
122
+ stored = self.stored_bytes()
123
+ return {
124
+ "tier": self.tier,
125
+ "unique_chunks": len(self.chunks),
126
+ "dedup_hits": self.dedup_hits,
127
+ "logical_bytes": self.logical_bytes,
128
+ "stored_bytes": stored,
129
+ "ratio": round(self.logical_bytes / stored, 2) if stored else 0.0,
130
+ }