PyPI - cryptodb - Versions diffs - 2.4.3__py3-none-any.whl - Mend

cryptodb 2.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

cryptodb-2.4.3.dist-info/METADATA +61 -0
cryptodb-2.4.3.dist-info/RECORD +27 -0
cryptodb-2.4.3.dist-info/WHEEL +4 -0
cryptodb-2.4.3.dist-info/entry_points.txt +2 -0
cryptodb-2.4.3.dist-info/licenses/LICENSE +65 -0
nedb/__init__.py +92 -0
nedb/autoindex.py +142 -0
nedb/backends/__init__.py +0 -0
nedb/backends/redis_backend.py +115 -0
nedb/cascade.py +130 -0
nedb/concurrent.py +218 -0
nedb/crypto.py +294 -0
nedb/engine.py +783 -0
nedb/index.py +98 -0
nedb/log.py +216 -0
nedb/merkle.py +62 -0
nedb/mongo.py +824 -0
nedb/proof.py +126 -0
nedb/query.py +305 -0
nedb/redis_compat.py +516 -0
nedb/relations.py +51 -0
nedb/resp2.py +250 -0
nedb/server.py +1011 -0
nedb/snapshot.py +216 -0
nedb/sql.py +430 -0
nedb/store.py +68 -0
nedb/wrap_redis.py +725 -0

nedb/concurrent.py ADDED Viewed

@@ -0,0 +1,218 @@
+"""
+nedb.concurrent — make a NEDB database safe AND fast under many concurrent clients,
+without a global lock.
+The problem
+-----------
+A hash-chained append-only log is *inherently* sequential: op N's hash commits to
+op N-1's head (h_n = H(h_{n-1} || op_n)). Two threads cannot append in parallel
+without corrupting the chain. The naive fix — wrap every request in one mutex —
+is correct but slow: it serializes the expensive fsync too, and it blocks readers.
+The design
+----------
+**Single-writer, group-commit sequencer with lock-free MVCC reads.**
+  * Writers don't take a lock. They drop a write *intent* on a queue and await a
+    future. ONE committer thread per database owns all mutation, so the chain is
+    always correct by construction — zero write-write contention.
+  * The committer drains the whole queue as a BATCH, chains + applies every op,
+    then issues ONE fsync for the entire batch. Under load this is *faster*: more
+    concurrent writers → bigger batches → fewer fsyncs per write. This is group
+    commit, the same trick Postgres/Kafka use to turn contention into throughput.
+  * Reads never touch the queue and never take a lock. They run at the last
+    *committed* sequence (snapshot isolation): the MVCC store is append-only and
+    versioned, so a reader pinned to `committed_seq` sees a consistent snapshot
+    even while the committer appends newer versions for the next batch. The only
+    structural hazard — enumerating keys while a new key is inserted — is handled
+    lock-free in MVCCStore.keys() via an optimistic snapshot+retry.
+Net effect: parallel reads, parallel cross-database writes, batched durable writes,
+and a provably correct single chain — no request-level lock anywhere.
+"""
+from __future__ import annotations
+import queue
+import threading
+from dataclasses import dataclass, field
+from typing import Any, List, Optional
+from .engine import NEDB
+from .query import parse_nql
+_STOP = object()
+@dataclass
+class _Intent:
+    kind: str
+    args: tuple
+    kwargs: dict
+    done: threading.Event = field(default_factory=threading.Event)
+    result: Any = None
+    error: Optional[BaseException] = None
+class Sequencer:
+    """Concurrent, group-committing front-end over one NEDB database.
+    Drop-in for NEDB from the daemon's perspective: the mutating methods are
+    serialized through a committer thread; reads are concurrent and snapshot-
+    isolated; everything else delegates to the wrapped engine.
+    """
+    def __init__(self, db: NEDB, max_batch: int = 512):
+        self.db = db
+        self.max_batch = max_batch
+        self._q: "queue.Queue[Any]" = queue.Queue()
+        # The seq through which all writes are durably committed and fully applied.
+        # Reads pin to this for snapshot isolation.
+        self._committed_seq: int = db.seq
+        self._closed = False
+        self._committer = threading.Thread(
+            target=self._run, name=f"nedb-committer", daemon=True
+        )
+        self._committer.start()
+    # ── write API: enqueue + await the committer ──────────────────────────────
+    def _submit(self, kind: str, *args: Any, **kwargs: Any) -> Any:
+        if self._closed:
+            raise RuntimeError("Sequencer is closed")
+        intent = _Intent(kind, args, kwargs)
+        self._q.put(intent)
+        intent.done.wait()
+        if intent.error is not None:
+            raise intent.error
+        return intent.result
+    def put(self, coll: str, id: str, doc: dict, **kw: Any) -> Any:
+        return self._submit("put", coll, id, doc, **kw)
+    def delete(self, coll: str, id: str, **kw: Any) -> Any:
+        return self._submit("delete", coll, id, **kw)
+    def link(self, frm: str, rel: str, to: str, **kw: Any) -> Any:
+        return self._submit("link", frm, rel, to, **kw)
+    def unlink(self, frm: str, rel: str, to: str, **kw: Any) -> Any:
+        return self._submit("unlink", frm, rel, to, **kw)
+    def create_index(self, *a: Any, **k: Any) -> Any:
+        return self._submit("create_index", *a, **k)
+    def put_file(self, *a: Any, **k: Any) -> Any:
+        return self._submit("put_file", *a, **k)
+    def checkpoint(self) -> Any:
+        return self._submit("checkpoint")
+    # ── read API: concurrent, snapshot-isolated at committed_seq ───────────────
+    def query(self, nql: str) -> List[dict]:
+        plan = parse_nql(nql)
+        if plan.get("as_of") is None:
+            plan["as_of"] = self._committed_seq
+        return self.db.execute(plan)
+    def get(self, coll: str, id: str, as_of: Optional[int] = None) -> Optional[dict]:
+        return self.db.get(coll, id, self._committed_seq if as_of is None else as_of)
+    def neighbors(self, frm: str, rel: str, as_of: Optional[int] = None) -> List[str]:
+        return self.db.neighbors(frm, rel, self._committed_seq if as_of is None else as_of)
+    def inbound(self, to: str, rel: str, as_of: Optional[int] = None) -> List[str]:
+        return self.db.inbound(to, rel, self._committed_seq if as_of is None else as_of)
+    def verify(self) -> bool:
+        return self.db.verify()
+    def get_file(self, *a: Any, **k: Any) -> Any:
+        return self.db.get_file(*a, **k)
+    @property
+    def seq(self) -> int:
+        return self.db.seq
+    @property
+    def head(self) -> str:
+        return self.db.head
+    @property
+    def committed_seq(self) -> int:
+        return self._committed_seq
+    # Everything else (log, store, indexes, relations, blobs, path, _dek, flush,
+    # close-of-engine, etc.) delegates to the wrapped engine.
+    def __getattr__(self, name: str) -> Any:
+        # __getattr__ only fires for attrs not found normally, so self.db is safe.
+        return getattr(self.db, name)
+    # ── the single writer ──────────────────────────────────────────────────────
+    def _run(self) -> None:
+        db = self.db
+        db._defer_sync = True  # group commit: we fsync once per batch
+        while True:
+            first = self._q.get()
+            if first is _STOP:
+                return
+            batch: List[Any] = [first]
+            while len(batch) < self.max_batch:
+                try:
+                    nxt = self._q.get_nowait()
+                except queue.Empty:
+                    break
+                batch.append(nxt)
+            if self._commit_batch(batch):
+                return  # saw _STOP
+    def _commit_batch(self, batch: List[Any]) -> bool:
+        db = self.db
+        saw_stop = False
+        # 1) chain + apply every op in order (in-memory + buffered AOF write).
+        #    No fsync here; readers (pinned to the OLD committed_seq) are isolated.
+        for intent in batch:
+            if intent is _STOP:
+                saw_stop = True
+                continue
+            try:
+                intent.result = self._apply_one(intent)
+            except BaseException as e:  # capture per-intent; never kill the committer
+                intent.error = e
+        # 2) ONE durable fsync for the whole batch (group commit).
+        try:
+            db.flush()
+        except Exception:
+            pass
+        # 3) publish the new snapshot, THEN wake writers (read-your-writes holds).
+        self._committed_seq = db.seq
+        for intent in batch:
+            if intent is not _STOP:
+                intent.done.set()
+        return saw_stop
+    def _apply_one(self, intent: _Intent) -> Any:
+        db, k = self.db, intent.kind
+        if k == "put":
+            return db.put(*intent.args, **intent.kwargs)
+        if k == "delete":
+            return db.delete(*intent.args, **intent.kwargs)
+        if k == "link":
+            return db.link(*intent.args, **intent.kwargs)
+        if k == "unlink":
+            return db.unlink(*intent.args, **intent.kwargs)
+        if k == "create_index":
+            return db.create_index(*intent.args, **intent.kwargs)
+        if k == "put_file":
+            return db.put_file(*intent.args, **intent.kwargs)
+        if k == "checkpoint":
+            return db.checkpoint()
+        raise ValueError(f"unknown write kind: {k}")
+    def close(self) -> None:
+        if self._closed:
+            return
+        self._closed = True
+        self._q.put(_STOP)
+        self._committer.join(timeout=5)
+        self.db.close()

nedb/crypto.py ADDED Viewed

@@ -0,0 +1,294 @@
+"""
+nedb.crypto — AES-256-GCM encryption at rest with a double-envelope key structure.
+Architecture
+────────────
+External TMK (Table Master Key)  ← provided by operator (env / arg / key file)
+         ↓  AES-256-GCM wrap
+       DEK  (Data Encryption Key)  ← random, per database, stored in key.enc
+         ↓  AES-256-GCM encrypt
+       Data  (AOF lines, snapshot.json, blob chunks)
+Key rotation: supply a new TMK and call rewrap_dek(). The DEK (and therefore
+all data) stays untouched — only key.enc is rewritten.
+Toggle: if no TMK is configured (no arg, no env, no key file), every function
+is a zero-overhead pass-through. Existing unencrypted databases work unchanged.
+TMK sources (priority order):
+  1. NEDB(path, tmk=<bytes>)              — programmatic
+  2. NEDB_TMK=<64-char hex>               — environment variable
+  3. NEDB_TMK_FILE=/path/to/keyfile       — raw bytes from a file
+  4. (none)                               — encryption disabled
+HKDF normalization: the TMK may be any length ≥ 16 bytes; it is always
+stretched / compressed to exactly 32 bytes via HKDF-SHA256 before use, so
+passphrases and key files of any size are accepted safely.
+Backend: pycryptodome (primary, cross-platform, pre-built wheels for all OSes
+including Windows MinGW — no cffi / C compiler required).  Falls back to
+cryptography if pycryptodome is not available (backwards compatibility for
+existing installations that already have cryptography).
+Install:
+    pip install nedb-engine[encryption]      # installs pycryptodome
+"""
+from __future__ import annotations
+import base64
+import json
+import os
+from typing import Optional
+# ── Backend detection ────────────────────────────────────────────────────────
+# pycryptodome is the primary backend: pre-built binary wheels for all
+# platforms (Linux / macOS / Windows x86 / Windows arm64 / Windows MinGW)
+# with no cffi dependency — installs everywhere without a C compiler.
+_BACKEND: Optional[str] = None
+_HAVE_CRYPTO = False
+try:
+    from Crypto.Cipher import AES as _PCD_AES             # type: ignore[import]
+    from Crypto.Protocol.KDF import HKDF as _PCD_HKDF     # type: ignore[import]
+    from Crypto.Hash import SHA256 as _PCD_SHA256          # type: ignore[import]
+    _BACKEND      = "pycryptodome"
+    _HAVE_CRYPTO  = True
+except ImportError:
+    pass
+if not _HAVE_CRYPTO:
+    # Fallback: cryptography (older installations / explicit [encryption] extra)
+    try:
+        from cryptography.hazmat.primitives.ciphers.aead import AESGCM as _CG_AESGCM  # type: ignore[import]
+        from cryptography.hazmat.primitives.kdf.hkdf import HKDF as _CG_HKDF          # type: ignore[import]
+        from cryptography.hazmat.primitives import hashes as _CG_hashes                # type: ignore[import]
+        _BACKEND     = "cryptography"
+        _HAVE_CRYPTO = True
+    except ImportError:
+        pass
+KEY_LEN   = 32    # 256-bit
+NONCE_LEN = 12    # 96-bit GCM nonce (standard recommendation)
+TAG_LEN   = 16    # 128-bit GCM authentication tag
+# Additional Authenticated Data tags — bind ciphertext to its purpose.
+_AAD_DEK  = b"NEDB-DEK-v1"
+_AAD_DATA = b"NEDB-data-v1"
+def _require_crypto() -> None:
+    if not _HAVE_CRYPTO:
+        raise ImportError(
+            "NEDB encryption at rest requires pycryptodome or cryptography.\n"
+            "Install with:  pip install 'nedb-engine[encryption]'\n"
+            "  (or:         pip install pycryptodome)"
+        )
+# ── Key derivation ────────────────────────────────────────────────────────────
+def derive_key(material: bytes) -> bytes:
+    """Normalise any-length key material to exactly 32 bytes via HKDF-SHA256."""
+    _require_crypto()
+    if _BACKEND == "pycryptodome":
+        return _PCD_HKDF(
+            master=material, key_len=KEY_LEN,
+            salt=b"NEDB-hkdf-v1",
+            hashmod=_PCD_SHA256,
+            context=b"nedb-key",
+        )
+    else:
+        h = _CG_HKDF(
+            algorithm=_CG_hashes.SHA256(), length=KEY_LEN,
+            salt=b"NEDB-hkdf-v1", info=b"nedb-key",
+        )
+        return h.derive(material)
+def resolve_tmk(tmk_arg: Optional[bytes] = None) -> Optional[bytes]:
+    """
+    Return the 32-byte TMK to use, or None if encryption is not configured.
+    Priority: explicit arg > NEDB_TMK env (hex) > NEDB_TMK_FILE env.
+    """
+    material: Optional[bytes] = None
+    if tmk_arg is not None:
+        material = tmk_arg
+    elif os.environ.get("NEDB_TMK"):
+        try:
+            material = bytes.fromhex(os.environ["NEDB_TMK"])
+        except ValueError as e:
+            raise ValueError(f"NEDB_TMK is not valid hex: {e}") from e
+    elif os.environ.get("NEDB_TMK_FILE"):
+        with open(os.environ["NEDB_TMK_FILE"], "rb") as fh:
+            material = fh.read().strip()
+    if material is None:
+        return None
+    return derive_key(material)
+# ── Low-level primitives ──────────────────────────────────────────────────────
+# On-disk format: nonce‖ciphertext‖tag  (12 + len + 16 bytes)
+# Both backends produce and consume the same byte layout for full compatibility
+# with databases created by either backend.
+def encrypt_bytes(plaintext: bytes, dek: bytes, aad: bytes = _AAD_DATA) -> bytes:
+    """AES-256-GCM encrypt. Returns nonce‖ciphertext‖tag (12 + len + 16 bytes)."""
+    _require_crypto()
+    nonce = os.urandom(NONCE_LEN)
+    if _BACKEND == "pycryptodome":
+        cipher = _PCD_AES.new(dek, _PCD_AES.MODE_GCM, nonce=nonce)
+        cipher.update(aad)
+        ciphertext, tag = cipher.encrypt_and_digest(plaintext)
+        return nonce + ciphertext + tag
+    else:
+        ct_with_tag = _CG_AESGCM(dek).encrypt(nonce, plaintext, aad)
+        return nonce + ct_with_tag
+def decrypt_bytes(data: bytes, dek: bytes, aad: bytes = _AAD_DATA) -> bytes:
+    """AES-256-GCM decrypt. Raises ValueError / InvalidTag on tampering."""
+    _require_crypto()
+    nonce      = data[:NONCE_LEN]
+    ciphertext = data[NONCE_LEN:-TAG_LEN]
+    tag        = data[-TAG_LEN:]
+    if _BACKEND == "pycryptodome":
+        cipher = _PCD_AES.new(dek, _PCD_AES.MODE_GCM, nonce=nonce)
+        cipher.update(aad)
+        return cipher.decrypt_and_verify(ciphertext, tag)
+    else:
+        return _CG_AESGCM(dek).decrypt(nonce, ciphertext + tag, aad)
+# ── DEK management ────────────────────────────────────────────────────────────
+KEY_ENC_FILE = "key.enc"
+def _key_enc_path(data_dir: str) -> str:
+    return os.path.join(data_dir, KEY_ENC_FILE)
+def generate_dek() -> bytes:
+    """Generate a fresh random 256-bit Data Encryption Key."""
+    return os.urandom(KEY_LEN)
+def wrap_dek(dek: bytes, tmk: bytes) -> dict:
+    """Encrypt the DEK with the TMK → a JSON-serialisable dict."""
+    _require_crypto()
+    nonce = os.urandom(NONCE_LEN)
+    if _BACKEND == "pycryptodome":
+        cipher = _PCD_AES.new(tmk, _PCD_AES.MODE_GCM, nonce=nonce)
+        cipher.update(_AAD_DEK)
+        ct, tag = cipher.encrypt_and_digest(dek)
+        ct_with_tag = ct + tag
+    else:
+        ct_with_tag = _CG_AESGCM(tmk).encrypt(nonce, dek, _AAD_DEK)
+    return {"v": 1, "alg": "AES-256-GCM", "n": nonce.hex(), "ct": ct_with_tag.hex()}
+def unwrap_dek(wrapped: dict, tmk: bytes) -> bytes:
+    """Decrypt the DEK using the TMK. Raises if the TMK is wrong or data tampered."""
+    _require_crypto()
+    nonce       = bytes.fromhex(wrapped["n"])
+    ct_with_tag = bytes.fromhex(wrapped["ct"])
+    ct          = ct_with_tag[:-TAG_LEN]
+    tag         = ct_with_tag[-TAG_LEN:]
+    if _BACKEND == "pycryptodome":
+        cipher = _PCD_AES.new(tmk, _PCD_AES.MODE_GCM, nonce=nonce)
+        cipher.update(_AAD_DEK)
+        return cipher.decrypt_and_verify(ct, tag)
+    else:
+        return _CG_AESGCM(tmk).decrypt(nonce, ct_with_tag, _AAD_DEK)
+def load_or_create_dek(data_dir: str, tmk: bytes) -> bytes:
+    """
+    Load and unwrap the DEK from key.enc, or generate a new one if the file
+    does not yet exist (new encrypted database).
+    """
+    path = _key_enc_path(data_dir)
+    if os.path.exists(path):
+        with open(path, encoding="utf-8") as fh:
+            wrapped = json.load(fh)
+        return unwrap_dek(wrapped, tmk)
+    dek = generate_dek()
+    _save_wrapped_dek(data_dir, dek, tmk)
+    return dek
+def _save_wrapped_dek(data_dir: str, dek: bytes, tmk: bytes) -> None:
+    path = _key_enc_path(data_dir)
+    tmp  = path + ".tmp"
+    with open(tmp, "w", encoding="utf-8") as fh:
+        json.dump(wrap_dek(dek, tmk), fh)
+        fh.flush()
+        os.fsync(fh.fileno())
+    os.replace(tmp, path)
+def rewrap_dek(data_dir: str, old_tmk: bytes, new_tmk: bytes) -> None:
+    """
+    Key rotation: re-wrap the DEK under a new TMK without touching any data.
+    After this call, the database will only open with new_tmk.
+    """
+    dek = load_or_create_dek(data_dir, old_tmk)
+    _save_wrapped_dek(data_dir, dek, new_tmk)
+# ── AOF line helpers ──────────────────────────────────────────────────────────
+def aof_encode(op_json: str, dek: Optional[bytes]) -> str:
+    if dek is None:
+        return op_json
+    ct = encrypt_bytes(op_json.encode(), dek)
+    return json.dumps({"enc": 1, "ct": base64.b64encode(ct).decode()},
+                      separators=(",", ":"))
+def aof_decode(line: str, dek: Optional[bytes]) -> str:
+    stripped = line.strip()
+    if not stripped:
+        return stripped
+    if dek is not None:
+        try:
+            env = json.loads(stripped)
+            if isinstance(env, dict) and env.get("enc") == 1:
+                ct = base64.b64decode(env["ct"])
+                return decrypt_bytes(ct, dek).decode()
+        except Exception:
+            pass
+    return stripped
+# ── Snapshot helpers ──────────────────────────────────────────────────────────
+def snapshot_encode(content: bytes, dek: Optional[bytes]) -> bytes:
+    if dek is None:
+        return content
+    ct = encrypt_bytes(content, dek)
+    return json.dumps({"enc": 1, "ct": base64.b64encode(ct).decode()},
+                      separators=(",", ":")).encode()
+def snapshot_decode(raw: bytes, dek: Optional[bytes]) -> bytes:
+    if dek is None:
+        return raw
+    try:
+        env = json.loads(raw)
+        if isinstance(env, dict) and env.get("enc") == 1:
+            ct = base64.b64decode(env["ct"])
+            return decrypt_bytes(ct, dek)
+    except Exception:
+        pass
+    return raw
+# ── BlobStore chunk helpers ───────────────────────────────────────────────────
+def chunk_encode(compressed_bytes: bytes, dek: Optional[bytes]) -> bytes:
+    return encrypt_bytes(compressed_bytes, dek) if dek is not None else compressed_bytes
+def chunk_decode(stored_bytes: bytes, dek: Optional[bytes]) -> bytes:
+    return decrypt_bytes(stored_bytes, dek) if dek is not None else stored_bytes