cryptodb 2.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nedb/concurrent.py ADDED
@@ -0,0 +1,218 @@
1
+ """
2
+ nedb.concurrent — make a NEDB database safe AND fast under many concurrent clients,
3
+ without a global lock.
4
+
5
+ The problem
6
+ -----------
7
+ A hash-chained append-only log is *inherently* sequential: op N's hash commits to
8
+ op N-1's head (h_n = H(h_{n-1} || op_n)). Two threads cannot append in parallel
9
+ without corrupting the chain. The naive fix — wrap every request in one mutex —
10
+ is correct but slow: it serializes the expensive fsync too, and it blocks readers.
11
+
12
+ The design
13
+ ----------
14
+ **Single-writer, group-commit sequencer with lock-free MVCC reads.**
15
+
16
+ * Writers don't take a lock. They drop a write *intent* on a queue and await a
17
+ future. ONE committer thread per database owns all mutation, so the chain is
18
+ always correct by construction — zero write-write contention.
19
+
20
+ * The committer drains the whole queue as a BATCH, chains + applies every op,
21
+ then issues ONE fsync for the entire batch. Under load this is *faster*: more
22
+ concurrent writers → bigger batches → fewer fsyncs per write. This is group
23
+ commit, the same trick Postgres/Kafka use to turn contention into throughput.
24
+
25
+ * Reads never touch the queue and never take a lock. They run at the last
26
+ *committed* sequence (snapshot isolation): the MVCC store is append-only and
27
+ versioned, so a reader pinned to `committed_seq` sees a consistent snapshot
28
+ even while the committer appends newer versions for the next batch. The only
29
+ structural hazard — enumerating keys while a new key is inserted — is handled
30
+ lock-free in MVCCStore.keys() via an optimistic snapshot+retry.
31
+
32
+ Net effect: parallel reads, parallel cross-database writes, batched durable writes,
33
+ and a provably correct single chain — no request-level lock anywhere.
34
+ """
35
+ from __future__ import annotations
36
+
37
+ import queue
38
+ import threading
39
+ from dataclasses import dataclass, field
40
+ from typing import Any, List, Optional
41
+
42
+ from .engine import NEDB
43
+ from .query import parse_nql
44
+
45
+ _STOP = object()
46
+
47
+
48
+ @dataclass
49
+ class _Intent:
50
+ kind: str
51
+ args: tuple
52
+ kwargs: dict
53
+ done: threading.Event = field(default_factory=threading.Event)
54
+ result: Any = None
55
+ error: Optional[BaseException] = None
56
+
57
+
58
+ class Sequencer:
59
+ """Concurrent, group-committing front-end over one NEDB database.
60
+
61
+ Drop-in for NEDB from the daemon's perspective: the mutating methods are
62
+ serialized through a committer thread; reads are concurrent and snapshot-
63
+ isolated; everything else delegates to the wrapped engine.
64
+ """
65
+
66
+ def __init__(self, db: NEDB, max_batch: int = 512):
67
+ self.db = db
68
+ self.max_batch = max_batch
69
+ self._q: "queue.Queue[Any]" = queue.Queue()
70
+ # The seq through which all writes are durably committed and fully applied.
71
+ # Reads pin to this for snapshot isolation.
72
+ self._committed_seq: int = db.seq
73
+ self._closed = False
74
+ self._committer = threading.Thread(
75
+ target=self._run, name=f"nedb-committer", daemon=True
76
+ )
77
+ self._committer.start()
78
+
79
+ # ── write API: enqueue + await the committer ──────────────────────────────
80
+ def _submit(self, kind: str, *args: Any, **kwargs: Any) -> Any:
81
+ if self._closed:
82
+ raise RuntimeError("Sequencer is closed")
83
+ intent = _Intent(kind, args, kwargs)
84
+ self._q.put(intent)
85
+ intent.done.wait()
86
+ if intent.error is not None:
87
+ raise intent.error
88
+ return intent.result
89
+
90
+ def put(self, coll: str, id: str, doc: dict, **kw: Any) -> Any:
91
+ return self._submit("put", coll, id, doc, **kw)
92
+
93
+ def delete(self, coll: str, id: str, **kw: Any) -> Any:
94
+ return self._submit("delete", coll, id, **kw)
95
+
96
+ def link(self, frm: str, rel: str, to: str, **kw: Any) -> Any:
97
+ return self._submit("link", frm, rel, to, **kw)
98
+
99
+ def unlink(self, frm: str, rel: str, to: str, **kw: Any) -> Any:
100
+ return self._submit("unlink", frm, rel, to, **kw)
101
+
102
+ def create_index(self, *a: Any, **k: Any) -> Any:
103
+ return self._submit("create_index", *a, **k)
104
+
105
+ def put_file(self, *a: Any, **k: Any) -> Any:
106
+ return self._submit("put_file", *a, **k)
107
+
108
+ def checkpoint(self) -> Any:
109
+ return self._submit("checkpoint")
110
+
111
+ # ── read API: concurrent, snapshot-isolated at committed_seq ───────────────
112
+ def query(self, nql: str) -> List[dict]:
113
+ plan = parse_nql(nql)
114
+ if plan.get("as_of") is None:
115
+ plan["as_of"] = self._committed_seq
116
+ return self.db.execute(plan)
117
+
118
+ def get(self, coll: str, id: str, as_of: Optional[int] = None) -> Optional[dict]:
119
+ return self.db.get(coll, id, self._committed_seq if as_of is None else as_of)
120
+
121
+ def neighbors(self, frm: str, rel: str, as_of: Optional[int] = None) -> List[str]:
122
+ return self.db.neighbors(frm, rel, self._committed_seq if as_of is None else as_of)
123
+
124
+ def inbound(self, to: str, rel: str, as_of: Optional[int] = None) -> List[str]:
125
+ return self.db.inbound(to, rel, self._committed_seq if as_of is None else as_of)
126
+
127
+ def verify(self) -> bool:
128
+ return self.db.verify()
129
+
130
+ def get_file(self, *a: Any, **k: Any) -> Any:
131
+ return self.db.get_file(*a, **k)
132
+
133
+ @property
134
+ def seq(self) -> int:
135
+ return self.db.seq
136
+
137
+ @property
138
+ def head(self) -> str:
139
+ return self.db.head
140
+
141
+ @property
142
+ def committed_seq(self) -> int:
143
+ return self._committed_seq
144
+
145
+ # Everything else (log, store, indexes, relations, blobs, path, _dek, flush,
146
+ # close-of-engine, etc.) delegates to the wrapped engine.
147
+ def __getattr__(self, name: str) -> Any:
148
+ # __getattr__ only fires for attrs not found normally, so self.db is safe.
149
+ return getattr(self.db, name)
150
+
151
+ # ── the single writer ──────────────────────────────────────────────────────
152
+ def _run(self) -> None:
153
+ db = self.db
154
+ db._defer_sync = True # group commit: we fsync once per batch
155
+ while True:
156
+ first = self._q.get()
157
+ if first is _STOP:
158
+ return
159
+ batch: List[Any] = [first]
160
+ while len(batch) < self.max_batch:
161
+ try:
162
+ nxt = self._q.get_nowait()
163
+ except queue.Empty:
164
+ break
165
+ batch.append(nxt)
166
+ if self._commit_batch(batch):
167
+ return # saw _STOP
168
+
169
+ def _commit_batch(self, batch: List[Any]) -> bool:
170
+ db = self.db
171
+ saw_stop = False
172
+ # 1) chain + apply every op in order (in-memory + buffered AOF write).
173
+ # No fsync here; readers (pinned to the OLD committed_seq) are isolated.
174
+ for intent in batch:
175
+ if intent is _STOP:
176
+ saw_stop = True
177
+ continue
178
+ try:
179
+ intent.result = self._apply_one(intent)
180
+ except BaseException as e: # capture per-intent; never kill the committer
181
+ intent.error = e
182
+ # 2) ONE durable fsync for the whole batch (group commit).
183
+ try:
184
+ db.flush()
185
+ except Exception:
186
+ pass
187
+ # 3) publish the new snapshot, THEN wake writers (read-your-writes holds).
188
+ self._committed_seq = db.seq
189
+ for intent in batch:
190
+ if intent is not _STOP:
191
+ intent.done.set()
192
+ return saw_stop
193
+
194
+ def _apply_one(self, intent: _Intent) -> Any:
195
+ db, k = self.db, intent.kind
196
+ if k == "put":
197
+ return db.put(*intent.args, **intent.kwargs)
198
+ if k == "delete":
199
+ return db.delete(*intent.args, **intent.kwargs)
200
+ if k == "link":
201
+ return db.link(*intent.args, **intent.kwargs)
202
+ if k == "unlink":
203
+ return db.unlink(*intent.args, **intent.kwargs)
204
+ if k == "create_index":
205
+ return db.create_index(*intent.args, **intent.kwargs)
206
+ if k == "put_file":
207
+ return db.put_file(*intent.args, **intent.kwargs)
208
+ if k == "checkpoint":
209
+ return db.checkpoint()
210
+ raise ValueError(f"unknown write kind: {k}")
211
+
212
+ def close(self) -> None:
213
+ if self._closed:
214
+ return
215
+ self._closed = True
216
+ self._q.put(_STOP)
217
+ self._committer.join(timeout=5)
218
+ self.db.close()
nedb/crypto.py ADDED
@@ -0,0 +1,294 @@
1
+ """
2
+ nedb.crypto — AES-256-GCM encryption at rest with a double-envelope key structure.
3
+
4
+ Architecture
5
+ ────────────
6
+ External TMK (Table Master Key) ← provided by operator (env / arg / key file)
7
+ ↓ AES-256-GCM wrap
8
+ DEK (Data Encryption Key) ← random, per database, stored in key.enc
9
+ ↓ AES-256-GCM encrypt
10
+ Data (AOF lines, snapshot.json, blob chunks)
11
+
12
+ Key rotation: supply a new TMK and call rewrap_dek(). The DEK (and therefore
13
+ all data) stays untouched — only key.enc is rewritten.
14
+
15
+ Toggle: if no TMK is configured (no arg, no env, no key file), every function
16
+ is a zero-overhead pass-through. Existing unencrypted databases work unchanged.
17
+
18
+ TMK sources (priority order):
19
+ 1. NEDB(path, tmk=<bytes>) — programmatic
20
+ 2. NEDB_TMK=<64-char hex> — environment variable
21
+ 3. NEDB_TMK_FILE=/path/to/keyfile — raw bytes from a file
22
+ 4. (none) — encryption disabled
23
+
24
+ HKDF normalization: the TMK may be any length ≥ 16 bytes; it is always
25
+ stretched / compressed to exactly 32 bytes via HKDF-SHA256 before use, so
26
+ passphrases and key files of any size are accepted safely.
27
+
28
+ Backend: pycryptodome (primary, cross-platform, pre-built wheels for all OSes
29
+ including Windows MinGW — no cffi / C compiler required). Falls back to
30
+ cryptography if pycryptodome is not available (backwards compatibility for
31
+ existing installations that already have cryptography).
32
+
33
+ Install:
34
+ pip install nedb-engine[encryption] # installs pycryptodome
35
+ """
36
+ from __future__ import annotations
37
+
38
+ import base64
39
+ import json
40
+ import os
41
+ from typing import Optional
42
+
43
+ # ── Backend detection ────────────────────────────────────────────────────────
44
+ # pycryptodome is the primary backend: pre-built binary wheels for all
45
+ # platforms (Linux / macOS / Windows x86 / Windows arm64 / Windows MinGW)
46
+ # with no cffi dependency — installs everywhere without a C compiler.
47
+ _BACKEND: Optional[str] = None
48
+ _HAVE_CRYPTO = False
49
+
50
+ try:
51
+ from Crypto.Cipher import AES as _PCD_AES # type: ignore[import]
52
+ from Crypto.Protocol.KDF import HKDF as _PCD_HKDF # type: ignore[import]
53
+ from Crypto.Hash import SHA256 as _PCD_SHA256 # type: ignore[import]
54
+ _BACKEND = "pycryptodome"
55
+ _HAVE_CRYPTO = True
56
+ except ImportError:
57
+ pass
58
+
59
+ if not _HAVE_CRYPTO:
60
+ # Fallback: cryptography (older installations / explicit [encryption] extra)
61
+ try:
62
+ from cryptography.hazmat.primitives.ciphers.aead import AESGCM as _CG_AESGCM # type: ignore[import]
63
+ from cryptography.hazmat.primitives.kdf.hkdf import HKDF as _CG_HKDF # type: ignore[import]
64
+ from cryptography.hazmat.primitives import hashes as _CG_hashes # type: ignore[import]
65
+ _BACKEND = "cryptography"
66
+ _HAVE_CRYPTO = True
67
+ except ImportError:
68
+ pass
69
+
70
+ KEY_LEN = 32 # 256-bit
71
+ NONCE_LEN = 12 # 96-bit GCM nonce (standard recommendation)
72
+ TAG_LEN = 16 # 128-bit GCM authentication tag
73
+
74
+ # Additional Authenticated Data tags — bind ciphertext to its purpose.
75
+ _AAD_DEK = b"NEDB-DEK-v1"
76
+ _AAD_DATA = b"NEDB-data-v1"
77
+
78
+
79
+ def _require_crypto() -> None:
80
+ if not _HAVE_CRYPTO:
81
+ raise ImportError(
82
+ "NEDB encryption at rest requires pycryptodome or cryptography.\n"
83
+ "Install with: pip install 'nedb-engine[encryption]'\n"
84
+ " (or: pip install pycryptodome)"
85
+ )
86
+
87
+
88
+ # ── Key derivation ────────────────────────────────────────────────────────────
89
+
90
+ def derive_key(material: bytes) -> bytes:
91
+ """Normalise any-length key material to exactly 32 bytes via HKDF-SHA256."""
92
+ _require_crypto()
93
+ if _BACKEND == "pycryptodome":
94
+ return _PCD_HKDF(
95
+ master=material, key_len=KEY_LEN,
96
+ salt=b"NEDB-hkdf-v1",
97
+ hashmod=_PCD_SHA256,
98
+ context=b"nedb-key",
99
+ )
100
+ else:
101
+ h = _CG_HKDF(
102
+ algorithm=_CG_hashes.SHA256(), length=KEY_LEN,
103
+ salt=b"NEDB-hkdf-v1", info=b"nedb-key",
104
+ )
105
+ return h.derive(material)
106
+
107
+
108
+ def resolve_tmk(tmk_arg: Optional[bytes] = None) -> Optional[bytes]:
109
+ """
110
+ Return the 32-byte TMK to use, or None if encryption is not configured.
111
+ Priority: explicit arg > NEDB_TMK env (hex) > NEDB_TMK_FILE env.
112
+ """
113
+ material: Optional[bytes] = None
114
+ if tmk_arg is not None:
115
+ material = tmk_arg
116
+ elif os.environ.get("NEDB_TMK"):
117
+ try:
118
+ material = bytes.fromhex(os.environ["NEDB_TMK"])
119
+ except ValueError as e:
120
+ raise ValueError(f"NEDB_TMK is not valid hex: {e}") from e
121
+ elif os.environ.get("NEDB_TMK_FILE"):
122
+ with open(os.environ["NEDB_TMK_FILE"], "rb") as fh:
123
+ material = fh.read().strip()
124
+ if material is None:
125
+ return None
126
+ return derive_key(material)
127
+
128
+
129
+ # ── Low-level primitives ──────────────────────────────────────────────────────
130
+ # On-disk format: nonce‖ciphertext‖tag (12 + len + 16 bytes)
131
+ # Both backends produce and consume the same byte layout for full compatibility
132
+ # with databases created by either backend.
133
+
134
+ def encrypt_bytes(plaintext: bytes, dek: bytes, aad: bytes = _AAD_DATA) -> bytes:
135
+ """AES-256-GCM encrypt. Returns nonce‖ciphertext‖tag (12 + len + 16 bytes)."""
136
+ _require_crypto()
137
+ nonce = os.urandom(NONCE_LEN)
138
+ if _BACKEND == "pycryptodome":
139
+ cipher = _PCD_AES.new(dek, _PCD_AES.MODE_GCM, nonce=nonce)
140
+ cipher.update(aad)
141
+ ciphertext, tag = cipher.encrypt_and_digest(plaintext)
142
+ return nonce + ciphertext + tag
143
+ else:
144
+ ct_with_tag = _CG_AESGCM(dek).encrypt(nonce, plaintext, aad)
145
+ return nonce + ct_with_tag
146
+
147
+
148
+ def decrypt_bytes(data: bytes, dek: bytes, aad: bytes = _AAD_DATA) -> bytes:
149
+ """AES-256-GCM decrypt. Raises ValueError / InvalidTag on tampering."""
150
+ _require_crypto()
151
+ nonce = data[:NONCE_LEN]
152
+ ciphertext = data[NONCE_LEN:-TAG_LEN]
153
+ tag = data[-TAG_LEN:]
154
+ if _BACKEND == "pycryptodome":
155
+ cipher = _PCD_AES.new(dek, _PCD_AES.MODE_GCM, nonce=nonce)
156
+ cipher.update(aad)
157
+ return cipher.decrypt_and_verify(ciphertext, tag)
158
+ else:
159
+ return _CG_AESGCM(dek).decrypt(nonce, ciphertext + tag, aad)
160
+
161
+
162
+ # ── DEK management ────────────────────────────────────────────────────────────
163
+
164
+ KEY_ENC_FILE = "key.enc"
165
+
166
+
167
+ def _key_enc_path(data_dir: str) -> str:
168
+ return os.path.join(data_dir, KEY_ENC_FILE)
169
+
170
+
171
+ def generate_dek() -> bytes:
172
+ """Generate a fresh random 256-bit Data Encryption Key."""
173
+ return os.urandom(KEY_LEN)
174
+
175
+
176
+ def wrap_dek(dek: bytes, tmk: bytes) -> dict:
177
+ """Encrypt the DEK with the TMK → a JSON-serialisable dict."""
178
+ _require_crypto()
179
+ nonce = os.urandom(NONCE_LEN)
180
+ if _BACKEND == "pycryptodome":
181
+ cipher = _PCD_AES.new(tmk, _PCD_AES.MODE_GCM, nonce=nonce)
182
+ cipher.update(_AAD_DEK)
183
+ ct, tag = cipher.encrypt_and_digest(dek)
184
+ ct_with_tag = ct + tag
185
+ else:
186
+ ct_with_tag = _CG_AESGCM(tmk).encrypt(nonce, dek, _AAD_DEK)
187
+ return {"v": 1, "alg": "AES-256-GCM", "n": nonce.hex(), "ct": ct_with_tag.hex()}
188
+
189
+
190
+ def unwrap_dek(wrapped: dict, tmk: bytes) -> bytes:
191
+ """Decrypt the DEK using the TMK. Raises if the TMK is wrong or data tampered."""
192
+ _require_crypto()
193
+ nonce = bytes.fromhex(wrapped["n"])
194
+ ct_with_tag = bytes.fromhex(wrapped["ct"])
195
+ ct = ct_with_tag[:-TAG_LEN]
196
+ tag = ct_with_tag[-TAG_LEN:]
197
+ if _BACKEND == "pycryptodome":
198
+ cipher = _PCD_AES.new(tmk, _PCD_AES.MODE_GCM, nonce=nonce)
199
+ cipher.update(_AAD_DEK)
200
+ return cipher.decrypt_and_verify(ct, tag)
201
+ else:
202
+ return _CG_AESGCM(tmk).decrypt(nonce, ct_with_tag, _AAD_DEK)
203
+
204
+
205
+ def load_or_create_dek(data_dir: str, tmk: bytes) -> bytes:
206
+ """
207
+ Load and unwrap the DEK from key.enc, or generate a new one if the file
208
+ does not yet exist (new encrypted database).
209
+ """
210
+ path = _key_enc_path(data_dir)
211
+ if os.path.exists(path):
212
+ with open(path, encoding="utf-8") as fh:
213
+ wrapped = json.load(fh)
214
+ return unwrap_dek(wrapped, tmk)
215
+ dek = generate_dek()
216
+ _save_wrapped_dek(data_dir, dek, tmk)
217
+ return dek
218
+
219
+
220
+ def _save_wrapped_dek(data_dir: str, dek: bytes, tmk: bytes) -> None:
221
+ path = _key_enc_path(data_dir)
222
+ tmp = path + ".tmp"
223
+ with open(tmp, "w", encoding="utf-8") as fh:
224
+ json.dump(wrap_dek(dek, tmk), fh)
225
+ fh.flush()
226
+ os.fsync(fh.fileno())
227
+ os.replace(tmp, path)
228
+
229
+
230
+ def rewrap_dek(data_dir: str, old_tmk: bytes, new_tmk: bytes) -> None:
231
+ """
232
+ Key rotation: re-wrap the DEK under a new TMK without touching any data.
233
+ After this call, the database will only open with new_tmk.
234
+ """
235
+ dek = load_or_create_dek(data_dir, old_tmk)
236
+ _save_wrapped_dek(data_dir, dek, new_tmk)
237
+
238
+
239
+ # ── AOF line helpers ──────────────────────────────────────────────────────────
240
+
241
+ def aof_encode(op_json: str, dek: Optional[bytes]) -> str:
242
+ if dek is None:
243
+ return op_json
244
+ ct = encrypt_bytes(op_json.encode(), dek)
245
+ return json.dumps({"enc": 1, "ct": base64.b64encode(ct).decode()},
246
+ separators=(",", ":"))
247
+
248
+
249
+ def aof_decode(line: str, dek: Optional[bytes]) -> str:
250
+ stripped = line.strip()
251
+ if not stripped:
252
+ return stripped
253
+ if dek is not None:
254
+ try:
255
+ env = json.loads(stripped)
256
+ if isinstance(env, dict) and env.get("enc") == 1:
257
+ ct = base64.b64decode(env["ct"])
258
+ return decrypt_bytes(ct, dek).decode()
259
+ except Exception:
260
+ pass
261
+ return stripped
262
+
263
+
264
+ # ── Snapshot helpers ──────────────────────────────────────────────────────────
265
+
266
+ def snapshot_encode(content: bytes, dek: Optional[bytes]) -> bytes:
267
+ if dek is None:
268
+ return content
269
+ ct = encrypt_bytes(content, dek)
270
+ return json.dumps({"enc": 1, "ct": base64.b64encode(ct).decode()},
271
+ separators=(",", ":")).encode()
272
+
273
+
274
+ def snapshot_decode(raw: bytes, dek: Optional[bytes]) -> bytes:
275
+ if dek is None:
276
+ return raw
277
+ try:
278
+ env = json.loads(raw)
279
+ if isinstance(env, dict) and env.get("enc") == 1:
280
+ ct = base64.b64decode(env["ct"])
281
+ return decrypt_bytes(ct, dek)
282
+ except Exception:
283
+ pass
284
+ return raw
285
+
286
+
287
+ # ── BlobStore chunk helpers ───────────────────────────────────────────────────
288
+
289
+ def chunk_encode(compressed_bytes: bytes, dek: Optional[bytes]) -> bytes:
290
+ return encrypt_bytes(compressed_bytes, dek) if dek is not None else compressed_bytes
291
+
292
+
293
+ def chunk_decode(stored_bytes: bytes, dek: Optional[bytes]) -> bytes:
294
+ return decrypt_bytes(stored_bytes, dek) if dek is not None else stored_bytes