cryptodb 2.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cryptodb-2.4.3.dist-info/METADATA +61 -0
- cryptodb-2.4.3.dist-info/RECORD +27 -0
- cryptodb-2.4.3.dist-info/WHEEL +4 -0
- cryptodb-2.4.3.dist-info/entry_points.txt +2 -0
- cryptodb-2.4.3.dist-info/licenses/LICENSE +65 -0
- nedb/__init__.py +92 -0
- nedb/autoindex.py +142 -0
- nedb/backends/__init__.py +0 -0
- nedb/backends/redis_backend.py +115 -0
- nedb/cascade.py +130 -0
- nedb/concurrent.py +218 -0
- nedb/crypto.py +294 -0
- nedb/engine.py +783 -0
- nedb/index.py +98 -0
- nedb/log.py +216 -0
- nedb/merkle.py +62 -0
- nedb/mongo.py +824 -0
- nedb/proof.py +126 -0
- nedb/query.py +305 -0
- nedb/redis_compat.py +516 -0
- nedb/relations.py +51 -0
- nedb/resp2.py +250 -0
- nedb/server.py +1011 -0
- nedb/snapshot.py +216 -0
- nedb/sql.py +430 -0
- nedb/store.py +68 -0
- nedb/wrap_redis.py +725 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cryptodb
|
|
3
|
+
Version: 2.4.3
|
|
4
|
+
Summary: NEDB โ a versioned, self-compressing, time-traveling embedded database (replay-protected, idempotent, relational, searchable) with durable AOF persistence and a server daemon (nedbd).
|
|
5
|
+
Project-URL: Homepage, https://github.com/aiassistsecure/nedb
|
|
6
|
+
Project-URL: Repository, https://github.com/aiassistsecure/nedb
|
|
7
|
+
Author: Eth-Interchained
|
|
8
|
+
License: GPL-3.0-or-later
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: compression,database,dedup,embedded,git,graph,mvcc,persistence,redis,search,server,time-travel,versioning
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Topic :: Database :: Database Engines/Servers
|
|
16
|
+
Requires-Python: >=3.8
|
|
17
|
+
Requires-Dist: pycryptodome>=3.19
|
|
18
|
+
Provides-Extra: encryption
|
|
19
|
+
Requires-Dist: cryptography>=41; extra == 'encryption'
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
<h1 align="center">CryptoDB</h1>
|
|
23
|
+
<p align="center"><b>The database that can prove it never lied.</b></p>
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
CryptoDB is a **content-addressed, hash-chained, time-traveling** datastore. Every version of every record is an immutable, **BLAKE2b-verified** object in a Merkle DAG โ nothing is ever overwritten, and the store can prove its own integrity on demand.
|
|
28
|
+
|
|
29
|
+
If your data is evidence โ ledgers, audit trails, provenance, anything you may one day have to *defend* โ CryptoDB makes the history itself tamper-evident and replayable.
|
|
30
|
+
|
|
31
|
+
## Why CryptoDB
|
|
32
|
+
|
|
33
|
+
- ๐ **Tamper-evident by construction.** `verify()` re-hashes every object against its content address. Flip a single byte on disk and it's caught โ silently impossible to forge history.
|
|
34
|
+
- โช **Time-travel is a query.** `AS OF <seq>` returns the exact state at any point. `VALID AS OF <time>` adds bi-temporal validity โ *what was true, as of when.*
|
|
35
|
+
- ๐งฌ **Causal provenance.** `caused_by` links every record to the facts that produced it; `TRACE` walks the graph. Audit **why**, not just **what**.
|
|
36
|
+
- ๐ **Encrypted at rest** (AES-256-GCM), RESP2 wire protocol, SQL / Redis / Mongo adapters, a `nedbd-v2` server daemon.
|
|
37
|
+
|
|
38
|
+
## Install
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
npm install cryptodb # Node (native addon)
|
|
42
|
+
pip install cryptodb # Python
|
|
43
|
+
cargo add cryptodb # Rust
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Reach for it when
|
|
47
|
+
|
|
48
|
+
Audit logs ยท financial & token ledgers ยท compliance trails ยท supply-chain provenance ยท anything where **provable, replayable history is the product**, not a nice-to-have.
|
|
49
|
+
|
|
50
|
+
```js
|
|
51
|
+
import { NedbCore } from "cryptodb";
|
|
52
|
+
const db = new NedbCore();
|
|
53
|
+
db.put("ledger", "acct:alice", JSON.stringify({ balance: 100 }));
|
|
54
|
+
db.put("ledger", "acct:alice", JSON.stringify({ balance: 250 }));
|
|
55
|
+
db.getAsOf("ledger", "acct:alice", 0n); // โ balance 100, the past, intact
|
|
56
|
+
db.verify(); // โ true: nothing was tampered
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
<sub>CryptoDB is a distribution of the **NEDB** engine, tuned for verifiability. Engine development happens upstream at [Eth-Interchained/nedb](https://github.com/Eth-Interchained/nedb). ยฉ Interchained LLC.</sub>
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
nedb/__init__.py,sha256=F0EzI9xFniLDwrMqzyYn_JRfi2S2YcsOVkv8AL7I8eM,4498
|
|
2
|
+
nedb/autoindex.py,sha256=vzaPYisirBAW2gjdqu-b7xg6d4NledE17f5dOqRtcSM,5711
|
|
3
|
+
nedb/cascade.py,sha256=qHvBivx4xbrAesGz3X27aJPo5bppgvukEZhw-qumHgo,4436
|
|
4
|
+
nedb/concurrent.py,sha256=97NmtkipEagk9vdriSwiNhLFqIGbv6CzRLQjSTZJBf8,8453
|
|
5
|
+
nedb/crypto.py,sha256=vA1TI48EvdyWBEnpMm0oX8mQ-EopeBbtnhm3kCpUxiA,11540
|
|
6
|
+
nedb/engine.py,sha256=dD6Pz3zQYepWlLBFWDHF5ASM5Jp00dSVTjVUXvQ-jM8,35581
|
|
7
|
+
nedb/index.py,sha256=vrBPWlcXN25AhvuQsqX-0Y3x3o-FwvZYKQmvSjXGD4Q,4020
|
|
8
|
+
nedb/log.py,sha256=PlEPpv70P1MCYyWPozxLSIOLHcN6Q6qR0TLeQ-AyvQQ,9246
|
|
9
|
+
nedb/merkle.py,sha256=dVZZTXlcXiAj-9WfFzmfnCsAl_9Bq-p7dWL2_sMNkbA,1947
|
|
10
|
+
nedb/mongo.py,sha256=ANN9WcW3TvTyfkkUyRZzRu_6zibMkxe2sfJCg_tGdFo,33542
|
|
11
|
+
nedb/proof.py,sha256=NKiS64H0Z7d5JR9YvKB9M3L4OijdVYz3N4T78N46T0Y,5135
|
|
12
|
+
nedb/query.py,sha256=h6549FohvQF4mBI0ZAdGbaoRkXUrxJo8WEnnJwUeCgg,9222
|
|
13
|
+
nedb/redis_compat.py,sha256=Gx8XENApoUaW2WoATDYH0W3oVvVdW1sZa4upwP0EXIs,19783
|
|
14
|
+
nedb/relations.py,sha256=howNwh8q2mLv6jFc_jhyXXlJY--e_4eJAGRIFf6OBko,2050
|
|
15
|
+
nedb/resp2.py,sha256=AATG29fGJoax_1zDSiVo-tOgiw2zPt-RiKtzP1bxUYE,10087
|
|
16
|
+
nedb/server.py,sha256=G_czcSIOn5HuBLimdCivlMcIjXa8hvkz5EtjUcQK7MI,50951
|
|
17
|
+
nedb/snapshot.py,sha256=4pMfFfp6cIS3zGmCXV1ccO2UuhUq67F_lAnRxNJ5jys,8867
|
|
18
|
+
nedb/sql.py,sha256=0gHL9vibLJVHN1OV04cB5NPDJvZEPid5aXPzYn1dGBI,13539
|
|
19
|
+
nedb/store.py,sha256=mxuyEeQAF-BCW0xVHiJz9iDR1UjzWCPQPX8fa1Hbo0o,2648
|
|
20
|
+
nedb/wrap_redis.py,sha256=wd13lLJLp6GAkIyi5dDsOdzrFDUUe1siKoK9i02LQGE,30208
|
|
21
|
+
nedb/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
|
+
nedb/backends/redis_backend.py,sha256=FrRkJ5_H-qLt8AH7mYMmTQaG1Sf-5LqLIOtSmyYVR_I,4950
|
|
23
|
+
cryptodb-2.4.3.dist-info/METADATA,sha256=JPNNeG7Zrb5eozbCApmzkGwM7KwUisLZLWWhGLB6LMs,3117
|
|
24
|
+
cryptodb-2.4.3.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
25
|
+
cryptodb-2.4.3.dist-info/entry_points.txt,sha256=KcR6tni6CeAIvIuv7HPWBTaxEP4jT6e7I_5a7jCIXew,43
|
|
26
|
+
cryptodb-2.4.3.dist-info/licenses/LICENSE,sha256=A8BP4sCOVNtv_n14Vf3obmmdNkM9vrxbyZmjS9wnVug,6607
|
|
27
|
+
cryptodb-2.4.3.dist-info/RECORD,,
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
Business Source License 1.1
|
|
2
|
+
|
|
3
|
+
Licensor: Interchained LLC, VibeCode 101, and/or the applicable copyright holders of NEDB
|
|
4
|
+
|
|
5
|
+
Licensed Work: NEDB, including all source code, documentation, examples, tests, build scripts, specifications, APIs, SDKs, packages, binaries, and derivative works distributed from or based on the NEDB repository.
|
|
6
|
+
|
|
7
|
+
Copyright: Copyright (c) 2026 Interchained LLC, VibeCode 101, and contributors. All rights reserved.
|
|
8
|
+
|
|
9
|
+
Change Date: Four years after the first publicly available distribution of each specific version of the Licensed Work under this License.
|
|
10
|
+
|
|
11
|
+
Change License: GNU Affero General Public License v3.0 or later.
|
|
12
|
+
|
|
13
|
+
Additional Use Grant:
|
|
14
|
+
|
|
15
|
+
You may use the Licensed Work only for non-production purposes, including evaluation, development, testing, research, local experimentation, security review, benchmarking, and personal learning.
|
|
16
|
+
|
|
17
|
+
No production use is granted automatically.
|
|
18
|
+
|
|
19
|
+
For clarity, the following uses are not permitted unless you have prior written permission from the Licensor:
|
|
20
|
+
|
|
21
|
+
1. Using the Licensed Work in production.
|
|
22
|
+
2. Using the Licensed Work to store, process, serve, index, replicate, query, or manage production data.
|
|
23
|
+
3. Offering the Licensed Work, or any derivative work, as a hosted, managed, embedded, bundled, white-labeled, commercial, paid, revenue-generating, or customer-facing product or service.
|
|
24
|
+
4. Offering database-as-a-service, storage-as-a-service, cache-as-a-service, search-as-a-service, analytics-as-a-service, AI-memory-as-a-service, agent-memory-as-a-service, blockchain-indexing-as-a-service, or any substantially similar service using the Licensed Work.
|
|
25
|
+
5. Using the Licensed Work to compete with NEDB, Interchained LLC, VibeCode 101, AiAssist Secure, or any affiliated product, service, infrastructure platform, database engine, AI runtime, agent platform, or hosted developer infrastructure.
|
|
26
|
+
6. Embedding the Licensed Work into commercial software, SaaS products, internal business systems, enterprise systems, hosted infrastructure, blockchain infrastructure, AI-agent infrastructure, or customer deliverables.
|
|
27
|
+
7. Removing, hiding, modifying, or misrepresenting this License, copyright notices, attribution notices, authorship notices, repository references, or licensing notices.
|
|
28
|
+
8. Circumventing license checks, access controls, paid licensing requirements, commercial-use restrictions, attribution requirements, or technical protections included with the Licensed Work.
|
|
29
|
+
9. Using the Licensed Work in a way that implies endorsement, partnership, sponsorship, certification, or approval by the Licensor without written permission.
|
|
30
|
+
10. Reselling, relicensing, sublicensing, renting, leasing, or otherwise commercially exploiting the Licensed Work except as expressly authorized in writing.
|
|
31
|
+
|
|
32
|
+
Additional production, commercial, hosted, enterprise, embedded, OEM, resale, white-label, managed-service, competitive, or otherwise restricted use grants are available only by separate written permission from the Licensor.
|
|
33
|
+
|
|
34
|
+
Written permission must be obtained from at least one of the following authorized licensing contacts:
|
|
35
|
+
|
|
36
|
+
[founders@vibecode-101.com](mailto:founders@vibecode-101.com)
|
|
37
|
+
[dev@interchained.org](mailto:dev@interchained.org)
|
|
38
|
+
|
|
39
|
+
Permission is valid only if it is expressly granted in writing by an authorized representative of the Licensor and specifically identifies the permitted use, scope, duration, parties, and any applicable commercial terms.
|
|
40
|
+
|
|
41
|
+
A general email conversation, informal message, pull request, issue comment, social media message, verbal discussion, repository access, package download, or contribution acceptance does not create a production, commercial, hosted, enterprise, resale, or competitive use license.
|
|
42
|
+
|
|
43
|
+
Terms:
|
|
44
|
+
|
|
45
|
+
The Licensor hereby grants you the right to copy, modify, create derivative works, redistribute, and make non-production use of the Licensed Work. The Licensor may make an Additional Use Grant, above, permitting limited production use.
|
|
46
|
+
|
|
47
|
+
Effective on the Change Date, or the fourth anniversary of the first publicly available distribution of a specific version of the Licensed Work under this License, whichever comes first, the Licensor hereby grants you rights under the terms of the Change License, and the rights granted in the paragraph above terminate.
|
|
48
|
+
|
|
49
|
+
If your use of the Licensed Work does not comply with the requirements currently in effect as described in this License, you must purchase or obtain a separate commercial license from the Licensor, its affiliated entities, or authorized resellers, or you must refrain from using the Licensed Work.
|
|
50
|
+
|
|
51
|
+
All copies of the original and modified Licensed Work, and derivative works of the Licensed Work, are subject to this License. This License applies separately for each version of the Licensed Work, and the Change Date may vary for each version of the Licensed Work released by Licensor.
|
|
52
|
+
|
|
53
|
+
You must conspicuously display this License on each original or modified copy of the Licensed Work. If you receive the Licensed Work in original or modified form from a third party, the terms and conditions set forth in this License apply to your use of that work.
|
|
54
|
+
|
|
55
|
+
Any use of the Licensed Work in violation of this License will automatically terminate your rights under this License for the current and all other versions of the Licensed Work.
|
|
56
|
+
|
|
57
|
+
This License does not grant you any right in any trademark, service mark, trade name, logo, domain name, brand identity, product name, project name, or other identifier of Licensor or its affiliates, except as expressly required to preserve legally required notices.
|
|
58
|
+
|
|
59
|
+
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON AN โAS ISโ BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, TITLE, SECURITY, ACCURACY, AVAILABILITY, DATA INTEGRITY, PERFORMANCE, AND FITNESS FOR PRODUCTION USE.
|
|
60
|
+
|
|
61
|
+
TO THE EXTENT PERMITTED BY APPLICABLE LAW, LICENSOR SHALL NOT BE LIABLE FOR ANY CLAIM, DAMAGES, LOSSES, COSTS, EXPENSES, BUSINESS INTERRUPTION, LOST PROFITS, LOST REVENUE, LOST DATA, SECURITY INCIDENTS, SERVICE OUTAGES, OR OTHER LIABILITY ARISING FROM OR RELATED TO THE LICENSED WORK OR YOUR USE OF THE LICENSED WORK.
|
|
62
|
+
|
|
63
|
+
Notice:
|
|
64
|
+
|
|
65
|
+
The Business Source License is not an Open Source license. However, each version of the Licensed Work will become available under the Change License on the applicable Change Date, or on the fourth anniversary of the first publicly available distribution of that version under this License, whichever comes first.
|
nedb/__init__.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""
|
|
2
|
+
NEDB โ a versioned, self-compressing, time-traveling embedded database.
|
|
3
|
+
|
|
4
|
+
* Replay-protected & idempotent: every write carries a monotonic nonce and an
|
|
5
|
+
optional idempotency key, enforced by a hash-chained append-only log.
|
|
6
|
+
* Time-travel: read the database AS OF any past sequence number.
|
|
7
|
+
* Relational: first-class, time-travel-aware relations with O(1) traversal.
|
|
8
|
+
* Filterable / sortable / searchable: equality, ordered, and full-text indexes.
|
|
9
|
+
* Queryable: NQL text queries and a fluent builder that share one plan.
|
|
10
|
+
* git-style files with Cascade compression: content-defined chunking + dedup +
|
|
11
|
+
temperature tiers, with a Merkle root per version anchorable on-chain.
|
|
12
|
+
|
|
13
|
+
The pure-Python package is the reference implementation and the always-works
|
|
14
|
+
fallback. When installed from a platform wheel, the compiled Rust core is available
|
|
15
|
+
as ``nedb._native`` (``nedb.__has_native__`` reports whether it loaded).
|
|
16
|
+
"""
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from .engine import NEDB
|
|
20
|
+
from .log import Op, OpLog, ReplayError
|
|
21
|
+
from .query import Query, parse_nql
|
|
22
|
+
from .snapshot import save_snapshot, load_snapshot
|
|
23
|
+
from .crypto import resolve_tmk, rewrap_dek
|
|
24
|
+
from .sql import sql_exec, sql_to_nql, SQLError, SQLUnsupportedError
|
|
25
|
+
from .redis_compat import RedisCompat, RedisError, RedisUnsupportedError
|
|
26
|
+
from .mongo import (
|
|
27
|
+
MongoCompat, MongoClient, MongoError, MongoUnsupportedError, ObjectId,
|
|
28
|
+
)
|
|
29
|
+
from .autoindex import AutoIndexDB
|
|
30
|
+
from .concurrent import Sequencer
|
|
31
|
+
from .wrap_redis import wrap_redis, WrappedRedis
|
|
32
|
+
from .proof import verify_proof, fold_head
|
|
33
|
+
|
|
34
|
+
try: # compiled Rust core, present in platform wheels (PyO3 via maturin)
|
|
35
|
+
from . import _native # type: ignore
|
|
36
|
+
__has_native__ = True
|
|
37
|
+
except ImportError: # pure-Python install (sdist / unsupported platform)
|
|
38
|
+
# Provide a stub module so `from nedb._native import NedbCore` raises an
|
|
39
|
+
# informative error instead of a bare ImportError with no guidance.
|
|
40
|
+
import types as _types, sys as _sys
|
|
41
|
+
|
|
42
|
+
import sys as _sys_tmp, os as _os_tmp
|
|
43
|
+
_is_msys2 = bool(_os_tmp.environ.get("MSYSTEM")) or "mingw" in _sys_tmp.executable.lower()
|
|
44
|
+
del _sys_tmp, _os_tmp
|
|
45
|
+
|
|
46
|
+
class _NativeStub(_types.ModuleType):
|
|
47
|
+
# Primary fix: install the Rust crate โ get the nedbd server โ use HTTP mode.
|
|
48
|
+
# Secondary fix (CPython only): pip reinstall to get the platform wheel with _native embedded.
|
|
49
|
+
_MSG_MSYS2 = (
|
|
50
|
+
"\n\n"
|
|
51
|
+
" nedb._native (embedded v2 DAG core) is not available on MSYS2/MinGW Python.\n\n"
|
|
52
|
+
" To use NEDB v2 features, install the server binary and use HTTP mode:\n\n"
|
|
53
|
+
" cargo install nedb-engine # install nedbd v2 server\n"
|
|
54
|
+
" nedbd --dag ./data # start DAG server\n"
|
|
55
|
+
" NEDB_URL=http://localhost:7070 python3 your_script.py\n\n"
|
|
56
|
+
" Run 'nedbd --doctor' for a full diagnosis.\n"
|
|
57
|
+
)
|
|
58
|
+
_MSG_OTHER = (
|
|
59
|
+
"\n\n"
|
|
60
|
+
" nedb._native (embedded v2 DAG core) is not available.\n"
|
|
61
|
+
" You have the universal wheel โ reinstall to get the platform wheel:\n\n"
|
|
62
|
+
" pip install --force-reinstall --no-cache-dir nedb-engine\n\n"
|
|
63
|
+
" Or install the server binary and use HTTP mode (works everywhere):\n\n"
|
|
64
|
+
" cargo install nedb-engine # install nedbd v2 server\n"
|
|
65
|
+
" nedbd --dag ./data # start DAG server\n"
|
|
66
|
+
" NEDB_URL=http://localhost:7070 python3 your_script.py\n\n"
|
|
67
|
+
" Run 'nedbd --doctor' for a full diagnosis.\n"
|
|
68
|
+
)
|
|
69
|
+
_MSG = _MSG_MSYS2 if _is_msys2 else _MSG_OTHER
|
|
70
|
+
|
|
71
|
+
def __getattr__(self, name: str):
|
|
72
|
+
raise ImportError(f"nedb._native.{name} is not available.{self._MSG}")
|
|
73
|
+
|
|
74
|
+
_native_stub = _NativeStub("nedb._native")
|
|
75
|
+
_native_stub.__package__ = "nedb"
|
|
76
|
+
_sys.modules["nedb._native"] = _native_stub # type: ignore
|
|
77
|
+
_native = _native_stub # type: ignore
|
|
78
|
+
__has_native__ = False
|
|
79
|
+
del _types, _sys, _NativeStub, _native_stub
|
|
80
|
+
|
|
81
|
+
__all__ = [
|
|
82
|
+
"NEDB", "OpLog", "Op", "ReplayError", "Query", "parse_nql",
|
|
83
|
+
"save_snapshot", "load_snapshot",
|
|
84
|
+
"sql_exec", "sql_to_nql", "SQLError", "SQLUnsupportedError",
|
|
85
|
+
"RedisCompat", "RedisError", "RedisUnsupportedError",
|
|
86
|
+
"MongoCompat", "MongoClient", "MongoError", "MongoUnsupportedError", "ObjectId",
|
|
87
|
+
"AutoIndexDB", "Sequencer",
|
|
88
|
+
"wrap_redis", "WrappedRedis",
|
|
89
|
+
"verify_proof", "fold_head",
|
|
90
|
+
"_native", "__has_native__",
|
|
91
|
+
]
|
|
92
|
+
__version__ = "2.4.3"
|
nedb/autoindex.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""
|
|
2
|
+
nedb.autoindex โ automatic index management.
|
|
3
|
+
|
|
4
|
+
Wraps a NEDB instance and intercepts query() calls. It tracks which fields are
|
|
5
|
+
used in WHERE and ORDER BY clauses per collection. Once a field reaches the
|
|
6
|
+
usage threshold it auto-creates the appropriate index:
|
|
7
|
+
|
|
8
|
+
- Equality conditions (= / !=) โ "eq" index
|
|
9
|
+
- Ordered comparisons (< > โค โฅ) โ "ordered" index
|
|
10
|
+
- ORDER BY field โ "ordered" index
|
|
11
|
+
- SEARCH clause on a field โ deferred (no per-field signal in NQL)
|
|
12
|
+
|
|
13
|
+
Usage::
|
|
14
|
+
|
|
15
|
+
from nedb import NEDB
|
|
16
|
+
from nedb.autoindex import AutoIndexDB
|
|
17
|
+
|
|
18
|
+
db = AutoIndexDB(NEDB("./data"), threshold=3)
|
|
19
|
+
db.query('FROM users WHERE status = "active"') # tallied
|
|
20
|
+
db.query('FROM users WHERE status = "active"')
|
|
21
|
+
db.query('FROM users WHERE status = "active"') # threshold reached โ index created
|
|
22
|
+
"""
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import re
|
|
26
|
+
from collections import defaultdict
|
|
27
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
_WHERE_RE = re.compile(r"\bWHERE\b([\s\S]*?)(?:\bSEARCH\b|\bORDER\b|\bTRAVERSE\b|\bLIMIT\b|$)", re.IGNORECASE)
|
|
31
|
+
_ORDER_RE = re.compile(r"\bORDER\s+BY\s+(\w+)", re.IGNORECASE)
|
|
32
|
+
_FROM_RE = re.compile(r"\bFROM\s+(\w+)", re.IGNORECASE)
|
|
33
|
+
_COND_RE = re.compile(r"(\w+)\s*(=|!=|<>|<=|>=|<|>)", re.IGNORECASE)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _parse_signals(nql: str) -> List[Tuple[str, str, str]]:
|
|
37
|
+
"""Return [(collection, field, 'eq'|'ordered')] from a NQL query string."""
|
|
38
|
+
signals = []
|
|
39
|
+
fm = _FROM_RE.search(nql)
|
|
40
|
+
if not fm:
|
|
41
|
+
return signals
|
|
42
|
+
coll = fm.group(1)
|
|
43
|
+
|
|
44
|
+
wm = _WHERE_RE.search(nql)
|
|
45
|
+
if wm:
|
|
46
|
+
for m in _COND_RE.finditer(wm.group(1)):
|
|
47
|
+
field, op = m.group(1), m.group(2)
|
|
48
|
+
kind = "eq" if op in ("=", "!=", "<>") else "ordered"
|
|
49
|
+
signals.append((coll, field, kind))
|
|
50
|
+
|
|
51
|
+
om = _ORDER_RE.search(nql)
|
|
52
|
+
if om:
|
|
53
|
+
signals.append((coll, om.group(1), "ordered"))
|
|
54
|
+
|
|
55
|
+
return signals
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class AutoIndexDB:
|
|
59
|
+
"""
|
|
60
|
+
NEDB wrapper that creates indexes automatically based on query usage.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
db : NEDB
|
|
65
|
+
A NEDB database instance (embedded or opened with a path).
|
|
66
|
+
threshold : int
|
|
67
|
+
Number of times a (collection, field, kind) combination must be
|
|
68
|
+
observed before the index is created. Default: 5.
|
|
69
|
+
verbose : bool
|
|
70
|
+
Print a message when an index is auto-created. Default: False.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(self, db: Any, threshold: int = 5, verbose: bool = False):
|
|
74
|
+
self._db = db
|
|
75
|
+
self.threshold = threshold
|
|
76
|
+
self.verbose = verbose
|
|
77
|
+
# counts[(coll, field, kind)] = n
|
|
78
|
+
self._counts: Dict[Tuple[str, str, str], int] = defaultdict(int)
|
|
79
|
+
# indexes already created so we don't re-create
|
|
80
|
+
self._created: set = set()
|
|
81
|
+
# Seed from existing index config if available
|
|
82
|
+
if hasattr(db, "indexes") and hasattr(db.indexes, "config"):
|
|
83
|
+
for coll, field, kind in db.indexes.config:
|
|
84
|
+
self._created.add((coll, field, kind))
|
|
85
|
+
|
|
86
|
+
# โโ Proxy every NEDB attribute โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
87
|
+
|
|
88
|
+
def __getattr__(self, name: str) -> Any:
|
|
89
|
+
return getattr(self._db, name)
|
|
90
|
+
|
|
91
|
+
# โโ Instrumented query โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
92
|
+
|
|
93
|
+
def query(self, nql: str) -> List[dict]:
|
|
94
|
+
"""Execute a NQL query, tally field usage, and auto-create indexes."""
|
|
95
|
+
signals = _parse_signals(nql)
|
|
96
|
+
for coll, field, kind in signals:
|
|
97
|
+
key = (coll, field, kind)
|
|
98
|
+
if key in self._created:
|
|
99
|
+
continue
|
|
100
|
+
# "ordered" supersedes "eq" โ if we already have eq, upgrade to ordered
|
|
101
|
+
eq_key = (coll, field, "eq")
|
|
102
|
+
if kind == "ordered" and eq_key not in self._created:
|
|
103
|
+
self._counts[key] += 1
|
|
104
|
+
elif kind == "eq" and (coll, field, "ordered") not in self._created:
|
|
105
|
+
self._counts[key] += 1
|
|
106
|
+
else:
|
|
107
|
+
self._counts[key] += 1
|
|
108
|
+
|
|
109
|
+
if self._counts[key] >= self.threshold:
|
|
110
|
+
self._auto_create(coll, field, kind)
|
|
111
|
+
|
|
112
|
+
return self._db.query(nql)
|
|
113
|
+
|
|
114
|
+
def _auto_create(self, coll: str, field: str, kind: str) -> None:
|
|
115
|
+
key = (coll, field, kind)
|
|
116
|
+
if key in self._created:
|
|
117
|
+
return
|
|
118
|
+
# Don't index internal NEDB fields
|
|
119
|
+
if field.startswith("_") and field not in ("_id",):
|
|
120
|
+
return
|
|
121
|
+
self._db.create_index(coll, field, kind)
|
|
122
|
+
self._created.add(key)
|
|
123
|
+
if self.verbose:
|
|
124
|
+
print(f"[autoindex] created {kind} index on {coll}.{field} (threshold={self.threshold})")
|
|
125
|
+
|
|
126
|
+
# โโ Manual analysis โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
127
|
+
|
|
128
|
+
def analyze(self) -> Dict[str, Any]:
|
|
129
|
+
"""Return current tallies and the indexes already created."""
|
|
130
|
+
return {
|
|
131
|
+
"tallies": {f"{c}.{f} ({k})": n for (c, f, k), n in self._counts.items()},
|
|
132
|
+
"indexes_created": [f"{c}.{f} ({k})" for (c, f, k) in sorted(self._created)],
|
|
133
|
+
"threshold": self.threshold,
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
def suggest(self) -> List[str]:
|
|
137
|
+
"""Return suggestions for indexes that are close to the threshold."""
|
|
138
|
+
out = []
|
|
139
|
+
for (coll, field, kind), count in sorted(self._counts.items(), key=lambda x: -x[1]):
|
|
140
|
+
if (coll, field, kind) not in self._created:
|
|
141
|
+
out.append(f"{coll}.{field} ({kind}) โ {count}/{self.threshold} queries")
|
|
142
|
+
return out
|
|
File without changes
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""
|
|
2
|
+
nedb.backends.redis_backend โ Redis Streams as the NEDB append-only log.
|
|
3
|
+
|
|
4
|
+
Alice's existing Redis keys are NEVER touched. NEDB operates in a strictly
|
|
5
|
+
isolated namespace:
|
|
6
|
+
|
|
7
|
+
nedb:{db_name}:oplog Redis Stream โ hash-chained op log
|
|
8
|
+
nedb:{db_name}:snapshot Redis Hash โ checkpoint for fast restart
|
|
9
|
+
nedb:{db_name}:events Pub/Sub chan โ live subscriptions (future)
|
|
10
|
+
nedb:{db_name}:meta Redis Hash โ version, index config
|
|
11
|
+
|
|
12
|
+
On startup NEDB replays the stream to rebuild its in-memory MVCC store.
|
|
13
|
+
On every write a new entry is XADD'd. One Redis connection, zero impact on
|
|
14
|
+
the user's existing keys.
|
|
15
|
+
|
|
16
|
+
ยฉ INTERCHAINED LLC ร Claude Sonnet 4.6
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import json
|
|
21
|
+
from typing import Any, Dict, List, Optional
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class RedisBackend:
|
|
25
|
+
"""
|
|
26
|
+
Redis-Streams-backed persistence for NEDB.
|
|
27
|
+
|
|
28
|
+
Pass an instance to NEDB as the `backend` parameter::
|
|
29
|
+
|
|
30
|
+
import redis
|
|
31
|
+
from nedb.backends.redis_backend import RedisBackend
|
|
32
|
+
from nedb import NEDB
|
|
33
|
+
|
|
34
|
+
r = redis.Redis("localhost", 6379)
|
|
35
|
+
db = NEDB(backend=RedisBackend(r, "rideshare"))
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, r: Any, db_name: str):
|
|
39
|
+
self._r = r
|
|
40
|
+
self.db_name = db_name
|
|
41
|
+
self.stream = f"nedb:{db_name}:oplog"
|
|
42
|
+
self.snap_key = f"nedb:{db_name}:snapshot"
|
|
43
|
+
self.meta_key = f"nedb:{db_name}:meta"
|
|
44
|
+
self.events_ch = f"nedb:{db_name}:events"
|
|
45
|
+
|
|
46
|
+
# โโ Op log โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
47
|
+
|
|
48
|
+
def append(self, op_json: str) -> None:
|
|
49
|
+
"""Append one JSON-serialised op to the stream."""
|
|
50
|
+
self._r.xadd(self.stream, {"op": op_json})
|
|
51
|
+
|
|
52
|
+
def append_batch(self, ops: List[str]) -> None:
|
|
53
|
+
"""Append multiple ops in a single pipeline (one round-trip)."""
|
|
54
|
+
pipe = self._r.pipeline(transaction=False)
|
|
55
|
+
for op_json in ops:
|
|
56
|
+
pipe.xadd(self.stream, {"op": op_json})
|
|
57
|
+
pipe.execute()
|
|
58
|
+
|
|
59
|
+
def read_all(self) -> List[str]:
|
|
60
|
+
"""Return all ops from the stream in insertion order."""
|
|
61
|
+
entries = self._r.xrange(self.stream, "-", "+")
|
|
62
|
+
return [e[1][b"op"].decode() for e in entries]
|
|
63
|
+
|
|
64
|
+
def read_after(self, last_id: str = "0") -> List[str]:
|
|
65
|
+
"""Return ops appended after `last_id` (for incremental replay)."""
|
|
66
|
+
entries = self._r.xrange(self.stream, f"({last_id}", "+")
|
|
67
|
+
return [e[1][b"op"].decode() for e in entries]
|
|
68
|
+
|
|
69
|
+
# โโ Snapshot / checkpoint โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
70
|
+
|
|
71
|
+
def save_snapshot(self, data: Dict[str, Any]) -> None:
|
|
72
|
+
"""Persist a checkpoint so restart replay only needs the delta."""
|
|
73
|
+
self._r.hset(self.snap_key, mapping={
|
|
74
|
+
k: json.dumps(v, separators=(",", ":"), default=str)
|
|
75
|
+
for k, v in data.items()
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
def load_snapshot(self) -> Optional[Dict[str, Any]]:
|
|
79
|
+
"""Load the last checkpoint, or None if none exists."""
|
|
80
|
+
raw = self._r.hgetall(self.snap_key)
|
|
81
|
+
if not raw:
|
|
82
|
+
return None
|
|
83
|
+
return {k.decode(): json.loads(v) for k, v in raw.items()}
|
|
84
|
+
|
|
85
|
+
# โโ Pub/sub live events โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
86
|
+
|
|
87
|
+
def publish_ops(self, ops: List[str]) -> None:
|
|
88
|
+
"""Publish committed ops to the events channel for live subscribers."""
|
|
89
|
+
if ops:
|
|
90
|
+
payload = json.dumps(ops, separators=(",", ":"))
|
|
91
|
+
self._r.publish(self.events_ch, payload)
|
|
92
|
+
|
|
93
|
+
# โโ Meta โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
94
|
+
|
|
95
|
+
def save_meta(self, meta: Dict[str, Any]) -> None:
|
|
96
|
+
self._r.hset(self.meta_key, mapping={
|
|
97
|
+
k: json.dumps(v, separators=(",", ":"), default=str)
|
|
98
|
+
for k, v in meta.items()
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
def load_meta(self) -> Dict[str, Any]:
|
|
102
|
+
raw = self._r.hgetall(self.meta_key)
|
|
103
|
+
if not raw:
|
|
104
|
+
return {}
|
|
105
|
+
return {k.decode(): json.loads(v) for k, v in raw.items()}
|
|
106
|
+
|
|
107
|
+
# โโ Utility โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
108
|
+
|
|
109
|
+
def stream_len(self) -> int:
|
|
110
|
+
return self._r.xlen(self.stream)
|
|
111
|
+
|
|
112
|
+
def flush(self) -> None:
|
|
113
|
+
"""Delete all NEDB shadow keys for this database (non-destructive to user keys)."""
|
|
114
|
+
for key in [self.stream, self.snap_key, self.meta_key]:
|
|
115
|
+
self._r.delete(key)
|
nedb/cascade.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""
|
|
2
|
+
nedb.cascade โ the Cascade compression pipeline + content-addressed blob store.
|
|
3
|
+
|
|
4
|
+
This is what makes NEDB double as a git-style file manager with maximum compression
|
|
5
|
+
WITHOUT inventing a new entropy coder. The novelty is the pipeline composition:
|
|
6
|
+
|
|
7
|
+
1. Content-defined chunking (Gear rolling hash) โ boundaries follow content, so a
|
|
8
|
+
one-byte insert only changes the chunk(s) around it, not everything after it.
|
|
9
|
+
2. Content-addressed dedup (BLAKE) โ identical chunks across all files and all
|
|
10
|
+
versions are stored exactly once.
|
|
11
|
+
3. Temperature tiers โ warm data uses a fast codec (zstd in prod; zlib in this
|
|
12
|
+
reference), cold/archival history uses a maximum-ratio codec (LZMA).
|
|
13
|
+
|
|
14
|
+
The production pipeline adds similarity-picked binary deltas (zstd --patch-from) and
|
|
15
|
+
schema-aware columnar transforms before the entropy stage; both are documented in
|
|
16
|
+
docs/SPEC.md and stubbed for the reference engine.
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import hashlib
|
|
21
|
+
import lzma
|
|
22
|
+
import random
|
|
23
|
+
import zlib
|
|
24
|
+
from typing import Dict, List
|
|
25
|
+
|
|
26
|
+
from .merkle import merkle_root
|
|
27
|
+
|
|
28
|
+
# --- Gear-hash content-defined chunking -------------------------------------
|
|
29
|
+
_MASK = (1 << 13) - 1 # ~8 KiB average chunk
|
|
30
|
+
_MIN = 2 * 1024
|
|
31
|
+
_MAX = 64 * 1024
|
|
32
|
+
_M64 = 0xFFFFFFFFFFFFFFFF
|
|
33
|
+
_GEAR = [random.Random(0x12345678 + i).getrandbits(64) for i in range(256)]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def chunk(data: bytes) -> List[bytes]:
|
|
37
|
+
chunks: List[bytes] = []
|
|
38
|
+
n = len(data)
|
|
39
|
+
i = 0
|
|
40
|
+
while i < n:
|
|
41
|
+
limit = min(i + _MAX, n)
|
|
42
|
+
h = 0
|
|
43
|
+
pos = i
|
|
44
|
+
cut = limit
|
|
45
|
+
while pos < limit:
|
|
46
|
+
h = ((h << 1) + _GEAR[data[pos]]) & _M64
|
|
47
|
+
pos += 1
|
|
48
|
+
if (pos - i) >= _MIN and (h & _MASK) == 0:
|
|
49
|
+
cut = pos
|
|
50
|
+
break
|
|
51
|
+
chunks.append(data[i:cut])
|
|
52
|
+
i = cut
|
|
53
|
+
return chunks
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _blake(b: bytes) -> str:
|
|
57
|
+
return hashlib.blake2b(b, digest_size=32).hexdigest()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# --- temperature tiers ------------------------------------------------------
|
|
61
|
+
def warm_compress(b: bytes) -> bytes: # zstd stand-in in the reference
|
|
62
|
+
return zlib.compress(b, 6)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def warm_decompress(b: bytes) -> bytes:
|
|
66
|
+
return zlib.decompress(b)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def cold_compress(b: bytes) -> bytes: # real LZMA โ the maximum-ratio archival tier
|
|
70
|
+
return lzma.compress(b, preset=9 | lzma.PRESET_EXTREME)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def cold_decompress(b: bytes) -> bytes:
|
|
74
|
+
return lzma.decompress(b)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class BlobStore:
|
|
78
|
+
"""Content-addressed, deduplicated, tiered blob store with versioned files."""
|
|
79
|
+
|
|
80
|
+
def __init__(self, tier: str = "warm") -> None:
|
|
81
|
+
self.tier = tier
|
|
82
|
+
self.chunks: Dict[str, bytes] = {} # hash -> compressed bytes
|
|
83
|
+
self.files: Dict[str, Dict[str, list]] = {} # name -> {versions, roots}
|
|
84
|
+
self.logical_bytes = 0
|
|
85
|
+
self.dedup_hits = 0
|
|
86
|
+
|
|
87
|
+
def _compress(self, b: bytes) -> bytes:
|
|
88
|
+
return cold_compress(b) if self.tier == "cold" else warm_compress(b)
|
|
89
|
+
|
|
90
|
+
def _decompress(self, b: bytes) -> bytes:
|
|
91
|
+
return cold_decompress(b) if self.tier == "cold" else warm_decompress(b)
|
|
92
|
+
|
|
93
|
+
def put_file(self, name: str, data: bytes) -> int:
|
|
94
|
+
recipe: List[str] = []
|
|
95
|
+
for c in chunk(data):
|
|
96
|
+
hh = _blake(c)
|
|
97
|
+
recipe.append(hh)
|
|
98
|
+
if hh in self.chunks:
|
|
99
|
+
self.dedup_hits += 1
|
|
100
|
+
else:
|
|
101
|
+
self.chunks[hh] = self._compress(c)
|
|
102
|
+
self.logical_bytes += len(data)
|
|
103
|
+
f = self.files.setdefault(name, {"versions": [], "roots": []})
|
|
104
|
+
f["versions"].append(recipe)
|
|
105
|
+
f["roots"].append(merkle_root(recipe))
|
|
106
|
+
return len(f["versions"]) - 1
|
|
107
|
+
|
|
108
|
+
def get_file(self, name: str, version: int = -1) -> bytes:
|
|
109
|
+
recipe = self.files[name]["versions"][version]
|
|
110
|
+
out = bytearray()
|
|
111
|
+
for hh in recipe:
|
|
112
|
+
out += self._decompress(self.chunks[hh])
|
|
113
|
+
return bytes(out)
|
|
114
|
+
|
|
115
|
+
def root(self, name: str, version: int = -1) -> str:
|
|
116
|
+
return self.files[name]["roots"][version]
|
|
117
|
+
|
|
118
|
+
def stored_bytes(self) -> int:
|
|
119
|
+
return sum(len(v) for v in self.chunks.values())
|
|
120
|
+
|
|
121
|
+
def stats(self) -> dict:
|
|
122
|
+
stored = self.stored_bytes()
|
|
123
|
+
return {
|
|
124
|
+
"tier": self.tier,
|
|
125
|
+
"unique_chunks": len(self.chunks),
|
|
126
|
+
"dedup_hits": self.dedup_hits,
|
|
127
|
+
"logical_bytes": self.logical_bytes,
|
|
128
|
+
"stored_bytes": stored,
|
|
129
|
+
"ratio": round(self.logical_bytes / stored, 2) if stored else 0.0,
|
|
130
|
+
}
|