dataply 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/index.js +5965 -0
- package/dist/types/core/Dataply.d.ts +75 -0
- package/dist/types/core/DataplyAPI.d.ts +121 -0
- package/dist/types/core/KeyManager.d.ts +42 -0
- package/dist/types/core/LogManager.d.ts +43 -0
- package/dist/types/core/Page.d.ts +690 -0
- package/dist/types/core/PageFileSystem.d.ts +98 -0
- package/dist/types/core/Row.d.ts +77 -0
- package/dist/types/core/RowIndexStategy.d.ts +19 -0
- package/dist/types/core/RowIndexStrategy.d.ts +20 -0
- package/dist/types/core/RowTableEngine.d.ts +121 -0
- package/dist/types/core/Shard.d.ts +75 -0
- package/dist/types/core/ShareAPI.d.ts +121 -0
- package/dist/types/core/VirtualFileSystem.d.ts +96 -0
- package/dist/types/core/transaction/LockManager.d.ts +28 -0
- package/dist/types/core/transaction/Transaction.d.ts +112 -0
- package/dist/types/core/transaction/TxContext.d.ts +5 -0
- package/dist/types/index.d.ts +6 -0
- package/dist/types/types/index.d.ts +49 -0
- package/dist/types/utils/TextCodec.d.ts +10 -0
- package/dist/types/utils/bitwise.d.ts +17 -0
- package/dist/types/utils/bytesToNumber.d.ts +1 -0
- package/dist/types/utils/catchPromise.d.ts +12 -0
- package/dist/types/utils/crc32.d.ts +6 -0
- package/dist/types/utils/index.d.ts +4 -0
- package/dist/types/utils/numberToBytes.d.ts +1 -0
- package/package.json +51 -0
- package/readme.md +248 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import { LockManager } from './LockManager';
|
|
2
|
+
import { VirtualFileSystem } from '../VirtualFileSystem';
|
|
3
|
+
/**
|
|
4
|
+
* Transaction class.
|
|
5
|
+
* Manages the lifecycle and resources of a database transaction.
|
|
6
|
+
*/
|
|
7
|
+
export declare class Transaction {
|
|
8
|
+
private vfs;
|
|
9
|
+
private lockManager;
|
|
10
|
+
/** Transaction ID */
|
|
11
|
+
readonly id: number;
|
|
12
|
+
/** List of held lock IDs (LOCK_ID) */
|
|
13
|
+
private heldLocks;
|
|
14
|
+
/** Held page locks (PageID -> LockID) */
|
|
15
|
+
private pageLocks;
|
|
16
|
+
/** Undo Logs: PageID -> Original Page Buffer (Snapshot) */
|
|
17
|
+
private undoPages;
|
|
18
|
+
/** List of Dirty Pages modified by the transaction */
|
|
19
|
+
private dirtyPages;
|
|
20
|
+
/** Pending Index Updates: PK -> { newRid, oldRid } */
|
|
21
|
+
private pendingIndexUpdates;
|
|
22
|
+
/** List of callbacks to execute on commit */
|
|
23
|
+
private commitHooks;
|
|
24
|
+
/**
|
|
25
|
+
* @param id Transaction ID
|
|
26
|
+
* @param vfs VFS instance
|
|
27
|
+
* @param lockManager LockManager instance
|
|
28
|
+
*/
|
|
29
|
+
constructor(id: number, vfs: VirtualFileSystem, lockManager: LockManager);
|
|
30
|
+
/**
|
|
31
|
+
* Registers a commit hook.
|
|
32
|
+
* @param hook Function to execute
|
|
33
|
+
*/
|
|
34
|
+
onCommit(hook: () => Promise<void>): void;
|
|
35
|
+
/**
|
|
36
|
+
* Stores an Undo page.
|
|
37
|
+
* Does not overwrite if the page is already stored (maintains the original snapshot).
|
|
38
|
+
* Does not call this method directly. It is called by the `VirtualFileSystem` instance.
|
|
39
|
+
* @param pageId Page ID
|
|
40
|
+
* @param buffer Page buffer
|
|
41
|
+
*/
|
|
42
|
+
__setUndoPage(pageId: number, buffer: Uint8Array): void;
|
|
43
|
+
/**
|
|
44
|
+
* Returns an Undo page.
|
|
45
|
+
* Does not call this method directly. It is called by the `VirtualFileSystem` instance.
|
|
46
|
+
* @param pageId Page ID
|
|
47
|
+
* @returns Undo page
|
|
48
|
+
*/
|
|
49
|
+
__getUndoPage(pageId: number): Uint8Array | undefined;
|
|
50
|
+
/**
|
|
51
|
+
* Returns true if the transaction has an Undo page for the given page ID.
|
|
52
|
+
* Does not call this method directly. It is called by the `VirtualFileSystem` instance.
|
|
53
|
+
* @param pageId Page ID
|
|
54
|
+
* @returns True if the transaction has an Undo page for the given page ID
|
|
55
|
+
*/
|
|
56
|
+
__hasUndoPage(pageId: number): boolean;
|
|
57
|
+
/**
|
|
58
|
+
* Adds a Pending Index Update.
|
|
59
|
+
* Does not call this method directly. It is called by the `VirtualFileSystem` instance.
|
|
60
|
+
* @param pk PK
|
|
61
|
+
* @param newRid New RID
|
|
62
|
+
* @param oldRid Old RID
|
|
63
|
+
*/
|
|
64
|
+
__addPendingIndexUpdate(pk: number, newRid: number, oldRid: number): void;
|
|
65
|
+
/**
|
|
66
|
+
* Returns a Pending Index Update.
|
|
67
|
+
* Does not call this method directly. It is called by the `VirtualFileSystem` instance.
|
|
68
|
+
* @param pk PK
|
|
69
|
+
* @returns Pending Index Update
|
|
70
|
+
*/
|
|
71
|
+
__getPendingIndexUpdate(pk: number): {
|
|
72
|
+
newRid: number;
|
|
73
|
+
oldRid: number;
|
|
74
|
+
} | undefined;
|
|
75
|
+
/**
|
|
76
|
+
* Returns all Pending Index Updates.
|
|
77
|
+
* Does not call this method directly. It is called by the `VirtualFileSystem` instance.
|
|
78
|
+
*/
|
|
79
|
+
__getPendingIndexUpdates(): Map<number, {
|
|
80
|
+
newRid: number;
|
|
81
|
+
oldRid: number;
|
|
82
|
+
}>;
|
|
83
|
+
/**
|
|
84
|
+
* Acquires a write lock.
|
|
85
|
+
* Does not call this method directly. It is called by the `VirtualFileSystem` instance.
|
|
86
|
+
* @param pageId Page ID
|
|
87
|
+
*/
|
|
88
|
+
__acquireWriteLock(pageId: number): Promise<void>;
|
|
89
|
+
/**
|
|
90
|
+
* Commits the transaction.
|
|
91
|
+
*/
|
|
92
|
+
commit(): Promise<void>;
|
|
93
|
+
/**
|
|
94
|
+
* Rolls back the transaction.
|
|
95
|
+
*/
|
|
96
|
+
rollback(): Promise<void>;
|
|
97
|
+
/**
|
|
98
|
+
* Adds a Dirty Page.
|
|
99
|
+
* Does not call this method directly. It is called by the `VirtualFileSystem` instance.
|
|
100
|
+
* @param pageId Page ID
|
|
101
|
+
*/
|
|
102
|
+
__addDirtyPage(pageId: number): void;
|
|
103
|
+
/**
|
|
104
|
+
* Returns the list of Dirty Pages.
|
|
105
|
+
* Does not call this method directly. It is called by the `VirtualFileSystem` instance.
|
|
106
|
+
*/
|
|
107
|
+
__getDirtyPages(): Set<number>;
|
|
108
|
+
/**
|
|
109
|
+
* Releases all locks.
|
|
110
|
+
*/
|
|
111
|
+
private releaseAllLocks;
|
|
112
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
export interface DataplyOptions {
|
|
2
|
+
/**
|
|
3
|
+
* The size of a page in bytes.
|
|
4
|
+
*/
|
|
5
|
+
pageSize?: number;
|
|
6
|
+
/**
|
|
7
|
+
* Write-Ahead Logging file path
|
|
8
|
+
*/
|
|
9
|
+
wal?: string | undefined | null;
|
|
10
|
+
/**
|
|
11
|
+
* The maximum number of pages to cache in memory.
|
|
12
|
+
*/
|
|
13
|
+
pageCacheCapacity?: number;
|
|
14
|
+
}
|
|
15
|
+
export interface DataplyMetadata {
|
|
16
|
+
/**
|
|
17
|
+
* The size of a page in bytes.
|
|
18
|
+
*/
|
|
19
|
+
pageSize: number;
|
|
20
|
+
/**
|
|
21
|
+
* The total number of pages in the dataply.
|
|
22
|
+
*/
|
|
23
|
+
pageCount: number;
|
|
24
|
+
/**
|
|
25
|
+
* The total number of data rows in the dataply.
|
|
26
|
+
*/
|
|
27
|
+
rowCount: number;
|
|
28
|
+
}
|
|
29
|
+
export type DataPage = Uint8Array & {
|
|
30
|
+
__pageType: 'data';
|
|
31
|
+
};
|
|
32
|
+
export type IndexPage = Uint8Array & {
|
|
33
|
+
__pageType: 'index';
|
|
34
|
+
};
|
|
35
|
+
export type BitmapPage = Uint8Array & {
|
|
36
|
+
__pageType: 'bitmap';
|
|
37
|
+
};
|
|
38
|
+
export type OverflowPage = Uint8Array & {
|
|
39
|
+
__pageType: 'overflow';
|
|
40
|
+
};
|
|
41
|
+
export type MetadataPage = Uint8Array & {
|
|
42
|
+
__pageType: 'metadata';
|
|
43
|
+
};
|
|
44
|
+
export type EmptyPage = Uint8Array & {
|
|
45
|
+
__pageType: 'empty';
|
|
46
|
+
};
|
|
47
|
+
export type UnknownPage = Uint8Array & {
|
|
48
|
+
__pageType: 'unknown';
|
|
49
|
+
};
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export declare class TextCodec {
|
|
2
|
+
protected static readonly TextEncoder: TextEncoder;
|
|
3
|
+
protected static readonly TextDecoder: TextDecoder;
|
|
4
|
+
static GetEncoder(): TextEncoder;
|
|
5
|
+
static getDecoder(): TextDecoder;
|
|
6
|
+
protected getEncoder(): TextEncoder;
|
|
7
|
+
protected getDecoder(): TextDecoder;
|
|
8
|
+
encode(text: string): Uint8Array;
|
|
9
|
+
decode(source: Uint8Array): string;
|
|
10
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sets or clears a specific bit in a 1-byte number and returns it.
|
|
3
|
+
* (Uses index 0-7 from the right, LSB-first)
|
|
4
|
+
* @param value Target number (1 byte)
|
|
5
|
+
* @param bitPos Bit position (0-7, 0 is the rightmost/least significant bit)
|
|
6
|
+
* @param flag Value to set (true: 1, false: 0)
|
|
7
|
+
* @returns Modified number
|
|
8
|
+
*/
|
|
9
|
+
export declare function setBit(value: number, bitPos: number, flag: boolean): number;
|
|
10
|
+
/**
|
|
11
|
+
* Returns the value of a specific bit in a 1-byte number.
|
|
12
|
+
* (Uses index 0-7 from the right, LSB-first)
|
|
13
|
+
* @param value Target number (1 byte)
|
|
14
|
+
* @param bitPos Bit position (0-7, 0 is the rightmost/least significant bit)
|
|
15
|
+
* @returns Bit value (true: 1, false: 0)
|
|
16
|
+
*/
|
|
17
|
+
export declare function getBit(value: number, bitPos: number): boolean;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function bytesToNumber(bytes: Uint8Array, offset?: number, length?: number): number;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
type CatchPromiseSuccess<T> = [undefined, T];
|
|
2
|
+
type CatchPromiseError = [Error];
|
|
3
|
+
type CatchPromiseResult<T> = CatchPromiseSuccess<T> | CatchPromiseError;
|
|
4
|
+
/**
|
|
5
|
+
* Catches a promise and returns a tuple of [error, result]
|
|
6
|
+
* If the promise is resolved, the first element of the tuple is `undefined`
|
|
7
|
+
* If the promise is rejected, the first element of the tuple is the `error`
|
|
8
|
+
* @param promise Promise to catch
|
|
9
|
+
* @returns Tuple of [error, result]
|
|
10
|
+
*/
|
|
11
|
+
export declare function catchPromise<T>(promise: Promise<T>): Promise<CatchPromiseResult<T>>;
|
|
12
|
+
export {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function numberToBytes(value: number, buffer: Uint8Array, offset?: number, length?: number): Uint8Array;
|
package/package.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "dataply",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"description": "A lightweight storage engine for Node.js with support for MVCC, WAL.",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"author": "izure <admin@izure.org>",
|
|
7
|
+
"type": "commonjs",
|
|
8
|
+
"main": "./dist/cjs/index.js",
|
|
9
|
+
"types": "./dist/types/index.d.ts",
|
|
10
|
+
"exports": {
|
|
11
|
+
".": {
|
|
12
|
+
"types": "./dist/types/index.d.ts",
|
|
13
|
+
"require": "./dist/cjs/index.js"
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"files": [
|
|
17
|
+
"dist/**/*"
|
|
18
|
+
],
|
|
19
|
+
"scripts": {
|
|
20
|
+
"test": "jest --runInBand --forceExit",
|
|
21
|
+
"benchmark": "npx tsx benchmark/benchmark.ts",
|
|
22
|
+
"build": "node build/index.js && tsc"
|
|
23
|
+
},
|
|
24
|
+
"keywords": [
|
|
25
|
+
"storage-engine",
|
|
26
|
+
"mvcc",
|
|
27
|
+
"wal",
|
|
28
|
+
"sharding",
|
|
29
|
+
"database",
|
|
30
|
+
"bplus-tree",
|
|
31
|
+
"typescript",
|
|
32
|
+
"record-storage"
|
|
33
|
+
],
|
|
34
|
+
"repository": {
|
|
35
|
+
"type": "git",
|
|
36
|
+
"url": "https://github.com/izure1/dataply.git"
|
|
37
|
+
},
|
|
38
|
+
"devDependencies": {
|
|
39
|
+
"@types/jest": "^30.0.0",
|
|
40
|
+
"@types/node": "^25.0.3",
|
|
41
|
+
"esbuild": "^0.27.2",
|
|
42
|
+
"jest": "^30.2.0",
|
|
43
|
+
"ts-jest": "^29.4.6",
|
|
44
|
+
"typescript": "^5.9.3"
|
|
45
|
+
},
|
|
46
|
+
"dependencies": {
|
|
47
|
+
"cache-entanglement": "^1.7.1",
|
|
48
|
+
"ryoiki": "^1.2.0",
|
|
49
|
+
"serializable-bptree": "^5.2.1"
|
|
50
|
+
}
|
|
51
|
+
}
|
package/readme.md
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+

|
|
2
|
+
|
|
3
|
+
# Dataply
|
|
4
|
+
|
|
5
|
+
> [!WARNING]
|
|
6
|
+
> **Dataply is currently in Alpha version.** It is experimental and not yet suitable for production use.
|
|
7
|
+
|
|
8
|
+
**Dataply** is a lightweight, high-performance **Record Store** designed for Node.js. It focuses on storing arbitrary data and providing an auto-generated Primary Key (PK) for ultra-fast retrieval, while supporting core enterprise features like MVCC, WAL, and atomic transactions.
|
|
9
|
+
|
|
10
|
+
## Key Features
|
|
11
|
+
|
|
12
|
+
- **🚀 Identity-Based Access**: Specialized in storing records and managing them via auto-generated Primary Keys.
|
|
13
|
+
- **⚡ High-Performance B+Tree**: Optimizes data lookup and insertion through an asynchronous B+Tree structure.
|
|
14
|
+
- **🛡️ MVCC Support**: Enables non-blocking read operations and guarantees data isolation between transactions.
|
|
15
|
+
- **📝 WAL (Write-Ahead Logging)**: Ensures data integrity and provides recovery capabilities in case of system failures.
|
|
16
|
+
- **💼 Transaction Mechanism**: Supports Commit and Rollback for atomic operations.
|
|
17
|
+
- **📦 Page-Based Storage**: Efficient page caching and disk I/O optimization through Virtual File System (VFS).
|
|
18
|
+
- **⌨️ TypeScript Support**: Provides comprehensive type definitions for all APIs.
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
### Prerequisites
|
|
23
|
+
|
|
24
|
+
- **Node.js**: v18.0.0 or higher
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
npm install dataply
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
import { Dataply } from 'dataply'
|
|
34
|
+
|
|
35
|
+
// Open Dataply instance
|
|
36
|
+
const dataply = new Dataply('./data.db', {
|
|
37
|
+
wal: './data.db.wal'
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
async function main() {
|
|
41
|
+
// Initialization (Required)
|
|
42
|
+
await dataply.init()
|
|
43
|
+
|
|
44
|
+
// Insert data
|
|
45
|
+
const pk = await dataply.insert('Hello, Dataply!')
|
|
46
|
+
console.log(`Inserted row with PK: ${pk}`)
|
|
47
|
+
|
|
48
|
+
// Update data
|
|
49
|
+
await dataply.update(pk, 'Updated Data')
|
|
50
|
+
console.log(`Updated row with PK: ${pk}`)
|
|
51
|
+
|
|
52
|
+
// Select data
|
|
53
|
+
const data = await dataply.select(pk)
|
|
54
|
+
console.log(`Read data: ${data}`)
|
|
55
|
+
|
|
56
|
+
// Delete data
|
|
57
|
+
await dataply.delete(pk)
|
|
58
|
+
console.log(`Deleted row with PK: ${pk}`)
|
|
59
|
+
|
|
60
|
+
// Close dataply
|
|
61
|
+
await dataply.close()
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
main()
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Transaction Management
|
|
68
|
+
|
|
69
|
+
### Explicit Transactions
|
|
70
|
+
You can group multiple operations into a single unit of work to ensure atomicity.
|
|
71
|
+
|
|
72
|
+
```typescript
|
|
73
|
+
const tx = dataply.createTransaction()
|
|
74
|
+
|
|
75
|
+
try {
|
|
76
|
+
await dataply.insert('Data 1', tx)
|
|
77
|
+
await dataply.update(pk, 'Updated Data', tx)
|
|
78
|
+
|
|
79
|
+
await tx.commit() // Persist changes to disk and clear WAL on success
|
|
80
|
+
} catch (error) {
|
|
81
|
+
await tx.rollback() // Revert all changes on failure (Undo)
|
|
82
|
+
}
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Auto-Transaction
|
|
86
|
+
If you omit the `tx` argument when calling methods like `insert`, `update`, or `delete`, Dataply internally **creates an individual transaction automatically**.
|
|
87
|
+
|
|
88
|
+
- **Guaranteed Atomicity**: Even single operations are processed within an internal transaction, ensuring they are only finalized on success and rolled back on failure.
|
|
89
|
+
- **Performance Note**: For batch processing or multiple related operations, wrapping them in a single explicit transaction is significantly faster than relying on auto-transactions due to reduced I/O overhead.
|
|
90
|
+
|
|
91
|
+
## API Reference
|
|
92
|
+
|
|
93
|
+
### Dataply Class
|
|
94
|
+
|
|
95
|
+
#### `constructor(file: string, options?: DataplyOptions): Dataply`
|
|
96
|
+
Opens a database file. If the file does not exist, it creates and initializes a new one.
|
|
97
|
+
- `options.pageSize`: Size of a page (Default: 8192, must be a power of 2)
|
|
98
|
+
- `options.pageCacheCapacity`: Maximum number of pages to keep in memory (Default: 10000)
|
|
99
|
+
- `options.wal`: Path to the WAL file. If omitted, WAL is disabled.
|
|
100
|
+
|
|
101
|
+
#### `async init(): Promise<void>`
|
|
102
|
+
Initializes the instance. Must be called before performing any CRUD operations.
|
|
103
|
+
|
|
104
|
+
#### `async insert(data: string | Uint8Array, tx?: Transaction): Promise<number>`
|
|
105
|
+
Inserts new data. Returns the Primary Key (PK) of the created row.
|
|
106
|
+
|
|
107
|
+
#### `async insertBatch(dataList: (string | Uint8Array)[], tx?: Transaction): Promise<number[]>`
|
|
108
|
+
Inserts multiple rows at once. This is significantly faster than multiple individual inserts as it minimizes internal transaction overhead.
|
|
109
|
+
|
|
110
|
+
#### `async select(pk: number, asRaw?: boolean, tx?: Transaction): Promise<string | Uint8Array | null>`
|
|
111
|
+
Retrieves data based on the PK. Returns `Uint8Array` if `asRaw` is true.
|
|
112
|
+
|
|
113
|
+
#### `async update(pk: number, data: string | Uint8Array, tx?: Transaction): Promise<void>`
|
|
114
|
+
Updates existing data.
|
|
115
|
+
|
|
116
|
+
#### `async delete(pk: number, tx?: Transaction): Promise<void>`
|
|
117
|
+
Marks data as deleted.
|
|
118
|
+
|
|
119
|
+
#### `async getMetadata(): Promise<DataplyMetadata>`
|
|
120
|
+
Returns the current metadata of the dataply, including `pageSize`, `pageCount`, and `rowCount`.
|
|
121
|
+
|
|
122
|
+
#### `createTransaction(): Transaction`
|
|
123
|
+
Creates a new transaction instance.
|
|
124
|
+
|
|
125
|
+
#### `async close(): Promise<void>`
|
|
126
|
+
Closes the file handles and shuts down safely.
|
|
127
|
+
|
|
128
|
+
### Transaction Class
|
|
129
|
+
|
|
130
|
+
#### `async commit(): Promise<void>`
|
|
131
|
+
Permanently reflects all changes made during the transaction to disk and releases locks.
|
|
132
|
+
|
|
133
|
+
#### `async rollback(): Promise<void>`
|
|
134
|
+
Cancels all changes made during the transaction and restores the original state.
|
|
135
|
+
|
|
136
|
+
## Extending Dataply
|
|
137
|
+
|
|
138
|
+
If you want to extend Dataply's functionality, use the `DataplyAPI` class. Unlike the standard `Dataply` class, `DataplyAPI` provides direct access to internal components like `PageFileSystem` or `RowTableEngine`, offering much more flexibility for custom implementations.
|
|
139
|
+
|
|
140
|
+
### Using DataplyAPI
|
|
141
|
+
|
|
142
|
+
```typescript
|
|
143
|
+
import { DataplyAPI } from 'dataply'
|
|
144
|
+
|
|
145
|
+
class CustomDataply extends DataplyAPI {
|
|
146
|
+
// Leverage internal protected members (pfs, rowTableEngine, etc.)
|
|
147
|
+
async getInternalStats() {
|
|
148
|
+
return {
|
|
149
|
+
pageSize: this.options.pageSize,
|
|
150
|
+
// Custom internal logic here
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const custom = CustomDataply.Use('./data.db')
|
|
156
|
+
await custom.init()
|
|
157
|
+
|
|
158
|
+
const stats = await custom.getInternalStats()
|
|
159
|
+
console.log(stats)
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Internal Architecture
|
|
163
|
+
|
|
164
|
+
Dataply implements the core principles of high-performance storage systems in a lightweight and efficient manner.
|
|
165
|
+
|
|
166
|
+
### 1. Layered Architecture
|
|
167
|
+
```mermaid
|
|
168
|
+
graph TD
|
|
169
|
+
API[Dataply API] --> RTE[Row Table Engine]
|
|
170
|
+
RTE --> PFS[Page File System]
|
|
171
|
+
PFS --> VFS[Virtual File System / Cache]
|
|
172
|
+
VFS --> WAL[Write Ahead Log]
|
|
173
|
+
VFS --> DISK[(Database File)]
|
|
174
|
+
|
|
175
|
+
TX[Transaction Manager] -.-> VFS
|
|
176
|
+
TX -.-> LM[Lock Manager]
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
### 2. Page-Based Storage and VFS Caching
|
|
180
|
+
- **Fixed-size Pages**: All data is managed in fixed-size units (default 8KB) called pages.
|
|
181
|
+
- **VFS Cache**: Minimizes disk I/O by caching frequently accessed pages in memory.
|
|
182
|
+
- **Dirty Page Tracking**: Tracks modified pages (Dirty) to synchronize them with disk efficiently only at the time of commit.
|
|
183
|
+
|
|
184
|
+
### 3. MVCC and Snapshot Isolation
|
|
185
|
+
- **Non-blocking Reads**: Read operations are not blocked by write operations.
|
|
186
|
+
- **Undo Log**: When a transaction modifies a page, it keeps the original data in an **Undo Buffer**. Other transactions trying to read the same page are served this snapshot to ensure consistent reads.
|
|
187
|
+
- **Rollback Mechanism**: Upon transaction failure, the Undo Buffer is used to instantly restore pages to their original state.
|
|
188
|
+
|
|
189
|
+
### 4. WAL (Write-Ahead Logging) and Crash Recovery
|
|
190
|
+
- **Performance and Reliability**: All changes are recorded in a sequential log file (WAL) before being written to the actual data file. This converts random writes into sequential writes for better performance and ensures data integrity.
|
|
191
|
+
- **Crash Recovery**: When restarting after an unexpected shutdown, Dataply reads the WAL to automatically replay (Redo) any changes that weren't yet reflected in the data file.
|
|
192
|
+
|
|
193
|
+
### 5. Concurrency Control and Indexing
|
|
194
|
+
- **Page-level Locking**: Prevents data contention by controlling sequential access to pages through the `LockManager`.
|
|
195
|
+
- **B+Tree Index**: Uses a B+Tree structure guaranteeing $O(\log N)$ performance for maximized PK lookup efficiency.
|
|
196
|
+
|
|
197
|
+
## Performance
|
|
198
|
+
|
|
199
|
+
Dataply is optimized for high-speed data processing. Below are the results of basic benchmark tests conducted on a local environment.
|
|
200
|
+
|
|
201
|
+
| Test Case | Count | Total Time | OPS (Operations Per Second) |
|
|
202
|
+
| :--- | :--- | :--- | :--- |
|
|
203
|
+
| **Bulk Insert (Batch)** | 10,000 | ~1,207ms | **~8,281 OPS** |
|
|
204
|
+
| **Bulk Insert (Individual)** | 100 | ~47ms | **~2,121 OPS** |
|
|
205
|
+
| **Bulk Insert with WAL** | 100 | ~946ms | **~105 OPS** |
|
|
206
|
+
| **Medium Row Insert (1KB)** | 100 | ~52ms | **~1,892 OPS** |
|
|
207
|
+
|
|
208
|
+
### Benchmark Analysis
|
|
209
|
+
- **Batching Efficiency**: Grouping operations into a single transaction is approximately **3.9x faster** than individual inserts by minimizing internal transaction management overhead.
|
|
210
|
+
- **WAL Trade-off**: Enabling Write-Ahead Logging ensures data durability but results in a significant performance decrease (approximately **20x slower** for individual inserts) due to synchronous I/O operations.
|
|
211
|
+
- **Node.js Optimization**: Dataply is designed to provide competitive performance (over **8,000 OPS** in batch mode) for a pure TypeScript Record Store without native dependencies.
|
|
212
|
+
|
|
213
|
+
> [!NOTE]
|
|
214
|
+
> Tests were conducted on a standard local environment (Node.js v25+). Performance may vary depending on hardware specifications (especially SSD/HDD) and system load.
|
|
215
|
+
|
|
216
|
+
## Limitations
|
|
217
|
+
|
|
218
|
+
As **Dataply** is currently in Alpha, there are several limitations to keep in mind:
|
|
219
|
+
- **PK-Only Access**: Data can only be retrieved or modified using the Primary Key. No secondary indexes or complex query logic are available yet.
|
|
220
|
+
- **No SQL Support**: This is a low-level **Record Store**. It does not support SQL or any higher-level query language.
|
|
221
|
+
- **Memory Usage**: The VFS cache size is controlled by `pageCacheCapacity`, but excessive use of large records should be handled with care.
|
|
222
|
+
|
|
223
|
+
## Q&A
|
|
224
|
+
|
|
225
|
+
### Q: What can I build with Dataply?
|
|
226
|
+
Dataply is a low-level record store that provides the essential building blocks for storage engines. You can use it to build custom document databases, specialized caching layers, or any application requiring high-performance, ACID-compliant data persistence.
|
|
227
|
+
|
|
228
|
+
### Q: Can I extend Dataply to implement a full-featured database?
|
|
229
|
+
Absolutely! By leveraging `DataplyAPI`, you can implement custom indexing (like secondary indexes), query parsers, and complex data schemas. Dataply handles the difficult aspects of transaction management, crash recovery (WAL), and concurrency control, letting you focus on your database's unique features.
|
|
230
|
+
|
|
231
|
+
### Q: How many rows can be inserted per page?
|
|
232
|
+
Dataply uses a 2-byte slots for data positioning within a page. This allows for a theoretical maximum of **65,536 ($2^{16}$)** rows per page.
|
|
233
|
+
|
|
234
|
+
### Q: What is the total maximum number of rows a database can hold?
|
|
235
|
+
With $2^{32}$ possible pages and $2^{16}$ rows per page, the theoretical limit is **281 trillion ($2^{48}$)** rows. In practice, the limit is typically governed by the physical storage size (approx. 32TB for default settings).
|
|
236
|
+
|
|
237
|
+
### Q: Is there a maximum database file size limit?
|
|
238
|
+
Using 4-byte (unsigned int) Page IDs and the default 8KB page size, Dataply can manage up to **32TB** of data ($2^{32} \times 8KB$).
|
|
239
|
+
|
|
240
|
+
### Q: Is WAL (Write-Ahead Logging) mandatory?
|
|
241
|
+
It is optional. While disabling WAL can improve write performance by reducing synchronous I/O, it is highly recommended for any production-like environment to ensure data integrity and automatic recovery after a system crash.
|
|
242
|
+
|
|
243
|
+
### Q: How does Dataply ensure data consistency during concurrent access?
|
|
244
|
+
Dataply utilizes a combination of page-level locking and MVCC (Multi-Version Concurrency Control). This allows for Snapshot Isolation, meaning readers can access a consistent state of the data without being blocked by ongoing write operations.
|
|
245
|
+
|
|
246
|
+
## License
|
|
247
|
+
|
|
248
|
+
MIT
|