document-dataply 0.0.10-alpha.7 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,8 +4,15 @@ import type { AnalysisProvider } from './AnalysisProvider';
4
4
  import { Transaction } from 'dataply';
5
5
  export declare class AnalysisManager<T extends DocumentJSON> {
6
6
  private api;
7
+ readonly sampleSize: number;
7
8
  private providers;
8
- constructor(api: DocumentDataplyAPI<T>);
9
+ private cron;
10
+ private flushing;
11
+ constructor(api: DocumentDataplyAPI<T>, schedule: string, sampleSize: number);
12
+ /**
13
+ * Stop the background analysis cron job.
14
+ */
15
+ close(): void;
9
16
  /**
10
17
  * Register all built-in analysis providers.
11
18
  * Each provider class is instantiated with the API reference and registered.
@@ -28,6 +35,10 @@ export declare class AnalysisManager<T extends DocumentJSON> {
28
35
  * @param tx The transaction to use
29
36
  */
30
37
  initializeProviders(tx: Transaction): Promise<void>;
38
+ /**
39
+ * Trigger the background analysis cron job.
40
+ */
41
+ triggerCron(): void;
31
42
  /**
32
43
  * Notify all realtime providers that documents were inserted.
33
44
  * Data is persisted immediately after each provider processes the mutation.
@@ -168,5 +168,15 @@ export type CreateIndexFTSOption<T extends DocumentJSON> = {
168
168
  };
169
169
  export type CreateIndexOption<T extends DocumentJSON> = CreateIndexBTreeOption<T> | CreateIndexFTSOption<T>;
170
170
  export interface DocumentDataplyOptions extends DataplyOptions {
171
+ /**
172
+ * The cron expression for the analysis schedule.
173
+ * If not provided, default is '* *\/1 * * *' (every 1 hour)
174
+ */
175
+ analysisSchedule?: string;
176
+ /**
177
+ * The sample size for the analysis.
178
+ * If not provided, default is 1000
179
+ */
180
+ analysisSampleSize?: number;
171
181
  }
172
182
  export {};
@@ -0,0 +1,34 @@
1
+ /**
2
+ * 이벤트 루프를 막지 않고 대량의 데이터를 처리하기 위한 유틸리티 클래스
3
+ */
4
+ export declare class DeadlineChunker {
5
+ /**
6
+ * 이벤트 루프를 막을 최대 허용 시간
7
+ */
8
+ private targetMs;
9
+ /**
10
+ * 현재 chunk size
11
+ */
12
+ private chunkSize;
13
+ /**
14
+ * Exponential Weighted Moving Average
15
+ */
16
+ private ewmaMs;
17
+ /**
18
+ * EWMA 평활화 계수
19
+ */
20
+ private alpha;
21
+ constructor(targetMs?: number);
22
+ /**
23
+ * EWMA 평활화 계수를 사용하여 평균 처리 시간을 업데이트합니다.
24
+ */
25
+ private updateEstimate;
26
+ /**
27
+ * 현재 chunk size를 업데이트합니다.
28
+ */
29
+ private nextChunkSize;
30
+ /**
31
+ * 주어진 items를 chunk로 분할하여 처리합니다.
32
+ */
33
+ processInChunks<T>(items: T[], processFn: (chunk: T[]) => Promise<void>): Promise<void>;
34
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "document-dataply",
3
- "version": "0.0.10-alpha.7",
3
+ "version": "0.0.10",
4
4
  "description": "Simple and powerful JSON document database supporting complex queries and flexible indexing policies.",
5
5
  "license": "MIT",
6
6
  "author": "izure <admin@izure.org>",
@@ -42,12 +42,13 @@
42
42
  "dataply"
43
43
  ],
44
44
  "dependencies": {
45
- "dataply": "^0.0.25-alpha.1"
45
+ "croner": "^10.0.1",
46
+ "dataply": "^0.0.25"
46
47
  },
47
48
  "devDependencies": {
48
49
  "@types/jest": "^30.0.0",
49
50
  "esbuild": "^0.27.3",
50
- "jest": "^30.2.0",
51
+ "jest": "^30.3.0",
51
52
  "ts-jest": "^29.4.6",
52
53
  "typescript": "^5.9.3"
53
54
  }
package/readme.md CHANGED
@@ -1,269 +1,137 @@
1
- ![node.js workflow](https://github.com/izure1/document-dataply/actions/workflows/node.js.yml/badge.svg)
2
- ![Performance Benchmark](https://github.com/izure1/document-dataply/actions/workflows/benchmark.yml/badge.svg)
3
- ![License](https://img.shields.io/badge/license-MIT-blue.svg)
4
-
5
- # Document-Dataply
6
-
7
- > [!WARNING]
8
- > **This project is currently in the Alpha stage.**
9
- > APIs and internal structures may change significantly between versions. Use with caution in production environments.
10
-
11
- `document-dataply` is a **pure JavaScript** high-performance document-oriented database library built on top of the [`dataply`](https://github.com/izure1/dataply) record storage engine. It is designed to handle at least millions of documents with high stability, providing a structured way to store, index, and query JSON-style documents.
12
-
13
- ## Key Features
14
-
15
- - **Document-Oriented**: Store and retrieve JSON-style documents.
16
- - **B+Tree Indexing**: Supports high-performance lookups using a B+Tree indexing engine.
17
- - **Deep Indexing**: Index nested object fields and specific array elements (e.g., `user.profile.name` or `tags.0`).
18
- - **Flexible Indexing Policies**: Supports full re-indexing for existing data or incremental indexing for future data.
19
- - **ACID Transactions**: Reliable atomic operations with **WAL (Write-Ahead Logging)** and **MVCC (Multi-Version Concurrency Control)** support.
20
- - **Modern Architecture**: Fully supports **Async/Await** and **Streaming**, making it ideal for modern high-concurrency server environments.
21
- - **Rich Querying**: Supports comparison operators (`lt`, `gt`, `equal`, etc.) and pattern matching (`like`).
22
-
23
- ## Platform Compatibility
24
-
25
- Built with pure JavaScript, `document-dataply` can be used in various environments:
26
- - **Official Support**: Node.js, Electron, NW.js
27
- - **Experimental Support**: Deno, Bun
28
-
29
- ## Data Types
30
-
31
- Supports standard JSON data types:
32
- - `string`, `number`, `boolean`, `null`
33
- - Nested `object` and `array`
34
-
35
- ## Installation
36
-
37
- ```bash
38
- npm install document-dataply
39
- ```
40
-
41
- ## Quick Start
42
-
43
- ```typescript
44
- import { DocumentDataply } from 'document-dataply';
45
-
46
- type MyDocument = {
47
- name: string;
48
- age: number;
49
- tags: string[];
50
- }
51
-
52
- async function main() {
53
- const db = DocumentDataply.Define<MyDocument>()
54
- .Options({ wal: 'my-database.wal' })
55
- .Open('my-database.db');
56
-
57
- // Initialize database
58
- await db.init();
59
-
60
- // Register indices
61
- // use transaction to ensure atomicity
62
- await db.migration(1, async (tx) => {
63
- await db.createIndex('name', { type: 'btree', fields: ['name'] }, tx);
64
- await db.createIndex('tags_0', { type: 'btree', fields: ['tags.0'] }, tx);
65
-
66
- // Composite Index support
67
- await db.createIndex('idx_name_age', { type: 'btree', fields: ['name', 'age'] }, tx);
68
-
69
- console.log('Migration completed successfully');
70
- });
71
-
72
- // Insert document
73
- const id = await db.insert({
74
- name: 'John Doe',
75
- age: 30,
76
- tags: ['admin', 'developer']
77
- });
78
-
79
- // Query document
80
- const query = db.select({
81
- name: 'John Doe', // Shortcut for { name: { equal: 'John Doe' } }
82
- age: { gte: 25 }
83
- })
84
-
85
- // Get all results
86
- const allResults = await query.drain();
87
- // Or iterate through results
88
- for await (const doc of query.stream()) {
89
- console.log(doc);
90
- }
91
-
92
- console.log(allResults);
93
-
94
- // Close database
95
- await db.close();
96
- }
97
-
98
- main();
99
- ```
100
-
101
- ## Advanced Usage
102
-
103
- ### Dynamic Index Management
104
-
105
- `document-dataply` supports creating indices at any time—whether before or after the database is initialized.
106
-
107
- - **Pre-Init**: Creating an index before `db.init()` ensures that the database is ready with all necessary structures from the start.
108
- - **Post-Init**: You can call `db.createIndex()` even after the database is already running. The library will automatically create the index and perform **backfilling** (populating the index with existing data) in the background.
109
-
110
- ```typescript
111
- // Create a new index on an existing database
112
- await db.createIndex('idx_new_field', { type: 'btree', fields: ['newField'] });
113
- ```
114
-
115
- ### Composite Indexing
116
-
117
- You can create an index on multiple fields. This is useful for optimizing queries that filter or sort by multiple criteria.
118
-
119
- ```typescript
120
- await db.createIndex('idx_composite', {
121
- type: 'btree',
122
- fields: ['category', 'price', 'status']
123
- });
124
- ```
125
-
126
- The sorting is performed element-by-element in the order defined in the `fields` array. If all values are equal, the system uses the internal `_id` as a fallback to ensure stable sorting.
127
-
128
- ### Batch Insertion
129
-
130
- To efficiently insert multiple documents, use the following:
131
-
132
- ```typescript
133
- const ids = await db.insertBatch([
134
- { name: 'Alice', age: 25, tags: ['user'] },
135
- { name: 'Bob', age: 28, tags: ['moderator'] }
136
- ]);
137
- ```
138
-
139
- ### Querying
140
-
141
- `document-dataply` supports powerful search capabilities based on B+Tree indexing.
142
-
143
- | Operator | Description |
144
- | :--- | :--- |
145
- | `lt`, `lte`, `gt`, `gte` | Comparison operations |
146
- | `equal`, `notEqual` | Equality check |
147
- | `like` | Pattern matching |
148
- | `or` | Matching within an array |
149
- | `match` | Full-text search (Requires FTS Index) |
150
-
151
- For detailed operator usage, index constraints (including full scans), and sorting methods, see the [Query Guide (QUERY.md)](./docs/QUERY.md).
152
-
153
- > [!IMPORTANT]
154
- > **Full-Text Search (match)**: To use the `match` operator, you must configure the field as an FTS index (e.g., `{ type: 'fts', tokenizer: 'whitespace' }`). Standard boolean indices do not support `match`. See [QUERY.md](./docs/QUERY.md#4-full-text-search-fts-indexing) for details.
155
-
156
- ### Transactions
157
-
158
- Ensure data integrity with ACID-compliant transactions. Use `commit()` and `rollback()` to process multiple operations atomically.
159
-
160
- For detailed usage and error handling patterns, see the [Transaction Guide (TRANSACTION.md)](./docs/TRANSACTION.md).
161
-
162
- ### Updating and Deleting
163
-
164
- `document-dataply` provides flexible ways to update or delete documents based on query results. All these operations are **Stream-based**, allowing you to handle at least millions of records without memory concerns.
165
-
166
- - **Partial Update**: Modify only specific fields or use a function for dynamic updates.
167
- - **Full Update**: Replace the entire document while preserving the original `_id`.
168
- - **Delete**: Permanently remove matching documents from both storage and indices.
169
-
170
- For details on streaming mechanisms and bandwidth optimization tips, see the [Stream Guide (STREAM.md)](./docs/STREAM.md).
171
-
172
- ### Schema Migration
173
-
174
- As your document structure evolves, you can use the `migration()` method to safely update your database. This method uses a `schemeVersion` to track which migrations have been applied.
175
-
176
- ```typescript
177
- await db.migration(1, async (tx) => {
178
- // Add a new index for an existing database
179
- await db.createIndex('age', { type: 'btree', fields: ['age'] }, tx);
180
- });
181
- ```
182
-
183
- For more details on handling database evolution, see the [Migration Guide (MIGRATION.md)](./docs/MIGRATION.md).
184
-
185
- ## Tips and Advanced Features
186
-
187
- For more information on performance optimization and advanced features, see [TIPS.md](./docs/TIPS.md).
188
-
189
- - **Query Optimization**: Automatic index selection for maximum performance.
190
- - **Sorting and Pagination**: Detailed usage of `limit`, `orderBy`, and `sortOrder`.
191
- - **Memory Management**: When to use `stream()` vs `drain()`.
192
- - **Performance**: Optimizing bulk data insertion using `insertBatch`.
193
- - **Indexing Policies**: Dynamic index creation and automatic backfilling.
194
- - **Composite Indexes**: Indexing multiple fields for complex queries.
195
-
196
- ## API Reference
197
-
198
- ### `db.createIndex(name, options, tx?)`
199
- Registers or creates a named index. Can be called at any time.
200
- - `options`: `{ type: 'btree', fields: string[] }` or `{ type: 'fts', fields: string, tokenizer: ... }`.
201
- - `tx`: Optional transaction.
202
- - Returns `Promise<this>` for chaining.
203
-
204
- ### `db.dropIndex(name, tx?)`
205
- Removes a named index from the database.
206
- - `name`: The name of the index to drop.
207
- - `tx`: Optional transaction.
208
- - Returns `Promise<this>` for chaining.
209
- - Note: The internal `_id` index cannot be dropped.
210
-
211
- ### `db.init()`
212
- Initializes the database and sets up system-managed indices. It also triggers backfilling for indices registered before `init()`.
213
-
214
- ### `db.migration(version, callback, tx?)`
215
- Runs a migration callback if the current `schemeVersion` is lower than the target `version`.
216
- - `version`: The target scheme version (number).
217
- - `callback`: An async function `(tx: Transaction) => Promise<void>`.
218
- - `tx`: Optional transaction.
219
-
220
- ### `db.insert(document, tx?)`
221
- Inserts a single document. Each document is automatically assigned a unique, immutable `_id` field. The method returns this `_id` (`number`).
222
-
223
- ### `db.insertBatch(documents, tx?)`
224
- Inserts multiple documents efficiently. Returns an array of `_ids` (`number[]`).
225
-
226
- ### `db.select(query, options?, tx?)`
227
- Searches for documents matching the query. Passing an empty object (`{}`) as the `query` retrieves all documents.
228
- Returns an object `{ stream, drain }`.
229
- - `stream()`: An async iterator to traverse results one by one.
230
- - `drain()`: A promise that resolves to an array of all matching documents.
231
-
232
- ### `db.partialUpdate(query, newFields, tx?)`
233
- Partially updates documents matching the query. `newFields` can be a partial object or a function that returns a partial object. Returns the number of updated documents.
234
-
235
- ### `db.fullUpdate(query, newDocument, tx?)`
236
- Fully replaces documents matching the query while preserving their `_id`. Returns the number of updated documents.
237
-
238
- ### `db.delete(query, tx?)`
239
- Deletes documents matching the query. Returns the number of deleted documents.
240
-
241
- ### `db.getMetadata(tx?)`
242
- Returns physical storage information and index metadata.
243
- - Returns `Promise<{ pageSize, pageCount, rowCount, indices, schemeVersion }>`
244
- - `indices`: List of user-defined index names.
245
- - `schemeVersion`: The current schema version of the database.
246
-
247
- ### `db.createTransaction()`
248
- Returns a new `Transaction` object.
249
-
250
- ### `db.close()`
251
- Flushes changes and closes the database files.
252
-
253
- ## Benchmark
254
-
255
- Automated benchmarks are executed on every push to the `main` branch and for every pull request. This ensures that performance regressions are detected early.
256
-
257
- - **Dataset**: 10,000 documents
258
- - **Operations**: Batch Insert, Indexed Select, Partial Update, Full Update, Delete
259
-
260
- ### Performance Trend
261
-
262
- You can view the real-time performance trend and detailed metrics on our [Performance Dashboard](https://izure1.github.io/document-dataply/dev/bench/).
263
-
264
- > [!TIP]
265
- > **Continuous Monitoring**: We use `github-action-benchmark` to monitor performance changes. For every PR, a summary of the performance impact is automatically commented to help maintain high efficiency.
266
-
267
- ## License
268
-
269
- MIT
1
+ ![node.js workflow](https://github.com/izure1/document-dataply/actions/workflows/node.js.yml/badge.svg)
2
+ ![Performance Benchmark](https://github.com/izure1/document-dataply/actions/workflows/benchmark.yml/badge.svg)
3
+ ![License](https://img.shields.io/badge/license-MIT-blue.svg)
4
+
5
+ # Document-Dataply
6
+
7
+ > [!WARNING]
8
+ > **This project is currently in the Alpha stage.**
9
+ > API structures may change in future updates. Please ensure sufficient testing before deploying to production environments.
10
+
11
+ ## 📖 Introduction
12
+
13
+ `document-dataply` is a high-performance **Document Database** implemented in pure JavaScript.
14
+ It prevents server memory (RAM) exhaustion even when handling millions of records, and supports ultra-fast searching and batch processing systems. It can be used intuitively without the complex tuning required by RDBMS.
15
+
16
+ ### Key Features
17
+
18
+ - **JSON Document Based**: Reads and writes data in raw JavaScript Object (JSON) format. Easily supports querying deeply nested object arrays (`a.b.c`).
19
+ - **B+Tree Indexing**: Built-in indexing system to drastically improve query speed (O(log N)).
20
+ - **Cost-Based Optimizer**: The engine automatically analyzes data distribution during query requests to find the most optimal execution path.
21
+ - **Full-Text Search (FTS)**: Provides a real-time search engine to find specific keywords within long text, such as article contents.
22
+ - **Safe Transactions**: If an error occurs during an operation, all modifications are fully rolled back, completely preventing data inconsistency.
23
+
24
+ ---
25
+
26
+ ## 💻 Installation
27
+
28
+ ```bash
29
+ npm install document-dataply
30
+ ```
31
+
32
+ - **Supported Environments**: Node.js, Electron, NW.js (Experimental support for Bun, Deno)
33
+
34
+ ---
35
+
36
+ ## 🚀 Quick Start
37
+
38
+ Here is the most basic guide for creating a database, registering an index, and querying data.
39
+
40
+ ```typescript
41
+ import { DocumentDataply } from 'document-dataply';
42
+
43
+ // 1. Define Document Type (Schema)
44
+ type UserProfile = {
45
+ name: string;
46
+ age: number;
47
+ tags: string[];
48
+ }
49
+
50
+ async function main() {
51
+ // 2. Create instance and connect to file
52
+ const db = DocumentDataply.Define<UserProfile>()
53
+ .Options({ wal: 'my-database.wal' })
54
+ .Open('my-database.db');
55
+
56
+ await db.init();
57
+
58
+ // 3. Create Index (Specify 'name' field to improve search speed)
59
+ await db.migration(1, async (tx) => {
60
+ await db.createIndex('idx_name', { type: 'btree', fields: ['name'] }, tx);
61
+ console.log('Index created successfully');
62
+ });
63
+
64
+ // 4. Insert Single Document
65
+ await db.insert({
66
+ name: 'IU',
67
+ age: 30,
68
+ tags: ['Singer', 'Actor']
69
+ });
70
+
71
+ // 5. Query Data
72
+ const query = db.select({
73
+ name: 'IU'
74
+ });
75
+
76
+ // Fetch results as an array at once
77
+ const results = await query.drain();
78
+ console.log(results);
79
+ // Output: [{ _id: 1, name: 'IU', age: 30, tags: ['Singer', 'Actor'] }]
80
+
81
+ // 6. Close the connection
82
+ await db.close();
83
+ }
84
+
85
+ main();
86
+ ```
87
+
88
+ ---
89
+
90
+ ## 💡 Common Patterns
91
+
92
+ ### Batch Processing for Performance
93
+ ```typescript
94
+ const largeData = Array.from({ length: 10000 }, (_, i) => ({
95
+ name: `User_${i}`,
96
+ age: Math.floor(Math.random() * 50),
97
+ tags: ['imported']
98
+ }));
99
+
100
+ // Much faster than individual inserts
101
+ await db.insertBatch(largeData);
102
+ ```
103
+
104
+ ### Complex Queries with Operators
105
+ ```typescript
106
+ const activeSeniors = await db.select({
107
+ age: { gte: 65 },
108
+ status: { equal: 'active' },
109
+ name: { like: 'John%' }
110
+ }).drain();
111
+ ```
112
+
113
+ ---
114
+
115
+ ## 📚 Detailed Manual
116
+
117
+ Please refer to the following documents for detailed usage of each feature and the library's core architecture.
118
+
119
+ 1. [**Query & Index Guide**](./docs/QUERY_AND_INDEX.md)
120
+ - Explains various search operators (`lt`, `gt`, etc.) and how to configure composite indices.
121
+ 2. [**Mutation & Transaction**](./docs/MUTATION_AND_TRANSACTION.md)
122
+ - Covers batch insertion techniques and transaction rollback principles.
123
+ 3. [**Stream Architecture (Memory Optimization)**](./docs/STREAM.md)
124
+ - Analyzes the secrets of the stream-based architecture that prevents OOM (Out Of Memory) crashes.
125
+ 4. [**Architecture Overview**](./docs/ARCHITECTURE.md)
126
+ - Deeply explores the internal workings of the database core, such as the optimizer.
127
+
128
+ ---
129
+
130
+ ## 📊 Library Benchmark
131
+
132
+ `document-dataply` strictly measures speed upon every code merge to defend against performance degradation.
133
+
134
+ [Check Real-time Performance on Main Branch](https://izure1.github.io/document-dataply/dev/bench/)
135
+
136
+ ## 📜 License
137
+ MIT License