jexidb 2.0.3 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.babelrc +13 -0
- package/.gitattributes +2 -0
- package/CHANGELOG.md +132 -101
- package/LICENSE +21 -21
- package/README.md +301 -639
- package/babel.config.json +5 -0
- package/dist/Database.cjs +5204 -0
- package/docs/API.md +908 -241
- package/docs/EXAMPLES.md +701 -177
- package/docs/README.md +194 -184
- package/examples/iterate-usage-example.js +157 -0
- package/examples/simple-iterate-example.js +115 -0
- package/jest.config.js +24 -0
- package/package.json +63 -54
- package/scripts/README.md +47 -0
- package/scripts/benchmark-array-serialization.js +108 -0
- package/scripts/clean-test-files.js +75 -0
- package/scripts/prepare.js +31 -0
- package/scripts/run-tests.js +80 -0
- package/scripts/score-mode-demo.js +45 -0
- package/src/Database.mjs +5325 -0
- package/src/FileHandler.mjs +1140 -0
- package/src/OperationQueue.mjs +279 -0
- package/src/SchemaManager.mjs +268 -0
- package/src/Serializer.mjs +702 -0
- package/src/managers/ConcurrencyManager.mjs +257 -0
- package/src/managers/IndexManager.mjs +2094 -0
- package/src/managers/QueryManager.mjs +1490 -0
- package/src/managers/StatisticsManager.mjs +262 -0
- package/src/managers/StreamingProcessor.mjs +429 -0
- package/src/managers/TermManager.mjs +278 -0
- package/src/utils/operatorNormalizer.mjs +116 -0
- package/test/$not-operator-with-and.test.js +282 -0
- package/test/README.md +8 -0
- package/test/close-init-cycle.test.js +256 -0
- package/test/coverage-method.test.js +93 -0
- package/test/critical-bugs-fixes.test.js +1069 -0
- package/test/deserialize-corruption-fixes.test.js +296 -0
- package/test/exists-method.test.js +318 -0
- package/test/explicit-indexes-comparison.test.js +219 -0
- package/test/filehandler-non-adjacent-ranges-bug.test.js +175 -0
- package/test/index-line-number-regression.test.js +100 -0
- package/test/index-missing-index-data.test.js +91 -0
- package/test/index-persistence.test.js +491 -0
- package/test/index-serialization.test.js +314 -0
- package/test/indexed-query-mode.test.js +360 -0
- package/test/insert-session-auto-flush.test.js +353 -0
- package/test/iterate-method.test.js +272 -0
- package/test/legacy-operator-compat.test.js +154 -0
- package/test/query-operators.test.js +238 -0
- package/test/regex-array-fields.test.js +129 -0
- package/test/score-method.test.js +298 -0
- package/test/setup.js +17 -0
- package/test/term-mapping-minimal.test.js +154 -0
- package/test/term-mapping-simple.test.js +257 -0
- package/test/term-mapping.test.js +514 -0
- package/test/writebuffer-flush-resilience.test.js +204 -0
- package/dist/FileHandler.js +0 -688
- package/dist/IndexManager.js +0 -353
- package/dist/IntegrityChecker.js +0 -364
- package/dist/JSONLDatabase.js +0 -1333
- package/dist/index.js +0 -617
- package/docs/MIGRATION.md +0 -295
- package/examples/auto-save-example.js +0 -158
- package/examples/cjs-usage.cjs +0 -82
- package/examples/close-vs-delete-example.js +0 -71
- package/examples/esm-usage.js +0 -113
- package/examples/example-columns.idx.jdb +0 -0
- package/examples/example-columns.jdb +0 -9
- package/examples/example-options.idx.jdb +0 -0
- package/examples/example-options.jdb +0 -0
- package/examples/example-users.idx.jdb +0 -0
- package/examples/example-users.jdb +0 -5
- package/examples/simple-test.js +0 -55
- package/src/FileHandler.js +0 -674
- package/src/IndexManager.js +0 -363
- package/src/IntegrityChecker.js +0 -379
- package/src/JSONLDatabase.js +0 -1391
- package/src/index.js +0 -608
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* StatisticsManager - Handles all statistics and metrics collection
|
|
3
|
+
*
|
|
4
|
+
* Responsibilities:
|
|
5
|
+
* - getJournalStats()
|
|
6
|
+
* - Performance metrics
|
|
7
|
+
* - Usage statistics
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export class StatisticsManager {
|
|
11
|
+
constructor(database) {
|
|
12
|
+
this.database = database
|
|
13
|
+
this.opts = database.opts
|
|
14
|
+
this.usageStats = database.usageStats || {
|
|
15
|
+
totalQueries: 0,
|
|
16
|
+
streamingQueries: 0,
|
|
17
|
+
indexedQueries: 0,
|
|
18
|
+
streamingAverageTime: 0,
|
|
19
|
+
indexedAverageTime: 0
|
|
20
|
+
}
|
|
21
|
+
this.performanceMetrics = {
|
|
22
|
+
startTime: Date.now(),
|
|
23
|
+
lastResetTime: Date.now(),
|
|
24
|
+
totalOperations: 0,
|
|
25
|
+
totalErrors: 0,
|
|
26
|
+
averageOperationTime: 0,
|
|
27
|
+
peakMemoryUsage: 0,
|
|
28
|
+
cacheHits: 0,
|
|
29
|
+
cacheMisses: 0
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Get journal statistics
|
|
35
|
+
* @returns {Object} - Journal statistics
|
|
36
|
+
*/
|
|
37
|
+
getJournalStats() {
|
|
38
|
+
return {
|
|
39
|
+
enabled: false,
|
|
40
|
+
message: 'Journal mode has been removed'
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Get performance metrics
|
|
46
|
+
* @returns {Object} - Performance metrics
|
|
47
|
+
*/
|
|
48
|
+
getPerformanceMetrics() {
|
|
49
|
+
const now = Date.now()
|
|
50
|
+
const uptime = now - this.performanceMetrics.startTime
|
|
51
|
+
|
|
52
|
+
return {
|
|
53
|
+
uptime: uptime,
|
|
54
|
+
totalOperations: this.performanceMetrics.totalOperations,
|
|
55
|
+
totalErrors: this.performanceMetrics.totalErrors,
|
|
56
|
+
averageOperationTime: this.performanceMetrics.averageOperationTime,
|
|
57
|
+
operationsPerSecond: this.performanceMetrics.totalOperations / (uptime / 1000),
|
|
58
|
+
errorRate: this.performanceMetrics.totalErrors / Math.max(1, this.performanceMetrics.totalOperations),
|
|
59
|
+
peakMemoryUsage: this.performanceMetrics.peakMemoryUsage,
|
|
60
|
+
cacheHitRate: this.performanceMetrics.cacheHits / Math.max(1, this.performanceMetrics.cacheHits + this.performanceMetrics.cacheMisses),
|
|
61
|
+
lastResetTime: this.performanceMetrics.lastResetTime
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Get usage statistics
|
|
67
|
+
* @returns {Object} - Usage statistics
|
|
68
|
+
*/
|
|
69
|
+
getUsageStats() {
|
|
70
|
+
return {
|
|
71
|
+
totalQueries: this.usageStats.totalQueries,
|
|
72
|
+
streamingQueries: this.usageStats.streamingQueries,
|
|
73
|
+
indexedQueries: this.usageStats.indexedQueries,
|
|
74
|
+
streamingAverageTime: this.usageStats.streamingAverageTime,
|
|
75
|
+
indexedAverageTime: this.usageStats.indexedAverageTime,
|
|
76
|
+
queryDistribution: {
|
|
77
|
+
streaming: this.usageStats.streamingQueries / Math.max(1, this.usageStats.totalQueries),
|
|
78
|
+
indexed: this.usageStats.indexedQueries / Math.max(1, this.usageStats.totalQueries)
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Get database statistics
|
|
85
|
+
* @returns {Object} - Database statistics
|
|
86
|
+
*/
|
|
87
|
+
getDatabaseStats() {
|
|
88
|
+
return {
|
|
89
|
+
totalRecords: this.database.offsets?.length || 0,
|
|
90
|
+
indexOffset: this.database.indexOffset || 0,
|
|
91
|
+
writeBufferSize: this.database.writeBuffer?.length || 0,
|
|
92
|
+
indexedFields: Object.keys(this.database.indexManager?.index?.data || {}),
|
|
93
|
+
totalIndexedFields: Object.keys(this.database.indexManager?.index?.data || {}).length,
|
|
94
|
+
isInitialized: this.database.initialized || false,
|
|
95
|
+
isDestroyed: this.database.destroyed || false
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Get comprehensive statistics
|
|
101
|
+
* @returns {Object} - All statistics combined
|
|
102
|
+
*/
|
|
103
|
+
getComprehensiveStats() {
|
|
104
|
+
return {
|
|
105
|
+
database: this.getDatabaseStats(),
|
|
106
|
+
performance: this.getPerformanceMetrics(),
|
|
107
|
+
usage: this.getUsageStats(),
|
|
108
|
+
journal: this.getJournalStats(),
|
|
109
|
+
timestamp: Date.now()
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Record operation performance
|
|
115
|
+
* @param {string} operation - Operation name
|
|
116
|
+
* @param {number} duration - Duration in milliseconds
|
|
117
|
+
* @param {boolean} success - Whether operation was successful
|
|
118
|
+
*/
|
|
119
|
+
recordOperation(operation, duration, success = true) {
|
|
120
|
+
this.performanceMetrics.totalOperations++
|
|
121
|
+
|
|
122
|
+
if (!success) {
|
|
123
|
+
this.performanceMetrics.totalErrors++
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Update average operation time
|
|
127
|
+
const currentAverage = this.performanceMetrics.averageOperationTime
|
|
128
|
+
const totalOps = this.performanceMetrics.totalOperations
|
|
129
|
+
this.performanceMetrics.averageOperationTime = (currentAverage * (totalOps - 1) + duration) / totalOps
|
|
130
|
+
|
|
131
|
+
// Update peak memory usage (if available)
|
|
132
|
+
if (typeof process !== 'undefined' && process.memoryUsage) {
|
|
133
|
+
const memoryUsage = process.memoryUsage()
|
|
134
|
+
this.performanceMetrics.peakMemoryUsage = Math.max(
|
|
135
|
+
this.performanceMetrics.peakMemoryUsage,
|
|
136
|
+
memoryUsage.heapUsed
|
|
137
|
+
)
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Record cache hit
|
|
143
|
+
*/
|
|
144
|
+
recordCacheHit() {
|
|
145
|
+
this.performanceMetrics.cacheHits++
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Record cache miss
|
|
150
|
+
*/
|
|
151
|
+
recordCacheMiss() {
|
|
152
|
+
this.performanceMetrics.cacheMisses++
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Update query statistics
|
|
157
|
+
* @param {string} type - Query type ('streaming' or 'indexed')
|
|
158
|
+
* @param {number} duration - Query duration in milliseconds
|
|
159
|
+
*/
|
|
160
|
+
updateQueryStats(type, duration) {
|
|
161
|
+
this.usageStats.totalQueries++
|
|
162
|
+
|
|
163
|
+
if (type === 'streaming') {
|
|
164
|
+
this.usageStats.streamingQueries++
|
|
165
|
+
this.updateAverageTime('streaming', duration)
|
|
166
|
+
} else if (type === 'indexed') {
|
|
167
|
+
this.usageStats.indexedQueries++
|
|
168
|
+
this.updateAverageTime('indexed', duration)
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Update average time for a query type
|
|
174
|
+
* @param {string} type - Query type
|
|
175
|
+
* @param {number} time - Time taken
|
|
176
|
+
*/
|
|
177
|
+
updateAverageTime(type, time) {
|
|
178
|
+
const key = `${type}AverageTime`
|
|
179
|
+
if (!this.usageStats[key]) {
|
|
180
|
+
this.usageStats[key] = 0
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
const currentAverage = this.usageStats[key]
|
|
184
|
+
const count = this.usageStats[`${type}Queries`] || 1
|
|
185
|
+
|
|
186
|
+
// Calculate running average
|
|
187
|
+
this.usageStats[key] = (currentAverage * (count - 1) + time) / count
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Reset all statistics
|
|
192
|
+
*/
|
|
193
|
+
resetStats() {
|
|
194
|
+
this.usageStats = {
|
|
195
|
+
totalQueries: 0,
|
|
196
|
+
streamingQueries: 0,
|
|
197
|
+
indexedQueries: 0,
|
|
198
|
+
streamingAverageTime: 0,
|
|
199
|
+
indexedAverageTime: 0
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
this.performanceMetrics = {
|
|
203
|
+
startTime: Date.now(),
|
|
204
|
+
lastResetTime: Date.now(),
|
|
205
|
+
totalOperations: 0,
|
|
206
|
+
totalErrors: 0,
|
|
207
|
+
averageOperationTime: 0,
|
|
208
|
+
peakMemoryUsage: 0,
|
|
209
|
+
cacheHits: 0,
|
|
210
|
+
cacheMisses: 0
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
if (this.opts.debugMode) {
|
|
214
|
+
console.log('📊 Statistics reset')
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Export statistics to JSON
|
|
220
|
+
* @returns {string} - JSON string of statistics
|
|
221
|
+
*/
|
|
222
|
+
exportStats() {
|
|
223
|
+
return JSON.stringify(this.getComprehensiveStats(), null, 2)
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Get statistics summary for logging
|
|
228
|
+
* @returns {string} - Summary string
|
|
229
|
+
*/
|
|
230
|
+
getStatsSummary() {
|
|
231
|
+
const stats = this.getComprehensiveStats()
|
|
232
|
+
return `
|
|
233
|
+
📊 Database Statistics Summary:
|
|
234
|
+
Records: ${stats.database.totalRecords}
|
|
235
|
+
Queries: ${stats.usage.totalQueries} (${Math.round(stats.usage.queryDistribution.streaming * 100)}% streaming, ${Math.round(stats.usage.queryDistribution.indexed * 100)}% indexed)
|
|
236
|
+
Operations: ${stats.performance.totalOperations}
|
|
237
|
+
Errors: ${stats.performance.totalErrors}
|
|
238
|
+
Uptime: ${Math.round(stats.performance.uptime / 1000)}s
|
|
239
|
+
Cache Hit Rate: ${Math.round(stats.performance.cacheHitRate * 100)}%
|
|
240
|
+
`.trim()
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Check if statistics collection is enabled
|
|
245
|
+
* @returns {boolean} - True if enabled
|
|
246
|
+
*/
|
|
247
|
+
isEnabled() {
|
|
248
|
+
return this.opts.collectStatistics !== false
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Enable or disable statistics collection
|
|
253
|
+
* @param {boolean} enabled - Whether to enable statistics
|
|
254
|
+
*/
|
|
255
|
+
setEnabled(enabled) {
|
|
256
|
+
this.opts.collectStatistics = enabled
|
|
257
|
+
|
|
258
|
+
if (this.opts.debugMode) {
|
|
259
|
+
console.log(`📊 Statistics collection ${enabled ? 'enabled' : 'disabled'}`)
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
}
|
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
import { EventEmitter } from 'events'
|
|
2
|
+
import fs from 'fs'
|
|
3
|
+
import readline from 'readline'
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* StreamingProcessor - Efficient streaming processing for large datasets
|
|
7
|
+
*
|
|
8
|
+
* Features:
|
|
9
|
+
* - Memory-efficient processing of large files
|
|
10
|
+
* - Configurable batch sizes
|
|
11
|
+
* - Progress tracking
|
|
12
|
+
* - Error handling and recovery
|
|
13
|
+
* - Transform pipelines
|
|
14
|
+
* - Backpressure control
|
|
15
|
+
*/
|
|
16
|
+
export class StreamingProcessor extends EventEmitter {
|
|
17
|
+
constructor(opts = {}) {
|
|
18
|
+
super()
|
|
19
|
+
|
|
20
|
+
this.opts = {
|
|
21
|
+
batchSize: opts.batchSize || 1000,
|
|
22
|
+
maxConcurrency: opts.maxConcurrency || 5,
|
|
23
|
+
bufferSize: opts.bufferSize || 64 * 1024, // 64KB
|
|
24
|
+
enableProgress: opts.enableProgress !== false,
|
|
25
|
+
progressInterval: opts.progressInterval || 1000, // 1 second
|
|
26
|
+
enableBackpressure: opts.enableBackpressure !== false,
|
|
27
|
+
maxPendingBatches: opts.maxPendingBatches || 10,
|
|
28
|
+
...opts
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
this.isProcessing = false
|
|
32
|
+
this.currentBatch = 0
|
|
33
|
+
this.totalBatches = 0
|
|
34
|
+
this.processedItems = 0
|
|
35
|
+
this.totalItems = 0
|
|
36
|
+
this.pendingBatches = 0
|
|
37
|
+
this.stats = {
|
|
38
|
+
startTime: 0,
|
|
39
|
+
endTime: 0,
|
|
40
|
+
totalProcessingTime: 0,
|
|
41
|
+
averageBatchTime: 0,
|
|
42
|
+
itemsPerSecond: 0,
|
|
43
|
+
memoryUsage: 0
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
this.progressTimer = null
|
|
47
|
+
this.transformPipeline = []
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Add a transform function to the pipeline
|
|
52
|
+
*/
|
|
53
|
+
addTransform(transformFn) {
|
|
54
|
+
this.transformPipeline.push(transformFn)
|
|
55
|
+
return this
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Process a file stream
|
|
60
|
+
*/
|
|
61
|
+
async processFileStream(filePath, processorFn) {
|
|
62
|
+
if (this.isProcessing) {
|
|
63
|
+
throw new Error('Streaming processor is already running')
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
this.isProcessing = true
|
|
67
|
+
this.stats.startTime = Date.now()
|
|
68
|
+
this.currentBatch = 0
|
|
69
|
+
this.processedItems = 0
|
|
70
|
+
|
|
71
|
+
try {
|
|
72
|
+
// Get file size for progress tracking
|
|
73
|
+
const stats = await fs.promises.stat(filePath)
|
|
74
|
+
this.totalItems = Math.ceil(stats.size / this.opts.bufferSize)
|
|
75
|
+
|
|
76
|
+
// Start progress tracking
|
|
77
|
+
if (this.opts.enableProgress) {
|
|
78
|
+
this._startProgressTracking()
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Create read stream
|
|
82
|
+
const fileStream = fs.createReadStream(filePath, {
|
|
83
|
+
encoding: 'utf8',
|
|
84
|
+
highWaterMark: this.opts.bufferSize
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
// Create readline interface
|
|
88
|
+
const rl = readline.createInterface({
|
|
89
|
+
input: fileStream,
|
|
90
|
+
crlfDelay: Infinity
|
|
91
|
+
})
|
|
92
|
+
|
|
93
|
+
let batch = []
|
|
94
|
+
let lineCount = 0
|
|
95
|
+
|
|
96
|
+
// Process lines in batches
|
|
97
|
+
for await (const line of rl) {
|
|
98
|
+
if (line.trim()) {
|
|
99
|
+
batch.push(line)
|
|
100
|
+
lineCount++
|
|
101
|
+
|
|
102
|
+
// Process batch when it reaches the configured size
|
|
103
|
+
if (batch.length >= this.opts.batchSize) {
|
|
104
|
+
await this._processBatch(batch, processorFn)
|
|
105
|
+
batch = []
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Process remaining items in the last batch
|
|
111
|
+
if (batch.length > 0) {
|
|
112
|
+
await this._processBatch(batch, processorFn)
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
this.stats.endTime = Date.now()
|
|
116
|
+
this.stats.totalProcessingTime = this.stats.endTime - this.stats.startTime
|
|
117
|
+
this.stats.itemsPerSecond = this.processedItems / (this.stats.totalProcessingTime / 1000)
|
|
118
|
+
|
|
119
|
+
this.emit('complete', {
|
|
120
|
+
totalItems: this.processedItems,
|
|
121
|
+
totalBatches: this.currentBatch,
|
|
122
|
+
processingTime: this.stats.totalProcessingTime,
|
|
123
|
+
itemsPerSecond: this.stats.itemsPerSecond
|
|
124
|
+
})
|
|
125
|
+
|
|
126
|
+
} catch (error) {
|
|
127
|
+
this.emit('error', error)
|
|
128
|
+
throw error
|
|
129
|
+
} finally {
|
|
130
|
+
this.isProcessing = false
|
|
131
|
+
this._stopProgressTracking()
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Process an array of items in streaming fashion
|
|
137
|
+
*/
|
|
138
|
+
async processArray(items, processorFn) {
|
|
139
|
+
if (this.isProcessing) {
|
|
140
|
+
throw new Error('Streaming processor is already running')
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
this.isProcessing = true
|
|
144
|
+
this.stats.startTime = Date.now()
|
|
145
|
+
this.currentBatch = 0
|
|
146
|
+
this.processedItems = 0
|
|
147
|
+
this.totalItems = items.length
|
|
148
|
+
this.totalBatches = Math.ceil(items.length / this.opts.batchSize)
|
|
149
|
+
|
|
150
|
+
try {
|
|
151
|
+
// Start progress tracking
|
|
152
|
+
if (this.opts.enableProgress) {
|
|
153
|
+
this._startProgressTracking()
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Process items in batches
|
|
157
|
+
for (let i = 0; i < items.length; i += this.opts.batchSize) {
|
|
158
|
+
const batch = items.slice(i, i + this.opts.batchSize)
|
|
159
|
+
await this._processBatch(batch, processorFn)
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
this.stats.endTime = Date.now()
|
|
163
|
+
this.stats.totalProcessingTime = this.stats.endTime - this.stats.startTime
|
|
164
|
+
this.stats.itemsPerSecond = this.processedItems / (this.stats.totalProcessingTime / 1000)
|
|
165
|
+
|
|
166
|
+
this.emit('complete', {
|
|
167
|
+
totalItems: this.processedItems,
|
|
168
|
+
totalBatches: this.currentBatch,
|
|
169
|
+
processingTime: this.stats.totalProcessingTime,
|
|
170
|
+
itemsPerSecond: this.stats.itemsPerSecond
|
|
171
|
+
})
|
|
172
|
+
|
|
173
|
+
} catch (error) {
|
|
174
|
+
this.emit('error', error)
|
|
175
|
+
throw error
|
|
176
|
+
} finally {
|
|
177
|
+
this.isProcessing = false
|
|
178
|
+
this._stopProgressTracking()
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Process a generator function
|
|
184
|
+
*/
|
|
185
|
+
async processGenerator(generatorFn, processorFn) {
|
|
186
|
+
if (this.isProcessing) {
|
|
187
|
+
throw new Error('Streaming processor is already running')
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
this.isProcessing = true
|
|
191
|
+
this.stats.startTime = Date.now()
|
|
192
|
+
this.currentBatch = 0
|
|
193
|
+
this.processedItems = 0
|
|
194
|
+
this.totalItems = 0 // Unknown for generators
|
|
195
|
+
|
|
196
|
+
try {
|
|
197
|
+
// Start progress tracking
|
|
198
|
+
if (this.opts.enableProgress) {
|
|
199
|
+
this._startProgressTracking()
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
const generator = generatorFn()
|
|
203
|
+
let batch = []
|
|
204
|
+
|
|
205
|
+
for await (const item of generator) {
|
|
206
|
+
batch.push(item)
|
|
207
|
+
this.totalItems++
|
|
208
|
+
|
|
209
|
+
// Process batch when it reaches the configured size
|
|
210
|
+
if (batch.length >= this.opts.batchSize) {
|
|
211
|
+
await this._processBatch(batch, processorFn)
|
|
212
|
+
batch = []
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Process remaining items in the last batch
|
|
217
|
+
if (batch.length > 0) {
|
|
218
|
+
await this._processBatch(batch, processorFn)
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
this.stats.endTime = Date.now()
|
|
222
|
+
this.stats.totalProcessingTime = this.stats.endTime - this.stats.startTime
|
|
223
|
+
this.stats.itemsPerSecond = this.processedItems / (this.stats.totalProcessingTime / 1000)
|
|
224
|
+
|
|
225
|
+
this.emit('complete', {
|
|
226
|
+
totalItems: this.processedItems,
|
|
227
|
+
totalBatches: this.currentBatch,
|
|
228
|
+
processingTime: this.stats.totalProcessingTime,
|
|
229
|
+
itemsPerSecond: this.stats.itemsPerSecond
|
|
230
|
+
})
|
|
231
|
+
|
|
232
|
+
} catch (error) {
|
|
233
|
+
this.emit('error', error)
|
|
234
|
+
throw error
|
|
235
|
+
} finally {
|
|
236
|
+
this.isProcessing = false
|
|
237
|
+
this._stopProgressTracking()
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Process a single batch
|
|
243
|
+
*/
|
|
244
|
+
async _processBatch(batch, processorFn) {
|
|
245
|
+
if (this.opts.enableBackpressure && this.pendingBatches >= this.opts.maxPendingBatches) {
|
|
246
|
+
// Wait for backpressure to reduce
|
|
247
|
+
await this._waitForBackpressure()
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
this.pendingBatches++
|
|
251
|
+
this.currentBatch++
|
|
252
|
+
|
|
253
|
+
try {
|
|
254
|
+
const startTime = Date.now()
|
|
255
|
+
|
|
256
|
+
// Apply transform pipeline
|
|
257
|
+
let transformedBatch = batch
|
|
258
|
+
for (const transform of this.transformPipeline) {
|
|
259
|
+
transformedBatch = await transform(transformedBatch)
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// Process the batch
|
|
263
|
+
const result = await processorFn(transformedBatch, this.currentBatch)
|
|
264
|
+
|
|
265
|
+
const batchTime = Date.now() - startTime
|
|
266
|
+
this.stats.averageBatchTime =
|
|
267
|
+
(this.stats.averageBatchTime + batchTime) / 2
|
|
268
|
+
|
|
269
|
+
this.processedItems += batch.length
|
|
270
|
+
|
|
271
|
+
this.emit('batchComplete', {
|
|
272
|
+
batchNumber: this.currentBatch,
|
|
273
|
+
batchSize: batch.length,
|
|
274
|
+
processingTime: batchTime,
|
|
275
|
+
result
|
|
276
|
+
})
|
|
277
|
+
|
|
278
|
+
} catch (error) {
|
|
279
|
+
this.emit('batchError', {
|
|
280
|
+
batchNumber: this.currentBatch,
|
|
281
|
+
batchSize: batch.length,
|
|
282
|
+
error
|
|
283
|
+
})
|
|
284
|
+
throw error
|
|
285
|
+
} finally {
|
|
286
|
+
this.pendingBatches--
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
/**
|
|
291
|
+
* Wait for backpressure to reduce
|
|
292
|
+
*/
|
|
293
|
+
async _waitForBackpressure() {
|
|
294
|
+
return new Promise((resolve) => {
|
|
295
|
+
const checkBackpressure = () => {
|
|
296
|
+
if (this.pendingBatches < this.opts.maxPendingBatches) {
|
|
297
|
+
resolve()
|
|
298
|
+
} else {
|
|
299
|
+
setTimeout(checkBackpressure, 10)
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
checkBackpressure()
|
|
303
|
+
})
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
/**
|
|
307
|
+
* Start progress tracking
|
|
308
|
+
*/
|
|
309
|
+
_startProgressTracking() {
|
|
310
|
+
this.progressTimer = setInterval(() => {
|
|
311
|
+
const progress = {
|
|
312
|
+
currentBatch: this.currentBatch,
|
|
313
|
+
totalBatches: this.totalBatches,
|
|
314
|
+
processedItems: this.processedItems,
|
|
315
|
+
totalItems: this.totalItems,
|
|
316
|
+
percentage: this.totalItems > 0 ? (this.processedItems / this.totalItems) * 100 : 0,
|
|
317
|
+
itemsPerSecond: this.stats.itemsPerSecond,
|
|
318
|
+
averageBatchTime: this.stats.averageBatchTime,
|
|
319
|
+
memoryUsage: process.memoryUsage().heapUsed / 1024 / 1024 // MB
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
this.emit('progress', progress)
|
|
323
|
+
}, this.opts.progressInterval)
|
|
324
|
+
this.progressTimer.unref(); // Allow process to exit without waiting for this timer
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Stop progress tracking
|
|
329
|
+
*/
|
|
330
|
+
_stopProgressTracking() {
|
|
331
|
+
if (this.progressTimer) {
|
|
332
|
+
clearInterval(this.progressTimer)
|
|
333
|
+
this.progressTimer = null
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* Get current statistics
|
|
339
|
+
*/
|
|
340
|
+
getStats() {
|
|
341
|
+
return {
|
|
342
|
+
...this.stats,
|
|
343
|
+
isProcessing: this.isProcessing,
|
|
344
|
+
currentBatch: this.currentBatch,
|
|
345
|
+
totalBatches: this.totalBatches,
|
|
346
|
+
processedItems: this.processedItems,
|
|
347
|
+
totalItems: this.totalItems,
|
|
348
|
+
pendingBatches: this.pendingBatches,
|
|
349
|
+
transformPipelineLength: this.transformPipeline.length
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
/**
|
|
354
|
+
* Stop processing
|
|
355
|
+
*/
|
|
356
|
+
stop() {
|
|
357
|
+
this.isProcessing = false
|
|
358
|
+
this._stopProgressTracking()
|
|
359
|
+
this.emit('stopped')
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
/**
|
|
363
|
+
* Reset the processor
|
|
364
|
+
*/
|
|
365
|
+
reset() {
|
|
366
|
+
this.stop()
|
|
367
|
+
this.currentBatch = 0
|
|
368
|
+
this.totalBatches = 0
|
|
369
|
+
this.processedItems = 0
|
|
370
|
+
this.totalItems = 0
|
|
371
|
+
this.pendingBatches = 0
|
|
372
|
+
this.stats = {
|
|
373
|
+
startTime: 0,
|
|
374
|
+
endTime: 0,
|
|
375
|
+
totalProcessingTime: 0,
|
|
376
|
+
averageBatchTime: 0,
|
|
377
|
+
itemsPerSecond: 0,
|
|
378
|
+
memoryUsage: 0
|
|
379
|
+
}
|
|
380
|
+
this.transformPipeline = []
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
/**
|
|
385
|
+
* Predefined transform functions
|
|
386
|
+
*/
|
|
387
|
+
export const Transforms = {
|
|
388
|
+
// Parse JSON lines
|
|
389
|
+
parseJSON: (batch) => {
|
|
390
|
+
return batch.map(line => {
|
|
391
|
+
try {
|
|
392
|
+
return JSON.parse(line)
|
|
393
|
+
} catch (error) {
|
|
394
|
+
console.warn('Failed to parse JSON line:', line)
|
|
395
|
+
return null
|
|
396
|
+
}
|
|
397
|
+
}).filter(item => item !== null)
|
|
398
|
+
},
|
|
399
|
+
|
|
400
|
+
// Filter out null/undefined values
|
|
401
|
+
filterNull: (batch) => {
|
|
402
|
+
return batch.filter(item => item !== null && item !== undefined)
|
|
403
|
+
},
|
|
404
|
+
|
|
405
|
+
// Transform to specific format
|
|
406
|
+
toFormat: (format) => (batch) => {
|
|
407
|
+
switch (format) {
|
|
408
|
+
case 'string':
|
|
409
|
+
return batch.map(item => String(item))
|
|
410
|
+
case 'number':
|
|
411
|
+
return batch.map(item => Number(item))
|
|
412
|
+
case 'object':
|
|
413
|
+
return batch.map(item => typeof item === 'object' ? item : { value: item })
|
|
414
|
+
default:
|
|
415
|
+
return batch
|
|
416
|
+
}
|
|
417
|
+
},
|
|
418
|
+
|
|
419
|
+
// Add metadata
|
|
420
|
+
addMetadata: (metadata) => (batch) => {
|
|
421
|
+
return batch.map(item => ({
|
|
422
|
+
...item,
|
|
423
|
+
...metadata,
|
|
424
|
+
processedAt: Date.now()
|
|
425
|
+
}))
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
export default StreamingProcessor
|