@naturalcycles/nodejs-lib 15.70.0 → 15.71.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/stream/index.d.ts +0 -1
- package/dist/stream/index.js +0 -1
- package/dist/stream/pipeline.d.ts +1 -4
- package/dist/stream/pipeline.js +3 -22
- package/dist/stream/transform/transformFilter.js +2 -2
- package/dist/stream/transform/transformMap.d.ts +13 -25
- package/dist/stream/transform/transformMap.js +134 -118
- package/dist/stream/transform/worker/transformMultiThreaded.js +59 -39
- package/package.json +1 -3
- package/src/stream/index.ts +0 -1
- package/src/stream/pipeline.ts +3 -34
- package/src/stream/transform/transformFilter.ts +2 -2
- package/src/stream/transform/transformMap.ts +168 -153
- package/src/stream/transform/worker/transformMultiThreaded.ts +57 -40
- package/src/stream/transform/worker/workerClassProxy.js +0 -4
- package/dist/stream/transform/transformMap2.d.ts +0 -66
- package/dist/stream/transform/transformMap2.js +0 -171
- package/src/stream/transform/transformMap2.ts +0 -283
package/src/stream/pipeline.ts
CHANGED
|
@@ -45,7 +45,6 @@ import {
|
|
|
45
45
|
type TransformLogProgressOptions,
|
|
46
46
|
} from './transform/transformLogProgress.js'
|
|
47
47
|
import { transformMap, type TransformMapOptions } from './transform/transformMap.js'
|
|
48
|
-
import { transformMap2, type TransformMap2Options } from './transform/transformMap2.js'
|
|
49
48
|
import {
|
|
50
49
|
transformMapSimple,
|
|
51
50
|
type TransformMapSimpleOptions,
|
|
@@ -185,7 +184,7 @@ export class Pipeline<T = unknown> {
|
|
|
185
184
|
return this
|
|
186
185
|
}
|
|
187
186
|
|
|
188
|
-
|
|
187
|
+
map<TO>(
|
|
189
188
|
mapper: AbortableAsyncMapper<T, TO | typeof SKIP | typeof END>,
|
|
190
189
|
opt?: TransformMapOptions<T, TO>,
|
|
191
190
|
): Pipeline<TO> {
|
|
@@ -198,19 +197,6 @@ export class Pipeline<T = unknown> {
|
|
|
198
197
|
return this as any
|
|
199
198
|
}
|
|
200
199
|
|
|
201
|
-
map<TO>(
|
|
202
|
-
mapper: AbortableAsyncMapper<T, TO | typeof SKIP | typeof END>,
|
|
203
|
-
opt?: TransformMap2Options<T, TO>,
|
|
204
|
-
): Pipeline<TO> {
|
|
205
|
-
this.transforms.push(
|
|
206
|
-
transformMap2(mapper, {
|
|
207
|
-
...opt,
|
|
208
|
-
signal: this.abortableSignal,
|
|
209
|
-
}),
|
|
210
|
-
)
|
|
211
|
-
return this as any
|
|
212
|
-
}
|
|
213
|
-
|
|
214
200
|
mapSync<TO>(
|
|
215
201
|
mapper: IndexedMapper<T, TO | typeof SKIP | typeof END>,
|
|
216
202
|
opt?: TransformMapSyncOptions,
|
|
@@ -231,7 +217,7 @@ export class Pipeline<T = unknown> {
|
|
|
231
217
|
|
|
232
218
|
filter(asyncPredicate: AsyncPredicate<T>, opt?: TransformMapOptions): this {
|
|
233
219
|
this.transforms.push(
|
|
234
|
-
|
|
220
|
+
transformMap(v => v, {
|
|
235
221
|
asyncPredicate,
|
|
236
222
|
...opt,
|
|
237
223
|
signal: this.abortableSignal,
|
|
@@ -430,24 +416,7 @@ export class Pipeline<T = unknown> {
|
|
|
430
416
|
opt: TransformMapOptions<T, void> & TransformLogProgressOptions<T> = {},
|
|
431
417
|
): Promise<void> {
|
|
432
418
|
this.transforms.push(
|
|
433
|
-
|
|
434
|
-
predicate: opt.logEvery ? _passthroughPredicate : undefined, // for the logger to work
|
|
435
|
-
...opt,
|
|
436
|
-
signal: this.abortableSignal,
|
|
437
|
-
}),
|
|
438
|
-
)
|
|
439
|
-
if (opt.logEvery) {
|
|
440
|
-
this.transforms.push(transformLogProgress(opt))
|
|
441
|
-
}
|
|
442
|
-
await this.run()
|
|
443
|
-
}
|
|
444
|
-
|
|
445
|
-
async forEach2(
|
|
446
|
-
fn: AsyncIndexedMapper<T, void>,
|
|
447
|
-
opt: TransformMap2Options<T, void> & TransformLogProgressOptions<T> = {},
|
|
448
|
-
): Promise<void> {
|
|
449
|
-
this.transforms.push(
|
|
450
|
-
transformMap2(fn, {
|
|
419
|
+
transformMap(fn, {
|
|
451
420
|
predicate: opt.logEvery ? _passthroughPredicate : undefined, // for the logger to work
|
|
452
421
|
...opt,
|
|
453
422
|
signal: this.abortableSignal,
|
|
@@ -2,7 +2,7 @@ import { Transform } from 'node:stream'
|
|
|
2
2
|
import type { AsyncPredicate, Predicate } from '@naturalcycles/js-lib/types'
|
|
3
3
|
import type { TransformOptions, TransformTyped } from '../stream.model.js'
|
|
4
4
|
import type { TransformMapOptions } from './transformMap.js'
|
|
5
|
-
import {
|
|
5
|
+
import { transformMap } from './transformMap.js'
|
|
6
6
|
|
|
7
7
|
/**
|
|
8
8
|
* Just a convenience wrapper around `transformMap` that has built-in predicate filtering support.
|
|
@@ -11,7 +11,7 @@ export function transformFilter<IN = any>(
|
|
|
11
11
|
asyncPredicate: AsyncPredicate<IN>,
|
|
12
12
|
opt: TransformMapOptions = {},
|
|
13
13
|
): TransformTyped<IN, IN> {
|
|
14
|
-
return
|
|
14
|
+
return transformMap(v => v, {
|
|
15
15
|
asyncPredicate,
|
|
16
16
|
...opt,
|
|
17
17
|
})
|
|
@@ -1,12 +1,16 @@
|
|
|
1
|
+
import { Transform } from 'node:stream'
|
|
1
2
|
import { _hc, type AbortableSignal } from '@naturalcycles/js-lib'
|
|
2
|
-
import { _since } from '@naturalcycles/js-lib/datetime
|
|
3
|
+
import { _since } from '@naturalcycles/js-lib/datetime'
|
|
3
4
|
import { _anyToError, _assert, ErrorMode } from '@naturalcycles/js-lib/error'
|
|
4
5
|
import { createCommonLoggerAtLevel } from '@naturalcycles/js-lib/log'
|
|
5
|
-
import {
|
|
6
|
+
import type { DeferredPromise } from '@naturalcycles/js-lib/promise'
|
|
7
|
+
import { pDefer } from '@naturalcycles/js-lib/promise/pDefer.js'
|
|
8
|
+
import { _stringify } from '@naturalcycles/js-lib/string'
|
|
6
9
|
import {
|
|
7
10
|
type AbortableAsyncMapper,
|
|
8
11
|
type AsyncPredicate,
|
|
9
12
|
END,
|
|
13
|
+
type NumberOfSeconds,
|
|
10
14
|
type PositiveInteger,
|
|
11
15
|
type Predicate,
|
|
12
16
|
type Promisable,
|
|
@@ -14,7 +18,6 @@ import {
|
|
|
14
18
|
type StringMap,
|
|
15
19
|
type UnixTimestampMillis,
|
|
16
20
|
} from '@naturalcycles/js-lib/types'
|
|
17
|
-
import through2Concurrent from 'through2-concurrent'
|
|
18
21
|
import { yellow } from '../../colors/colors.js'
|
|
19
22
|
import type { TransformOptions, TransformTyped } from '../stream.model.js'
|
|
20
23
|
import { PIPELINE_GRACEFUL_ABORT } from '../stream.util.js'
|
|
@@ -34,23 +37,17 @@ export interface TransformMapOptions<IN = any, OUT = IN> extends TransformOption
|
|
|
34
37
|
/**
|
|
35
38
|
* Number of concurrently pending promises returned by `mapper`.
|
|
36
39
|
*
|
|
37
|
-
*
|
|
38
|
-
* It was recently changed up from 16, after some testing that shown that
|
|
39
|
-
* for simple low-cpu mapper functions 32 produces almost 2x throughput.
|
|
40
|
-
* For example, in scenarios like streaming a query from Datastore.
|
|
41
|
-
* UPD: changed back from 32 to 16, "to be on a safe side", as 32 sometimes
|
|
42
|
-
* causes "Datastore timeout errors".
|
|
40
|
+
* @default 16
|
|
43
41
|
*/
|
|
44
42
|
concurrency?: PositiveInteger
|
|
45
43
|
|
|
46
44
|
/**
|
|
47
|
-
*
|
|
48
|
-
*
|
|
45
|
+
* Time in seconds to gradually increase concurrency from 1 to `concurrency`.
|
|
46
|
+
* Useful for warming up connections to databases, APIs, etc.
|
|
49
47
|
*
|
|
50
|
-
*
|
|
51
|
-
* So, 64 means a total buffer of 128 (64 input and 64 output buffer).
|
|
48
|
+
* Set to 0 to disable warmup (default).
|
|
52
49
|
*/
|
|
53
|
-
|
|
50
|
+
warmupSeconds?: NumberOfSeconds
|
|
54
51
|
|
|
55
52
|
/**
|
|
56
53
|
* @default THROW_IMMEDIATELY
|
|
@@ -88,165 +85,106 @@ export interface TransformMapOptions<IN = any, OUT = IN> extends TransformOption
|
|
|
88
85
|
signal?: AbortableSignal
|
|
89
86
|
}
|
|
90
87
|
|
|
91
|
-
|
|
92
|
-
/**
|
|
93
|
-
* True if transform was successful (didn't throw Immediate or Aggregated error).
|
|
94
|
-
*/
|
|
95
|
-
ok: boolean
|
|
96
|
-
/**
|
|
97
|
-
* Only used (and returned) for ErrorMode.Aggregated
|
|
98
|
-
*/
|
|
99
|
-
collectedErrors: Error[]
|
|
100
|
-
countErrors: number
|
|
101
|
-
countIn: number
|
|
102
|
-
countOut: number
|
|
103
|
-
started: UnixTimestampMillis
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
export interface TransformMapStatsSummary extends TransformMapStats {
|
|
107
|
-
/**
|
|
108
|
-
* Name of the summary, defaults to `Transform`
|
|
109
|
-
*/
|
|
110
|
-
name?: string
|
|
111
|
-
|
|
112
|
-
/**
|
|
113
|
-
* Allows to pass extra key-value object, which will be rendered as:
|
|
114
|
-
* key: value
|
|
115
|
-
* key2: value2
|
|
116
|
-
*/
|
|
117
|
-
extra?: StringMap<any>
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
// doesn't work, cause here we don't construct our Transform instance ourselves
|
|
121
|
-
// export class TransformMap extends AbortableTransform {}
|
|
88
|
+
const WARMUP_CHECK_INTERVAL_MS = 1000
|
|
122
89
|
|
|
123
90
|
/**
|
|
124
|
-
* Like
|
|
125
|
-
*
|
|
126
|
-
* Main feature is concurrency control (implemented via `through2-concurrent`) and convenient options.
|
|
127
|
-
* Using this allows native stream .pipe() to work and use backpressure.
|
|
91
|
+
* Like transformMap, but with native concurrency control (no through2-concurrent dependency)
|
|
92
|
+
* and support for gradual warmup.
|
|
128
93
|
*
|
|
129
|
-
*
|
|
130
|
-
*
|
|
131
|
-
* Concurrency defaults to 16.
|
|
132
|
-
*
|
|
133
|
-
* If an Array is returned by `mapper` - it will be flattened and multiple results will be emitted from it. Tested by Array.isArray().
|
|
94
|
+
* @experimental
|
|
134
95
|
*/
|
|
135
96
|
export function transformMap<IN = any, OUT = IN>(
|
|
136
97
|
mapper: AbortableAsyncMapper<IN, OUT | typeof SKIP | typeof END>,
|
|
137
98
|
opt: TransformMapOptions<IN, OUT> = {},
|
|
138
99
|
): TransformTyped<IN, OUT> {
|
|
139
100
|
const {
|
|
140
|
-
concurrency = 16,
|
|
141
|
-
|
|
142
|
-
predicate,
|
|
101
|
+
concurrency: maxConcurrency = 16,
|
|
102
|
+
warmupSeconds = 0,
|
|
103
|
+
predicate,
|
|
143
104
|
asyncPredicate,
|
|
144
105
|
errorMode = ErrorMode.THROW_IMMEDIATELY,
|
|
145
106
|
onError,
|
|
146
107
|
onDone,
|
|
147
108
|
metric = 'stream',
|
|
148
109
|
signal,
|
|
110
|
+
objectMode = true,
|
|
111
|
+
highWaterMark = 64,
|
|
149
112
|
} = opt
|
|
150
113
|
|
|
151
|
-
const
|
|
114
|
+
const warmupMs = warmupSeconds * 1000
|
|
115
|
+
const logger = createCommonLoggerAtLevel(opt.logger, opt.logLevel)
|
|
116
|
+
|
|
117
|
+
// Stats
|
|
118
|
+
let started = 0 as UnixTimestampMillis
|
|
152
119
|
let index = -1
|
|
153
120
|
let countOut = 0
|
|
154
121
|
let isSettled = false
|
|
155
122
|
let ok = true
|
|
156
123
|
let errors = 0
|
|
157
|
-
const collectedErrors: Error[] = []
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
logger.error(err)
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
// emit Aggregated error
|
|
183
|
-
cb(
|
|
184
|
-
new AggregateError(
|
|
185
|
-
collectedErrors,
|
|
186
|
-
`transformMap resulted in ${collectedErrors.length} error(s)`,
|
|
187
|
-
),
|
|
188
|
-
)
|
|
189
|
-
} else {
|
|
190
|
-
// emit no error
|
|
124
|
+
const collectedErrors: Error[] = []
|
|
125
|
+
|
|
126
|
+
// Concurrency control - single counter, single callback for backpressure
|
|
127
|
+
let inFlight = 0
|
|
128
|
+
let blockedCallback: (() => void) | null = null
|
|
129
|
+
let flushBlocked: DeferredPromise | null = null
|
|
130
|
+
|
|
131
|
+
// Warmup - cached concurrency to reduce Date.now() syscalls
|
|
132
|
+
let warmupComplete = warmupSeconds <= 0 || maxConcurrency <= 1
|
|
133
|
+
let concurrency = warmupComplete ? maxConcurrency : 1
|
|
134
|
+
let lastWarmupCheck = 0
|
|
135
|
+
|
|
136
|
+
return new Transform({
|
|
137
|
+
objectMode,
|
|
138
|
+
readableHighWaterMark: highWaterMark,
|
|
139
|
+
writableHighWaterMark: highWaterMark,
|
|
140
|
+
async transform(this: Transform, chunk: IN, _, cb) {
|
|
141
|
+
// Initialize start time on first item
|
|
142
|
+
if (started === 0) {
|
|
143
|
+
started = Date.now() as UnixTimestampMillis
|
|
144
|
+
lastWarmupCheck = started
|
|
145
|
+
}
|
|
191
146
|
|
|
192
|
-
try {
|
|
193
|
-
await onDone?.({
|
|
194
|
-
ok,
|
|
195
|
-
collectedErrors,
|
|
196
|
-
countErrors: errors,
|
|
197
|
-
countIn: index + 1,
|
|
198
|
-
countOut,
|
|
199
|
-
started,
|
|
200
|
-
})
|
|
201
|
-
} catch (err) {
|
|
202
|
-
logger.error(err)
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
cb()
|
|
206
|
-
}
|
|
207
|
-
},
|
|
208
|
-
},
|
|
209
|
-
async function transformMapFn(chunk: IN, _, cb) {
|
|
210
|
-
// Stop processing if isSettled (either THROW_IMMEDIATELY was fired or END received)
|
|
211
147
|
if (isSettled) return cb()
|
|
212
148
|
|
|
213
149
|
const currentIndex = ++index
|
|
150
|
+
inFlight++
|
|
151
|
+
if (!warmupComplete) {
|
|
152
|
+
updateConcurrency()
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Apply backpressure if at capacity, otherwise request more input
|
|
156
|
+
if (inFlight < concurrency) {
|
|
157
|
+
cb()
|
|
158
|
+
} else {
|
|
159
|
+
blockedCallback = cb
|
|
160
|
+
}
|
|
214
161
|
|
|
215
162
|
try {
|
|
216
163
|
const res: OUT | typeof SKIP | typeof END = await mapper(chunk, currentIndex)
|
|
217
|
-
|
|
218
|
-
if (isSettled) return
|
|
164
|
+
|
|
165
|
+
if (isSettled) return
|
|
219
166
|
|
|
220
167
|
if (res === END) {
|
|
221
168
|
isSettled = true
|
|
222
|
-
logger.log(`
|
|
169
|
+
logger.log(`transformMap2 END received at index ${currentIndex}`)
|
|
223
170
|
_assert(signal, 'signal is required when using END')
|
|
224
171
|
signal.abort(new Error(PIPELINE_GRACEFUL_ABORT))
|
|
225
|
-
return
|
|
172
|
+
return
|
|
226
173
|
}
|
|
227
174
|
|
|
228
|
-
if (res === SKIP)
|
|
229
|
-
// do nothing, don't push
|
|
230
|
-
return cb()
|
|
231
|
-
}
|
|
175
|
+
if (res === SKIP) return
|
|
232
176
|
|
|
177
|
+
let shouldPush = true
|
|
233
178
|
if (predicate) {
|
|
234
|
-
|
|
235
|
-
countOut++
|
|
236
|
-
this.push(res)
|
|
237
|
-
}
|
|
179
|
+
shouldPush = predicate(res, currentIndex)
|
|
238
180
|
} else if (asyncPredicate) {
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
}
|
|
244
|
-
} else {
|
|
181
|
+
shouldPush = (await asyncPredicate(res, currentIndex)) && !isSettled
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (shouldPush) {
|
|
245
185
|
countOut++
|
|
246
186
|
this.push(res)
|
|
247
187
|
}
|
|
248
|
-
|
|
249
|
-
cb() // done processing
|
|
250
188
|
} catch (err) {
|
|
251
189
|
logger.error(err)
|
|
252
190
|
errors++
|
|
@@ -261,40 +199,117 @@ export function transformMap<IN = any, OUT = IN>(
|
|
|
261
199
|
if (errorMode === ErrorMode.THROW_IMMEDIATELY) {
|
|
262
200
|
isSettled = true
|
|
263
201
|
ok = false
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
// try {
|
|
269
|
-
// await onDone?.({
|
|
270
|
-
// ok: false,
|
|
271
|
-
// collectedErrors,
|
|
272
|
-
// countErrors: errors,
|
|
273
|
-
// countIn: index + 1,
|
|
274
|
-
// countOut,
|
|
275
|
-
// started,
|
|
276
|
-
// })
|
|
277
|
-
// } catch (err) {
|
|
278
|
-
// logger.error(err)
|
|
279
|
-
// }
|
|
280
|
-
|
|
281
|
-
return cb(err) // Emit error immediately
|
|
202
|
+
await callOnDone()
|
|
203
|
+
this.destroy(_anyToError(err))
|
|
204
|
+
return
|
|
282
205
|
}
|
|
283
|
-
|
|
284
206
|
if (errorMode === ErrorMode.THROW_AGGREGATED) {
|
|
285
|
-
collectedErrors.push(err
|
|
207
|
+
collectedErrors.push(_anyToError(err))
|
|
208
|
+
}
|
|
209
|
+
} finally {
|
|
210
|
+
inFlight--
|
|
211
|
+
|
|
212
|
+
// Release blocked callback if we now have capacity
|
|
213
|
+
if (blockedCallback && inFlight < concurrency) {
|
|
214
|
+
const pendingCb = blockedCallback
|
|
215
|
+
blockedCallback = null
|
|
216
|
+
pendingCb()
|
|
286
217
|
}
|
|
287
218
|
|
|
288
|
-
//
|
|
219
|
+
// Trigger flush completion if all done
|
|
220
|
+
if (inFlight === 0 && flushBlocked) {
|
|
221
|
+
flushBlocked.resolve()
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
},
|
|
225
|
+
async flush(cb) {
|
|
226
|
+
// Wait for all in-flight operations to complete
|
|
227
|
+
if (inFlight > 0) {
|
|
228
|
+
flushBlocked = pDefer()
|
|
229
|
+
await flushBlocked
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
logErrorStats(true)
|
|
233
|
+
await callOnDone()
|
|
234
|
+
|
|
235
|
+
if (collectedErrors.length) {
|
|
236
|
+
cb(
|
|
237
|
+
new AggregateError(
|
|
238
|
+
collectedErrors,
|
|
239
|
+
`transformMap2 resulted in ${collectedErrors.length} error(s)`,
|
|
240
|
+
),
|
|
241
|
+
)
|
|
242
|
+
} else {
|
|
289
243
|
cb()
|
|
290
244
|
}
|
|
291
245
|
},
|
|
292
|
-
)
|
|
246
|
+
})
|
|
247
|
+
|
|
248
|
+
function updateConcurrency(): void {
|
|
249
|
+
const now = Date.now()
|
|
250
|
+
if (now - lastWarmupCheck < WARMUP_CHECK_INTERVAL_MS) return
|
|
251
|
+
lastWarmupCheck = now
|
|
252
|
+
|
|
253
|
+
const elapsed = now - started
|
|
254
|
+
if (elapsed >= warmupMs) {
|
|
255
|
+
warmupComplete = true
|
|
256
|
+
concurrency = maxConcurrency
|
|
257
|
+
logger.log(`transformMap2: warmup complete in ${_since(started)}`)
|
|
258
|
+
return
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
const progress = elapsed / warmupMs
|
|
262
|
+
concurrency = Math.max(1, Math.floor(1 + (maxConcurrency - 1) * progress))
|
|
263
|
+
}
|
|
293
264
|
|
|
294
265
|
function logErrorStats(final = false): void {
|
|
295
266
|
if (!errors) return
|
|
296
267
|
logger.log(`${metric} ${final ? 'final ' : ''}errors: ${yellow(errors)}`)
|
|
297
268
|
}
|
|
269
|
+
|
|
270
|
+
async function callOnDone(): Promise<void> {
|
|
271
|
+
try {
|
|
272
|
+
await onDone?.({
|
|
273
|
+
ok: collectedErrors.length === 0 && ok,
|
|
274
|
+
collectedErrors,
|
|
275
|
+
countErrors: errors,
|
|
276
|
+
countIn: index + 1,
|
|
277
|
+
countOut,
|
|
278
|
+
started,
|
|
279
|
+
})
|
|
280
|
+
} catch (err) {
|
|
281
|
+
logger.error(err)
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
export interface TransformMapStats {
|
|
287
|
+
/**
|
|
288
|
+
* True if transform was successful (didn't throw Immediate or Aggregated error).
|
|
289
|
+
*/
|
|
290
|
+
ok: boolean
|
|
291
|
+
/**
|
|
292
|
+
* Only used (and returned) for ErrorMode.Aggregated
|
|
293
|
+
*/
|
|
294
|
+
collectedErrors: Error[]
|
|
295
|
+
countErrors: number
|
|
296
|
+
countIn: number
|
|
297
|
+
countOut: number
|
|
298
|
+
started: UnixTimestampMillis
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
export interface TransformMapStatsSummary extends TransformMapStats {
|
|
302
|
+
/**
|
|
303
|
+
* Name of the summary, defaults to `Transform`
|
|
304
|
+
*/
|
|
305
|
+
name?: string
|
|
306
|
+
|
|
307
|
+
/**
|
|
308
|
+
* Allows to pass extra key-value object, which will be rendered as:
|
|
309
|
+
* key: value
|
|
310
|
+
* key2: value2
|
|
311
|
+
*/
|
|
312
|
+
extra?: StringMap<any>
|
|
298
313
|
}
|
|
299
314
|
|
|
300
315
|
/**
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
+
import { Transform } from 'node:stream'
|
|
1
2
|
import { Worker } from 'node:worker_threads'
|
|
2
3
|
import { _range } from '@naturalcycles/js-lib/array/range.js'
|
|
3
4
|
import type { DeferredPromise } from '@naturalcycles/js-lib/promise'
|
|
4
5
|
import { pDefer } from '@naturalcycles/js-lib/promise/pDefer.js'
|
|
5
6
|
import type { AnyObject } from '@naturalcycles/js-lib/types'
|
|
6
|
-
import through2Concurrent from 'through2-concurrent'
|
|
7
7
|
import type { TransformTyped } from '../../stream.model.js'
|
|
8
8
|
import type { WorkerInput, WorkerOutput } from './transformMultiThreaded.model.js'
|
|
9
9
|
|
|
@@ -59,6 +59,11 @@ export function transformMultiThreaded<IN, OUT>(
|
|
|
59
59
|
const messageDonePromises: Record<number, DeferredPromise<OUT>> = {}
|
|
60
60
|
let index = -1 // input chunk index, will start from 0
|
|
61
61
|
|
|
62
|
+
// Concurrency control
|
|
63
|
+
let inFlight = 0
|
|
64
|
+
let blockedCallback: (() => void) | null = null
|
|
65
|
+
let flushBlocked: DeferredPromise | null = null
|
|
66
|
+
|
|
62
67
|
const workers = _range(0, poolSize).map(workerIndex => {
|
|
63
68
|
workerDonePromises.push(pDefer())
|
|
64
69
|
|
|
@@ -70,23 +75,16 @@ export function transformMultiThreaded<IN, OUT>(
|
|
|
70
75
|
},
|
|
71
76
|
})
|
|
72
77
|
|
|
73
|
-
// const {threadId} = worker
|
|
74
|
-
// console.log({threadId})
|
|
75
|
-
|
|
76
78
|
worker.on('error', err => {
|
|
77
79
|
console.error(`Worker ${workerIndex} error`, err)
|
|
78
80
|
workerDonePromises[workerIndex]!.reject(err as Error)
|
|
79
81
|
})
|
|
80
82
|
|
|
81
83
|
worker.on('exit', _exitCode => {
|
|
82
|
-
// console.log(`Worker ${index} exit: ${exitCode}`)
|
|
83
84
|
workerDonePromises[workerIndex]!.resolve(undefined)
|
|
84
85
|
})
|
|
85
86
|
|
|
86
87
|
worker.on('message', (out: WorkerOutput<OUT>) => {
|
|
87
|
-
// console.log(`Message from Worker ${workerIndex}:`, out)
|
|
88
|
-
// console.log(Object.keys(messageDonePromises))
|
|
89
|
-
// tr.push(out.payload)
|
|
90
88
|
if (out.error) {
|
|
91
89
|
messageDonePromises[out.index]!.reject(out.error)
|
|
92
90
|
} else {
|
|
@@ -97,32 +95,22 @@ export function transformMultiThreaded<IN, OUT>(
|
|
|
97
95
|
return worker
|
|
98
96
|
})
|
|
99
97
|
|
|
100
|
-
return
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
try {
|
|
106
|
-
// Push null (complete) to all sub-streams
|
|
107
|
-
for (const worker of workers) {
|
|
108
|
-
worker.postMessage(null)
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
console.log(`transformMultiThreaded.final is waiting for all chains to be done`)
|
|
112
|
-
await Promise.all(workerDonePromises)
|
|
113
|
-
console.log(`transformMultiThreaded.final all chains done`)
|
|
114
|
-
|
|
115
|
-
cb()
|
|
116
|
-
} catch (err) {
|
|
117
|
-
cb(err as Error)
|
|
118
|
-
}
|
|
119
|
-
},
|
|
120
|
-
},
|
|
121
|
-
async function transformMapFn(chunk: IN, _, cb) {
|
|
122
|
-
// Freezing the index, because it may change due to concurrency
|
|
98
|
+
return new Transform({
|
|
99
|
+
objectMode: true,
|
|
100
|
+
readableHighWaterMark: highWaterMark,
|
|
101
|
+
writableHighWaterMark: highWaterMark,
|
|
102
|
+
async transform(this: Transform, chunk: IN, _, cb) {
|
|
123
103
|
const currentIndex = ++index
|
|
104
|
+
inFlight++
|
|
124
105
|
|
|
125
|
-
//
|
|
106
|
+
// Apply backpressure if at capacity, otherwise request more input
|
|
107
|
+
if (inFlight < maxConcurrency) {
|
|
108
|
+
cb()
|
|
109
|
+
} else {
|
|
110
|
+
blockedCallback = cb
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Create the unresolved promise (to await)
|
|
126
114
|
messageDonePromises[currentIndex] = pDefer<OUT>()
|
|
127
115
|
|
|
128
116
|
const worker = workers[currentIndex % poolSize]! // round-robin
|
|
@@ -132,21 +120,50 @@ export function transformMultiThreaded<IN, OUT>(
|
|
|
132
120
|
} as WorkerInput)
|
|
133
121
|
|
|
134
122
|
try {
|
|
135
|
-
// awaiting for result
|
|
136
123
|
const out = await messageDonePromises[currentIndex]
|
|
137
|
-
|
|
138
|
-
// return the result
|
|
139
|
-
cb(null, out)
|
|
124
|
+
this.push(out)
|
|
140
125
|
} catch (err) {
|
|
141
126
|
// Currently we only support ErrorMode.SUPPRESS
|
|
142
127
|
// Error is logged and output continues
|
|
143
128
|
console.error(err)
|
|
129
|
+
} finally {
|
|
130
|
+
delete messageDonePromises[currentIndex]
|
|
131
|
+
inFlight--
|
|
132
|
+
|
|
133
|
+
// Release blocked callback if we now have capacity
|
|
134
|
+
if (blockedCallback && inFlight < maxConcurrency) {
|
|
135
|
+
const pendingCb = blockedCallback
|
|
136
|
+
blockedCallback = null
|
|
137
|
+
pendingCb()
|
|
138
|
+
}
|
|
144
139
|
|
|
145
|
-
|
|
140
|
+
// Trigger flush completion if all done
|
|
141
|
+
if (inFlight === 0 && flushBlocked) {
|
|
142
|
+
flushBlocked.resolve()
|
|
143
|
+
}
|
|
146
144
|
}
|
|
145
|
+
},
|
|
146
|
+
async flush(cb) {
|
|
147
|
+
// Wait for all in-flight operations to complete
|
|
148
|
+
if (inFlight > 0) {
|
|
149
|
+
flushBlocked = pDefer()
|
|
150
|
+
await flushBlocked
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
try {
|
|
154
|
+
// Push null (complete) to all workers
|
|
155
|
+
for (const worker of workers) {
|
|
156
|
+
worker.postMessage(null)
|
|
157
|
+
}
|
|
147
158
|
|
|
148
|
-
|
|
149
|
-
|
|
159
|
+
console.log(`transformMultiThreaded.flush is waiting for all workers to be done`)
|
|
160
|
+
await Promise.all(workerDonePromises)
|
|
161
|
+
console.log(`transformMultiThreaded.flush all workers done`)
|
|
162
|
+
|
|
163
|
+
cb()
|
|
164
|
+
} catch (err) {
|
|
165
|
+
cb(err as Error)
|
|
166
|
+
}
|
|
150
167
|
},
|
|
151
|
-
)
|
|
168
|
+
})
|
|
152
169
|
}
|
|
@@ -10,12 +10,8 @@ if (!workerFile) {
|
|
|
10
10
|
// console.log(`worker#${workerIndex} created`)
|
|
11
11
|
|
|
12
12
|
try {
|
|
13
|
-
// require('esbuild-register') // alternative
|
|
14
|
-
// require('ts-node/register/transpile-only')
|
|
15
|
-
// require('tsx/cjs/api').register() // https://tsx.is/dev-api/register-cjs
|
|
16
13
|
const { register } = await import('tsx/esm/api')
|
|
17
14
|
register() // https://tsx.is/dev-api/register-esm
|
|
18
|
-
// require('tsconfig-paths/register')
|
|
19
15
|
} catch {} // require if exists
|
|
20
16
|
|
|
21
17
|
const { WorkerClass } = await import(workerFile)
|