@naturalcycles/nodejs-lib 15.70.1 → 15.72.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/stream/index.d.ts +0 -1
- package/dist/stream/index.js +0 -1
- package/dist/stream/pipeline.d.ts +13 -8
- package/dist/stream/pipeline.js +18 -32
- package/dist/stream/transform/transformFilter.js +2 -2
- package/dist/stream/transform/transformMap.d.ts +13 -25
- package/dist/stream/transform/transformMap.js +134 -118
- package/dist/stream/transform/worker/transformMultiThreaded.js +59 -39
- package/dist/zip/zip.util.d.ts +4 -1
- package/dist/zip/zip.util.js +14 -2
- package/package.json +1 -3
- package/src/stream/index.ts +0 -1
- package/src/stream/pipeline.ts +21 -48
- package/src/stream/transform/transformFilter.ts +2 -2
- package/src/stream/transform/transformMap.ts +168 -153
- package/src/stream/transform/worker/transformMultiThreaded.ts +57 -40
- package/src/stream/transform/worker/workerClassProxy.js +0 -4
- package/src/zip/zip.util.ts +15 -1
- package/dist/stream/transform/transformMap2.d.ts +0 -66
- package/dist/stream/transform/transformMap2.js +0 -171
- package/src/stream/transform/transformMap2.ts +0 -283
|
@@ -1,12 +1,16 @@
|
|
|
1
|
+
import { Transform } from 'node:stream'
|
|
1
2
|
import { _hc, type AbortableSignal } from '@naturalcycles/js-lib'
|
|
2
|
-
import { _since } from '@naturalcycles/js-lib/datetime
|
|
3
|
+
import { _since } from '@naturalcycles/js-lib/datetime'
|
|
3
4
|
import { _anyToError, _assert, ErrorMode } from '@naturalcycles/js-lib/error'
|
|
4
5
|
import { createCommonLoggerAtLevel } from '@naturalcycles/js-lib/log'
|
|
5
|
-
import {
|
|
6
|
+
import type { DeferredPromise } from '@naturalcycles/js-lib/promise'
|
|
7
|
+
import { pDefer } from '@naturalcycles/js-lib/promise/pDefer.js'
|
|
8
|
+
import { _stringify } from '@naturalcycles/js-lib/string'
|
|
6
9
|
import {
|
|
7
10
|
type AbortableAsyncMapper,
|
|
8
11
|
type AsyncPredicate,
|
|
9
12
|
END,
|
|
13
|
+
type NumberOfSeconds,
|
|
10
14
|
type PositiveInteger,
|
|
11
15
|
type Predicate,
|
|
12
16
|
type Promisable,
|
|
@@ -14,7 +18,6 @@ import {
|
|
|
14
18
|
type StringMap,
|
|
15
19
|
type UnixTimestampMillis,
|
|
16
20
|
} from '@naturalcycles/js-lib/types'
|
|
17
|
-
import through2Concurrent from 'through2-concurrent'
|
|
18
21
|
import { yellow } from '../../colors/colors.js'
|
|
19
22
|
import type { TransformOptions, TransformTyped } from '../stream.model.js'
|
|
20
23
|
import { PIPELINE_GRACEFUL_ABORT } from '../stream.util.js'
|
|
@@ -34,23 +37,17 @@ export interface TransformMapOptions<IN = any, OUT = IN> extends TransformOption
|
|
|
34
37
|
/**
|
|
35
38
|
* Number of concurrently pending promises returned by `mapper`.
|
|
36
39
|
*
|
|
37
|
-
*
|
|
38
|
-
* It was recently changed up from 16, after some testing that shown that
|
|
39
|
-
* for simple low-cpu mapper functions 32 produces almost 2x throughput.
|
|
40
|
-
* For example, in scenarios like streaming a query from Datastore.
|
|
41
|
-
* UPD: changed back from 32 to 16, "to be on a safe side", as 32 sometimes
|
|
42
|
-
* causes "Datastore timeout errors".
|
|
40
|
+
* @default 16
|
|
43
41
|
*/
|
|
44
42
|
concurrency?: PositiveInteger
|
|
45
43
|
|
|
46
44
|
/**
|
|
47
|
-
*
|
|
48
|
-
*
|
|
45
|
+
* Time in seconds to gradually increase concurrency from 1 to `concurrency`.
|
|
46
|
+
* Useful for warming up connections to databases, APIs, etc.
|
|
49
47
|
*
|
|
50
|
-
*
|
|
51
|
-
* So, 64 means a total buffer of 128 (64 input and 64 output buffer).
|
|
48
|
+
* Set to 0 to disable warmup (default).
|
|
52
49
|
*/
|
|
53
|
-
|
|
50
|
+
warmupSeconds?: NumberOfSeconds
|
|
54
51
|
|
|
55
52
|
/**
|
|
56
53
|
* @default THROW_IMMEDIATELY
|
|
@@ -88,165 +85,106 @@ export interface TransformMapOptions<IN = any, OUT = IN> extends TransformOption
|
|
|
88
85
|
signal?: AbortableSignal
|
|
89
86
|
}
|
|
90
87
|
|
|
91
|
-
|
|
92
|
-
/**
|
|
93
|
-
* True if transform was successful (didn't throw Immediate or Aggregated error).
|
|
94
|
-
*/
|
|
95
|
-
ok: boolean
|
|
96
|
-
/**
|
|
97
|
-
* Only used (and returned) for ErrorMode.Aggregated
|
|
98
|
-
*/
|
|
99
|
-
collectedErrors: Error[]
|
|
100
|
-
countErrors: number
|
|
101
|
-
countIn: number
|
|
102
|
-
countOut: number
|
|
103
|
-
started: UnixTimestampMillis
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
export interface TransformMapStatsSummary extends TransformMapStats {
|
|
107
|
-
/**
|
|
108
|
-
* Name of the summary, defaults to `Transform`
|
|
109
|
-
*/
|
|
110
|
-
name?: string
|
|
111
|
-
|
|
112
|
-
/**
|
|
113
|
-
* Allows to pass extra key-value object, which will be rendered as:
|
|
114
|
-
* key: value
|
|
115
|
-
* key2: value2
|
|
116
|
-
*/
|
|
117
|
-
extra?: StringMap<any>
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
// doesn't work, cause here we don't construct our Transform instance ourselves
|
|
121
|
-
// export class TransformMap extends AbortableTransform {}
|
|
88
|
+
const WARMUP_CHECK_INTERVAL_MS = 1000
|
|
122
89
|
|
|
123
90
|
/**
|
|
124
|
-
* Like
|
|
125
|
-
*
|
|
126
|
-
* Main feature is concurrency control (implemented via `through2-concurrent`) and convenient options.
|
|
127
|
-
* Using this allows native stream .pipe() to work and use backpressure.
|
|
91
|
+
* Like transformMap, but with native concurrency control (no through2-concurrent dependency)
|
|
92
|
+
* and support for gradual warmup.
|
|
128
93
|
*
|
|
129
|
-
*
|
|
130
|
-
*
|
|
131
|
-
* Concurrency defaults to 16.
|
|
132
|
-
*
|
|
133
|
-
* If an Array is returned by `mapper` - it will be flattened and multiple results will be emitted from it. Tested by Array.isArray().
|
|
94
|
+
* @experimental
|
|
134
95
|
*/
|
|
135
96
|
export function transformMap<IN = any, OUT = IN>(
|
|
136
97
|
mapper: AbortableAsyncMapper<IN, OUT | typeof SKIP | typeof END>,
|
|
137
98
|
opt: TransformMapOptions<IN, OUT> = {},
|
|
138
99
|
): TransformTyped<IN, OUT> {
|
|
139
100
|
const {
|
|
140
|
-
concurrency = 16,
|
|
141
|
-
|
|
142
|
-
predicate,
|
|
101
|
+
concurrency: maxConcurrency = 16,
|
|
102
|
+
warmupSeconds = 0,
|
|
103
|
+
predicate,
|
|
143
104
|
asyncPredicate,
|
|
144
105
|
errorMode = ErrorMode.THROW_IMMEDIATELY,
|
|
145
106
|
onError,
|
|
146
107
|
onDone,
|
|
147
108
|
metric = 'stream',
|
|
148
109
|
signal,
|
|
110
|
+
objectMode = true,
|
|
111
|
+
highWaterMark = 64,
|
|
149
112
|
} = opt
|
|
150
113
|
|
|
151
|
-
const
|
|
114
|
+
const warmupMs = warmupSeconds * 1000
|
|
115
|
+
const logger = createCommonLoggerAtLevel(opt.logger, opt.logLevel)
|
|
116
|
+
|
|
117
|
+
// Stats
|
|
118
|
+
let started = 0 as UnixTimestampMillis
|
|
152
119
|
let index = -1
|
|
153
120
|
let countOut = 0
|
|
154
121
|
let isSettled = false
|
|
155
122
|
let ok = true
|
|
156
123
|
let errors = 0
|
|
157
|
-
const collectedErrors: Error[] = []
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
logger.error(err)
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
// emit Aggregated error
|
|
183
|
-
cb(
|
|
184
|
-
new AggregateError(
|
|
185
|
-
collectedErrors,
|
|
186
|
-
`transformMap resulted in ${collectedErrors.length} error(s)`,
|
|
187
|
-
),
|
|
188
|
-
)
|
|
189
|
-
} else {
|
|
190
|
-
// emit no error
|
|
124
|
+
const collectedErrors: Error[] = []
|
|
125
|
+
|
|
126
|
+
// Concurrency control - single counter, single callback for backpressure
|
|
127
|
+
let inFlight = 0
|
|
128
|
+
let blockedCallback: (() => void) | null = null
|
|
129
|
+
let flushBlocked: DeferredPromise | null = null
|
|
130
|
+
|
|
131
|
+
// Warmup - cached concurrency to reduce Date.now() syscalls
|
|
132
|
+
let warmupComplete = warmupSeconds <= 0 || maxConcurrency <= 1
|
|
133
|
+
let concurrency = warmupComplete ? maxConcurrency : 1
|
|
134
|
+
let lastWarmupCheck = 0
|
|
135
|
+
|
|
136
|
+
return new Transform({
|
|
137
|
+
objectMode,
|
|
138
|
+
readableHighWaterMark: highWaterMark,
|
|
139
|
+
writableHighWaterMark: highWaterMark,
|
|
140
|
+
async transform(this: Transform, chunk: IN, _, cb) {
|
|
141
|
+
// Initialize start time on first item
|
|
142
|
+
if (started === 0) {
|
|
143
|
+
started = Date.now() as UnixTimestampMillis
|
|
144
|
+
lastWarmupCheck = started
|
|
145
|
+
}
|
|
191
146
|
|
|
192
|
-
try {
|
|
193
|
-
await onDone?.({
|
|
194
|
-
ok,
|
|
195
|
-
collectedErrors,
|
|
196
|
-
countErrors: errors,
|
|
197
|
-
countIn: index + 1,
|
|
198
|
-
countOut,
|
|
199
|
-
started,
|
|
200
|
-
})
|
|
201
|
-
} catch (err) {
|
|
202
|
-
logger.error(err)
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
cb()
|
|
206
|
-
}
|
|
207
|
-
},
|
|
208
|
-
},
|
|
209
|
-
async function transformMapFn(chunk: IN, _, cb) {
|
|
210
|
-
// Stop processing if isSettled (either THROW_IMMEDIATELY was fired or END received)
|
|
211
147
|
if (isSettled) return cb()
|
|
212
148
|
|
|
213
149
|
const currentIndex = ++index
|
|
150
|
+
inFlight++
|
|
151
|
+
if (!warmupComplete) {
|
|
152
|
+
updateConcurrency()
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Apply backpressure if at capacity, otherwise request more input
|
|
156
|
+
if (inFlight < concurrency) {
|
|
157
|
+
cb()
|
|
158
|
+
} else {
|
|
159
|
+
blockedCallback = cb
|
|
160
|
+
}
|
|
214
161
|
|
|
215
162
|
try {
|
|
216
163
|
const res: OUT | typeof SKIP | typeof END = await mapper(chunk, currentIndex)
|
|
217
|
-
|
|
218
|
-
if (isSettled) return
|
|
164
|
+
|
|
165
|
+
if (isSettled) return
|
|
219
166
|
|
|
220
167
|
if (res === END) {
|
|
221
168
|
isSettled = true
|
|
222
|
-
logger.log(`
|
|
169
|
+
logger.log(`transformMap2 END received at index ${currentIndex}`)
|
|
223
170
|
_assert(signal, 'signal is required when using END')
|
|
224
171
|
signal.abort(new Error(PIPELINE_GRACEFUL_ABORT))
|
|
225
|
-
return
|
|
172
|
+
return
|
|
226
173
|
}
|
|
227
174
|
|
|
228
|
-
if (res === SKIP)
|
|
229
|
-
// do nothing, don't push
|
|
230
|
-
return cb()
|
|
231
|
-
}
|
|
175
|
+
if (res === SKIP) return
|
|
232
176
|
|
|
177
|
+
let shouldPush = true
|
|
233
178
|
if (predicate) {
|
|
234
|
-
|
|
235
|
-
countOut++
|
|
236
|
-
this.push(res)
|
|
237
|
-
}
|
|
179
|
+
shouldPush = predicate(res, currentIndex)
|
|
238
180
|
} else if (asyncPredicate) {
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
}
|
|
244
|
-
} else {
|
|
181
|
+
shouldPush = (await asyncPredicate(res, currentIndex)) && !isSettled
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (shouldPush) {
|
|
245
185
|
countOut++
|
|
246
186
|
this.push(res)
|
|
247
187
|
}
|
|
248
|
-
|
|
249
|
-
cb() // done processing
|
|
250
188
|
} catch (err) {
|
|
251
189
|
logger.error(err)
|
|
252
190
|
errors++
|
|
@@ -261,40 +199,117 @@ export function transformMap<IN = any, OUT = IN>(
|
|
|
261
199
|
if (errorMode === ErrorMode.THROW_IMMEDIATELY) {
|
|
262
200
|
isSettled = true
|
|
263
201
|
ok = false
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
// try {
|
|
269
|
-
// await onDone?.({
|
|
270
|
-
// ok: false,
|
|
271
|
-
// collectedErrors,
|
|
272
|
-
// countErrors: errors,
|
|
273
|
-
// countIn: index + 1,
|
|
274
|
-
// countOut,
|
|
275
|
-
// started,
|
|
276
|
-
// })
|
|
277
|
-
// } catch (err) {
|
|
278
|
-
// logger.error(err)
|
|
279
|
-
// }
|
|
280
|
-
|
|
281
|
-
return cb(err) // Emit error immediately
|
|
202
|
+
await callOnDone()
|
|
203
|
+
this.destroy(_anyToError(err))
|
|
204
|
+
return
|
|
282
205
|
}
|
|
283
|
-
|
|
284
206
|
if (errorMode === ErrorMode.THROW_AGGREGATED) {
|
|
285
|
-
collectedErrors.push(err
|
|
207
|
+
collectedErrors.push(_anyToError(err))
|
|
208
|
+
}
|
|
209
|
+
} finally {
|
|
210
|
+
inFlight--
|
|
211
|
+
|
|
212
|
+
// Release blocked callback if we now have capacity
|
|
213
|
+
if (blockedCallback && inFlight < concurrency) {
|
|
214
|
+
const pendingCb = blockedCallback
|
|
215
|
+
blockedCallback = null
|
|
216
|
+
pendingCb()
|
|
286
217
|
}
|
|
287
218
|
|
|
288
|
-
//
|
|
219
|
+
// Trigger flush completion if all done
|
|
220
|
+
if (inFlight === 0 && flushBlocked) {
|
|
221
|
+
flushBlocked.resolve()
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
},
|
|
225
|
+
async flush(cb) {
|
|
226
|
+
// Wait for all in-flight operations to complete
|
|
227
|
+
if (inFlight > 0) {
|
|
228
|
+
flushBlocked = pDefer()
|
|
229
|
+
await flushBlocked
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
logErrorStats(true)
|
|
233
|
+
await callOnDone()
|
|
234
|
+
|
|
235
|
+
if (collectedErrors.length) {
|
|
236
|
+
cb(
|
|
237
|
+
new AggregateError(
|
|
238
|
+
collectedErrors,
|
|
239
|
+
`transformMap2 resulted in ${collectedErrors.length} error(s)`,
|
|
240
|
+
),
|
|
241
|
+
)
|
|
242
|
+
} else {
|
|
289
243
|
cb()
|
|
290
244
|
}
|
|
291
245
|
},
|
|
292
|
-
)
|
|
246
|
+
})
|
|
247
|
+
|
|
248
|
+
function updateConcurrency(): void {
|
|
249
|
+
const now = Date.now()
|
|
250
|
+
if (now - lastWarmupCheck < WARMUP_CHECK_INTERVAL_MS) return
|
|
251
|
+
lastWarmupCheck = now
|
|
252
|
+
|
|
253
|
+
const elapsed = now - started
|
|
254
|
+
if (elapsed >= warmupMs) {
|
|
255
|
+
warmupComplete = true
|
|
256
|
+
concurrency = maxConcurrency
|
|
257
|
+
logger.log(`transformMap2: warmup complete in ${_since(started)}`)
|
|
258
|
+
return
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
const progress = elapsed / warmupMs
|
|
262
|
+
concurrency = Math.max(1, Math.floor(1 + (maxConcurrency - 1) * progress))
|
|
263
|
+
}
|
|
293
264
|
|
|
294
265
|
function logErrorStats(final = false): void {
|
|
295
266
|
if (!errors) return
|
|
296
267
|
logger.log(`${metric} ${final ? 'final ' : ''}errors: ${yellow(errors)}`)
|
|
297
268
|
}
|
|
269
|
+
|
|
270
|
+
async function callOnDone(): Promise<void> {
|
|
271
|
+
try {
|
|
272
|
+
await onDone?.({
|
|
273
|
+
ok: collectedErrors.length === 0 && ok,
|
|
274
|
+
collectedErrors,
|
|
275
|
+
countErrors: errors,
|
|
276
|
+
countIn: index + 1,
|
|
277
|
+
countOut,
|
|
278
|
+
started,
|
|
279
|
+
})
|
|
280
|
+
} catch (err) {
|
|
281
|
+
logger.error(err)
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
export interface TransformMapStats {
|
|
287
|
+
/**
|
|
288
|
+
* True if transform was successful (didn't throw Immediate or Aggregated error).
|
|
289
|
+
*/
|
|
290
|
+
ok: boolean
|
|
291
|
+
/**
|
|
292
|
+
* Only used (and returned) for ErrorMode.Aggregated
|
|
293
|
+
*/
|
|
294
|
+
collectedErrors: Error[]
|
|
295
|
+
countErrors: number
|
|
296
|
+
countIn: number
|
|
297
|
+
countOut: number
|
|
298
|
+
started: UnixTimestampMillis
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
export interface TransformMapStatsSummary extends TransformMapStats {
|
|
302
|
+
/**
|
|
303
|
+
* Name of the summary, defaults to `Transform`
|
|
304
|
+
*/
|
|
305
|
+
name?: string
|
|
306
|
+
|
|
307
|
+
/**
|
|
308
|
+
* Allows to pass extra key-value object, which will be rendered as:
|
|
309
|
+
* key: value
|
|
310
|
+
* key2: value2
|
|
311
|
+
*/
|
|
312
|
+
extra?: StringMap<any>
|
|
298
313
|
}
|
|
299
314
|
|
|
300
315
|
/**
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
+
import { Transform } from 'node:stream'
|
|
1
2
|
import { Worker } from 'node:worker_threads'
|
|
2
3
|
import { _range } from '@naturalcycles/js-lib/array/range.js'
|
|
3
4
|
import type { DeferredPromise } from '@naturalcycles/js-lib/promise'
|
|
4
5
|
import { pDefer } from '@naturalcycles/js-lib/promise/pDefer.js'
|
|
5
6
|
import type { AnyObject } from '@naturalcycles/js-lib/types'
|
|
6
|
-
import through2Concurrent from 'through2-concurrent'
|
|
7
7
|
import type { TransformTyped } from '../../stream.model.js'
|
|
8
8
|
import type { WorkerInput, WorkerOutput } from './transformMultiThreaded.model.js'
|
|
9
9
|
|
|
@@ -59,6 +59,11 @@ export function transformMultiThreaded<IN, OUT>(
|
|
|
59
59
|
const messageDonePromises: Record<number, DeferredPromise<OUT>> = {}
|
|
60
60
|
let index = -1 // input chunk index, will start from 0
|
|
61
61
|
|
|
62
|
+
// Concurrency control
|
|
63
|
+
let inFlight = 0
|
|
64
|
+
let blockedCallback: (() => void) | null = null
|
|
65
|
+
let flushBlocked: DeferredPromise | null = null
|
|
66
|
+
|
|
62
67
|
const workers = _range(0, poolSize).map(workerIndex => {
|
|
63
68
|
workerDonePromises.push(pDefer())
|
|
64
69
|
|
|
@@ -70,23 +75,16 @@ export function transformMultiThreaded<IN, OUT>(
|
|
|
70
75
|
},
|
|
71
76
|
})
|
|
72
77
|
|
|
73
|
-
// const {threadId} = worker
|
|
74
|
-
// console.log({threadId})
|
|
75
|
-
|
|
76
78
|
worker.on('error', err => {
|
|
77
79
|
console.error(`Worker ${workerIndex} error`, err)
|
|
78
80
|
workerDonePromises[workerIndex]!.reject(err as Error)
|
|
79
81
|
})
|
|
80
82
|
|
|
81
83
|
worker.on('exit', _exitCode => {
|
|
82
|
-
// console.log(`Worker ${index} exit: ${exitCode}`)
|
|
83
84
|
workerDonePromises[workerIndex]!.resolve(undefined)
|
|
84
85
|
})
|
|
85
86
|
|
|
86
87
|
worker.on('message', (out: WorkerOutput<OUT>) => {
|
|
87
|
-
// console.log(`Message from Worker ${workerIndex}:`, out)
|
|
88
|
-
// console.log(Object.keys(messageDonePromises))
|
|
89
|
-
// tr.push(out.payload)
|
|
90
88
|
if (out.error) {
|
|
91
89
|
messageDonePromises[out.index]!.reject(out.error)
|
|
92
90
|
} else {
|
|
@@ -97,32 +95,22 @@ export function transformMultiThreaded<IN, OUT>(
|
|
|
97
95
|
return worker
|
|
98
96
|
})
|
|
99
97
|
|
|
100
|
-
return
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
try {
|
|
106
|
-
// Push null (complete) to all sub-streams
|
|
107
|
-
for (const worker of workers) {
|
|
108
|
-
worker.postMessage(null)
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
console.log(`transformMultiThreaded.final is waiting for all chains to be done`)
|
|
112
|
-
await Promise.all(workerDonePromises)
|
|
113
|
-
console.log(`transformMultiThreaded.final all chains done`)
|
|
114
|
-
|
|
115
|
-
cb()
|
|
116
|
-
} catch (err) {
|
|
117
|
-
cb(err as Error)
|
|
118
|
-
}
|
|
119
|
-
},
|
|
120
|
-
},
|
|
121
|
-
async function transformMapFn(chunk: IN, _, cb) {
|
|
122
|
-
// Freezing the index, because it may change due to concurrency
|
|
98
|
+
return new Transform({
|
|
99
|
+
objectMode: true,
|
|
100
|
+
readableHighWaterMark: highWaterMark,
|
|
101
|
+
writableHighWaterMark: highWaterMark,
|
|
102
|
+
async transform(this: Transform, chunk: IN, _, cb) {
|
|
123
103
|
const currentIndex = ++index
|
|
104
|
+
inFlight++
|
|
124
105
|
|
|
125
|
-
//
|
|
106
|
+
// Apply backpressure if at capacity, otherwise request more input
|
|
107
|
+
if (inFlight < maxConcurrency) {
|
|
108
|
+
cb()
|
|
109
|
+
} else {
|
|
110
|
+
blockedCallback = cb
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Create the unresolved promise (to await)
|
|
126
114
|
messageDonePromises[currentIndex] = pDefer<OUT>()
|
|
127
115
|
|
|
128
116
|
const worker = workers[currentIndex % poolSize]! // round-robin
|
|
@@ -132,21 +120,50 @@ export function transformMultiThreaded<IN, OUT>(
|
|
|
132
120
|
} as WorkerInput)
|
|
133
121
|
|
|
134
122
|
try {
|
|
135
|
-
// awaiting for result
|
|
136
123
|
const out = await messageDonePromises[currentIndex]
|
|
137
|
-
|
|
138
|
-
// return the result
|
|
139
|
-
cb(null, out)
|
|
124
|
+
this.push(out)
|
|
140
125
|
} catch (err) {
|
|
141
126
|
// Currently we only support ErrorMode.SUPPRESS
|
|
142
127
|
// Error is logged and output continues
|
|
143
128
|
console.error(err)
|
|
129
|
+
} finally {
|
|
130
|
+
delete messageDonePromises[currentIndex]
|
|
131
|
+
inFlight--
|
|
132
|
+
|
|
133
|
+
// Release blocked callback if we now have capacity
|
|
134
|
+
if (blockedCallback && inFlight < maxConcurrency) {
|
|
135
|
+
const pendingCb = blockedCallback
|
|
136
|
+
blockedCallback = null
|
|
137
|
+
pendingCb()
|
|
138
|
+
}
|
|
144
139
|
|
|
145
|
-
|
|
140
|
+
// Trigger flush completion if all done
|
|
141
|
+
if (inFlight === 0 && flushBlocked) {
|
|
142
|
+
flushBlocked.resolve()
|
|
143
|
+
}
|
|
146
144
|
}
|
|
145
|
+
},
|
|
146
|
+
async flush(cb) {
|
|
147
|
+
// Wait for all in-flight operations to complete
|
|
148
|
+
if (inFlight > 0) {
|
|
149
|
+
flushBlocked = pDefer()
|
|
150
|
+
await flushBlocked
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
try {
|
|
154
|
+
// Push null (complete) to all workers
|
|
155
|
+
for (const worker of workers) {
|
|
156
|
+
worker.postMessage(null)
|
|
157
|
+
}
|
|
147
158
|
|
|
148
|
-
|
|
149
|
-
|
|
159
|
+
console.log(`transformMultiThreaded.flush is waiting for all workers to be done`)
|
|
160
|
+
await Promise.all(workerDonePromises)
|
|
161
|
+
console.log(`transformMultiThreaded.flush all workers done`)
|
|
162
|
+
|
|
163
|
+
cb()
|
|
164
|
+
} catch (err) {
|
|
165
|
+
cb(err as Error)
|
|
166
|
+
}
|
|
150
167
|
},
|
|
151
|
-
)
|
|
168
|
+
})
|
|
152
169
|
}
|
|
@@ -10,12 +10,8 @@ if (!workerFile) {
|
|
|
10
10
|
// console.log(`worker#${workerIndex} created`)
|
|
11
11
|
|
|
12
12
|
try {
|
|
13
|
-
// require('esbuild-register') // alternative
|
|
14
|
-
// require('ts-node/register/transpile-only')
|
|
15
|
-
// require('tsx/cjs/api').register() // https://tsx.is/dev-api/register-cjs
|
|
16
13
|
const { register } = await import('tsx/esm/api')
|
|
17
14
|
register() // https://tsx.is/dev-api/register-esm
|
|
18
|
-
// require('tsconfig-paths/register')
|
|
19
15
|
} catch {} // require if exists
|
|
20
16
|
|
|
21
17
|
const { WorkerClass } = await import(workerFile)
|
package/src/zip/zip.util.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { promisify } from 'node:util'
|
|
2
2
|
import type { ZlibOptions, ZstdOptions } from 'node:zlib'
|
|
3
3
|
import zlib from 'node:zlib'
|
|
4
|
+
import type { Integer } from '@naturalcycles/js-lib/types'
|
|
4
5
|
|
|
5
6
|
const deflate = promisify(zlib.deflate.bind(zlib))
|
|
6
7
|
const inflate = promisify(zlib.inflate.bind(zlib))
|
|
@@ -89,9 +90,22 @@ export async function gunzipToString(buf: Buffer, options?: ZlibOptions): Promis
|
|
|
89
90
|
|
|
90
91
|
export async function zstdCompress(
|
|
91
92
|
input: Buffer | string,
|
|
93
|
+
level?: Integer, // defaults to 3
|
|
92
94
|
options: ZstdOptions = {},
|
|
93
95
|
): Promise<Buffer<ArrayBuffer>> {
|
|
94
|
-
return await zstdCompressAsync(input, options)
|
|
96
|
+
return await zstdCompressAsync(input, zstdLevelToOptions(level, options))
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export function zstdLevelToOptions(level: Integer | undefined, opt: ZstdOptions = {}): ZstdOptions {
|
|
100
|
+
if (!level) return opt
|
|
101
|
+
|
|
102
|
+
return {
|
|
103
|
+
...opt,
|
|
104
|
+
params: {
|
|
105
|
+
...opt.params,
|
|
106
|
+
[zlib.constants.ZSTD_c_compressionLevel]: level,
|
|
107
|
+
},
|
|
108
|
+
}
|
|
95
109
|
}
|
|
96
110
|
|
|
97
111
|
export async function zstdDecompressToString(
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
import type { AbortableSignal } from '@naturalcycles/js-lib';
|
|
2
|
-
import { ErrorMode } from '@naturalcycles/js-lib/error';
|
|
3
|
-
import { type AbortableAsyncMapper, type AsyncPredicate, END, type NumberOfSeconds, type PositiveInteger, type Predicate, type Promisable, SKIP } from '@naturalcycles/js-lib/types';
|
|
4
|
-
import type { TransformOptions, TransformTyped } from '../stream.model.js';
|
|
5
|
-
import type { TransformMapStats } from './transformMap.js';
|
|
6
|
-
export interface TransformMap2Options<IN = any, OUT = IN> extends TransformOptions {
|
|
7
|
-
/**
|
|
8
|
-
* Predicate to filter outgoing results (after mapper).
|
|
9
|
-
* Allows to not emit all results.
|
|
10
|
-
*
|
|
11
|
-
* Defaults to "pass everything" (including null, undefined, etc).
|
|
12
|
-
* Simpler way to exclude certain cases is to return SKIP symbol from the mapper.
|
|
13
|
-
*/
|
|
14
|
-
predicate?: Predicate<OUT>;
|
|
15
|
-
asyncPredicate?: AsyncPredicate<OUT>;
|
|
16
|
-
/**
|
|
17
|
-
* Number of concurrently pending promises returned by `mapper`.
|
|
18
|
-
*
|
|
19
|
-
* @default 16
|
|
20
|
-
*/
|
|
21
|
-
concurrency?: PositiveInteger;
|
|
22
|
-
/**
|
|
23
|
-
* Time in seconds to gradually increase concurrency from 1 to `concurrency`.
|
|
24
|
-
* Useful for warming up connections to databases, APIs, etc.
|
|
25
|
-
*
|
|
26
|
-
* Set to 0 to disable warmup (default).
|
|
27
|
-
*/
|
|
28
|
-
warmupSeconds?: NumberOfSeconds;
|
|
29
|
-
/**
|
|
30
|
-
* @default THROW_IMMEDIATELY
|
|
31
|
-
*/
|
|
32
|
-
errorMode?: ErrorMode;
|
|
33
|
-
/**
|
|
34
|
-
* If defined - will be called on every error happening in the stream.
|
|
35
|
-
* Called BEFORE observable will emit error (unless skipErrors is set to true).
|
|
36
|
-
*/
|
|
37
|
-
onError?: (err: Error, input: IN) => any;
|
|
38
|
-
/**
|
|
39
|
-
* A hook that is called when the last item is finished processing.
|
|
40
|
-
* stats object is passed, containing countIn and countOut -
|
|
41
|
-
* number of items that entered the transform and number of items that left it.
|
|
42
|
-
*
|
|
43
|
-
* Callback is called **before** [possible] Aggregated error is thrown,
|
|
44
|
-
* and before [possible] THROW_IMMEDIATELY error.
|
|
45
|
-
*
|
|
46
|
-
* onDone callback will be awaited before Error is thrown.
|
|
47
|
-
*/
|
|
48
|
-
onDone?: (stats: TransformMapStats) => Promisable<any>;
|
|
49
|
-
/**
|
|
50
|
-
* Progress metric
|
|
51
|
-
*
|
|
52
|
-
* @default `stream`
|
|
53
|
-
*/
|
|
54
|
-
metric?: string;
|
|
55
|
-
/**
|
|
56
|
-
* Allows to abort (gracefully stop) the stream from inside the Transform.
|
|
57
|
-
*/
|
|
58
|
-
signal?: AbortableSignal;
|
|
59
|
-
}
|
|
60
|
-
/**
|
|
61
|
-
* Like transformMap, but with native concurrency control (no through2-concurrent dependency)
|
|
62
|
-
* and support for gradual warmup.
|
|
63
|
-
*
|
|
64
|
-
* @experimental
|
|
65
|
-
*/
|
|
66
|
-
export declare function transformMap2<IN = any, OUT = IN>(mapper: AbortableAsyncMapper<IN, OUT | typeof SKIP | typeof END>, opt?: TransformMap2Options<IN, OUT>): TransformTyped<IN, OUT>;
|