@naturalcycles/nodejs-lib 12.56.1 → 12.60.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +19 -18
- package/dist/index.js +19 -92
- package/dist/log/log.util.d.ts +4 -0
- package/dist/log/log.util.js +11 -0
- package/dist/stream/ndjson/ndjsonMap.d.ts +2 -2
- package/dist/stream/ndjson/ndjsonMap.js +4 -3
- package/dist/stream/ndjson/ndjsonStreamForEach.d.ts +2 -2
- package/dist/stream/ndjson/transformJsonParse.js +3 -3
- package/dist/stream/ndjson/transformToNDJson.js +2 -2
- package/dist/stream/pipeline/pipeline.d.ts +25 -3
- package/dist/stream/pipeline/pipeline.js +76 -9
- package/dist/stream/readable/readableCreate.d.ts +8 -0
- package/dist/stream/readable/readableCreate.js +9 -1
- package/dist/stream/readable/readableForEach.d.ts +2 -2
- package/dist/stream/readable/readableFromArray.d.ts +2 -2
- package/dist/stream/readable/readableFromArray.js +17 -13
- package/dist/stream/readable/readableMap.d.ts +2 -2
- package/dist/stream/readable/readableMap.js +22 -17
- package/dist/stream/sizeStack.d.ts +9 -0
- package/dist/stream/sizeStack.js +48 -0
- package/dist/stream/stream.util.d.ts +4 -0
- package/dist/stream/stream.util.js +24 -0
- package/dist/stream/transform/transformBuffer.js +1 -1
- package/dist/stream/transform/transformFilter.d.ts +3 -4
- package/dist/stream/transform/transformFilter.js +5 -20
- package/dist/stream/transform/transformLimit.d.ts +36 -1
- package/dist/stream/transform/transformLimit.js +33 -15
- package/dist/stream/transform/transformLogProgress.d.ts +22 -1
- package/dist/stream/transform/transformLogProgress.js +38 -20
- package/dist/stream/transform/transformMap.d.ts +4 -10
- package/dist/stream/transform/transformMap.js +52 -64
- package/dist/stream/transform/transformMapSimple.d.ts +2 -1
- package/dist/stream/transform/transformMapSimple.js +3 -3
- package/dist/stream/transform/transformMapSync.d.ts +7 -4
- package/dist/stream/transform/transformMapSync.js +30 -24
- package/dist/stream/transform/transformNoOp.js +1 -1
- package/dist/stream/transform/transformTap.d.ts +5 -2
- package/dist/stream/transform/transformTap.js +5 -4
- package/dist/stream/transform/transformToArray.js +1 -1
- package/dist/stream/transform/transformToString.js +2 -2
- package/dist/stream/transform/worker/transformMultiThreaded.js +1 -1
- package/dist/stream/transform/worker/workerClassProxy.js +1 -0
- package/dist/stream/writable/writableFork.d.ts +2 -0
- package/dist/stream/writable/writableFork.js +3 -1
- package/dist/stream/writable/writableLimit.d.ts +9 -0
- package/dist/stream/writable/writableLimit.js +29 -0
- package/dist/stream/writable/writablePushToArray.js +1 -1
- package/dist/stream/writable/writableVoid.d.ts +8 -1
- package/dist/stream/writable/writableVoid.js +6 -2
- package/dist/util/zip.util.d.ts +10 -2
- package/dist/util/zip.util.js +10 -3
- package/package.json +1 -1
- package/src/index.ts +17 -156
- package/src/log/log.util.ts +9 -0
- package/src/stream/ndjson/ndjsonMap.ts +7 -5
- package/src/stream/ndjson/ndjsonStreamForEach.ts +2 -2
- package/src/stream/ndjson/transformJsonParse.ts +3 -3
- package/src/stream/ndjson/transformToNDJson.ts +2 -2
- package/src/stream/pipeline/pipeline.ts +102 -9
- package/src/stream/readable/readableCreate.ts +9 -1
- package/src/stream/readable/readableForEach.ts +2 -2
- package/src/stream/readable/readableFromArray.ts +18 -21
- package/src/stream/readable/readableMap.ts +24 -21
- package/src/stream/sizeStack.ts +56 -0
- package/src/stream/stream.util.ts +29 -0
- package/src/stream/transform/transformBuffer.ts +1 -1
- package/src/stream/transform/transformFilter.ts +6 -20
- package/src/stream/transform/transformLimit.ts +71 -19
- package/src/stream/transform/transformLogProgress.ts +78 -26
- package/src/stream/transform/transformMap.ts +74 -94
- package/src/stream/transform/transformMapSimple.ts +6 -4
- package/src/stream/transform/transformMapSync.ts +45 -28
- package/src/stream/transform/transformNoOp.ts +1 -1
- package/src/stream/transform/transformTap.ts +11 -6
- package/src/stream/transform/transformToArray.ts +1 -1
- package/src/stream/transform/transformToString.ts +2 -2
- package/src/stream/transform/worker/transformMultiThreaded.ts +1 -1
- package/src/stream/transform/worker/workerClassProxy.js +1 -0
- package/src/stream/writable/writableFork.ts +3 -1
- package/src/stream/writable/writableLimit.ts +28 -0
- package/src/stream/writable/writablePushToArray.ts +1 -1
- package/src/stream/writable/writableVoid.ts +14 -2
- package/src/util/zip.util.ts +11 -3
- package/dist/stream/transform/legacy/transformMap.d.ts +0 -17
- package/dist/stream/transform/legacy/transformMap.js +0 -94
- package/src/stream/transform/legacy/transformMap.ts +0 -133
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { Readable } from 'stream'
|
|
2
|
+
import { CommonLogger } from '@naturalcycles/js-lib'
|
|
3
|
+
|
|
4
|
+
export function pipelineClose(
|
|
5
|
+
name: string,
|
|
6
|
+
readableDownstream: Readable,
|
|
7
|
+
sourceReadable: Readable | undefined,
|
|
8
|
+
streamDone: Promise<void> | undefined,
|
|
9
|
+
logger: CommonLogger,
|
|
10
|
+
): void {
|
|
11
|
+
readableDownstream.push(null) // this closes the stream, so downstream Readable will receive `end` and won't write anything
|
|
12
|
+
|
|
13
|
+
if (!sourceReadable) {
|
|
14
|
+
logger.warn(`${name} sourceReadable is not provided, readable stream will not be stopped`)
|
|
15
|
+
} else {
|
|
16
|
+
logger.log(`${name} is calling readable.unpipe() to pause the stream`)
|
|
17
|
+
sourceReadable.unpipe() // it is expected to pause the stream
|
|
18
|
+
|
|
19
|
+
if (!streamDone) {
|
|
20
|
+
logger.log(`${name} streamDone is not provided, will do readable.destroy right away`)
|
|
21
|
+
sourceReadable.destroy()
|
|
22
|
+
} else {
|
|
23
|
+
void streamDone.then(() => {
|
|
24
|
+
logger.log(`${name} streamDone, calling readable.destroy()`)
|
|
25
|
+
sourceReadable.destroy() // this throws ERR_STREAM_PREMATURE_CLOSE
|
|
26
|
+
})
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
@@ -18,7 +18,7 @@ export function transformBuffer<IN = any>(opt: TransformBufferOptions): Transfor
|
|
|
18
18
|
return new Transform({
|
|
19
19
|
objectMode: true,
|
|
20
20
|
...opt,
|
|
21
|
-
transform(chunk,
|
|
21
|
+
transform(chunk, _, cb) {
|
|
22
22
|
buf.push(chunk)
|
|
23
23
|
|
|
24
24
|
if (buf.length >= batchSize) {
|
|
@@ -1,32 +1,18 @@
|
|
|
1
1
|
import { Transform } from 'stream'
|
|
2
2
|
import { AsyncPredicate, Predicate } from '@naturalcycles/js-lib'
|
|
3
3
|
import { TransformOptions, TransformTyped } from '../stream.model'
|
|
4
|
+
import { transformMap, TransformMapOptions } from './transformMap'
|
|
4
5
|
|
|
5
6
|
/**
|
|
6
|
-
*
|
|
7
|
-
* So, it's recommended to use transformMap instead, that is both concurrent and has
|
|
8
|
-
* filtering feature by default.
|
|
7
|
+
* Just a convenience wrapper around `transformMap` that has built-in predicate filtering support.
|
|
9
8
|
*/
|
|
10
9
|
export function transformFilter<IN = any>(
|
|
11
10
|
predicate: AsyncPredicate<IN>,
|
|
12
|
-
opt:
|
|
11
|
+
opt: TransformMapOptions = {},
|
|
13
12
|
): TransformTyped<IN, IN> {
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
return new Transform({
|
|
17
|
-
objectMode: true,
|
|
13
|
+
return transformMap(v => v, {
|
|
14
|
+
predicate,
|
|
18
15
|
...opt,
|
|
19
|
-
async transform(chunk: IN, _encoding, cb) {
|
|
20
|
-
try {
|
|
21
|
-
if (await predicate(chunk, index++)) {
|
|
22
|
-
cb(null, chunk) // pass through
|
|
23
|
-
} else {
|
|
24
|
-
cb() // signal that we've finished processing, but emit no output here
|
|
25
|
-
}
|
|
26
|
-
} catch (err) {
|
|
27
|
-
cb(err as Error)
|
|
28
|
-
}
|
|
29
|
-
},
|
|
30
16
|
})
|
|
31
17
|
}
|
|
32
18
|
|
|
@@ -42,7 +28,7 @@ export function transformFilterSync<IN = any>(
|
|
|
42
28
|
return new Transform({
|
|
43
29
|
objectMode: true,
|
|
44
30
|
...opt,
|
|
45
|
-
|
|
31
|
+
transform(chunk: IN, _, cb) {
|
|
46
32
|
try {
|
|
47
33
|
if (predicate(chunk, index++)) {
|
|
48
34
|
cb(null, chunk) // pass through
|
|
@@ -1,32 +1,84 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { Readable } from 'stream'
|
|
2
|
+
import { CommonLogger } from '@naturalcycles/js-lib'
|
|
3
|
+
import { AbortableTransform, transformNoOp } from '../../index'
|
|
2
4
|
import { TransformOptions, TransformTyped } from '../stream.model'
|
|
5
|
+
import { pipelineClose } from '../stream.util'
|
|
6
|
+
|
|
7
|
+
export interface TransformLimitOptions extends TransformOptions {
|
|
8
|
+
/**
|
|
9
|
+
* Nullish value (e.g 0 or undefined) would mean "no limit"
|
|
10
|
+
*/
|
|
11
|
+
limit?: number
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* If provided (recommended!) - it will call readable.destroy() on limit.
|
|
15
|
+
* Without it - it will only stop the downstream consumers, but won't stop
|
|
16
|
+
* the Readable ("source" of the stream).
|
|
17
|
+
* It is almost always crucial to stop the Source too, so, please provide the Readable here!
|
|
18
|
+
*/
|
|
19
|
+
sourceReadable?: Readable
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Please provide it (a Promise that resolves when the Stream is done, e.g finished consuming things)
|
|
23
|
+
* to be able to wait for Consumers before calling `readable.destroy`.
|
|
24
|
+
* Has no effect if `readable` is not provided.
|
|
25
|
+
*/
|
|
26
|
+
streamDone?: Promise<void>
|
|
27
|
+
|
|
28
|
+
logger?: CommonLogger
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Set to true to enable additional debug messages, e.g it'll log
|
|
32
|
+
* when readable still emits values after the limit is reached.
|
|
33
|
+
*/
|
|
34
|
+
debug?: boolean
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Class only exists to be able to do `instanceof TransformLimit`
|
|
39
|
+
* and to set sourceReadable+streamDone to it in `_pipeline`.
|
|
40
|
+
*/
|
|
41
|
+
export class TransformLimit extends AbortableTransform {}
|
|
3
42
|
|
|
4
43
|
/**
|
|
5
44
|
* 0 or falsy value means "no limit"
|
|
6
45
|
*/
|
|
7
|
-
export function transformLimit<IN>(
|
|
8
|
-
limit
|
|
9
|
-
|
|
10
|
-
)
|
|
11
|
-
|
|
12
|
-
|
|
46
|
+
export function transformLimit<IN>(opt: TransformLimitOptions = {}): TransformTyped<IN, IN> {
|
|
47
|
+
const { logger = console, limit, debug } = opt
|
|
48
|
+
|
|
49
|
+
if (!limit) {
|
|
50
|
+
// No limit - returning pass-through transform
|
|
51
|
+
return transformNoOp()
|
|
52
|
+
}
|
|
13
53
|
|
|
14
|
-
|
|
54
|
+
let i = 0 // so we start first chunk with 1
|
|
55
|
+
let ended = false
|
|
56
|
+
return new TransformLimit({
|
|
15
57
|
objectMode: true,
|
|
16
58
|
...opt,
|
|
17
|
-
transform(this:
|
|
18
|
-
|
|
59
|
+
transform(this: TransformLimit, chunk, _, cb) {
|
|
60
|
+
i++
|
|
19
61
|
|
|
20
|
-
if (
|
|
21
|
-
cb(null, chunk) // pass through the item
|
|
22
|
-
} else {
|
|
23
|
-
cb(null) // pass-through empty
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
if (limit && index === limit) {
|
|
62
|
+
if (i === limit) {
|
|
27
63
|
ended = true
|
|
28
|
-
|
|
29
|
-
|
|
64
|
+
logger.log(`transformLimit of ${limit} reached`)
|
|
65
|
+
this.push(chunk)
|
|
66
|
+
|
|
67
|
+
pipelineClose(
|
|
68
|
+
'transformLimit',
|
|
69
|
+
this,
|
|
70
|
+
opt.sourceReadable || this.sourceReadable,
|
|
71
|
+
opt.streamDone || this.streamDone,
|
|
72
|
+
logger,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
cb() // after pause
|
|
76
|
+
} else if (!ended) {
|
|
77
|
+
cb(null, chunk)
|
|
78
|
+
} else {
|
|
79
|
+
if (debug) logger.log(`transformLimit.transform after limit`, i)
|
|
80
|
+
// If we ever HANG (don't call cb) - Node will do process.exit(0) to us
|
|
81
|
+
cb() // ended, don't emit anything
|
|
30
82
|
}
|
|
31
83
|
},
|
|
32
84
|
})
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import { Transform } from 'stream'
|
|
2
2
|
import { inspect, InspectOptions } from 'util'
|
|
3
|
-
import { SimpleMovingAverage, _mb, _since, AnyObject } from '@naturalcycles/js-lib'
|
|
3
|
+
import { SimpleMovingAverage, _mb, _since, AnyObject, CommonLogger } from '@naturalcycles/js-lib'
|
|
4
4
|
import { dayjs } from '@naturalcycles/time-lib'
|
|
5
5
|
import { boldWhite, dimGrey, white, yellow } from '../../colors'
|
|
6
6
|
import { hasColors } from '../../colors/colors'
|
|
7
|
+
import { SizeStack } from '../sizeStack'
|
|
7
8
|
import { TransformOptions, TransformTyped } from '../stream.model'
|
|
8
9
|
|
|
9
10
|
export interface TransformLogProgressOptions<IN = any> extends TransformOptions {
|
|
@@ -86,6 +87,8 @@ export interface TransformLogProgressOptions<IN = any> extends TransformOptions
|
|
|
86
87
|
*/
|
|
87
88
|
logEvery?: number
|
|
88
89
|
|
|
90
|
+
logger?: CommonLogger
|
|
91
|
+
|
|
89
92
|
/**
|
|
90
93
|
* Function to return extra properties to the "progress object".
|
|
91
94
|
*
|
|
@@ -101,6 +104,41 @@ export interface TransformLogProgressOptions<IN = any> extends TransformOptions
|
|
|
101
104
|
* Defaults to 1.
|
|
102
105
|
*/
|
|
103
106
|
batchSize?: number
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Experimental logging of item (shunk) sizes, when json-stringified.
|
|
110
|
+
*
|
|
111
|
+
* Defaults to false.
|
|
112
|
+
*
|
|
113
|
+
* @experimental
|
|
114
|
+
*/
|
|
115
|
+
logSizes?: boolean
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* How many last item sizes to keep in a buffer, to calculate stats (p50, p90, avg, etc).
|
|
119
|
+
* Defaults to 100_000.
|
|
120
|
+
* Cannot be Infinity.
|
|
121
|
+
*/
|
|
122
|
+
logSizesBuffer?: number
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Works in addition to `logSizes`. Adds "zipped sizes".
|
|
126
|
+
*
|
|
127
|
+
* @experimental
|
|
128
|
+
*/
|
|
129
|
+
logZippedSizes?: boolean
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
interface LogItem extends AnyObject {
|
|
133
|
+
heapUsed?: number
|
|
134
|
+
heapTotal?: number
|
|
135
|
+
rss?: number
|
|
136
|
+
peakRSS?: number
|
|
137
|
+
rssMinusHeap?: number
|
|
138
|
+
external?: number
|
|
139
|
+
arrayBuffers?: number
|
|
140
|
+
rps10?: number
|
|
141
|
+
rpsTotal?: number
|
|
104
142
|
}
|
|
105
143
|
|
|
106
144
|
const inspectOpt: InspectOptions = {
|
|
@@ -122,8 +160,12 @@ export function transformLogProgress<IN = any>(
|
|
|
122
160
|
peakRSS: logPeakRSS = true,
|
|
123
161
|
logRPS = true,
|
|
124
162
|
logEvery = 1000,
|
|
163
|
+
logSizes = false,
|
|
164
|
+
logSizesBuffer = 100_000,
|
|
165
|
+
logZippedSizes = false,
|
|
125
166
|
batchSize = 1,
|
|
126
167
|
extra,
|
|
168
|
+
logger = console,
|
|
127
169
|
} = opt
|
|
128
170
|
const logProgress = opt.logProgress !== false && logEvery !== 0 // true by default
|
|
129
171
|
const logEvery10 = logEvery * 10
|
|
@@ -135,15 +177,23 @@ export function transformLogProgress<IN = any>(
|
|
|
135
177
|
let progress = 0
|
|
136
178
|
let peakRSS = 0
|
|
137
179
|
|
|
180
|
+
const sizes = logSizes ? new SizeStack('json', logSizesBuffer) : undefined
|
|
181
|
+
const sizesZipped = logZippedSizes ? new SizeStack('json.gz', logSizesBuffer) : undefined
|
|
182
|
+
|
|
138
183
|
logStats() // initial
|
|
139
184
|
|
|
140
185
|
return new Transform({
|
|
141
186
|
objectMode: true,
|
|
142
187
|
...opt,
|
|
143
|
-
transform(chunk: IN,
|
|
188
|
+
transform(chunk: IN, _, cb) {
|
|
144
189
|
progress++
|
|
145
190
|
processedLastSecond++
|
|
146
191
|
|
|
192
|
+
if (sizes) {
|
|
193
|
+
// Check it, cause gzipping might be delayed here..
|
|
194
|
+
void SizeStack.countItem(chunk, logger, sizes, sizesZipped)
|
|
195
|
+
}
|
|
196
|
+
|
|
147
197
|
if (logProgress && progress % logEvery === 0) {
|
|
148
198
|
logStats(chunk, false, progress % logEvery10 === 0)
|
|
149
199
|
}
|
|
@@ -172,28 +222,30 @@ export function transformLogProgress<IN = any>(
|
|
|
172
222
|
const rps10 = Math.round(sma.push(lastRPS))
|
|
173
223
|
if (mem.rss > peakRSS) peakRSS = mem.rss
|
|
174
224
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
)
|
|
196
|
-
|
|
225
|
+
const o: LogItem = {
|
|
226
|
+
[final ? `${metric}_final` : metric]: batchedProgress,
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
if (extra) Object.assign(o, extra(chunk, progress))
|
|
230
|
+
if (logHeapUsed) o.heapUsed = _mb(mem.heapUsed)
|
|
231
|
+
if (logHeapTotal) o.heapTotal = _mb(mem.heapTotal)
|
|
232
|
+
if (logRss) o.rss = _mb(mem.rss)
|
|
233
|
+
if (logPeakRSS) o.peakRSS = _mb(peakRSS)
|
|
234
|
+
if (opt.rssMinusHeap) o.rssMinusHeap = _mb(mem.rss - mem.heapTotal)
|
|
235
|
+
if (opt.external) o.external = _mb(mem.external)
|
|
236
|
+
if (opt.arrayBuffers) o.arrayBuffers = _mb(mem.arrayBuffers || 0)
|
|
237
|
+
|
|
238
|
+
if (logRPS) Object.assign(o, { rps10, rpsTotal })
|
|
239
|
+
|
|
240
|
+
logger.log(inspect(o, inspectOpt))
|
|
241
|
+
|
|
242
|
+
if (sizes?.items.length) {
|
|
243
|
+
logger.log(sizes.getStats())
|
|
244
|
+
|
|
245
|
+
if (sizesZipped?.items.length) {
|
|
246
|
+
logger.log(sizesZipped.getStats())
|
|
247
|
+
}
|
|
248
|
+
}
|
|
197
249
|
|
|
198
250
|
if (tenx) {
|
|
199
251
|
let perHour: number | string =
|
|
@@ -202,13 +254,13 @@ export function transformLogProgress<IN = any>(
|
|
|
202
254
|
perHour = Math.round(perHour / 1000) + 'K'
|
|
203
255
|
}
|
|
204
256
|
|
|
205
|
-
|
|
257
|
+
logger.log(
|
|
206
258
|
`${dimGrey(dayjs().toPretty())} ${white(metric)} took ${yellow(
|
|
207
259
|
_since(started),
|
|
208
260
|
)} so far to process ${yellow(batchedProgress)} rows, ~${yellow(perHour)}/hour`,
|
|
209
261
|
)
|
|
210
262
|
} else if (final) {
|
|
211
|
-
|
|
263
|
+
logger.log(
|
|
212
264
|
`${boldWhite(metric)} took ${yellow(_since(started))} to process ${yellow(
|
|
213
265
|
batchedProgress,
|
|
214
266
|
)} rows with total RPS of ${yellow(rpsTotal)}`,
|
|
@@ -1,15 +1,18 @@
|
|
|
1
|
-
import { Transform } from 'stream'
|
|
2
1
|
import {
|
|
2
|
+
AbortableAsyncMapper,
|
|
3
3
|
AggregatedError,
|
|
4
|
-
AsyncMapper,
|
|
5
4
|
AsyncPredicate,
|
|
6
5
|
CommonLogger,
|
|
6
|
+
END,
|
|
7
7
|
ErrorMode,
|
|
8
8
|
pFilter,
|
|
9
|
-
|
|
9
|
+
SKIP,
|
|
10
10
|
} from '@naturalcycles/js-lib'
|
|
11
|
+
import through2Concurrent = require('through2-concurrent')
|
|
11
12
|
import { yellow } from '../../colors'
|
|
13
|
+
import { AbortableTransform } from '../pipeline/pipeline'
|
|
12
14
|
import { TransformTyped } from '../stream.model'
|
|
15
|
+
import { pipelineClose } from '../stream.util'
|
|
13
16
|
|
|
14
17
|
export interface TransformMapOptions<IN = any, OUT = IN> {
|
|
15
18
|
/**
|
|
@@ -23,9 +26,8 @@ export interface TransformMapOptions<IN = any, OUT = IN> {
|
|
|
23
26
|
* Predicate to filter outgoing results (after mapper).
|
|
24
27
|
* Allows to not emit all results.
|
|
25
28
|
*
|
|
26
|
-
*
|
|
27
|
-
*
|
|
28
|
-
* @default to filter out undefined/null values, but pass anything else
|
|
29
|
+
* Defaults to "pass everything" (including null, undefined, etc).
|
|
30
|
+
* Simpler way to exclude certain cases is to return SKIP symbol from the mapper.
|
|
29
31
|
*/
|
|
30
32
|
predicate?: AsyncPredicate<OUT>
|
|
31
33
|
|
|
@@ -54,17 +56,11 @@ export interface TransformMapOptions<IN = any, OUT = IN> {
|
|
|
54
56
|
*/
|
|
55
57
|
metric?: string
|
|
56
58
|
|
|
57
|
-
/**
|
|
58
|
-
* If defined - called BEFORE `final()` callback is called.
|
|
59
|
-
*/
|
|
60
|
-
beforeFinal?: () => any
|
|
61
|
-
|
|
62
59
|
logger?: CommonLogger
|
|
63
60
|
}
|
|
64
61
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
}
|
|
62
|
+
// doesn't work, cause here we don't construct our Transform instance ourselves
|
|
63
|
+
// export class TransformMap extends AbortableTransform {}
|
|
68
64
|
|
|
69
65
|
/**
|
|
70
66
|
* Like pMap, but for streams.
|
|
@@ -79,112 +75,96 @@ export function notNullishPredicate(item: any): boolean {
|
|
|
79
75
|
* If an Array is returned by `mapper` - it will be flattened and multiple results will be emitted from it. Tested by Array.isArray().
|
|
80
76
|
*/
|
|
81
77
|
export function transformMap<IN = any, OUT = IN>(
|
|
82
|
-
mapper:
|
|
78
|
+
mapper: AbortableAsyncMapper<IN, OUT>,
|
|
83
79
|
opt: TransformMapOptions<IN, OUT> = {},
|
|
84
80
|
): TransformTyped<IN, OUT> {
|
|
85
81
|
const {
|
|
86
82
|
concurrency = 16,
|
|
87
|
-
predicate
|
|
83
|
+
predicate, // we now default to "no predicate" (meaning pass-everything)
|
|
88
84
|
errorMode = ErrorMode.THROW_IMMEDIATELY,
|
|
89
85
|
flattenArrayOutput,
|
|
90
86
|
onError,
|
|
91
|
-
beforeFinal,
|
|
92
87
|
metric = 'stream',
|
|
93
88
|
logger = console,
|
|
94
89
|
} = opt
|
|
95
90
|
|
|
96
91
|
let index = -1
|
|
97
|
-
let
|
|
92
|
+
let isSettled = false
|
|
98
93
|
let errors = 0
|
|
99
94
|
const collectedErrors: Error[] = [] // only used if errorMode == THROW_AGGREGATED
|
|
100
95
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
return new Transform({
|
|
108
|
-
objectMode: true,
|
|
96
|
+
return through2Concurrent.obj(
|
|
97
|
+
{
|
|
98
|
+
maxConcurrency: concurrency,
|
|
99
|
+
async final(cb) {
|
|
100
|
+
// console.log('transformMap final')
|
|
109
101
|
|
|
110
|
-
|
|
111
|
-
// console.log('transformMap final', {index}, q.inFlight, q.queueSize)
|
|
102
|
+
logErrorStats(true)
|
|
112
103
|
|
|
113
|
-
|
|
114
|
-
|
|
104
|
+
if (collectedErrors.length) {
|
|
105
|
+
// emit Aggregated error
|
|
106
|
+
cb(new AggregatedError(collectedErrors))
|
|
107
|
+
} else {
|
|
108
|
+
// emit no error
|
|
109
|
+
cb()
|
|
110
|
+
}
|
|
111
|
+
},
|
|
112
|
+
},
|
|
113
|
+
async function transformMapFn(this: AbortableTransform, chunk: IN, _, cb) {
|
|
114
|
+
// Stop processing if isSettled (either THROW_IMMEDIATELY was fired or END received)
|
|
115
|
+
if (isSettled) return cb()
|
|
116
|
+
|
|
117
|
+
const currentIndex = ++index
|
|
118
|
+
|
|
119
|
+
try {
|
|
120
|
+
const res = await mapper(chunk, currentIndex)
|
|
121
|
+
const passedResults = await pFilter(
|
|
122
|
+
flattenArrayOutput && Array.isArray(res) ? res : [res],
|
|
123
|
+
async r => {
|
|
124
|
+
if (r === END) {
|
|
125
|
+
isSettled = true // will be checked later
|
|
126
|
+
return false
|
|
127
|
+
}
|
|
128
|
+
return r !== SKIP && (!predicate || (await predicate(r, currentIndex)))
|
|
129
|
+
},
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
passedResults.forEach(r => this.push(r))
|
|
133
|
+
|
|
134
|
+
if (isSettled) {
|
|
135
|
+
logger.log(`transformMap END received at index ${currentIndex}`)
|
|
136
|
+
pipelineClose('transformMap', this, this.sourceReadable, this.streamDone, logger)
|
|
137
|
+
}
|
|
115
138
|
|
|
116
|
-
|
|
139
|
+
cb() // done processing
|
|
140
|
+
} catch (err) {
|
|
141
|
+
logger.error(err)
|
|
142
|
+
errors++
|
|
143
|
+
logErrorStats()
|
|
117
144
|
|
|
118
|
-
|
|
145
|
+
if (onError) {
|
|
146
|
+
try {
|
|
147
|
+
onError(err, chunk)
|
|
148
|
+
} catch {}
|
|
149
|
+
}
|
|
119
150
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
cb(new AggregatedError(collectedErrors))
|
|
125
|
-
} else {
|
|
126
|
-
// emit no error
|
|
127
|
-
// It is truly a mistery, but calling cb() here was causing ERR_MULTIPLE_CALLBACK ?!
|
|
128
|
-
// Commenting it out seems to work ?!
|
|
129
|
-
// ?!
|
|
130
|
-
// cb()
|
|
131
|
-
}
|
|
132
|
-
},
|
|
151
|
+
if (errorMode === ErrorMode.THROW_IMMEDIATELY) {
|
|
152
|
+
isSettled = true
|
|
153
|
+
return cb(err) // Emit error immediately
|
|
154
|
+
}
|
|
133
155
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
// console.log('transform', {index})
|
|
137
|
-
|
|
138
|
-
// Stop processing if THROW_IMMEDIATELY mode is used
|
|
139
|
-
if (isRejected && errorMode === ErrorMode.THROW_IMMEDIATELY) return cb()
|
|
140
|
-
|
|
141
|
-
// It resolves when it is successfully STARTED execution.
|
|
142
|
-
// If it's queued instead - it'll wait and resolve only upon START.
|
|
143
|
-
await q.push(async () => {
|
|
144
|
-
try {
|
|
145
|
-
const currentIndex = index // because we need to pass it to 2 functions - mapper and predicate. Refers to INPUT index (since it may return multiple outputs)
|
|
146
|
-
const res = await mapper(chunk, currentIndex)
|
|
147
|
-
const passedResults = await pFilter(
|
|
148
|
-
flattenArrayOutput && Array.isArray(res) ? res : [res],
|
|
149
|
-
async r => await predicate(r, currentIndex),
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
passedResults.forEach(r => this.push(r))
|
|
153
|
-
} catch (err) {
|
|
154
|
-
logger.error(err)
|
|
155
|
-
|
|
156
|
-
errors++
|
|
157
|
-
|
|
158
|
-
logErrorStats(logger)
|
|
159
|
-
|
|
160
|
-
if (onError) {
|
|
161
|
-
try {
|
|
162
|
-
onError(err, chunk)
|
|
163
|
-
} catch {}
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
if (errorMode === ErrorMode.THROW_IMMEDIATELY) {
|
|
167
|
-
isRejected = true
|
|
168
|
-
// Emit error immediately
|
|
169
|
-
// return cb(err as Error)
|
|
170
|
-
return this.emit('error', err as Error)
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
if (errorMode === ErrorMode.THROW_AGGREGATED) {
|
|
174
|
-
collectedErrors.push(err as Error)
|
|
175
|
-
}
|
|
156
|
+
if (errorMode === ErrorMode.THROW_AGGREGATED) {
|
|
157
|
+
collectedErrors.push(err as Error)
|
|
176
158
|
}
|
|
177
|
-
})
|
|
178
159
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
160
|
+
// Tell input stream that we're done processing, but emit nothing to output - not error nor result
|
|
161
|
+
cb()
|
|
162
|
+
}
|
|
182
163
|
},
|
|
183
|
-
|
|
164
|
+
)
|
|
184
165
|
|
|
185
|
-
function logErrorStats(
|
|
166
|
+
function logErrorStats(final = false): void {
|
|
186
167
|
if (!errors) return
|
|
187
|
-
|
|
188
168
|
logger.log(`${metric} ${final ? 'final ' : ''}errors: ${yellow(errors)}`)
|
|
189
169
|
}
|
|
190
170
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Transform } from 'stream'
|
|
2
|
-
import { ErrorMode, Mapper } from '@naturalcycles/js-lib'
|
|
2
|
+
import { CommonLogger, ErrorMode, Mapper } from '@naturalcycles/js-lib'
|
|
3
3
|
import { TransformTyped } from '../stream.model'
|
|
4
4
|
|
|
5
5
|
export interface TransformMapSimpleOptions {
|
|
@@ -9,6 +9,8 @@ export interface TransformMapSimpleOptions {
|
|
|
9
9
|
* @default ErrorMode.THROW_IMMEDIATELY
|
|
10
10
|
*/
|
|
11
11
|
errorMode?: ErrorMode.THROW_IMMEDIATELY | ErrorMode.SUPPRESS
|
|
12
|
+
|
|
13
|
+
logger?: CommonLogger
|
|
12
14
|
}
|
|
13
15
|
|
|
14
16
|
/**
|
|
@@ -25,15 +27,15 @@ export function transformMapSimple<IN = any, OUT = IN>(
|
|
|
25
27
|
opt: TransformMapSimpleOptions = {},
|
|
26
28
|
): TransformTyped<IN, OUT> {
|
|
27
29
|
let index = -1
|
|
28
|
-
const { errorMode = ErrorMode.THROW_IMMEDIATELY } = opt
|
|
30
|
+
const { errorMode = ErrorMode.THROW_IMMEDIATELY, logger = console } = opt
|
|
29
31
|
|
|
30
32
|
return new Transform({
|
|
31
33
|
objectMode: true,
|
|
32
|
-
transform(chunk: IN,
|
|
34
|
+
transform(chunk: IN, _, cb) {
|
|
33
35
|
try {
|
|
34
36
|
cb(null, mapper(chunk, ++index))
|
|
35
37
|
} catch (err) {
|
|
36
|
-
|
|
38
|
+
logger.error(err)
|
|
37
39
|
|
|
38
40
|
if (errorMode === ErrorMode.SUPPRESS) {
|
|
39
41
|
cb() // suppress the error
|