@naturalcycles/nodejs-lib 15.22.0 → 15.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/exec2/exec2.js +1 -0
- package/dist/stream/index.d.ts +1 -2
- package/dist/stream/index.js +1 -2
- package/dist/stream/ndjson/ndjsonMap.d.ts +1 -1
- package/dist/stream/ndjson/ndjsonMap.js +13 -15
- package/dist/stream/ndjson/ndjsonStreamForEach.d.ts +2 -2
- package/dist/stream/ndjson/ndjsonStreamForEach.js +9 -15
- package/dist/stream/pipeline.d.ts +93 -0
- package/dist/stream/pipeline.js +262 -0
- package/dist/stream/stream.util.d.ts +1 -3
- package/dist/stream/stream.util.js +1 -20
- package/dist/stream/transform/transformChunk.d.ts +5 -8
- package/dist/stream/transform/transformChunk.js +4 -2
- package/dist/stream/transform/transformFlatten.d.ts +1 -0
- package/dist/stream/transform/transformFlatten.js +15 -4
- package/dist/stream/transform/transformLimit.d.ts +3 -26
- package/dist/stream/transform/transformLimit.js +14 -23
- package/dist/stream/transform/transformMap.d.ts +5 -0
- package/dist/stream/transform/transformMap.js +22 -18
- package/dist/stream/transform/transformMapSync.d.ts +5 -3
- package/dist/stream/transform/transformMapSync.js +7 -8
- package/dist/stream/transform/transformSplit.js +2 -1
- package/dist/stream/transform/transformTee.js +4 -2
- package/dist/stream/writable/writableForEach.d.ts +2 -1
- package/dist/stream/writable/writableFork.js +2 -2
- package/package.json +1 -1
- package/src/exec2/exec2.ts +1 -0
- package/src/stream/index.ts +1 -2
- package/src/stream/ndjson/ndjsonMap.ts +12 -22
- package/src/stream/ndjson/ndjsonStreamForEach.ts +8 -15
- package/src/stream/pipeline.ts +351 -0
- package/src/stream/stream.util.ts +1 -29
- package/src/stream/transform/transformChunk.ts +8 -11
- package/src/stream/transform/transformFlatten.ts +16 -4
- package/src/stream/transform/transformLimit.ts +20 -51
- package/src/stream/transform/transformMap.ts +31 -20
- package/src/stream/transform/transformMapSync.ts +14 -8
- package/src/stream/transform/transformSplit.ts +2 -1
- package/src/stream/transform/transformTee.ts +5 -2
- package/src/stream/writable/writableForEach.ts +2 -2
- package/src/stream/writable/writableFork.ts +2 -2
- package/dist/stream/pipeline/pipeline.d.ts +0 -36
- package/dist/stream/pipeline/pipeline.js +0 -82
- package/dist/stream/readable/readableForEach.d.ts +0 -19
- package/dist/stream/readable/readableForEach.js +0 -30
- package/src/stream/pipeline/pipeline.ts +0 -114
- package/src/stream/readable/readableForEach.ts +0 -42
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
import { Readable, type Transform } from 'node:stream'
|
|
2
|
+
import { pipeline } from 'node:stream/promises'
|
|
3
|
+
import type { ReadableStream as WebReadableStream } from 'node:stream/web'
|
|
4
|
+
import { createUnzip, type ZlibOptions } from 'node:zlib'
|
|
5
|
+
import { createGzip } from 'node:zlib'
|
|
6
|
+
import { createAbortableSignal } from '@naturalcycles/js-lib'
|
|
7
|
+
import type {
|
|
8
|
+
AbortableAsyncMapper,
|
|
9
|
+
AsyncIndexedMapper,
|
|
10
|
+
AsyncPredicate,
|
|
11
|
+
END,
|
|
12
|
+
IndexedMapper,
|
|
13
|
+
Integer,
|
|
14
|
+
NonNegativeInteger,
|
|
15
|
+
PositiveInteger,
|
|
16
|
+
Predicate,
|
|
17
|
+
SKIP,
|
|
18
|
+
} from '@naturalcycles/js-lib/types'
|
|
19
|
+
import { fs2 } from '../fs/fs2.js'
|
|
20
|
+
import { createReadStreamAsNDJSON } from './ndjson/createReadStreamAsNDJSON.js'
|
|
21
|
+
import { transformJsonParse } from './ndjson/transformJsonParse.js'
|
|
22
|
+
import { transformToNDJson } from './ndjson/transformToNDJson.js'
|
|
23
|
+
import type {
|
|
24
|
+
ReadableTyped,
|
|
25
|
+
TransformOptions,
|
|
26
|
+
TransformTyped,
|
|
27
|
+
WritableTyped,
|
|
28
|
+
} from './stream.model.js'
|
|
29
|
+
import { PIPELINE_GRACEFUL_ABORT } from './stream.util.js'
|
|
30
|
+
import { transformChunk } from './transform/transformChunk.js'
|
|
31
|
+
import { transformFilterSync } from './transform/transformFilter.js'
|
|
32
|
+
import { transformFlatten, transformFlattenIfNeeded } from './transform/transformFlatten.js'
|
|
33
|
+
import { transformLimit } from './transform/transformLimit.js'
|
|
34
|
+
import {
|
|
35
|
+
transformLogProgress,
|
|
36
|
+
type TransformLogProgressOptions,
|
|
37
|
+
} from './transform/transformLogProgress.js'
|
|
38
|
+
import { transformMap, type TransformMapOptions } from './transform/transformMap.js'
|
|
39
|
+
import {
|
|
40
|
+
transformMapSimple,
|
|
41
|
+
type TransformMapSimpleOptions,
|
|
42
|
+
} from './transform/transformMapSimple.js'
|
|
43
|
+
import { transformMapSync, type TransformMapSyncOptions } from './transform/transformMapSync.js'
|
|
44
|
+
import { transformOffset, type TransformOffsetOptions } from './transform/transformOffset.js'
|
|
45
|
+
import { transformSplitOnNewline } from './transform/transformSplit.js'
|
|
46
|
+
import { transformTap, type TransformTapOptions } from './transform/transformTap.js'
|
|
47
|
+
import { transformThrottle, type TransformThrottleOptions } from './transform/transformThrottle.js'
|
|
48
|
+
import { writablePushToArray } from './writable/writablePushToArray.js'
|
|
49
|
+
import { writableVoid } from './writable/writableVoid.js'
|
|
50
|
+
|
|
51
|
+
export class Pipeline<T> {
|
|
52
|
+
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
|
|
53
|
+
private readonly source: Readable
|
|
54
|
+
private transforms: NodeJS.ReadWriteStream[] = []
|
|
55
|
+
private destination?: NodeJS.WritableStream
|
|
56
|
+
private readableLimit?: Integer
|
|
57
|
+
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
|
|
58
|
+
private objectMode: boolean
|
|
59
|
+
private abortableSignal = createAbortableSignal()
|
|
60
|
+
|
|
61
|
+
private constructor(source: ReadableTyped<T>, objectMode = true) {
|
|
62
|
+
this.source = source
|
|
63
|
+
this.objectMode = objectMode
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
static from<T>(source: ReadableTyped<T>): Pipeline<T> {
|
|
67
|
+
return new Pipeline(source)
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
static fromWeb<T>(webReadableStream: WebReadableStream<T>): Pipeline<T> {
|
|
71
|
+
return new Pipeline(Readable.fromWeb(webReadableStream))
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Technically same as `fromIterable` (since Array is Iterable),
|
|
76
|
+
* but named a bit friendlier.
|
|
77
|
+
*/
|
|
78
|
+
static fromArray<T>(input: T[]): Pipeline<T> {
|
|
79
|
+
return new Pipeline(Readable.from(input))
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
static fromIterable<T>(input: Iterable<T> | AsyncIterable<T>): Pipeline<T> {
|
|
83
|
+
return new Pipeline(Readable.from(input))
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
static fromFile(sourceFilePath: string): Pipeline<Uint8Array> {
|
|
87
|
+
return new Pipeline(fs2.createReadStream(sourceFilePath), false)
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
static fromNDJsonFile<T>(sourceFilePath: string): Pipeline<T> {
|
|
91
|
+
return new Pipeline(createReadStreamAsNDJSON<T>(sourceFilePath))
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Limits the source Readable, but using `.take(limit)` on it.
|
|
96
|
+
* This is THE preferred way of limiting the source.
|
|
97
|
+
*/
|
|
98
|
+
limitSource(limit: NonNegativeInteger | undefined): this {
|
|
99
|
+
this.readableLimit = limit
|
|
100
|
+
return this
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* If possible - STRONGLY PREFER applying `.take(limit)` on the source Readable,
|
|
105
|
+
* as it's a clean graceful way of limiting the Readable. Example:
|
|
106
|
+
*
|
|
107
|
+
* Pipeline.from(myReadable.take(10))
|
|
108
|
+
*
|
|
109
|
+
* or
|
|
110
|
+
*
|
|
111
|
+
* Pipeline
|
|
112
|
+
* .from(myReadable)
|
|
113
|
+
* .limitSource(10)
|
|
114
|
+
*
|
|
115
|
+
* If applying `take` on Readable is not possible - use this method at your own risk.
|
|
116
|
+
* Why warning?
|
|
117
|
+
* The limit works by aborting the stream, and then catching the error - certainly
|
|
118
|
+
* less clean than `.take()` on the source.
|
|
119
|
+
*/
|
|
120
|
+
limit(limit: NonNegativeInteger | undefined): this {
|
|
121
|
+
this.transforms.push(
|
|
122
|
+
transformLimit({
|
|
123
|
+
limit,
|
|
124
|
+
signal: this.abortableSignal,
|
|
125
|
+
}),
|
|
126
|
+
)
|
|
127
|
+
return this
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
chunk(chunkSize: PositiveInteger, opt?: TransformOptions): Pipeline<T[]> {
|
|
131
|
+
this.transforms.push(transformChunk(chunkSize, opt))
|
|
132
|
+
return this as any
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
flatten<TO>(this: Pipeline<readonly TO[]>): Pipeline<TO> {
|
|
136
|
+
this.transforms.push(transformFlatten())
|
|
137
|
+
return this as any
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
flattenIfNeeded(): Pipeline<T extends readonly (infer TO)[] ? TO : T> {
|
|
141
|
+
this.transforms.push(transformFlattenIfNeeded())
|
|
142
|
+
return this as any
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// TransformLogProgressOptions intentionally doesn't have <T> passed, as it's inconvenient in many cases
|
|
146
|
+
logProgress(opt?: TransformLogProgressOptions): this {
|
|
147
|
+
this.transforms.push(transformLogProgress(opt))
|
|
148
|
+
return this
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
map<TO>(
|
|
152
|
+
mapper: AbortableAsyncMapper<T, TO | typeof SKIP | typeof END>,
|
|
153
|
+
opt?: TransformMapOptions<T, TO>,
|
|
154
|
+
): Pipeline<TO> {
|
|
155
|
+
this.transforms.push(
|
|
156
|
+
transformMap(mapper, {
|
|
157
|
+
...opt,
|
|
158
|
+
signal: this.abortableSignal,
|
|
159
|
+
}),
|
|
160
|
+
)
|
|
161
|
+
return this as any
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
mapSync<TO>(
|
|
165
|
+
mapper: IndexedMapper<T, TO | typeof SKIP | typeof END>,
|
|
166
|
+
opt?: TransformMapSyncOptions,
|
|
167
|
+
): Pipeline<TO> {
|
|
168
|
+
this.transforms.push(
|
|
169
|
+
transformMapSync(mapper, {
|
|
170
|
+
...opt,
|
|
171
|
+
signal: this.abortableSignal,
|
|
172
|
+
}),
|
|
173
|
+
)
|
|
174
|
+
return this as any
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
mapSimple<TO>(mapper: IndexedMapper<T, TO>, opt?: TransformMapSimpleOptions): Pipeline<TO> {
|
|
178
|
+
this.transforms.push(transformMapSimple(mapper, opt))
|
|
179
|
+
return this as any
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
filter(predicate: AsyncPredicate<T>, opt?: TransformMapOptions): this {
|
|
183
|
+
this.transforms.push(
|
|
184
|
+
transformMap(v => v, {
|
|
185
|
+
predicate,
|
|
186
|
+
...opt,
|
|
187
|
+
signal: this.abortableSignal,
|
|
188
|
+
}),
|
|
189
|
+
)
|
|
190
|
+
return this
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
filterSync(predicate: Predicate<T>, opt?: TransformOptions): this {
|
|
194
|
+
this.transforms.push(transformFilterSync(predicate, opt))
|
|
195
|
+
return this
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
offset(opt: TransformOffsetOptions): this {
|
|
199
|
+
this.transforms.push(transformOffset(opt))
|
|
200
|
+
return this
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
tap(fn: AsyncIndexedMapper<T, any>, opt?: TransformTapOptions): this {
|
|
204
|
+
this.transforms.push(transformTap(fn, opt))
|
|
205
|
+
return this
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
throttle(opt: TransformThrottleOptions): this {
|
|
209
|
+
this.transforms.push(transformThrottle(opt))
|
|
210
|
+
return this
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// todo: tee/fork
|
|
214
|
+
|
|
215
|
+
transform<TO>(transform: TransformTyped<T, TO>): Pipeline<TO> {
|
|
216
|
+
this.transforms.push(transform)
|
|
217
|
+
return this as any
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Helper method to add multiple transforms at once.
|
|
222
|
+
* Not type safe! Prefer using singular `transform()` multiple times for type safety.
|
|
223
|
+
*/
|
|
224
|
+
transformMany<TO>(transforms: Transform[]): Pipeline<TO> {
|
|
225
|
+
this.transforms.push(...transforms)
|
|
226
|
+
return this as any
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Utility method just to conveniently type-cast the current Pipeline type.
|
|
231
|
+
* No runtime effect.
|
|
232
|
+
*/
|
|
233
|
+
typeCastAs<TO>(): Pipeline<TO> {
|
|
234
|
+
return this as any
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
setObjectMode(objectMode: boolean): this {
|
|
238
|
+
this.objectMode = objectMode
|
|
239
|
+
return this
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Transform the stream of Objects into a stream of JSON lines.
|
|
244
|
+
* Technically, it goes into objectMode=false, so it's a binary stream at the end.
|
|
245
|
+
*/
|
|
246
|
+
toNDJson(): Pipeline<Uint8Array> {
|
|
247
|
+
this.transforms.push(transformToNDJson())
|
|
248
|
+
this.objectMode = false
|
|
249
|
+
return this as any
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
parseNDJson<TO = unknown>(this: Pipeline<Uint8Array>): Pipeline<TO> {
|
|
253
|
+
// It was said that transformJsonParse() separately is 10% or more slower than .map(line => JSON.parse(line))
|
|
254
|
+
// So, we can investigate a speedup
|
|
255
|
+
this.transforms.push(transformSplitOnNewline(), transformJsonParse())
|
|
256
|
+
this.objectMode = true
|
|
257
|
+
return this as any
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
gzip(this: Pipeline<Uint8Array>, opt?: ZlibOptions): Pipeline<Uint8Array> {
|
|
261
|
+
this.transforms.push(createGzip(opt))
|
|
262
|
+
this.objectMode = false
|
|
263
|
+
return this as any
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
gunzip(this: Pipeline<Uint8Array>, opt?: ZlibOptions): Pipeline<Uint8Array> {
|
|
267
|
+
this.transforms.push(createUnzip(opt))
|
|
268
|
+
this.objectMode = false
|
|
269
|
+
return this as any
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
async toArray(opt?: TransformOptions): Promise<T[]> {
|
|
273
|
+
const arr: T[] = []
|
|
274
|
+
this.destination = writablePushToArray(arr, opt)
|
|
275
|
+
await this.run()
|
|
276
|
+
return arr
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
async toFile(outputFilePath: string): Promise<void> {
|
|
280
|
+
fs2.ensureFile(outputFilePath)
|
|
281
|
+
this.destination = fs2.createWriteStream(outputFilePath)
|
|
282
|
+
await this.run()
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
async toNDJsonFile(outputFilePath: string): Promise<void> {
|
|
286
|
+
fs2.ensureFile(outputFilePath)
|
|
287
|
+
this.transforms.push(transformToNDJson())
|
|
288
|
+
if (outputFilePath.endsWith('.gz')) {
|
|
289
|
+
this.transforms.push(
|
|
290
|
+
createGzip({
|
|
291
|
+
// chunkSize: 64 * 1024, // no observed speedup
|
|
292
|
+
}),
|
|
293
|
+
)
|
|
294
|
+
}
|
|
295
|
+
this.destination = fs2.createWriteStream(outputFilePath, {
|
|
296
|
+
// highWaterMark: 64 * 1024, // no observed speedup
|
|
297
|
+
})
|
|
298
|
+
await this.run()
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
async to(destination: WritableTyped<T>): Promise<void> {
|
|
302
|
+
this.destination = destination
|
|
303
|
+
await this.run()
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
async forEach(
|
|
307
|
+
fn: AsyncIndexedMapper<T, void>,
|
|
308
|
+
opt?: TransformMapOptions<T, void>,
|
|
309
|
+
): Promise<void> {
|
|
310
|
+
this.transforms.push(
|
|
311
|
+
transformMap(fn, {
|
|
312
|
+
...opt,
|
|
313
|
+
signal: this.abortableSignal,
|
|
314
|
+
}),
|
|
315
|
+
)
|
|
316
|
+
await this.run()
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
async forEachSync(
|
|
320
|
+
fn: IndexedMapper<T, void>,
|
|
321
|
+
opt?: TransformMapSyncOptions<T, void>,
|
|
322
|
+
): Promise<void> {
|
|
323
|
+
this.transforms.push(
|
|
324
|
+
transformMapSync(fn, {
|
|
325
|
+
...opt,
|
|
326
|
+
signal: this.abortableSignal,
|
|
327
|
+
}),
|
|
328
|
+
)
|
|
329
|
+
await this.run()
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
async run(): Promise<void> {
|
|
333
|
+
this.destination ||= writableVoid()
|
|
334
|
+
let { source } = this
|
|
335
|
+
if (this.readableLimit) {
|
|
336
|
+
source = source.take(this.readableLimit)
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
try {
|
|
340
|
+
await pipeline([source, ...this.transforms, this.destination], {
|
|
341
|
+
signal: this.abortableSignal,
|
|
342
|
+
})
|
|
343
|
+
} catch (err) {
|
|
344
|
+
if (err instanceof Error && (err.cause as any)?.message === PIPELINE_GRACEFUL_ABORT) {
|
|
345
|
+
console.log('pipeline gracefully aborted') // todo: this message may be removed later
|
|
346
|
+
return
|
|
347
|
+
}
|
|
348
|
+
throw err
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
}
|
|
@@ -1,29 +1 @@
|
|
|
1
|
-
|
|
2
|
-
import type { CommonLogger } from '@naturalcycles/js-lib/log'
|
|
3
|
-
|
|
4
|
-
export function pipelineClose(
|
|
5
|
-
name: string,
|
|
6
|
-
readableDownstream: Readable,
|
|
7
|
-
sourceReadable: Readable | undefined,
|
|
8
|
-
streamDone: Promise<void> | undefined,
|
|
9
|
-
logger: CommonLogger,
|
|
10
|
-
): void {
|
|
11
|
-
readableDownstream.push(null) // this closes the stream, so downstream Readable will receive `end` and won't write anything
|
|
12
|
-
|
|
13
|
-
if (!sourceReadable) {
|
|
14
|
-
logger.warn(`${name} sourceReadable is not provided, readable stream will not be stopped`)
|
|
15
|
-
} else {
|
|
16
|
-
logger.log(`${name} is calling readable.unpipe() to pause the stream`)
|
|
17
|
-
sourceReadable.unpipe() // it is expected to pause the stream
|
|
18
|
-
|
|
19
|
-
if (!streamDone) {
|
|
20
|
-
logger.log(`${name} streamDone is not provided, will do readable.destroy right away`)
|
|
21
|
-
sourceReadable.destroy()
|
|
22
|
-
} else {
|
|
23
|
-
void streamDone.then(() => {
|
|
24
|
-
logger.log(`${name} streamDone, calling readable.destroy()`)
|
|
25
|
-
sourceReadable.destroy() // this throws ERR_STREAM_PREMATURE_CLOSE
|
|
26
|
-
})
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
}
|
|
1
|
+
export const PIPELINE_GRACEFUL_ABORT = 'PIPELINE_GRACEFUL_ABORT'
|
|
@@ -1,22 +1,19 @@
|
|
|
1
1
|
import { Transform } from 'node:stream'
|
|
2
|
+
import type { PositiveInteger } from '@naturalcycles/js-lib/types'
|
|
2
3
|
import type { TransformOptions, TransformTyped } from '../stream.model.js'
|
|
3
4
|
|
|
4
|
-
export interface TransformChunkOptions extends TransformOptions {
|
|
5
|
-
/**
|
|
6
|
-
* How many items to include in each chunk.
|
|
7
|
-
* Last chunk will contain the remaining items, possibly less than chunkSize.
|
|
8
|
-
*/
|
|
9
|
-
chunkSize: number
|
|
10
|
-
}
|
|
11
|
-
|
|
12
5
|
/**
|
|
13
6
|
* Similar to RxJS bufferCount(),
|
|
14
7
|
* allows to "chunk" the input stream into chunks of `opt.chunkSize` size.
|
|
15
8
|
* Last chunk will contain the remaining items, possibly less than chunkSize.
|
|
9
|
+
*
|
|
10
|
+
* `chunkSize` indicates how many items to include in each chunk.
|
|
11
|
+
* Last chunk will contain the remaining items, possibly less than chunkSize.
|
|
16
12
|
*/
|
|
17
|
-
export function transformChunk<IN = any>(
|
|
18
|
-
|
|
19
|
-
|
|
13
|
+
export function transformChunk<IN = any>(
|
|
14
|
+
chunkSize: PositiveInteger,
|
|
15
|
+
opt?: TransformOptions,
|
|
16
|
+
): TransformTyped<IN, IN[]> {
|
|
20
17
|
let buf: IN[] = []
|
|
21
18
|
|
|
22
19
|
return new Transform({
|
|
@@ -5,13 +5,25 @@ export function transformFlatten<T>(): TransformTyped<T[], T> {
|
|
|
5
5
|
return new Transform({
|
|
6
6
|
objectMode: true,
|
|
7
7
|
transform(chunk: T[], _, cb) {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
8
|
+
for (const item of chunk) {
|
|
9
|
+
this.push(item)
|
|
10
|
+
}
|
|
11
|
+
cb() // acknowledge
|
|
12
|
+
},
|
|
13
|
+
})
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function transformFlattenIfNeeded<T>(): TransformTyped<T[], T> {
|
|
17
|
+
return new Transform({
|
|
18
|
+
objectMode: true,
|
|
19
|
+
transform(chunk: T[], _, cb) {
|
|
20
|
+
if (Array.isArray(chunk)) {
|
|
12
21
|
for (const item of chunk) {
|
|
13
22
|
this.push(item)
|
|
14
23
|
}
|
|
24
|
+
} else {
|
|
25
|
+
// As a safety precaution, to not crash the pipeline - push as is
|
|
26
|
+
this.push(chunk)
|
|
15
27
|
}
|
|
16
28
|
cb() // acknowledge
|
|
17
29
|
},
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
import
|
|
2
|
-
import type {
|
|
3
|
-
import { AbortableTransform } from '../pipeline/pipeline.js'
|
|
1
|
+
import { Transform } from 'node:stream'
|
|
2
|
+
import type { AbortableSignal } from '@naturalcycles/js-lib'
|
|
4
3
|
import type { TransformOptions, TransformTyped } from '../stream.model.js'
|
|
5
|
-
import {
|
|
4
|
+
import { PIPELINE_GRACEFUL_ABORT } from '../stream.util.js'
|
|
6
5
|
import { transformNoOp } from './transformNoOp.js'
|
|
7
6
|
|
|
8
7
|
export interface TransformLimitOptions extends TransformOptions {
|
|
@@ -12,72 +11,42 @@ export interface TransformLimitOptions extends TransformOptions {
|
|
|
12
11
|
limit?: number
|
|
13
12
|
|
|
14
13
|
/**
|
|
15
|
-
*
|
|
16
|
-
* Without it - it will only stop the downstream consumers, but won't stop
|
|
17
|
-
* the Readable ("source" of the stream).
|
|
18
|
-
* It is almost always crucial to stop the Source too, so, please provide the Readable here!
|
|
14
|
+
* Allows to abort (gracefully stop) the stream from inside the Transform.
|
|
19
15
|
*/
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
/**
|
|
23
|
-
* Please provide it (a Promise that resolves when the Stream is done, e.g finished consuming things)
|
|
24
|
-
* to be able to wait for Consumers before calling `readable.destroy`.
|
|
25
|
-
* Has no effect if `readable` is not provided.
|
|
26
|
-
*/
|
|
27
|
-
streamDone?: Promise<void>
|
|
28
|
-
|
|
29
|
-
logger?: CommonLogger
|
|
30
|
-
|
|
31
|
-
/**
|
|
32
|
-
* Set to true to enable additional debug messages, e.g it'll log
|
|
33
|
-
* when readable still emits values after the limit is reached.
|
|
34
|
-
*/
|
|
35
|
-
debug?: boolean
|
|
16
|
+
signal: AbortableSignal
|
|
36
17
|
}
|
|
37
18
|
|
|
38
|
-
/**
|
|
39
|
-
* Class only exists to be able to do `instanceof TransformLimit`
|
|
40
|
-
* and to set sourceReadable+streamDone to it in `_pipeline`.
|
|
41
|
-
*/
|
|
42
|
-
export class TransformLimit extends AbortableTransform {}
|
|
43
|
-
|
|
44
19
|
export function transformLimit<IN>(opt: TransformLimitOptions): TransformTyped<IN, IN> {
|
|
45
|
-
const {
|
|
20
|
+
const { limit, signal } = opt
|
|
46
21
|
|
|
47
22
|
if (!limit) {
|
|
48
|
-
// No limit - returning pass-through transform
|
|
49
23
|
return transformNoOp()
|
|
50
24
|
}
|
|
51
25
|
|
|
52
26
|
let i = 0 // so we start first chunk with 1
|
|
53
27
|
let ended = false
|
|
54
|
-
return new
|
|
28
|
+
return new Transform({
|
|
55
29
|
objectMode: true,
|
|
56
30
|
...opt,
|
|
57
|
-
transform(
|
|
31
|
+
transform(chunk, _, cb) {
|
|
32
|
+
if (ended) {
|
|
33
|
+
return
|
|
34
|
+
}
|
|
35
|
+
|
|
58
36
|
i++
|
|
59
37
|
|
|
60
38
|
if (i === limit) {
|
|
61
39
|
ended = true
|
|
62
|
-
logger.log(`transformLimit of ${limit} reached`)
|
|
63
40
|
this.push(chunk)
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
logger,
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
cb() // after pause
|
|
74
|
-
} else if (!ended) {
|
|
75
|
-
cb(null, chunk)
|
|
76
|
-
} else {
|
|
77
|
-
if (debug) logger.log(`transformLimit.transform after limit`, i)
|
|
78
|
-
// If we ever HANG (don't call cb) - Node will do process.exit(0) to us
|
|
79
|
-
cb() // ended, don't emit anything
|
|
41
|
+
this.push(null) // tell downstream that we're done
|
|
42
|
+
cb()
|
|
43
|
+
queueMicrotask(() => {
|
|
44
|
+
signal.abort(new Error(PIPELINE_GRACEFUL_ABORT))
|
|
45
|
+
})
|
|
46
|
+
return
|
|
80
47
|
}
|
|
48
|
+
|
|
49
|
+
cb(null, chunk)
|
|
81
50
|
},
|
|
82
51
|
})
|
|
83
52
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { _hc } from '@naturalcycles/js-lib'
|
|
1
|
+
import { _hc, type AbortableSignal } from '@naturalcycles/js-lib'
|
|
2
2
|
import { _since } from '@naturalcycles/js-lib/datetime/time.util.js'
|
|
3
|
-
import { _anyToError, ErrorMode } from '@naturalcycles/js-lib/error'
|
|
3
|
+
import { _anyToError, _assert, ErrorMode } from '@naturalcycles/js-lib/error'
|
|
4
4
|
import type { CommonLogger } from '@naturalcycles/js-lib/log'
|
|
5
5
|
import { _stringify } from '@naturalcycles/js-lib/string/stringify.js'
|
|
6
6
|
import {
|
|
@@ -15,9 +15,8 @@ import {
|
|
|
15
15
|
} from '@naturalcycles/js-lib/types'
|
|
16
16
|
import through2Concurrent from 'through2-concurrent'
|
|
17
17
|
import { yellow } from '../../colors/colors.js'
|
|
18
|
-
import type { AbortableTransform } from '../pipeline/pipeline.js'
|
|
19
18
|
import type { TransformTyped } from '../stream.model.js'
|
|
20
|
-
import {
|
|
19
|
+
import { PIPELINE_GRACEFUL_ABORT } from '../stream.util.js'
|
|
21
20
|
|
|
22
21
|
export interface TransformMapOptions<IN = any, OUT = IN> {
|
|
23
22
|
/**
|
|
@@ -81,6 +80,11 @@ export interface TransformMapOptions<IN = any, OUT = IN> {
|
|
|
81
80
|
metric?: string
|
|
82
81
|
|
|
83
82
|
logger?: CommonLogger
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Allows to abort (gracefully stop) the stream from inside the Transform.
|
|
86
|
+
*/
|
|
87
|
+
signal?: AbortableSignal
|
|
84
88
|
}
|
|
85
89
|
|
|
86
90
|
export interface TransformMapStats {
|
|
@@ -140,12 +144,14 @@ export function transformMap<IN = any, OUT = IN>(
|
|
|
140
144
|
onDone,
|
|
141
145
|
metric = 'stream',
|
|
142
146
|
logger = console,
|
|
147
|
+
signal,
|
|
143
148
|
} = opt
|
|
144
149
|
|
|
145
150
|
const started = Date.now() as UnixTimestampMillis
|
|
146
151
|
let index = -1
|
|
147
152
|
let countOut = 0
|
|
148
153
|
let isSettled = false
|
|
154
|
+
let ok = true
|
|
149
155
|
let errors = 0
|
|
150
156
|
const collectedErrors: Error[] = [] // only used if errorMode == THROW_AGGREGATED
|
|
151
157
|
|
|
@@ -185,7 +191,7 @@ export function transformMap<IN = any, OUT = IN>(
|
|
|
185
191
|
|
|
186
192
|
try {
|
|
187
193
|
await onDone?.({
|
|
188
|
-
ok
|
|
194
|
+
ok,
|
|
189
195
|
collectedErrors,
|
|
190
196
|
countErrors: errors,
|
|
191
197
|
countIn: index + 1,
|
|
@@ -200,7 +206,7 @@ export function transformMap<IN = any, OUT = IN>(
|
|
|
200
206
|
}
|
|
201
207
|
},
|
|
202
208
|
},
|
|
203
|
-
async function transformMapFn(
|
|
209
|
+
async function transformMapFn(chunk: IN, _, cb) {
|
|
204
210
|
// Stop processing if isSettled (either THROW_IMMEDIATELY was fired or END received)
|
|
205
211
|
if (isSettled) return cb()
|
|
206
212
|
|
|
@@ -214,7 +220,8 @@ export function transformMap<IN = any, OUT = IN>(
|
|
|
214
220
|
if (res === END) {
|
|
215
221
|
isSettled = true
|
|
216
222
|
logger.log(`transformMap END received at index ${currentIndex}`)
|
|
217
|
-
|
|
223
|
+
_assert(signal, 'signal is required when using END')
|
|
224
|
+
signal.abort(new Error(PIPELINE_GRACEFUL_ABORT))
|
|
218
225
|
return cb()
|
|
219
226
|
}
|
|
220
227
|
|
|
@@ -243,19 +250,23 @@ export function transformMap<IN = any, OUT = IN>(
|
|
|
243
250
|
|
|
244
251
|
if (errorMode === ErrorMode.THROW_IMMEDIATELY) {
|
|
245
252
|
isSettled = true
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
253
|
+
ok = false
|
|
254
|
+
|
|
255
|
+
// Tests show that onDone is still called at `final` (second time),
|
|
256
|
+
// so, we no longer call it here
|
|
257
|
+
|
|
258
|
+
// try {
|
|
259
|
+
// await onDone?.({
|
|
260
|
+
// ok: false,
|
|
261
|
+
// collectedErrors,
|
|
262
|
+
// countErrors: errors,
|
|
263
|
+
// countIn: index + 1,
|
|
264
|
+
// countOut,
|
|
265
|
+
// started,
|
|
266
|
+
// })
|
|
267
|
+
// } catch (err) {
|
|
268
|
+
// logger.error(err)
|
|
269
|
+
// }
|
|
259
270
|
|
|
260
271
|
return cb(err) // Emit error immediately
|
|
261
272
|
}
|