@naturalcycles/nodejs-lib 15.21.0 → 15.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/exec2/exec2.js +1 -0
- package/dist/stream/index.d.ts +2 -2
- package/dist/stream/index.js +2 -2
- package/dist/stream/ndjson/ndjsonMap.d.ts +1 -1
- package/dist/stream/ndjson/ndjsonMap.js +13 -15
- package/dist/stream/ndjson/ndjsonStreamForEach.d.ts +2 -2
- package/dist/stream/ndjson/ndjsonStreamForEach.js +9 -15
- package/dist/stream/ndjson/transformJsonParse.js +0 -1
- package/dist/stream/pipeline.d.ts +79 -0
- package/dist/stream/pipeline.js +220 -0
- package/dist/stream/readable/readableCombined.d.ts +30 -0
- package/dist/stream/readable/readableCombined.js +77 -0
- package/dist/stream/stream.util.d.ts +1 -3
- package/dist/stream/stream.util.js +1 -20
- package/dist/stream/transform/transformChunk.d.ts +5 -8
- package/dist/stream/transform/transformChunk.js +4 -2
- package/dist/stream/transform/transformFlatten.d.ts +1 -0
- package/dist/stream/transform/transformFlatten.js +15 -4
- package/dist/stream/transform/transformLimit.d.ts +3 -26
- package/dist/stream/transform/transformLimit.js +14 -23
- package/dist/stream/transform/transformMap.d.ts +15 -2
- package/dist/stream/transform/transformMap.js +25 -19
- package/dist/stream/transform/transformMapSync.d.ts +5 -3
- package/dist/stream/transform/transformMapSync.js +7 -8
- package/dist/stream/transform/transformTee.js +4 -2
- package/dist/stream/writable/writableForEach.d.ts +2 -1
- package/dist/stream/writable/writableFork.js +2 -2
- package/package.json +1 -1
- package/src/exec2/exec2.ts +1 -0
- package/src/stream/index.ts +2 -2
- package/src/stream/ndjson/ndjsonMap.ts +12 -22
- package/src/stream/ndjson/ndjsonStreamForEach.ts +8 -15
- package/src/stream/ndjson/transformJsonParse.ts +0 -1
- package/src/stream/pipeline.ts +301 -0
- package/src/stream/readable/readableCombined.ts +87 -0
- package/src/stream/stream.util.ts +1 -29
- package/src/stream/transform/transformChunk.ts +8 -11
- package/src/stream/transform/transformFlatten.ts +16 -4
- package/src/stream/transform/transformLimit.ts +20 -51
- package/src/stream/transform/transformMap.ts +45 -21
- package/src/stream/transform/transformMapSync.ts +14 -8
- package/src/stream/transform/transformTee.ts +5 -2
- package/src/stream/writable/writableForEach.ts +2 -2
- package/src/stream/writable/writableFork.ts +2 -2
- package/dist/stream/pipeline/pipeline.d.ts +0 -36
- package/dist/stream/pipeline/pipeline.js +0 -82
- package/dist/stream/readable/readableForEach.d.ts +0 -19
- package/dist/stream/readable/readableForEach.js +0 -30
- package/src/stream/pipeline/pipeline.ts +0 -114
- package/src/stream/readable/readableForEach.ts +0 -42
package/dist/exec2/exec2.js
CHANGED
package/dist/stream/index.d.ts
CHANGED
|
@@ -5,10 +5,10 @@ export * from './ndjson/ndjsonMap.js';
|
|
|
5
5
|
export * from './ndjson/ndjsonStreamForEach.js';
|
|
6
6
|
export * from './ndjson/transformJsonParse.js';
|
|
7
7
|
export * from './ndjson/transformToNDJson.js';
|
|
8
|
-
export * from './pipeline
|
|
8
|
+
export * from './pipeline.js';
|
|
9
9
|
export * from './progressLogger.js';
|
|
10
|
+
export * from './readable/readableCombined.js';
|
|
10
11
|
export * from './readable/readableCreate.js';
|
|
11
|
-
export * from './readable/readableForEach.js';
|
|
12
12
|
export * from './readable/readableFromArray.js';
|
|
13
13
|
export * from './readable/readableToArray.js';
|
|
14
14
|
export * from './stream.model.js';
|
package/dist/stream/index.js
CHANGED
|
@@ -5,10 +5,10 @@ export * from './ndjson/ndjsonMap.js';
|
|
|
5
5
|
export * from './ndjson/ndjsonStreamForEach.js';
|
|
6
6
|
export * from './ndjson/transformJsonParse.js';
|
|
7
7
|
export * from './ndjson/transformToNDJson.js';
|
|
8
|
-
export * from './pipeline
|
|
8
|
+
export * from './pipeline.js';
|
|
9
9
|
export * from './progressLogger.js';
|
|
10
|
+
export * from './readable/readableCombined.js';
|
|
10
11
|
export * from './readable/readableCreate.js';
|
|
11
|
-
export * from './readable/readableForEach.js';
|
|
12
12
|
export * from './readable/readableFromArray.js';
|
|
13
13
|
export * from './readable/readableToArray.js';
|
|
14
14
|
export * from './stream.model.js';
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { AbortableAsyncMapper } from '@naturalcycles/js-lib/types';
|
|
2
|
-
import {
|
|
2
|
+
import type { TransformLogProgressOptions, TransformMapOptions } from '../index.js';
|
|
3
3
|
export interface NDJSONMapOptions<IN = any, OUT = IN> extends TransformMapOptions<IN, OUT>, TransformLogProgressOptions<IN> {
|
|
4
4
|
inputFilePath: string;
|
|
5
5
|
outputFilePath: string;
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { ErrorMode } from '@naturalcycles/js-lib/error/errorMode.js';
|
|
2
|
-
import {
|
|
3
|
-
import { _pipeline, transformLimit, transformLogProgress, transformMap } from '../index.js';
|
|
2
|
+
import { Pipeline } from '../pipeline.js';
|
|
4
3
|
/**
|
|
5
4
|
* Unzips input file automatically, if it ends with `.gz`.
|
|
6
5
|
* Zips output file automatically, if it ends with `.gz`.
|
|
@@ -11,17 +10,16 @@ export async function ndjsonMap(mapper, opt) {
|
|
|
11
10
|
inputFilePath,
|
|
12
11
|
outputFilePath,
|
|
13
12
|
});
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
]);
|
|
13
|
+
await Pipeline.fromNDJsonFile(inputFilePath)
|
|
14
|
+
.limitSource(limitInput)
|
|
15
|
+
.logProgress({ metric: 'read', ...opt })
|
|
16
|
+
.map(mapper, {
|
|
17
|
+
errorMode: ErrorMode.SUPPRESS,
|
|
18
|
+
...opt,
|
|
19
|
+
})
|
|
20
|
+
.flattenIfNeeded()
|
|
21
|
+
// .typeCastAs<OUT>()
|
|
22
|
+
.limit(limitOutput)
|
|
23
|
+
.logProgress({ metric: 'saved', logEvery: logEveryOutput })
|
|
24
|
+
.toNDJsonFile(outputFilePath);
|
|
27
25
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { AbortableAsyncMapper } from '@naturalcycles/js-lib/types';
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
2
|
+
import type { TransformLogProgressOptions } from '../transform/transformLogProgress.js';
|
|
3
|
+
import type { TransformMapOptions } from '../transform/transformMap.js';
|
|
4
4
|
export interface NDJSONStreamForEachOptions<IN = any> extends TransformMapOptions<IN, void>, TransformLogProgressOptions<IN> {
|
|
5
5
|
inputFilePath: string;
|
|
6
6
|
}
|
|
@@ -1,21 +1,15 @@
|
|
|
1
1
|
import { ErrorMode } from '@naturalcycles/js-lib/error/errorMode.js';
|
|
2
|
-
import {
|
|
3
|
-
import { transformLogProgress, } from '../transform/transformLogProgress.js';
|
|
4
|
-
import { transformMap } from '../transform/transformMap.js';
|
|
5
|
-
import { writableVoid } from '../writable/writableVoid.js';
|
|
6
|
-
import { createReadStreamAsNDJSON } from './createReadStreamAsNDJSON.js';
|
|
2
|
+
import { Pipeline } from '../pipeline.js';
|
|
7
3
|
/**
|
|
8
4
|
* Convenience function to `forEach` through an ndjson file.
|
|
9
5
|
*/
|
|
10
6
|
export async function ndjsonStreamForEach(mapper, opt) {
|
|
11
|
-
await
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
writableVoid(),
|
|
20
|
-
]);
|
|
7
|
+
await Pipeline.fromNDJsonFile(opt.inputFilePath)
|
|
8
|
+
.map(mapper, {
|
|
9
|
+
errorMode: ErrorMode.THROW_AGGREGATED,
|
|
10
|
+
...opt,
|
|
11
|
+
predicate: () => true, // to log progress properly
|
|
12
|
+
})
|
|
13
|
+
.logProgress(opt)
|
|
14
|
+
.run();
|
|
21
15
|
}
|
|
@@ -19,7 +19,6 @@ export function transformJsonParse(opt = {}) {
|
|
|
19
19
|
writableObjectMode: false,
|
|
20
20
|
readableObjectMode: true,
|
|
21
21
|
// highWatermark increased, because it's proven to be faster: https://github.com/nodejs/node/pull/52037
|
|
22
|
-
// todo: it'll be default in Node 22, then we can remove this
|
|
23
22
|
writableHighWaterMark: 64 * 1024,
|
|
24
23
|
transform(chunk, _, cb) {
|
|
25
24
|
try {
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { type Transform } from 'node:stream';
|
|
2
|
+
import type { AbortableAsyncMapper, AsyncIndexedMapper, AsyncPredicate, END, IndexedMapper, NonNegativeInteger, PositiveInteger, Predicate, SKIP } from '@naturalcycles/js-lib/types';
|
|
3
|
+
import type { ReadableTyped, TransformOptions, TransformTyped, WritableTyped } from './stream.model.js';
|
|
4
|
+
import { type TransformLogProgressOptions } from './transform/transformLogProgress.js';
|
|
5
|
+
import { type TransformMapOptions } from './transform/transformMap.js';
|
|
6
|
+
import { type TransformMapSimpleOptions } from './transform/transformMapSimple.js';
|
|
7
|
+
import { type TransformMapSyncOptions } from './transform/transformMapSync.js';
|
|
8
|
+
import { type TransformOffsetOptions } from './transform/transformOffset.js';
|
|
9
|
+
import { type TransformTapOptions } from './transform/transformTap.js';
|
|
10
|
+
import { type TransformThrottleOptions } from './transform/transformThrottle.js';
|
|
11
|
+
export declare class Pipeline<T> {
|
|
12
|
+
private readonly source;
|
|
13
|
+
private transforms;
|
|
14
|
+
private destination?;
|
|
15
|
+
private readableLimit?;
|
|
16
|
+
private abortableSignal;
|
|
17
|
+
private constructor();
|
|
18
|
+
static from<T>(source: ReadableTyped<T>): Pipeline<T>;
|
|
19
|
+
/**
|
|
20
|
+
* Technically same as `fromIterable` (since Array is Iterable),
|
|
21
|
+
* but named a bit friendlier.
|
|
22
|
+
*/
|
|
23
|
+
static fromArray<T>(input: T[]): Pipeline<T>;
|
|
24
|
+
static fromIterable<T>(input: Iterable<T> | AsyncIterable<T>): Pipeline<T>;
|
|
25
|
+
static fromNDJsonFile<T>(sourceFilePath: string): Pipeline<T>;
|
|
26
|
+
/**
|
|
27
|
+
* Limits the source Readable, but using `.take(limit)` on it.
|
|
28
|
+
* This is THE preferred way of limiting the source.
|
|
29
|
+
*/
|
|
30
|
+
limitSource(limit: NonNegativeInteger | undefined): this;
|
|
31
|
+
/**
|
|
32
|
+
* If possible - STRONGLY PREFER applying `.take(limit)` on the source Readable,
|
|
33
|
+
* as it's a clean graceful way of limiting the Readable. Example:
|
|
34
|
+
*
|
|
35
|
+
* Pipeline.from(myReadable.take(10))
|
|
36
|
+
*
|
|
37
|
+
* or
|
|
38
|
+
*
|
|
39
|
+
* Pipeline
|
|
40
|
+
* .from(myReadable)
|
|
41
|
+
* .limitSource(10)
|
|
42
|
+
*
|
|
43
|
+
* If applying `take` on Readable is not possible - use this method at your own risk.
|
|
44
|
+
* Why warning?
|
|
45
|
+
* The limit works by aborting the stream, and then catching the error - certainly
|
|
46
|
+
* less clean than `.take()` on the source.
|
|
47
|
+
*/
|
|
48
|
+
limit(limit: NonNegativeInteger | undefined): this;
|
|
49
|
+
chunk(chunkSize: PositiveInteger, opt?: TransformOptions): Pipeline<T[]>;
|
|
50
|
+
flatten<TO>(this: Pipeline<readonly TO[]>): Pipeline<TO>;
|
|
51
|
+
flattenIfNeeded(): Pipeline<T extends readonly (infer TO)[] ? TO : T>;
|
|
52
|
+
logProgress(opt?: TransformLogProgressOptions): this;
|
|
53
|
+
map<TO>(mapper: AbortableAsyncMapper<T, TO | typeof SKIP | typeof END>, opt?: TransformMapOptions<T, TO>): Pipeline<TO>;
|
|
54
|
+
mapSync<TO>(mapper: IndexedMapper<T, TO | typeof SKIP | typeof END>, opt?: TransformMapSyncOptions): Pipeline<TO>;
|
|
55
|
+
mapSimple<TO>(mapper: IndexedMapper<T, TO>, opt?: TransformMapSimpleOptions): Pipeline<TO>;
|
|
56
|
+
filter(predicate: AsyncPredicate<T>, opt?: TransformMapOptions): this;
|
|
57
|
+
filterSync(predicate: Predicate<T>, opt?: TransformOptions): this;
|
|
58
|
+
offset(opt: TransformOffsetOptions): this;
|
|
59
|
+
tap(fn: AsyncIndexedMapper<T, any>, opt?: TransformTapOptions): this;
|
|
60
|
+
throttle(opt: TransformThrottleOptions): this;
|
|
61
|
+
transform<TO>(transform: TransformTyped<T, TO>): Pipeline<TO>;
|
|
62
|
+
/**
|
|
63
|
+
* Helper method to add multiple transforms at once.
|
|
64
|
+
* Not type safe! Prefer using singular `transform()` multiple times for type safety.
|
|
65
|
+
*/
|
|
66
|
+
transformMany<TO>(transforms: Transform[]): Pipeline<TO>;
|
|
67
|
+
/**
|
|
68
|
+
* Utility method just to conveniently type-cast the current Pipeline type.
|
|
69
|
+
* No runtime effect.
|
|
70
|
+
*/
|
|
71
|
+
typeCastAs<TO>(): Pipeline<TO>;
|
|
72
|
+
toArray(opt?: TransformOptions): Promise<T[]>;
|
|
73
|
+
toFile(outputFilePath: string): Promise<void>;
|
|
74
|
+
toNDJsonFile(outputFilePath: string): Promise<void>;
|
|
75
|
+
to(destination: WritableTyped<T>): Promise<void>;
|
|
76
|
+
forEach(fn: AsyncIndexedMapper<T, void>, opt?: TransformMapOptions<T, void>): Promise<void>;
|
|
77
|
+
forEachSync(fn: IndexedMapper<T, void>, opt?: TransformMapSyncOptions<T, void>): Promise<void>;
|
|
78
|
+
run(): Promise<void>;
|
|
79
|
+
}
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
import { Readable } from 'node:stream';
|
|
2
|
+
import { pipeline } from 'node:stream/promises';
|
|
3
|
+
import { createGzip } from 'node:zlib';
|
|
4
|
+
import { createAbortableSignal } from '@naturalcycles/js-lib';
|
|
5
|
+
import { fs2 } from '../fs/fs2.js';
|
|
6
|
+
import { createReadStreamAsNDJSON } from './ndjson/createReadStreamAsNDJSON.js';
|
|
7
|
+
import { transformToNDJson } from './ndjson/transformToNDJson.js';
|
|
8
|
+
import { PIPELINE_GRACEFUL_ABORT } from './stream.util.js';
|
|
9
|
+
import { transformChunk } from './transform/transformChunk.js';
|
|
10
|
+
import { transformFilterSync } from './transform/transformFilter.js';
|
|
11
|
+
import { transformFlatten, transformFlattenIfNeeded } from './transform/transformFlatten.js';
|
|
12
|
+
import { transformLimit } from './transform/transformLimit.js';
|
|
13
|
+
import { transformLogProgress, } from './transform/transformLogProgress.js';
|
|
14
|
+
import { transformMap } from './transform/transformMap.js';
|
|
15
|
+
import { transformMapSimple, } from './transform/transformMapSimple.js';
|
|
16
|
+
import { transformMapSync } from './transform/transformMapSync.js';
|
|
17
|
+
import { transformOffset } from './transform/transformOffset.js';
|
|
18
|
+
import { transformTap } from './transform/transformTap.js';
|
|
19
|
+
import { transformThrottle } from './transform/transformThrottle.js';
|
|
20
|
+
import { writablePushToArray } from './writable/writablePushToArray.js';
|
|
21
|
+
import { writableVoid } from './writable/writableVoid.js';
|
|
22
|
+
export class Pipeline {
|
|
23
|
+
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
|
|
24
|
+
source;
|
|
25
|
+
transforms = [];
|
|
26
|
+
destination;
|
|
27
|
+
readableLimit;
|
|
28
|
+
abortableSignal = createAbortableSignal();
|
|
29
|
+
constructor(source) {
|
|
30
|
+
this.source = source;
|
|
31
|
+
}
|
|
32
|
+
static from(source) {
|
|
33
|
+
return new Pipeline(source);
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Technically same as `fromIterable` (since Array is Iterable),
|
|
37
|
+
* but named a bit friendlier.
|
|
38
|
+
*/
|
|
39
|
+
static fromArray(input) {
|
|
40
|
+
return new Pipeline(Readable.from(input));
|
|
41
|
+
}
|
|
42
|
+
static fromIterable(input) {
|
|
43
|
+
return new Pipeline(Readable.from(input));
|
|
44
|
+
}
|
|
45
|
+
static fromNDJsonFile(sourceFilePath) {
|
|
46
|
+
return new Pipeline(createReadStreamAsNDJSON(sourceFilePath));
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Limits the source Readable, but using `.take(limit)` on it.
|
|
50
|
+
* This is THE preferred way of limiting the source.
|
|
51
|
+
*/
|
|
52
|
+
limitSource(limit) {
|
|
53
|
+
this.readableLimit = limit;
|
|
54
|
+
return this;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* If possible - STRONGLY PREFER applying `.take(limit)` on the source Readable,
|
|
58
|
+
* as it's a clean graceful way of limiting the Readable. Example:
|
|
59
|
+
*
|
|
60
|
+
* Pipeline.from(myReadable.take(10))
|
|
61
|
+
*
|
|
62
|
+
* or
|
|
63
|
+
*
|
|
64
|
+
* Pipeline
|
|
65
|
+
* .from(myReadable)
|
|
66
|
+
* .limitSource(10)
|
|
67
|
+
*
|
|
68
|
+
* If applying `take` on Readable is not possible - use this method at your own risk.
|
|
69
|
+
* Why warning?
|
|
70
|
+
* The limit works by aborting the stream, and then catching the error - certainly
|
|
71
|
+
* less clean than `.take()` on the source.
|
|
72
|
+
*/
|
|
73
|
+
limit(limit) {
|
|
74
|
+
this.transforms.push(transformLimit({
|
|
75
|
+
limit,
|
|
76
|
+
signal: this.abortableSignal,
|
|
77
|
+
}));
|
|
78
|
+
return this;
|
|
79
|
+
}
|
|
80
|
+
chunk(chunkSize, opt) {
|
|
81
|
+
this.transforms.push(transformChunk(chunkSize, opt));
|
|
82
|
+
return this;
|
|
83
|
+
}
|
|
84
|
+
flatten() {
|
|
85
|
+
this.transforms.push(transformFlatten());
|
|
86
|
+
return this;
|
|
87
|
+
}
|
|
88
|
+
flattenIfNeeded() {
|
|
89
|
+
this.transforms.push(transformFlattenIfNeeded());
|
|
90
|
+
return this;
|
|
91
|
+
}
|
|
92
|
+
// TransformLogProgressOptions intentionally doesn't have <T> passed, as it's inconvenient in many cases
|
|
93
|
+
logProgress(opt) {
|
|
94
|
+
this.transforms.push(transformLogProgress(opt));
|
|
95
|
+
return this;
|
|
96
|
+
}
|
|
97
|
+
map(mapper, opt) {
|
|
98
|
+
this.transforms.push(transformMap(mapper, {
|
|
99
|
+
...opt,
|
|
100
|
+
signal: this.abortableSignal,
|
|
101
|
+
}));
|
|
102
|
+
return this;
|
|
103
|
+
}
|
|
104
|
+
mapSync(mapper, opt) {
|
|
105
|
+
this.transforms.push(transformMapSync(mapper, {
|
|
106
|
+
...opt,
|
|
107
|
+
signal: this.abortableSignal,
|
|
108
|
+
}));
|
|
109
|
+
return this;
|
|
110
|
+
}
|
|
111
|
+
mapSimple(mapper, opt) {
|
|
112
|
+
this.transforms.push(transformMapSimple(mapper, opt));
|
|
113
|
+
return this;
|
|
114
|
+
}
|
|
115
|
+
filter(predicate, opt) {
|
|
116
|
+
this.transforms.push(transformMap(v => v, {
|
|
117
|
+
predicate,
|
|
118
|
+
...opt,
|
|
119
|
+
signal: this.abortableSignal,
|
|
120
|
+
}));
|
|
121
|
+
return this;
|
|
122
|
+
}
|
|
123
|
+
filterSync(predicate, opt) {
|
|
124
|
+
this.transforms.push(transformFilterSync(predicate, opt));
|
|
125
|
+
return this;
|
|
126
|
+
}
|
|
127
|
+
offset(opt) {
|
|
128
|
+
this.transforms.push(transformOffset(opt));
|
|
129
|
+
return this;
|
|
130
|
+
}
|
|
131
|
+
tap(fn, opt) {
|
|
132
|
+
this.transforms.push(transformTap(fn, opt));
|
|
133
|
+
return this;
|
|
134
|
+
}
|
|
135
|
+
throttle(opt) {
|
|
136
|
+
this.transforms.push(transformThrottle(opt));
|
|
137
|
+
return this;
|
|
138
|
+
}
|
|
139
|
+
// todo: tee/fork
|
|
140
|
+
transform(transform) {
|
|
141
|
+
this.transforms.push(transform);
|
|
142
|
+
return this;
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Helper method to add multiple transforms at once.
|
|
146
|
+
* Not type safe! Prefer using singular `transform()` multiple times for type safety.
|
|
147
|
+
*/
|
|
148
|
+
transformMany(transforms) {
|
|
149
|
+
this.transforms.push(...transforms);
|
|
150
|
+
return this;
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Utility method just to conveniently type-cast the current Pipeline type.
|
|
154
|
+
* No runtime effect.
|
|
155
|
+
*/
|
|
156
|
+
typeCastAs() {
|
|
157
|
+
return this;
|
|
158
|
+
}
|
|
159
|
+
async toArray(opt) {
|
|
160
|
+
const arr = [];
|
|
161
|
+
this.destination = writablePushToArray(arr, opt);
|
|
162
|
+
await this.run();
|
|
163
|
+
return arr;
|
|
164
|
+
}
|
|
165
|
+
async toFile(outputFilePath) {
|
|
166
|
+
fs2.ensureFile(outputFilePath);
|
|
167
|
+
this.destination = fs2.createWriteStream(outputFilePath);
|
|
168
|
+
await this.run();
|
|
169
|
+
}
|
|
170
|
+
async toNDJsonFile(outputFilePath) {
|
|
171
|
+
fs2.ensureFile(outputFilePath);
|
|
172
|
+
this.transforms.push(transformToNDJson());
|
|
173
|
+
if (outputFilePath.endsWith('.gz')) {
|
|
174
|
+
this.transforms.push(createGzip({
|
|
175
|
+
// chunkSize: 64 * 1024, // no observed speedup
|
|
176
|
+
}));
|
|
177
|
+
}
|
|
178
|
+
this.destination = fs2.createWriteStream(outputFilePath, {
|
|
179
|
+
// highWaterMark: 64 * 1024, // no observed speedup
|
|
180
|
+
});
|
|
181
|
+
await this.run();
|
|
182
|
+
}
|
|
183
|
+
async to(destination) {
|
|
184
|
+
this.destination = destination;
|
|
185
|
+
await this.run();
|
|
186
|
+
}
|
|
187
|
+
async forEach(fn, opt) {
|
|
188
|
+
this.transforms.push(transformMap(fn, {
|
|
189
|
+
...opt,
|
|
190
|
+
signal: this.abortableSignal,
|
|
191
|
+
}));
|
|
192
|
+
await this.run();
|
|
193
|
+
}
|
|
194
|
+
async forEachSync(fn, opt) {
|
|
195
|
+
this.transforms.push(transformMapSync(fn, {
|
|
196
|
+
...opt,
|
|
197
|
+
signal: this.abortableSignal,
|
|
198
|
+
}));
|
|
199
|
+
await this.run();
|
|
200
|
+
}
|
|
201
|
+
async run() {
|
|
202
|
+
this.destination ||= writableVoid();
|
|
203
|
+
let { source } = this;
|
|
204
|
+
if (this.readableLimit) {
|
|
205
|
+
source = source.take(this.readableLimit);
|
|
206
|
+
}
|
|
207
|
+
try {
|
|
208
|
+
await pipeline([source, ...this.transforms, this.destination], {
|
|
209
|
+
signal: this.abortableSignal,
|
|
210
|
+
});
|
|
211
|
+
}
|
|
212
|
+
catch (err) {
|
|
213
|
+
if (err instanceof Error && err.cause?.message === PIPELINE_GRACEFUL_ABORT) {
|
|
214
|
+
console.log('pipeline gracefully aborted'); // todo: this message may be removed later
|
|
215
|
+
return;
|
|
216
|
+
}
|
|
217
|
+
throw err;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { Readable } from 'node:stream';
|
|
2
|
+
import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream';
|
|
3
|
+
/**
|
|
4
|
+
* Allows to combine multiple Readables into 1 Readable.
|
|
5
|
+
* As soon as any of the input Readables emit - the output Readable emits
|
|
6
|
+
* (passes through).
|
|
7
|
+
* Order is not preserved in any way, first come first served!
|
|
8
|
+
*
|
|
9
|
+
* Readable completes when all input Readables complete.
|
|
10
|
+
*
|
|
11
|
+
* @experimental
|
|
12
|
+
*/
|
|
13
|
+
export declare class ReadableCombined<T> extends Readable implements ReadableTyped<T> {
|
|
14
|
+
inputs: Readable[];
|
|
15
|
+
static create<T>(inputs: Readable[]): ReadableCombined<T>;
|
|
16
|
+
private constructor();
|
|
17
|
+
/**
|
|
18
|
+
* If defined - we are in Paused mode
|
|
19
|
+
* and should await the lock to be resolved before proceeding.
|
|
20
|
+
*
|
|
21
|
+
* If not defined - we are in Flowing mode, no limits in data flow.
|
|
22
|
+
*/
|
|
23
|
+
private lock?;
|
|
24
|
+
private countIn;
|
|
25
|
+
private countOut;
|
|
26
|
+
private countReads;
|
|
27
|
+
private start;
|
|
28
|
+
_read(): void;
|
|
29
|
+
private logStats;
|
|
30
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import { Readable } from 'node:stream';
|
|
2
|
+
import { pDefer } from '@naturalcycles/js-lib/promise/pDefer.js';
|
|
3
|
+
import { pMap } from '@naturalcycles/js-lib/promise/pMap.js';
|
|
4
|
+
/**
|
|
5
|
+
* Allows to combine multiple Readables into 1 Readable.
|
|
6
|
+
* As soon as any of the input Readables emit - the output Readable emits
|
|
7
|
+
* (passes through).
|
|
8
|
+
* Order is not preserved in any way, first come first served!
|
|
9
|
+
*
|
|
10
|
+
* Readable completes when all input Readables complete.
|
|
11
|
+
*
|
|
12
|
+
* @experimental
|
|
13
|
+
*/
|
|
14
|
+
export class ReadableCombined extends Readable {
|
|
15
|
+
inputs;
|
|
16
|
+
static create(inputs) {
|
|
17
|
+
return new ReadableCombined(inputs);
|
|
18
|
+
}
|
|
19
|
+
constructor(inputs) {
|
|
20
|
+
super({ objectMode: true });
|
|
21
|
+
this.inputs = inputs;
|
|
22
|
+
void this.start();
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* If defined - we are in Paused mode
|
|
26
|
+
* and should await the lock to be resolved before proceeding.
|
|
27
|
+
*
|
|
28
|
+
* If not defined - we are in Flowing mode, no limits in data flow.
|
|
29
|
+
*/
|
|
30
|
+
lock;
|
|
31
|
+
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
|
|
32
|
+
countIn = 0;
|
|
33
|
+
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
|
|
34
|
+
countOut = 0;
|
|
35
|
+
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
|
|
36
|
+
countReads = 0;
|
|
37
|
+
async start() {
|
|
38
|
+
await pMap(this.inputs, async (input, i) => {
|
|
39
|
+
for await (const item of input) {
|
|
40
|
+
this.countIn++;
|
|
41
|
+
this.logStats();
|
|
42
|
+
if (this.lock) {
|
|
43
|
+
await this.lock;
|
|
44
|
+
// lock is undefined at this point
|
|
45
|
+
}
|
|
46
|
+
const shouldContinue = this.push(item);
|
|
47
|
+
this.countOut++;
|
|
48
|
+
if (!shouldContinue && !this.lock) {
|
|
49
|
+
this.lock = pDefer();
|
|
50
|
+
console.log(`ReadableCombined.push #${i} returned false, pausing the flow!`);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
console.log(`ReadableCombined: input #${i} done`);
|
|
54
|
+
});
|
|
55
|
+
console.log(`ReadableCombined: all inputs done!`);
|
|
56
|
+
this.push(null);
|
|
57
|
+
}
|
|
58
|
+
_read() {
|
|
59
|
+
this.countReads++;
|
|
60
|
+
if (this.lock) {
|
|
61
|
+
console.log(`ReadableCombined._read: resuming the flow!`);
|
|
62
|
+
// calling it in this order is important!
|
|
63
|
+
// this.lock should be undefined BEFORE we call lock.resolve()
|
|
64
|
+
const { lock } = this;
|
|
65
|
+
this.lock = undefined;
|
|
66
|
+
lock.resolve();
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
logStats() {
|
|
70
|
+
const { countIn, countOut, countReads } = this;
|
|
71
|
+
console.log({
|
|
72
|
+
countIn,
|
|
73
|
+
countOut,
|
|
74
|
+
countReads,
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
}
|
|
@@ -1,3 +1 @@
|
|
|
1
|
-
|
|
2
|
-
import type { CommonLogger } from '@naturalcycles/js-lib/log';
|
|
3
|
-
export declare function pipelineClose(name: string, readableDownstream: Readable, sourceReadable: Readable | undefined, streamDone: Promise<void> | undefined, logger: CommonLogger): void;
|
|
1
|
+
export declare const PIPELINE_GRACEFUL_ABORT = "PIPELINE_GRACEFUL_ABORT";
|
|
@@ -1,20 +1 @@
|
|
|
1
|
-
export
|
|
2
|
-
readableDownstream.push(null); // this closes the stream, so downstream Readable will receive `end` and won't write anything
|
|
3
|
-
if (!sourceReadable) {
|
|
4
|
-
logger.warn(`${name} sourceReadable is not provided, readable stream will not be stopped`);
|
|
5
|
-
}
|
|
6
|
-
else {
|
|
7
|
-
logger.log(`${name} is calling readable.unpipe() to pause the stream`);
|
|
8
|
-
sourceReadable.unpipe(); // it is expected to pause the stream
|
|
9
|
-
if (!streamDone) {
|
|
10
|
-
logger.log(`${name} streamDone is not provided, will do readable.destroy right away`);
|
|
11
|
-
sourceReadable.destroy();
|
|
12
|
-
}
|
|
13
|
-
else {
|
|
14
|
-
void streamDone.then(() => {
|
|
15
|
-
logger.log(`${name} streamDone, calling readable.destroy()`);
|
|
16
|
-
sourceReadable.destroy(); // this throws ERR_STREAM_PREMATURE_CLOSE
|
|
17
|
-
});
|
|
18
|
-
}
|
|
19
|
-
}
|
|
20
|
-
}
|
|
1
|
+
export const PIPELINE_GRACEFUL_ABORT = 'PIPELINE_GRACEFUL_ABORT';
|
|
@@ -1,14 +1,11 @@
|
|
|
1
|
+
import type { PositiveInteger } from '@naturalcycles/js-lib/types';
|
|
1
2
|
import type { TransformOptions, TransformTyped } from '../stream.model.js';
|
|
2
|
-
export interface TransformChunkOptions extends TransformOptions {
|
|
3
|
-
/**
|
|
4
|
-
* How many items to include in each chunk.
|
|
5
|
-
* Last chunk will contain the remaining items, possibly less than chunkSize.
|
|
6
|
-
*/
|
|
7
|
-
chunkSize: number;
|
|
8
|
-
}
|
|
9
3
|
/**
|
|
10
4
|
* Similar to RxJS bufferCount(),
|
|
11
5
|
* allows to "chunk" the input stream into chunks of `opt.chunkSize` size.
|
|
12
6
|
* Last chunk will contain the remaining items, possibly less than chunkSize.
|
|
7
|
+
*
|
|
8
|
+
* `chunkSize` indicates how many items to include in each chunk.
|
|
9
|
+
* Last chunk will contain the remaining items, possibly less than chunkSize.
|
|
13
10
|
*/
|
|
14
|
-
export declare function transformChunk<IN = any>(
|
|
11
|
+
export declare function transformChunk<IN = any>(chunkSize: PositiveInteger, opt?: TransformOptions): TransformTyped<IN, IN[]>;
|
|
@@ -3,9 +3,11 @@ import { Transform } from 'node:stream';
|
|
|
3
3
|
* Similar to RxJS bufferCount(),
|
|
4
4
|
* allows to "chunk" the input stream into chunks of `opt.chunkSize` size.
|
|
5
5
|
* Last chunk will contain the remaining items, possibly less than chunkSize.
|
|
6
|
+
*
|
|
7
|
+
* `chunkSize` indicates how many items to include in each chunk.
|
|
8
|
+
* Last chunk will contain the remaining items, possibly less than chunkSize.
|
|
6
9
|
*/
|
|
7
|
-
export function transformChunk(opt) {
|
|
8
|
-
const { chunkSize } = opt;
|
|
10
|
+
export function transformChunk(chunkSize, opt) {
|
|
9
11
|
let buf = [];
|
|
10
12
|
return new Transform({
|
|
11
13
|
objectMode: true,
|
|
@@ -3,15 +3,26 @@ export function transformFlatten() {
|
|
|
3
3
|
return new Transform({
|
|
4
4
|
objectMode: true,
|
|
5
5
|
transform(chunk, _, cb) {
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
this.push(chunk);
|
|
6
|
+
for (const item of chunk) {
|
|
7
|
+
this.push(item);
|
|
9
8
|
}
|
|
10
|
-
|
|
9
|
+
cb(); // acknowledge
|
|
10
|
+
},
|
|
11
|
+
});
|
|
12
|
+
}
|
|
13
|
+
export function transformFlattenIfNeeded() {
|
|
14
|
+
return new Transform({
|
|
15
|
+
objectMode: true,
|
|
16
|
+
transform(chunk, _, cb) {
|
|
17
|
+
if (Array.isArray(chunk)) {
|
|
11
18
|
for (const item of chunk) {
|
|
12
19
|
this.push(item);
|
|
13
20
|
}
|
|
14
21
|
}
|
|
22
|
+
else {
|
|
23
|
+
// As a safety precaution, to not crash the pipeline - push as is
|
|
24
|
+
this.push(chunk);
|
|
25
|
+
}
|
|
15
26
|
cb(); // acknowledge
|
|
16
27
|
},
|
|
17
28
|
});
|