@naturalcycles/nodejs-lib 15.25.0 → 15.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/slack/slack.service.d.ts +1 -0
- package/dist/slack/slack.service.js +4 -3
- package/dist/stream/index.d.ts +2 -8
- package/dist/stream/index.js +2 -8
- package/dist/stream/ndjson/createReadStreamAsNDJson.d.ts +16 -0
- package/dist/stream/ndjson/{createReadStreamAsNDJSON.js → createReadStreamAsNDJson.js} +10 -13
- package/dist/stream/ndjson/ndjsonMap.d.ts +2 -0
- package/dist/stream/ndjson/ndjsonMap.js +2 -0
- package/dist/stream/pipeline.d.ts +2 -1
- package/dist/stream/pipeline.js +25 -8
- package/dist/stream/progressLogger.d.ts +3 -3
- package/dist/stream/readable/readableCombined.d.ts +4 -2
- package/dist/stream/readable/readableCombined.js +16 -11
- package/dist/stream/readable/readableCreate.d.ts +1 -3
- package/dist/stream/readable/readableCreate.js +4 -4
- package/dist/stream/stream.model.d.ts +16 -0
- package/dist/stream/transform/transformFork.d.ts +10 -0
- package/dist/stream/transform/transformFork.js +62 -0
- package/dist/stream/transform/transformLimit.d.ts +2 -1
- package/dist/stream/transform/transformLimit.js +3 -3
- package/dist/stream/transform/transformLogProgress.js +3 -2
- package/dist/stream/transform/transformMap.d.ts +2 -4
- package/dist/stream/transform/transformMap.js +3 -2
- package/dist/stream/transform/transformMapSimple.d.ts +2 -4
- package/dist/stream/transform/transformMapSimple.js +3 -2
- package/dist/stream/transform/transformMapSync.d.ts +2 -4
- package/dist/stream/transform/transformMapSync.js +3 -1
- package/dist/stream/transform/transformSplit.js +2 -2
- package/dist/stream/transform/transformThrottle.d.ts +2 -3
- package/dist/stream/transform/transformThrottle.js +22 -27
- package/dist/stream/writable/writableVoid.d.ts +1 -8
- package/dist/stream/writable/writableVoid.js +0 -1
- package/package.json +1 -1
- package/src/slack/slack.service.ts +6 -3
- package/src/stream/index.ts +2 -8
- package/src/stream/ndjson/{createReadStreamAsNDJSON.ts → createReadStreamAsNDJson.ts} +10 -13
- package/src/stream/ndjson/ndjsonMap.ts +2 -0
- package/src/stream/pipeline.ts +33 -9
- package/src/stream/progressLogger.ts +3 -3
- package/src/stream/readable/readableCombined.ts +22 -11
- package/src/stream/readable/readableCreate.ts +4 -3
- package/src/stream/stream.model.ts +18 -0
- package/src/stream/transform/transformFork.ts +74 -0
- package/src/stream/transform/transformLimit.ts +5 -4
- package/src/stream/transform/transformLogProgress.ts +3 -2
- package/src/stream/transform/transformMap.ts +4 -8
- package/src/stream/transform/transformMapSimple.ts +10 -7
- package/src/stream/transform/transformMapSync.ts +4 -6
- package/src/stream/transform/transformSplit.ts +2 -2
- package/src/stream/transform/transformThrottle.ts +28 -36
- package/src/stream/writable/writableVoid.ts +1 -10
- package/dist/stream/ndjson/createReadStreamAsNDJSON.d.ts +0 -19
- package/dist/stream/ndjson/createWriteStreamAsNDJSON.d.ts +0 -11
- package/dist/stream/ndjson/createWriteStreamAsNDJSON.js +0 -27
- package/dist/stream/ndjson/ndjsonStreamForEach.d.ts +0 -10
- package/dist/stream/ndjson/ndjsonStreamForEach.js +0 -15
- package/dist/stream/readable/readableToArray.d.ts +0 -9
- package/dist/stream/readable/readableToArray.js +0 -17
- package/dist/stream/transform/transformTee.d.ts +0 -13
- package/dist/stream/transform/transformTee.js +0 -37
- package/dist/stream/transform/transformToArray.d.ts +0 -5
- package/dist/stream/transform/transformToArray.js +0 -20
- package/dist/stream/writable/writableForEach.d.ts +0 -12
- package/dist/stream/writable/writableForEach.js +0 -15
- package/dist/stream/writable/writableFork.d.ts +0 -10
- package/dist/stream/writable/writableFork.js +0 -45
- package/dist/stream/writable/writableLimit.d.ts +0 -8
- package/dist/stream/writable/writableLimit.js +0 -25
- package/src/stream/ndjson/createWriteStreamAsNDJSON.ts +0 -30
- package/src/stream/ndjson/ndjsonStreamForEach.ts +0 -28
- package/src/stream/readable/readableToArray.ts +0 -19
- package/src/stream/transform/transformTee.ts +0 -48
- package/src/stream/transform/transformToArray.ts +0 -23
- package/src/stream/writable/writableForEach.ts +0 -25
- package/src/stream/writable/writableFork.ts +0 -56
- package/src/stream/writable/writableLimit.ts +0 -29
|
@@ -11,9 +11,10 @@ import { ErrorMode } from '@naturalcycles/js-lib/error/errorMode.js';
|
|
|
11
11
|
*/
|
|
12
12
|
export function transformMapSimple(mapper, opt = {}) {
|
|
13
13
|
let index = -1;
|
|
14
|
-
const { errorMode = ErrorMode.THROW_IMMEDIATELY, logger = console } = opt;
|
|
14
|
+
const { errorMode = ErrorMode.THROW_IMMEDIATELY, logger = console, objectMode = true, highWaterMark, } = opt;
|
|
15
15
|
return new Transform({
|
|
16
|
-
objectMode
|
|
16
|
+
objectMode,
|
|
17
|
+
highWaterMark,
|
|
17
18
|
transform(chunk, _, cb) {
|
|
18
19
|
try {
|
|
19
20
|
cb(null, mapper(chunk, ++index));
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
import type { AbortableSignal } from '@naturalcycles/js-lib';
|
|
2
2
|
import { ErrorMode } from '@naturalcycles/js-lib/error';
|
|
3
|
-
import type { CommonLogger } from '@naturalcycles/js-lib/log';
|
|
4
3
|
import type { IndexedMapper, Predicate } from '@naturalcycles/js-lib/types';
|
|
5
4
|
import { END, SKIP } from '@naturalcycles/js-lib/types';
|
|
6
|
-
import type { TransformTyped } from '../stream.model.js';
|
|
5
|
+
import type { TransformOptions, TransformTyped } from '../stream.model.js';
|
|
7
6
|
import type { TransformMapStats } from './transformMap.js';
|
|
8
|
-
export interface TransformMapSyncOptions<IN = any, OUT = IN> {
|
|
7
|
+
export interface TransformMapSyncOptions<IN = any, OUT = IN> extends TransformOptions {
|
|
9
8
|
/**
|
|
10
9
|
* @default true
|
|
11
10
|
*/
|
|
@@ -44,7 +43,6 @@ export interface TransformMapSyncOptions<IN = any, OUT = IN> {
|
|
|
44
43
|
* @default `stream`
|
|
45
44
|
*/
|
|
46
45
|
metric?: string;
|
|
47
|
-
logger?: CommonLogger;
|
|
48
46
|
/**
|
|
49
47
|
* Allows to abort (gracefully stop) the stream from inside the Transform.
|
|
50
48
|
*/
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { Transform } from 'node:stream';
|
|
2
2
|
import { _anyToError, _assert, ErrorMode } from '@naturalcycles/js-lib/error';
|
|
3
|
+
import { createCommonLoggerAtLevel } from '@naturalcycles/js-lib/log';
|
|
3
4
|
import { END, SKIP } from '@naturalcycles/js-lib/types';
|
|
4
5
|
import { yellow } from '../../colors/colors.js';
|
|
5
6
|
import { PIPELINE_GRACEFUL_ABORT } from '../stream.util.js';
|
|
@@ -9,13 +10,14 @@ import { PIPELINE_GRACEFUL_ABORT } from '../stream.util.js';
|
|
|
9
10
|
*/
|
|
10
11
|
export function transformMapSync(mapper, opt = {}) {
|
|
11
12
|
const { predicate, // defaults to "no predicate" (pass everything)
|
|
12
|
-
errorMode = ErrorMode.THROW_IMMEDIATELY, onError, onDone, metric = 'stream', objectMode = true,
|
|
13
|
+
errorMode = ErrorMode.THROW_IMMEDIATELY, onError, onDone, metric = 'stream', objectMode = true, signal, } = opt;
|
|
13
14
|
const started = Date.now();
|
|
14
15
|
let index = -1;
|
|
15
16
|
let countOut = 0;
|
|
16
17
|
let isSettled = false;
|
|
17
18
|
let errors = 0;
|
|
18
19
|
const collectedErrors = []; // only used if errorMode == THROW_AGGREGATED
|
|
20
|
+
const logger = createCommonLoggerAtLevel(opt.logger, opt.logLevel);
|
|
19
21
|
return new Transform({
|
|
20
22
|
objectMode,
|
|
21
23
|
...opt,
|
|
@@ -14,7 +14,7 @@ export function transformSplitOnNewline() {
|
|
|
14
14
|
writableObjectMode: false,
|
|
15
15
|
writableHighWaterMark: 64 * 1024,
|
|
16
16
|
readableObjectMode: true,
|
|
17
|
-
transform(buf, _enc,
|
|
17
|
+
transform(buf, _enc, cb) {
|
|
18
18
|
let offset = 0;
|
|
19
19
|
let lastMatch = 0;
|
|
20
20
|
if (buffered) {
|
|
@@ -36,7 +36,7 @@ export function transformSplitOnNewline() {
|
|
|
36
36
|
break;
|
|
37
37
|
}
|
|
38
38
|
}
|
|
39
|
-
|
|
39
|
+
cb();
|
|
40
40
|
},
|
|
41
41
|
flush(done) {
|
|
42
42
|
if (buffered && buffered.length > 0)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { NumberOfSeconds, PositiveInteger } from '@naturalcycles/js-lib/types';
|
|
2
|
-
import type { TransformTyped } from '../stream.model.js';
|
|
3
|
-
export interface TransformThrottleOptions {
|
|
2
|
+
import type { TransformOptions, TransformTyped } from '../stream.model.js';
|
|
3
|
+
export interface TransformThrottleOptions extends TransformOptions {
|
|
4
4
|
/**
|
|
5
5
|
* How many items to allow per `interval` of seconds.
|
|
6
6
|
*/
|
|
@@ -9,7 +9,6 @@ export interface TransformThrottleOptions {
|
|
|
9
9
|
* How long is the interval (in seconds) where number of items should not exceed `throughput`.
|
|
10
10
|
*/
|
|
11
11
|
interval: NumberOfSeconds;
|
|
12
|
-
debug?: boolean;
|
|
13
12
|
}
|
|
14
13
|
/**
|
|
15
14
|
* Allows to throttle the throughput of the stream.
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { Transform } from 'node:stream';
|
|
2
2
|
import { _ms, _since, localTime } from '@naturalcycles/js-lib/datetime';
|
|
3
|
+
import { createCommonLoggerAtLevel } from '@naturalcycles/js-lib/log';
|
|
3
4
|
import { pDefer } from '@naturalcycles/js-lib/promise/pDefer.js';
|
|
4
5
|
/**
|
|
5
6
|
* Allows to throttle the throughput of the stream.
|
|
@@ -19,36 +20,34 @@ import { pDefer } from '@naturalcycles/js-lib/promise/pDefer.js';
|
|
|
19
20
|
* @experimental
|
|
20
21
|
*/
|
|
21
22
|
export function transformThrottle(opt) {
|
|
22
|
-
const { throughput, interval,
|
|
23
|
+
const { throughput, interval, objectMode = true, highWaterMark } = opt;
|
|
23
24
|
let count = 0;
|
|
24
25
|
let start;
|
|
25
|
-
let
|
|
26
|
+
let lock;
|
|
26
27
|
let timeout;
|
|
28
|
+
const logger = createCommonLoggerAtLevel(opt.logger, opt.logLevel);
|
|
27
29
|
return new Transform({
|
|
28
|
-
objectMode
|
|
30
|
+
objectMode,
|
|
31
|
+
highWaterMark,
|
|
29
32
|
async transform(item, _, cb) {
|
|
30
33
|
// console.log('incoming', item, { paused: !!paused, count })
|
|
31
34
|
if (!start) {
|
|
32
35
|
start = Date.now();
|
|
33
36
|
timeout = setTimeout(() => onInterval(this), interval * 1000);
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
});
|
|
40
|
-
}
|
|
37
|
+
logger.log(`${localTime.now().toPretty()} transformThrottle started with`, {
|
|
38
|
+
throughput,
|
|
39
|
+
interval,
|
|
40
|
+
rps: Math.round(throughput / interval),
|
|
41
|
+
});
|
|
41
42
|
}
|
|
42
|
-
if (
|
|
43
|
-
// console.log('awaiting
|
|
44
|
-
await
|
|
43
|
+
if (lock) {
|
|
44
|
+
// console.log('awaiting lock', {item, count})
|
|
45
|
+
await lock;
|
|
45
46
|
}
|
|
46
47
|
if (++count >= throughput) {
|
|
47
48
|
// console.log('pausing now after', {item, count})
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
console.log(`${localTime.now().toPretty()} transformThrottle activated: ${count} items passed in ${_since(start)}, will pause for ${_ms(interval * 1000 - (Date.now() - start))}`);
|
|
51
|
-
}
|
|
49
|
+
lock = pDefer();
|
|
50
|
+
logger.log(`${localTime.now().toPretty()} transformThrottle activated: ${count} items passed in ${_since(start)}, will pause for ${_ms(interval * 1000 - (Date.now() - start))}`);
|
|
52
51
|
}
|
|
53
52
|
cb(null, item); // pass the item through
|
|
54
53
|
},
|
|
@@ -58,20 +57,16 @@ export function transformThrottle(opt) {
|
|
|
58
57
|
},
|
|
59
58
|
});
|
|
60
59
|
function onInterval(transform) {
|
|
61
|
-
if (
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
paused.resolve();
|
|
66
|
-
paused = undefined;
|
|
60
|
+
if (lock) {
|
|
61
|
+
logger.log(`${localTime.now().toPretty()} transformThrottle resumed`);
|
|
62
|
+
lock.resolve();
|
|
63
|
+
lock = undefined;
|
|
67
64
|
}
|
|
68
65
|
else {
|
|
69
|
-
|
|
70
|
-
console.log(`${localTime.now().toPretty()} transformThrottle passed ${count} (of max ${throughput}) items in ${_since(start)}`);
|
|
71
|
-
}
|
|
66
|
+
logger.log(`${localTime.now().toPretty()} transformThrottle passed ${count} (of max ${throughput}) items in ${_since(start)}`);
|
|
72
67
|
}
|
|
73
68
|
count = 0;
|
|
74
|
-
start =
|
|
69
|
+
start = localTime.nowUnixMillis();
|
|
75
70
|
timeout = setTimeout(() => onInterval(transform), interval * 1000);
|
|
76
71
|
}
|
|
77
72
|
}
|
|
@@ -1,15 +1,8 @@
|
|
|
1
1
|
import { Writable } from 'node:stream';
|
|
2
|
-
import type { DeferredPromise } from '@naturalcycles/js-lib/promise';
|
|
3
2
|
import type { TransformOptions } from '../stream.model.js';
|
|
4
|
-
export interface WritableVoidOptions extends TransformOptions {
|
|
5
|
-
/**
|
|
6
|
-
* If set - it will be Resolved when the Stream is done (after final.cb)
|
|
7
|
-
*/
|
|
8
|
-
streamDone?: DeferredPromise;
|
|
9
|
-
}
|
|
10
3
|
/**
|
|
11
4
|
* Use as a "null-terminator" of stream.pipeline.
|
|
12
5
|
* It consumes the stream as quickly as possible without doing anything.
|
|
13
6
|
* Put it in the end of your pipeline in case it ends with Transform that needs a consumer.
|
|
14
7
|
*/
|
|
15
|
-
export declare function writableVoid(opt?:
|
|
8
|
+
export declare function writableVoid(opt?: TransformOptions): Writable;
|
package/package.json
CHANGED
|
@@ -2,8 +2,8 @@ import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js'
|
|
|
2
2
|
import { type Fetcher, getFetcher } from '@naturalcycles/js-lib/http'
|
|
3
3
|
import {
|
|
4
4
|
type CommonLogger,
|
|
5
|
-
commonLoggerMinLevel,
|
|
6
5
|
type CommonLogLevel,
|
|
6
|
+
createCommonLoggerAtLevel,
|
|
7
7
|
} from '@naturalcycles/js-lib/log'
|
|
8
8
|
import { _omit } from '@naturalcycles/js-lib/object/object.util.js'
|
|
9
9
|
import { PQueue } from '@naturalcycles/js-lib/promise/pQueue.js'
|
|
@@ -152,19 +152,22 @@ export class SlackService<CTX = any> {
|
|
|
152
152
|
*/
|
|
153
153
|
getCommonLogger(opt: {
|
|
154
154
|
minLogLevel: CommonLogLevel
|
|
155
|
+
debugChannel?: string
|
|
155
156
|
logChannel?: string
|
|
156
157
|
warnChannel?: string
|
|
157
158
|
errorChannel?: string
|
|
158
159
|
}): CommonLogger {
|
|
159
|
-
const { minLogLevel = 'log', logChannel, warnChannel, errorChannel } = opt
|
|
160
|
+
const { minLogLevel = 'log', debugChannel, logChannel, warnChannel, errorChannel } = opt
|
|
160
161
|
const defaultChannel = this.cfg.defaults?.channel || DEFAULTS.channel!
|
|
161
162
|
|
|
162
163
|
const q = new PQueue({
|
|
163
164
|
concurrency: 1,
|
|
164
165
|
})
|
|
165
166
|
|
|
166
|
-
return
|
|
167
|
+
return createCommonLoggerAtLevel(
|
|
167
168
|
{
|
|
169
|
+
debug: (...args) =>
|
|
170
|
+
q.push(() => this.send({ items: args, channel: debugChannel || defaultChannel })),
|
|
168
171
|
log: (...args) =>
|
|
169
172
|
q.push(() => this.send({ items: args, channel: logChannel || defaultChannel })),
|
|
170
173
|
warn: (...args) =>
|
package/src/stream/index.ts
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
export * from './ndjson/
|
|
2
|
-
export * from './ndjson/createWriteStreamAsNDJSON.js'
|
|
1
|
+
export * from './ndjson/createReadStreamAsNDJson.js'
|
|
3
2
|
export * from './ndjson/ndjson.model.js'
|
|
4
3
|
export * from './ndjson/ndjsonMap.js'
|
|
5
|
-
export * from './ndjson/ndjsonStreamForEach.js'
|
|
6
4
|
export * from './ndjson/transformJsonParse.js'
|
|
7
5
|
export * from './ndjson/transformToNDJson.js'
|
|
8
6
|
export * from './pipeline.js'
|
|
@@ -10,11 +8,11 @@ export * from './progressLogger.js'
|
|
|
10
8
|
export * from './readable/readableCombined.js'
|
|
11
9
|
export * from './readable/readableCreate.js'
|
|
12
10
|
export * from './readable/readableFromArray.js'
|
|
13
|
-
export * from './readable/readableToArray.js'
|
|
14
11
|
export * from './stream.model.js'
|
|
15
12
|
export * from './transform/transformChunk.js'
|
|
16
13
|
export * from './transform/transformFilter.js'
|
|
17
14
|
export * from './transform/transformFlatten.js'
|
|
15
|
+
export * from './transform/transformFork.js'
|
|
18
16
|
export * from './transform/transformLimit.js'
|
|
19
17
|
export * from './transform/transformLogProgress.js'
|
|
20
18
|
export * from './transform/transformMap.js'
|
|
@@ -24,13 +22,9 @@ export * from './transform/transformNoOp.js'
|
|
|
24
22
|
export * from './transform/transformOffset.js'
|
|
25
23
|
export * from './transform/transformSplit.js'
|
|
26
24
|
export * from './transform/transformTap.js'
|
|
27
|
-
export * from './transform/transformTee.js'
|
|
28
25
|
export * from './transform/transformThrottle.js'
|
|
29
|
-
export * from './transform/transformToArray.js'
|
|
30
26
|
export * from './transform/worker/baseWorkerClass.js'
|
|
31
27
|
export * from './transform/worker/transformMultiThreaded.js'
|
|
32
28
|
export * from './transform/worker/transformMultiThreaded.model.js'
|
|
33
|
-
export * from './writable/writableForEach.js'
|
|
34
|
-
export * from './writable/writableFork.js'
|
|
35
29
|
export * from './writable/writablePushToArray.js'
|
|
36
30
|
export * from './writable/writableVoid.js'
|
|
@@ -4,24 +4,21 @@ import type { ReadableTyped } from '../stream.model.js'
|
|
|
4
4
|
import { transformSplitOnNewline } from '../transform/transformSplit.js'
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
|
-
|
|
7
|
+
Returns a Readable of [already parsed] NDJSON objects.
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
9
|
+
Replaces a list of operations:
|
|
10
|
+
- requireFileToExist(inputPath)
|
|
11
|
+
- fs.createReadStream
|
|
12
|
+
- createUnzip (only if path ends with '.gz')
|
|
13
|
+
- transformSplitOnNewline
|
|
14
|
+
- transformJsonParse
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
To add a Limit or Offset: just add .take() or .drop(), example:
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
fs2.createReadStreamAsNDJSON().take(100),
|
|
20
|
-
transformX(),
|
|
21
|
-
])
|
|
18
|
+
createReadStreamAsNDJson().take(100)
|
|
22
19
|
*/
|
|
23
20
|
|
|
24
|
-
export function
|
|
21
|
+
export function createReadStreamAsNDJson<ROW = any>(inputPath: string): ReadableTyped<ROW> {
|
|
25
22
|
fs2.requireFileToExist(inputPath)
|
|
26
23
|
|
|
27
24
|
let stream: ReadableTyped<ROW> = fs2
|
|
@@ -21,6 +21,8 @@ export interface NDJSONMapOptions<IN = any, OUT = IN>
|
|
|
21
21
|
/**
|
|
22
22
|
* Unzips input file automatically, if it ends with `.gz`.
|
|
23
23
|
* Zips output file automatically, if it ends with `.gz`.
|
|
24
|
+
*
|
|
25
|
+
* @deprecated use Pipeline directly
|
|
24
26
|
*/
|
|
25
27
|
export async function ndjsonMap<IN = any, OUT = any>(
|
|
26
28
|
mapper: AbortableAsyncMapper<IN, OUT>,
|
package/src/stream/pipeline.ts
CHANGED
|
@@ -17,7 +17,7 @@ import type {
|
|
|
17
17
|
SKIP,
|
|
18
18
|
} from '@naturalcycles/js-lib/types'
|
|
19
19
|
import { fs2 } from '../fs/fs2.js'
|
|
20
|
-
import {
|
|
20
|
+
import { createReadStreamAsNDJson } from './ndjson/createReadStreamAsNDJson.js'
|
|
21
21
|
import { transformJsonParse } from './ndjson/transformJsonParse.js'
|
|
22
22
|
import { transformToNDJson } from './ndjson/transformToNDJson.js'
|
|
23
23
|
import type {
|
|
@@ -30,6 +30,7 @@ import { PIPELINE_GRACEFUL_ABORT } from './stream.util.js'
|
|
|
30
30
|
import { transformChunk } from './transform/transformChunk.js'
|
|
31
31
|
import { transformFilterSync } from './transform/transformFilter.js'
|
|
32
32
|
import { transformFlatten, transformFlattenIfNeeded } from './transform/transformFlatten.js'
|
|
33
|
+
import { transformFork } from './transform/transformFork.js'
|
|
33
34
|
import { transformLimit } from './transform/transformLimit.js'
|
|
34
35
|
import {
|
|
35
36
|
transformLogProgress,
|
|
@@ -83,12 +84,22 @@ export class Pipeline<T> {
|
|
|
83
84
|
return new Pipeline(Readable.from(input))
|
|
84
85
|
}
|
|
85
86
|
|
|
86
|
-
static
|
|
87
|
-
|
|
87
|
+
static fromNDJsonFile<T>(sourceFilePath: string): Pipeline<T> {
|
|
88
|
+
// Important that createReadStreamAsNDJson function is used
|
|
89
|
+
// (and not Pipeline set of individual transforms),
|
|
90
|
+
// because createReadStreamAsNDJson returns a Readable,
|
|
91
|
+
// hence it allows to apply .take(limit) on it
|
|
92
|
+
// e.g like Pipeline.fromNDJsonFile().limitSource(limit)
|
|
93
|
+
return new Pipeline<T>(createReadStreamAsNDJson(sourceFilePath))
|
|
88
94
|
}
|
|
89
95
|
|
|
90
|
-
static
|
|
91
|
-
return new Pipeline(
|
|
96
|
+
static fromFile(sourceFilePath: string): Pipeline<Uint8Array> {
|
|
97
|
+
return new Pipeline(
|
|
98
|
+
fs2.createReadStream(sourceFilePath, {
|
|
99
|
+
highWaterMark: 64 * 1024, // no observed speedup
|
|
100
|
+
}),
|
|
101
|
+
false,
|
|
102
|
+
)
|
|
92
103
|
}
|
|
93
104
|
|
|
94
105
|
/**
|
|
@@ -210,8 +221,6 @@ export class Pipeline<T> {
|
|
|
210
221
|
return this
|
|
211
222
|
}
|
|
212
223
|
|
|
213
|
-
// todo: tee/fork
|
|
214
|
-
|
|
215
224
|
transform<TO>(transform: TransformTyped<T, TO>): Pipeline<TO> {
|
|
216
225
|
this.transforms.push(transform)
|
|
217
226
|
return this as any
|
|
@@ -226,6 +235,11 @@ export class Pipeline<T> {
|
|
|
226
235
|
return this as any
|
|
227
236
|
}
|
|
228
237
|
|
|
238
|
+
fork(fn: (pipeline: Pipeline<T>) => Pipeline<T>, opt?: TransformOptions): this {
|
|
239
|
+
this.transforms.push(transformFork(fn, opt))
|
|
240
|
+
return this
|
|
241
|
+
}
|
|
242
|
+
|
|
229
243
|
/**
|
|
230
244
|
* Utility method just to conveniently type-cast the current Pipeline type.
|
|
231
245
|
* No runtime effect.
|
|
@@ -276,13 +290,23 @@ export class Pipeline<T> {
|
|
|
276
290
|
}
|
|
277
291
|
|
|
278
292
|
gzip(this: Pipeline<Uint8Array>, opt?: ZlibOptions): Pipeline<Uint8Array> {
|
|
279
|
-
this.transforms.push(
|
|
293
|
+
this.transforms.push(
|
|
294
|
+
createGzip({
|
|
295
|
+
// chunkSize: 64 * 1024, // no observed speedup
|
|
296
|
+
...opt,
|
|
297
|
+
}),
|
|
298
|
+
)
|
|
280
299
|
this.objectMode = false
|
|
281
300
|
return this as any
|
|
282
301
|
}
|
|
283
302
|
|
|
284
303
|
gunzip(this: Pipeline<Uint8Array>, opt?: ZlibOptions): Pipeline<Uint8Array> {
|
|
285
|
-
this.transforms.push(
|
|
304
|
+
this.transforms.push(
|
|
305
|
+
createUnzip({
|
|
306
|
+
chunkSize: 64 * 1024, // speedup from ~3200 to 3800 rps!
|
|
307
|
+
...opt,
|
|
308
|
+
}),
|
|
309
|
+
)
|
|
286
310
|
this.objectMode = false
|
|
287
311
|
return this as any
|
|
288
312
|
}
|
|
@@ -4,7 +4,7 @@ import { _hc, _mb } from '@naturalcycles/js-lib'
|
|
|
4
4
|
import { _since, localTime } from '@naturalcycles/js-lib/datetime'
|
|
5
5
|
import type { CommonLogger } from '@naturalcycles/js-lib/log'
|
|
6
6
|
import { SimpleMovingAverage } from '@naturalcycles/js-lib/math'
|
|
7
|
-
import type { AnyObject, UnixTimestampMillis } from '@naturalcycles/js-lib/types'
|
|
7
|
+
import type { AnyObject, PositiveInteger, UnixTimestampMillis } from '@naturalcycles/js-lib/types'
|
|
8
8
|
import { boldWhite, dimGrey, hasColors, white, yellow } from '../colors/colors.js'
|
|
9
9
|
import { SizeStack } from './sizeStack.js'
|
|
10
10
|
import type { ReadableMapper } from './stream.model.js'
|
|
@@ -87,7 +87,7 @@ export interface ProgressLoggerCfg<T = any> {
|
|
|
87
87
|
*
|
|
88
88
|
* @default 1000
|
|
89
89
|
*/
|
|
90
|
-
logEvery?:
|
|
90
|
+
logEvery?: PositiveInteger
|
|
91
91
|
|
|
92
92
|
logger?: CommonLogger
|
|
93
93
|
|
|
@@ -111,7 +111,7 @@ export interface ProgressLoggerCfg<T = any> {
|
|
|
111
111
|
*
|
|
112
112
|
* Defaults to 1.
|
|
113
113
|
*/
|
|
114
|
-
chunkSize?:
|
|
114
|
+
chunkSize?: PositiveInteger
|
|
115
115
|
|
|
116
116
|
/**
|
|
117
117
|
* Experimental logging of item (shunk) sizes, when json-stringified.
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import { Readable } from 'node:stream'
|
|
2
|
+
import { type CommonLogger, createCommonLoggerAtLevel } from '@naturalcycles/js-lib/log'
|
|
2
3
|
import { type DeferredPromise, pDefer } from '@naturalcycles/js-lib/promise/pDefer.js'
|
|
3
4
|
import { pMap } from '@naturalcycles/js-lib/promise/pMap.js'
|
|
4
5
|
import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream'
|
|
6
|
+
import type { TransformOptions } from '../stream.model.js'
|
|
5
7
|
|
|
6
8
|
/**
|
|
7
9
|
* Allows to combine multiple Readables into 1 Readable.
|
|
@@ -14,15 +16,22 @@ import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream'
|
|
|
14
16
|
* @experimental
|
|
15
17
|
*/
|
|
16
18
|
export class ReadableCombined<T> extends Readable implements ReadableTyped<T> {
|
|
17
|
-
static create<T>(inputs: Readable[]): ReadableCombined<T> {
|
|
18
|
-
return new ReadableCombined<T>(inputs)
|
|
19
|
+
static create<T>(inputs: Readable[], opt: TransformOptions = {}): ReadableCombined<T> {
|
|
20
|
+
return new ReadableCombined<T>(inputs, opt)
|
|
19
21
|
}
|
|
20
22
|
|
|
21
|
-
private constructor(
|
|
22
|
-
|
|
23
|
-
|
|
23
|
+
private constructor(
|
|
24
|
+
public inputs: Readable[],
|
|
25
|
+
opt: TransformOptions,
|
|
26
|
+
) {
|
|
27
|
+
const { objectMode = true, highWaterMark } = opt
|
|
28
|
+
super({ objectMode, highWaterMark })
|
|
29
|
+
this.logger = createCommonLoggerAtLevel(opt.logger, opt.logLevel)
|
|
30
|
+
void this.run()
|
|
24
31
|
}
|
|
25
32
|
|
|
33
|
+
private logger: CommonLogger
|
|
34
|
+
|
|
26
35
|
/**
|
|
27
36
|
* If defined - we are in Paused mode
|
|
28
37
|
* and should await the lock to be resolved before proceeding.
|
|
@@ -38,7 +47,9 @@ export class ReadableCombined<T> extends Readable implements ReadableTyped<T> {
|
|
|
38
47
|
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
|
|
39
48
|
private countReads = 0
|
|
40
49
|
|
|
41
|
-
private async
|
|
50
|
+
private async run(): Promise<void> {
|
|
51
|
+
const { logger } = this
|
|
52
|
+
|
|
42
53
|
await pMap(this.inputs, async (input, i) => {
|
|
43
54
|
for await (const item of input) {
|
|
44
55
|
this.countIn++
|
|
@@ -52,14 +63,14 @@ export class ReadableCombined<T> extends Readable implements ReadableTyped<T> {
|
|
|
52
63
|
this.countOut++
|
|
53
64
|
if (!shouldContinue && !this.lock) {
|
|
54
65
|
this.lock = pDefer()
|
|
55
|
-
|
|
66
|
+
logger.log(`ReadableCombined.push #${i} returned false, pausing the flow!`)
|
|
56
67
|
}
|
|
57
68
|
}
|
|
58
69
|
|
|
59
|
-
|
|
70
|
+
logger.log(`ReadableCombined: input #${i} done`)
|
|
60
71
|
})
|
|
61
72
|
|
|
62
|
-
|
|
73
|
+
logger.log(`ReadableCombined: all inputs done!`)
|
|
63
74
|
this.push(null)
|
|
64
75
|
}
|
|
65
76
|
|
|
@@ -67,7 +78,7 @@ export class ReadableCombined<T> extends Readable implements ReadableTyped<T> {
|
|
|
67
78
|
this.countReads++
|
|
68
79
|
|
|
69
80
|
if (this.lock) {
|
|
70
|
-
|
|
81
|
+
this.logger.log(`ReadableCombined._read: resuming the flow!`)
|
|
71
82
|
// calling it in this order is important!
|
|
72
83
|
// this.lock should be undefined BEFORE we call lock.resolve()
|
|
73
84
|
const { lock } = this
|
|
@@ -78,7 +89,7 @@ export class ReadableCombined<T> extends Readable implements ReadableTyped<T> {
|
|
|
78
89
|
|
|
79
90
|
private logStats(): void {
|
|
80
91
|
const { countIn, countOut, countReads } = this
|
|
81
|
-
|
|
92
|
+
this.logger.debug({
|
|
82
93
|
countIn,
|
|
83
94
|
countOut,
|
|
84
95
|
countReads,
|
|
@@ -13,17 +13,18 @@ import type { ReadableTyped } from '../stream.model.js'
|
|
|
13
13
|
* e.g the read() method doesn't return anything, so, it will hang the Node process (or cause it to process.exit(0))
|
|
14
14
|
* if read() will be called AFTER everything was pushed and Readable is closed (by pushing `null`).
|
|
15
15
|
* Beware of it when e.g doing unit testing! Jest prefers to hang (not exit-0).
|
|
16
|
-
*
|
|
17
|
-
* @deprecated because of the caution above
|
|
18
16
|
*/
|
|
19
17
|
export function readableCreate<T>(
|
|
20
18
|
items: Iterable<T> = [],
|
|
21
19
|
opt?: ReadableOptions,
|
|
20
|
+
onRead?: () => void, // read callback
|
|
22
21
|
): ReadableTyped<T> {
|
|
23
22
|
const readable = new Readable({
|
|
24
23
|
objectMode: true,
|
|
25
24
|
...opt,
|
|
26
|
-
read() {
|
|
25
|
+
read() {
|
|
26
|
+
onRead?.()
|
|
27
|
+
},
|
|
27
28
|
})
|
|
28
29
|
for (const item of items) {
|
|
29
30
|
readable.push(item)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { Readable, Transform, Writable } from 'node:stream'
|
|
2
|
+
import type { CommonLogger, CommonLogLevel } from '@naturalcycles/js-lib/log'
|
|
2
3
|
import type { Promisable } from '@naturalcycles/js-lib/types'
|
|
3
4
|
|
|
4
5
|
export interface ReadableSignalOptions {
|
|
@@ -72,4 +73,21 @@ export interface TransformOptions {
|
|
|
72
73
|
* @default 16
|
|
73
74
|
*/
|
|
74
75
|
highWaterMark?: number
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Defaults to `console`.
|
|
79
|
+
*/
|
|
80
|
+
logger?: CommonLogger
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Not every Transform implements it.
|
|
84
|
+
* Can be one of:
|
|
85
|
+
* debug - most verbose, when debugging is needed
|
|
86
|
+
* log - default level
|
|
87
|
+
* error - logs errors and warnings only
|
|
88
|
+
*
|
|
89
|
+
* Default is 'log'.
|
|
90
|
+
*
|
|
91
|
+
*/
|
|
92
|
+
logLevel?: CommonLogLevel
|
|
75
93
|
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import { Transform } from 'node:stream'
|
|
2
|
+
import { createCommonLoggerAtLevel } from '@naturalcycles/js-lib/log'
|
|
3
|
+
import { type DeferredPromise, pDefer } from '@naturalcycles/js-lib/promise/pDefer.js'
|
|
4
|
+
import { Pipeline } from '../pipeline.js'
|
|
5
|
+
import { readableCreate } from '../readable/readableCreate.js'
|
|
6
|
+
import type { TransformOptions, TransformTyped } from '../stream.model.js'
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Allows to "fork" away from the "main pipeline" into the "forked pipeline".
|
|
10
|
+
*
|
|
11
|
+
* Correctly keeps backpressure from both "downstreams" (main and forked).
|
|
12
|
+
*
|
|
13
|
+
* @experimental
|
|
14
|
+
*/
|
|
15
|
+
export function transformFork<T>(
|
|
16
|
+
fn: (pipeline: Pipeline<T>) => Pipeline<T>,
|
|
17
|
+
opt: TransformOptions = {},
|
|
18
|
+
): TransformTyped<T, T> {
|
|
19
|
+
const { objectMode = true, highWaterMark } = opt
|
|
20
|
+
const logger = createCommonLoggerAtLevel(opt.logger, opt.logLevel)
|
|
21
|
+
|
|
22
|
+
let lock: DeferredPromise | undefined
|
|
23
|
+
|
|
24
|
+
const fork = readableCreate<T>([], {}, () => {
|
|
25
|
+
// `_read` is called
|
|
26
|
+
if (!lock) return
|
|
27
|
+
// We had a lock - let's Resume
|
|
28
|
+
logger.log(`TransformFork: resume`)
|
|
29
|
+
const lockCopy = lock
|
|
30
|
+
lock = undefined
|
|
31
|
+
lockCopy.resolve()
|
|
32
|
+
})
|
|
33
|
+
|
|
34
|
+
const p = fn(Pipeline.from<T>(fork))
|
|
35
|
+
void p.run().then(() => {
|
|
36
|
+
logger.log('TransformFork: done')
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
return new Transform({
|
|
40
|
+
objectMode,
|
|
41
|
+
highWaterMark,
|
|
42
|
+
async transform(chunk: T, _, cb) {
|
|
43
|
+
// pass through to the "main" pipeline
|
|
44
|
+
// Main pipeline should handle backpressure "automatically",
|
|
45
|
+
// so, we're not maintaining a Lock for it
|
|
46
|
+
this.push(chunk)
|
|
47
|
+
|
|
48
|
+
if (lock) {
|
|
49
|
+
// Forked pipeline is locked - let's wait for it to call _read
|
|
50
|
+
await lock
|
|
51
|
+
// lock is undefined at this point
|
|
52
|
+
}
|
|
53
|
+
// pass to the "forked" pipeline
|
|
54
|
+
const shouldContinue = fork.push(chunk)
|
|
55
|
+
if (!shouldContinue && !lock) {
|
|
56
|
+
// Forked pipeline indicates that we should Pause
|
|
57
|
+
lock = pDefer()
|
|
58
|
+
logger.log(`TransformFork: pause`)
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// acknowledge that we've finished processing the input chunk
|
|
62
|
+
cb()
|
|
63
|
+
},
|
|
64
|
+
async final(cb) {
|
|
65
|
+
logger.log('TransformFork: final')
|
|
66
|
+
|
|
67
|
+
// Pushing null "closes"/ends the secondary pipeline correctly
|
|
68
|
+
fork.push(null)
|
|
69
|
+
|
|
70
|
+
// Acknowledge that we've received `null` and passed it through to the fork
|
|
71
|
+
cb()
|
|
72
|
+
},
|
|
73
|
+
})
|
|
74
|
+
}
|