@naturalcycles/nodejs-lib 15.70.1 → 15.72.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/stream/index.d.ts +0 -1
- package/dist/stream/index.js +0 -1
- package/dist/stream/pipeline.d.ts +13 -8
- package/dist/stream/pipeline.js +18 -32
- package/dist/stream/transform/transformFilter.js +2 -2
- package/dist/stream/transform/transformMap.d.ts +13 -25
- package/dist/stream/transform/transformMap.js +134 -118
- package/dist/stream/transform/worker/transformMultiThreaded.js +59 -39
- package/dist/zip/zip.util.d.ts +4 -1
- package/dist/zip/zip.util.js +14 -2
- package/package.json +1 -3
- package/src/stream/index.ts +0 -1
- package/src/stream/pipeline.ts +21 -48
- package/src/stream/transform/transformFilter.ts +2 -2
- package/src/stream/transform/transformMap.ts +168 -153
- package/src/stream/transform/worker/transformMultiThreaded.ts +57 -40
- package/src/stream/transform/worker/workerClassProxy.js +0 -4
- package/src/zip/zip.util.ts +15 -1
- package/dist/stream/transform/transformMap2.d.ts +0 -66
- package/dist/stream/transform/transformMap2.js +0 -171
- package/src/stream/transform/transformMap2.ts +0 -283
|
@@ -1,7 +1,7 @@
|
|
|
1
|
+
import { Transform } from 'node:stream';
|
|
1
2
|
import { Worker } from 'node:worker_threads';
|
|
2
3
|
import { _range } from '@naturalcycles/js-lib/array/range.js';
|
|
3
4
|
import { pDefer } from '@naturalcycles/js-lib/promise/pDefer.js';
|
|
4
|
-
import through2Concurrent from 'through2-concurrent';
|
|
5
5
|
const workerProxyFilePath = `${import.meta.dirname}/workerClassProxy.js`;
|
|
6
6
|
/**
|
|
7
7
|
* Spawns a pool of Workers (threads).
|
|
@@ -21,6 +21,10 @@ export function transformMultiThreaded(opt) {
|
|
|
21
21
|
const workerDonePromises = [];
|
|
22
22
|
const messageDonePromises = {};
|
|
23
23
|
let index = -1; // input chunk index, will start from 0
|
|
24
|
+
// Concurrency control
|
|
25
|
+
let inFlight = 0;
|
|
26
|
+
let blockedCallback = null;
|
|
27
|
+
let flushBlocked = null;
|
|
24
28
|
const workers = _range(0, poolSize).map(workerIndex => {
|
|
25
29
|
workerDonePromises.push(pDefer());
|
|
26
30
|
const worker = new Worker(workerProxyFilePath, {
|
|
@@ -30,20 +34,14 @@ export function transformMultiThreaded(opt) {
|
|
|
30
34
|
...workerData,
|
|
31
35
|
},
|
|
32
36
|
});
|
|
33
|
-
// const {threadId} = worker
|
|
34
|
-
// console.log({threadId})
|
|
35
37
|
worker.on('error', err => {
|
|
36
38
|
console.error(`Worker ${workerIndex} error`, err);
|
|
37
39
|
workerDonePromises[workerIndex].reject(err);
|
|
38
40
|
});
|
|
39
41
|
worker.on('exit', _exitCode => {
|
|
40
|
-
// console.log(`Worker ${index} exit: ${exitCode}`)
|
|
41
42
|
workerDonePromises[workerIndex].resolve(undefined);
|
|
42
43
|
});
|
|
43
44
|
worker.on('message', (out) => {
|
|
44
|
-
// console.log(`Message from Worker ${workerIndex}:`, out)
|
|
45
|
-
// console.log(Object.keys(messageDonePromises))
|
|
46
|
-
// tr.push(out.payload)
|
|
47
45
|
if (out.error) {
|
|
48
46
|
messageDonePromises[out.index].reject(out.error);
|
|
49
47
|
}
|
|
@@ -53,48 +51,70 @@ export function transformMultiThreaded(opt) {
|
|
|
53
51
|
});
|
|
54
52
|
return worker;
|
|
55
53
|
});
|
|
56
|
-
return
|
|
57
|
-
|
|
58
|
-
highWaterMark,
|
|
59
|
-
|
|
54
|
+
return new Transform({
|
|
55
|
+
objectMode: true,
|
|
56
|
+
readableHighWaterMark: highWaterMark,
|
|
57
|
+
writableHighWaterMark: highWaterMark,
|
|
58
|
+
async transform(chunk, _, cb) {
|
|
59
|
+
const currentIndex = ++index;
|
|
60
|
+
inFlight++;
|
|
61
|
+
// Apply backpressure if at capacity, otherwise request more input
|
|
62
|
+
if (inFlight < maxConcurrency) {
|
|
63
|
+
cb();
|
|
64
|
+
}
|
|
65
|
+
else {
|
|
66
|
+
blockedCallback = cb;
|
|
67
|
+
}
|
|
68
|
+
// Create the unresolved promise (to await)
|
|
69
|
+
messageDonePromises[currentIndex] = pDefer();
|
|
70
|
+
const worker = workers[currentIndex % poolSize]; // round-robin
|
|
71
|
+
worker.postMessage({
|
|
72
|
+
index: currentIndex,
|
|
73
|
+
payload: chunk,
|
|
74
|
+
});
|
|
75
|
+
try {
|
|
76
|
+
const out = await messageDonePromises[currentIndex];
|
|
77
|
+
this.push(out);
|
|
78
|
+
}
|
|
79
|
+
catch (err) {
|
|
80
|
+
// Currently we only support ErrorMode.SUPPRESS
|
|
81
|
+
// Error is logged and output continues
|
|
82
|
+
console.error(err);
|
|
83
|
+
}
|
|
84
|
+
finally {
|
|
85
|
+
delete messageDonePromises[currentIndex];
|
|
86
|
+
inFlight--;
|
|
87
|
+
// Release blocked callback if we now have capacity
|
|
88
|
+
if (blockedCallback && inFlight < maxConcurrency) {
|
|
89
|
+
const pendingCb = blockedCallback;
|
|
90
|
+
blockedCallback = null;
|
|
91
|
+
pendingCb();
|
|
92
|
+
}
|
|
93
|
+
// Trigger flush completion if all done
|
|
94
|
+
if (inFlight === 0 && flushBlocked) {
|
|
95
|
+
flushBlocked.resolve();
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
},
|
|
99
|
+
async flush(cb) {
|
|
100
|
+
// Wait for all in-flight operations to complete
|
|
101
|
+
if (inFlight > 0) {
|
|
102
|
+
flushBlocked = pDefer();
|
|
103
|
+
await flushBlocked;
|
|
104
|
+
}
|
|
60
105
|
try {
|
|
61
|
-
// Push null (complete) to all
|
|
106
|
+
// Push null (complete) to all workers
|
|
62
107
|
for (const worker of workers) {
|
|
63
108
|
worker.postMessage(null);
|
|
64
109
|
}
|
|
65
|
-
console.log(`transformMultiThreaded.
|
|
110
|
+
console.log(`transformMultiThreaded.flush is waiting for all workers to be done`);
|
|
66
111
|
await Promise.all(workerDonePromises);
|
|
67
|
-
console.log(`transformMultiThreaded.
|
|
112
|
+
console.log(`transformMultiThreaded.flush all workers done`);
|
|
68
113
|
cb();
|
|
69
114
|
}
|
|
70
115
|
catch (err) {
|
|
71
116
|
cb(err);
|
|
72
117
|
}
|
|
73
118
|
},
|
|
74
|
-
}, async function transformMapFn(chunk, _, cb) {
|
|
75
|
-
// Freezing the index, because it may change due to concurrency
|
|
76
|
-
const currentIndex = ++index;
|
|
77
|
-
// Create the unresolved promise (to avait)
|
|
78
|
-
messageDonePromises[currentIndex] = pDefer();
|
|
79
|
-
const worker = workers[currentIndex % poolSize]; // round-robin
|
|
80
|
-
worker.postMessage({
|
|
81
|
-
index: currentIndex,
|
|
82
|
-
payload: chunk,
|
|
83
|
-
});
|
|
84
|
-
try {
|
|
85
|
-
// awaiting for result
|
|
86
|
-
const out = await messageDonePromises[currentIndex];
|
|
87
|
-
// console.log('awaited!')
|
|
88
|
-
// return the result
|
|
89
|
-
cb(null, out);
|
|
90
|
-
}
|
|
91
|
-
catch (err) {
|
|
92
|
-
// Currently we only support ErrorMode.SUPPRESS
|
|
93
|
-
// Error is logged and output continues
|
|
94
|
-
console.error(err);
|
|
95
|
-
cb(); // emit nothing in case of an error
|
|
96
|
-
}
|
|
97
|
-
// clean up
|
|
98
|
-
delete messageDonePromises[currentIndex];
|
|
99
119
|
});
|
|
100
120
|
}
|
package/dist/zip/zip.util.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ZlibOptions, ZstdOptions } from 'node:zlib';
|
|
2
|
+
import type { Integer } from '@naturalcycles/js-lib/types';
|
|
2
3
|
export declare function decompressZstdOrInflateToString(buf: Buffer): Promise<string>;
|
|
3
4
|
/**
|
|
4
5
|
* Detects if Buffer is zstd-compressed.
|
|
@@ -29,7 +30,9 @@ export declare function gunzipBuffer(buf: Buffer, options?: ZlibOptions): Promis
|
|
|
29
30
|
*/
|
|
30
31
|
export declare function gzipString(s: string, options?: ZlibOptions): Promise<Buffer<ArrayBuffer>>;
|
|
31
32
|
export declare function gunzipToString(buf: Buffer, options?: ZlibOptions): Promise<string>;
|
|
32
|
-
export declare function zstdCompress(input: Buffer | string,
|
|
33
|
+
export declare function zstdCompress(input: Buffer | string, level?: Integer, // defaults to 3
|
|
34
|
+
options?: ZstdOptions): Promise<Buffer<ArrayBuffer>>;
|
|
35
|
+
export declare function zstdLevelToOptions(level: Integer | undefined, opt?: ZstdOptions): ZstdOptions;
|
|
33
36
|
export declare function zstdDecompressToString(input: Buffer, options?: ZstdOptions): Promise<string>;
|
|
34
37
|
export declare function zstdDecompress(input: Buffer, options?: ZstdOptions): Promise<Buffer<ArrayBuffer>>;
|
|
35
38
|
export declare function isZstdBuffer(input: Buffer): boolean;
|
package/dist/zip/zip.util.js
CHANGED
|
@@ -59,8 +59,20 @@ export async function gzipString(s, options) {
|
|
|
59
59
|
export async function gunzipToString(buf, options) {
|
|
60
60
|
return (await gunzipBuffer(buf, options)).toString();
|
|
61
61
|
}
|
|
62
|
-
export async function zstdCompress(input,
|
|
63
|
-
|
|
62
|
+
export async function zstdCompress(input, level, // defaults to 3
|
|
63
|
+
options = {}) {
|
|
64
|
+
return await zstdCompressAsync(input, zstdLevelToOptions(level, options));
|
|
65
|
+
}
|
|
66
|
+
export function zstdLevelToOptions(level, opt = {}) {
|
|
67
|
+
if (!level)
|
|
68
|
+
return opt;
|
|
69
|
+
return {
|
|
70
|
+
...opt,
|
|
71
|
+
params: {
|
|
72
|
+
...opt.params,
|
|
73
|
+
[zlib.constants.ZSTD_c_compressionLevel]: level,
|
|
74
|
+
},
|
|
75
|
+
};
|
|
64
76
|
}
|
|
65
77
|
export async function zstdDecompressToString(input, options = {}) {
|
|
66
78
|
return (await zstdDecompressAsync(input, options)).toString();
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@naturalcycles/nodejs-lib",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "15.
|
|
4
|
+
"version": "15.72.0",
|
|
5
5
|
"dependencies": {
|
|
6
6
|
"@naturalcycles/js-lib": "^15",
|
|
7
7
|
"@types/js-yaml": "^4",
|
|
@@ -14,13 +14,11 @@
|
|
|
14
14
|
"js-yaml": "^4",
|
|
15
15
|
"jsonwebtoken": "^9",
|
|
16
16
|
"lru-cache": "^11",
|
|
17
|
-
"through2-concurrent": "^2",
|
|
18
17
|
"tinyglobby": "^0.2",
|
|
19
18
|
"tslib": "^2",
|
|
20
19
|
"yargs": "^18"
|
|
21
20
|
},
|
|
22
21
|
"devDependencies": {
|
|
23
|
-
"@types/through2-concurrent": "^2",
|
|
24
22
|
"@naturalcycles/dev-lib": "18.4.2"
|
|
25
23
|
},
|
|
26
24
|
"exports": {
|
package/src/stream/index.ts
CHANGED
|
@@ -16,7 +16,6 @@ export * from './transform/transformFork.js'
|
|
|
16
16
|
export * from './transform/transformLimit.js'
|
|
17
17
|
export * from './transform/transformLogProgress.js'
|
|
18
18
|
export * from './transform/transformMap.js'
|
|
19
|
-
export * from './transform/transformMap2.js'
|
|
20
19
|
export * from './transform/transformMapSimple.js'
|
|
21
20
|
export * from './transform/transformMapSync.js'
|
|
22
21
|
export * from './transform/transformNoOp.js'
|
package/src/stream/pipeline.ts
CHANGED
|
@@ -24,6 +24,7 @@ import {
|
|
|
24
24
|
type SKIP,
|
|
25
25
|
} from '@naturalcycles/js-lib/types'
|
|
26
26
|
import { fs2 } from '../fs/fs2.js'
|
|
27
|
+
import { zstdLevelToOptions } from '../zip/zip.util.js'
|
|
27
28
|
import { createReadStreamAsNDJson } from './ndjson/createReadStreamAsNDJson.js'
|
|
28
29
|
import { transformJsonParse } from './ndjson/transformJsonParse.js'
|
|
29
30
|
import { transformToNDJson } from './ndjson/transformToNDJson.js'
|
|
@@ -45,7 +46,6 @@ import {
|
|
|
45
46
|
type TransformLogProgressOptions,
|
|
46
47
|
} from './transform/transformLogProgress.js'
|
|
47
48
|
import { transformMap, type TransformMapOptions } from './transform/transformMap.js'
|
|
48
|
-
import { transformMap2, type TransformMap2Options } from './transform/transformMap2.js'
|
|
49
49
|
import {
|
|
50
50
|
transformMapSimple,
|
|
51
51
|
type TransformMapSimpleOptions,
|
|
@@ -185,7 +185,7 @@ export class Pipeline<T = unknown> {
|
|
|
185
185
|
return this
|
|
186
186
|
}
|
|
187
187
|
|
|
188
|
-
|
|
188
|
+
map<TO>(
|
|
189
189
|
mapper: AbortableAsyncMapper<T, TO | typeof SKIP | typeof END>,
|
|
190
190
|
opt?: TransformMapOptions<T, TO>,
|
|
191
191
|
): Pipeline<TO> {
|
|
@@ -198,19 +198,6 @@ export class Pipeline<T = unknown> {
|
|
|
198
198
|
return this as any
|
|
199
199
|
}
|
|
200
200
|
|
|
201
|
-
map<TO>(
|
|
202
|
-
mapper: AbortableAsyncMapper<T, TO | typeof SKIP | typeof END>,
|
|
203
|
-
opt?: TransformMap2Options<T, TO>,
|
|
204
|
-
): Pipeline<TO> {
|
|
205
|
-
this.transforms.push(
|
|
206
|
-
transformMap2(mapper, {
|
|
207
|
-
...opt,
|
|
208
|
-
signal: this.abortableSignal,
|
|
209
|
-
}),
|
|
210
|
-
)
|
|
211
|
-
return this as any
|
|
212
|
-
}
|
|
213
|
-
|
|
214
201
|
mapSync<TO>(
|
|
215
202
|
mapper: IndexedMapper<T, TO | typeof SKIP | typeof END>,
|
|
216
203
|
opt?: TransformMapSyncOptions,
|
|
@@ -231,7 +218,7 @@ export class Pipeline<T = unknown> {
|
|
|
231
218
|
|
|
232
219
|
filter(asyncPredicate: AsyncPredicate<T>, opt?: TransformMapOptions): this {
|
|
233
220
|
this.transforms.push(
|
|
234
|
-
|
|
221
|
+
transformMap(v => v, {
|
|
235
222
|
asyncPredicate,
|
|
236
223
|
...opt,
|
|
237
224
|
signal: this.abortableSignal,
|
|
@@ -363,13 +350,12 @@ export class Pipeline<T = unknown> {
|
|
|
363
350
|
return this as any
|
|
364
351
|
}
|
|
365
352
|
|
|
366
|
-
zstdCompress(
|
|
367
|
-
this
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
)
|
|
353
|
+
zstdCompress(
|
|
354
|
+
this: Pipeline<Uint8Array>,
|
|
355
|
+
level?: Integer, // defaults to 3
|
|
356
|
+
opt?: ZstdOptions,
|
|
357
|
+
): Pipeline<Uint8Array> {
|
|
358
|
+
this.transforms.push(createZstdCompress(zstdLevelToOptions(level, opt)))
|
|
373
359
|
this.objectMode = false
|
|
374
360
|
return this as any
|
|
375
361
|
}
|
|
@@ -398,21 +384,25 @@ export class Pipeline<T = unknown> {
|
|
|
398
384
|
await this.run()
|
|
399
385
|
}
|
|
400
386
|
|
|
401
|
-
|
|
387
|
+
/**
|
|
388
|
+
* level corresponds to zstd compression level (if filename ends with .zst),
|
|
389
|
+
* or gzip compression level (if filename ends with .gz).
|
|
390
|
+
* Default levels are:
|
|
391
|
+
* gzip: 6
|
|
392
|
+
* zlib: 3 (optimized for throughput, not size, may be larger than gzip at its default level)
|
|
393
|
+
*/
|
|
394
|
+
async toNDJsonFile(outputFilePath: string, level?: Integer): Promise<void> {
|
|
402
395
|
fs2.ensureFile(outputFilePath)
|
|
403
396
|
this.transforms.push(transformToNDJson())
|
|
404
397
|
if (outputFilePath.endsWith('.gz')) {
|
|
405
398
|
this.transforms.push(
|
|
406
399
|
createGzip({
|
|
400
|
+
level,
|
|
407
401
|
// chunkSize: 64 * 1024, // no observed speedup
|
|
408
402
|
}),
|
|
409
403
|
)
|
|
410
404
|
} else if (outputFilePath.endsWith('.zst')) {
|
|
411
|
-
this.transforms.push(
|
|
412
|
-
createZstdCompress({
|
|
413
|
-
// chunkSize: 64 * 1024, // no observed speedup
|
|
414
|
-
}),
|
|
415
|
-
)
|
|
405
|
+
this.transforms.push(createZstdCompress(zstdLevelToOptions(level)))
|
|
416
406
|
}
|
|
417
407
|
this.destination = fs2.createWriteStream(outputFilePath, {
|
|
418
408
|
// highWaterMark: 64 * 1024, // no observed speedup
|
|
@@ -425,29 +415,12 @@ export class Pipeline<T = unknown> {
|
|
|
425
415
|
await this.run()
|
|
426
416
|
}
|
|
427
417
|
|
|
428
|
-
async forEachLegacy(
|
|
429
|
-
fn: AsyncIndexedMapper<T, void>,
|
|
430
|
-
opt: TransformMapOptions<T, void> & TransformLogProgressOptions<T> = {},
|
|
431
|
-
): Promise<void> {
|
|
432
|
-
this.transforms.push(
|
|
433
|
-
transformMap2(fn, {
|
|
434
|
-
predicate: opt.logEvery ? _passthroughPredicate : undefined, // for the logger to work
|
|
435
|
-
...opt,
|
|
436
|
-
signal: this.abortableSignal,
|
|
437
|
-
}),
|
|
438
|
-
)
|
|
439
|
-
if (opt.logEvery) {
|
|
440
|
-
this.transforms.push(transformLogProgress(opt))
|
|
441
|
-
}
|
|
442
|
-
await this.run()
|
|
443
|
-
}
|
|
444
|
-
|
|
445
418
|
async forEach(
|
|
446
419
|
fn: AsyncIndexedMapper<T, void>,
|
|
447
|
-
opt:
|
|
420
|
+
opt: TransformMapOptions<T, void> & TransformLogProgressOptions<T> = {},
|
|
448
421
|
): Promise<void> {
|
|
449
422
|
this.transforms.push(
|
|
450
|
-
|
|
423
|
+
transformMap(fn, {
|
|
451
424
|
predicate: opt.logEvery ? _passthroughPredicate : undefined, // for the logger to work
|
|
452
425
|
...opt,
|
|
453
426
|
signal: this.abortableSignal,
|
|
@@ -2,7 +2,7 @@ import { Transform } from 'node:stream'
|
|
|
2
2
|
import type { AsyncPredicate, Predicate } from '@naturalcycles/js-lib/types'
|
|
3
3
|
import type { TransformOptions, TransformTyped } from '../stream.model.js'
|
|
4
4
|
import type { TransformMapOptions } from './transformMap.js'
|
|
5
|
-
import {
|
|
5
|
+
import { transformMap } from './transformMap.js'
|
|
6
6
|
|
|
7
7
|
/**
|
|
8
8
|
* Just a convenience wrapper around `transformMap` that has built-in predicate filtering support.
|
|
@@ -11,7 +11,7 @@ export function transformFilter<IN = any>(
|
|
|
11
11
|
asyncPredicate: AsyncPredicate<IN>,
|
|
12
12
|
opt: TransformMapOptions = {},
|
|
13
13
|
): TransformTyped<IN, IN> {
|
|
14
|
-
return
|
|
14
|
+
return transformMap(v => v, {
|
|
15
15
|
asyncPredicate,
|
|
16
16
|
...opt,
|
|
17
17
|
})
|