@naturalcycles/nodejs-lib 15.70.1 → 15.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
+ import { Transform } from 'node:stream';
1
2
  import { Worker } from 'node:worker_threads';
2
3
  import { _range } from '@naturalcycles/js-lib/array/range.js';
3
4
  import { pDefer } from '@naturalcycles/js-lib/promise/pDefer.js';
4
- import through2Concurrent from 'through2-concurrent';
5
5
  const workerProxyFilePath = `${import.meta.dirname}/workerClassProxy.js`;
6
6
  /**
7
7
  * Spawns a pool of Workers (threads).
@@ -21,6 +21,10 @@ export function transformMultiThreaded(opt) {
21
21
  const workerDonePromises = [];
22
22
  const messageDonePromises = {};
23
23
  let index = -1; // input chunk index, will start from 0
24
+ // Concurrency control
25
+ let inFlight = 0;
26
+ let blockedCallback = null;
27
+ let flushBlocked = null;
24
28
  const workers = _range(0, poolSize).map(workerIndex => {
25
29
  workerDonePromises.push(pDefer());
26
30
  const worker = new Worker(workerProxyFilePath, {
@@ -30,20 +34,14 @@ export function transformMultiThreaded(opt) {
30
34
  ...workerData,
31
35
  },
32
36
  });
33
- // const {threadId} = worker
34
- // console.log({threadId})
35
37
  worker.on('error', err => {
36
38
  console.error(`Worker ${workerIndex} error`, err);
37
39
  workerDonePromises[workerIndex].reject(err);
38
40
  });
39
41
  worker.on('exit', _exitCode => {
40
- // console.log(`Worker ${index} exit: ${exitCode}`)
41
42
  workerDonePromises[workerIndex].resolve(undefined);
42
43
  });
43
44
  worker.on('message', (out) => {
44
- // console.log(`Message from Worker ${workerIndex}:`, out)
45
- // console.log(Object.keys(messageDonePromises))
46
- // tr.push(out.payload)
47
45
  if (out.error) {
48
46
  messageDonePromises[out.index].reject(out.error);
49
47
  }
@@ -53,48 +51,70 @@ export function transformMultiThreaded(opt) {
53
51
  });
54
52
  return worker;
55
53
  });
56
- return through2Concurrent.obj({
57
- maxConcurrency,
58
- highWaterMark,
59
- async final(cb) {
54
+ return new Transform({
55
+ objectMode: true,
56
+ readableHighWaterMark: highWaterMark,
57
+ writableHighWaterMark: highWaterMark,
58
+ async transform(chunk, _, cb) {
59
+ const currentIndex = ++index;
60
+ inFlight++;
61
+ // Apply backpressure if at capacity, otherwise request more input
62
+ if (inFlight < maxConcurrency) {
63
+ cb();
64
+ }
65
+ else {
66
+ blockedCallback = cb;
67
+ }
68
+ // Create the unresolved promise (to await)
69
+ messageDonePromises[currentIndex] = pDefer();
70
+ const worker = workers[currentIndex % poolSize]; // round-robin
71
+ worker.postMessage({
72
+ index: currentIndex,
73
+ payload: chunk,
74
+ });
75
+ try {
76
+ const out = await messageDonePromises[currentIndex];
77
+ this.push(out);
78
+ }
79
+ catch (err) {
80
+ // Currently we only support ErrorMode.SUPPRESS
81
+ // Error is logged and output continues
82
+ console.error(err);
83
+ }
84
+ finally {
85
+ delete messageDonePromises[currentIndex];
86
+ inFlight--;
87
+ // Release blocked callback if we now have capacity
88
+ if (blockedCallback && inFlight < maxConcurrency) {
89
+ const pendingCb = blockedCallback;
90
+ blockedCallback = null;
91
+ pendingCb();
92
+ }
93
+ // Trigger flush completion if all done
94
+ if (inFlight === 0 && flushBlocked) {
95
+ flushBlocked.resolve();
96
+ }
97
+ }
98
+ },
99
+ async flush(cb) {
100
+ // Wait for all in-flight operations to complete
101
+ if (inFlight > 0) {
102
+ flushBlocked = pDefer();
103
+ await flushBlocked;
104
+ }
60
105
  try {
61
- // Push null (complete) to all sub-streams
106
+ // Push null (complete) to all workers
62
107
  for (const worker of workers) {
63
108
  worker.postMessage(null);
64
109
  }
65
- console.log(`transformMultiThreaded.final is waiting for all chains to be done`);
110
+ console.log(`transformMultiThreaded.flush is waiting for all workers to be done`);
66
111
  await Promise.all(workerDonePromises);
67
- console.log(`transformMultiThreaded.final all chains done`);
112
+ console.log(`transformMultiThreaded.flush all workers done`);
68
113
  cb();
69
114
  }
70
115
  catch (err) {
71
116
  cb(err);
72
117
  }
73
118
  },
74
- }, async function transformMapFn(chunk, _, cb) {
75
- // Freezing the index, because it may change due to concurrency
76
- const currentIndex = ++index;
77
- // Create the unresolved promise (to avait)
78
- messageDonePromises[currentIndex] = pDefer();
79
- const worker = workers[currentIndex % poolSize]; // round-robin
80
- worker.postMessage({
81
- index: currentIndex,
82
- payload: chunk,
83
- });
84
- try {
85
- // awaiting for result
86
- const out = await messageDonePromises[currentIndex];
87
- // console.log('awaited!')
88
- // return the result
89
- cb(null, out);
90
- }
91
- catch (err) {
92
- // Currently we only support ErrorMode.SUPPRESS
93
- // Error is logged and output continues
94
- console.error(err);
95
- cb(); // emit nothing in case of an error
96
- }
97
- // clean up
98
- delete messageDonePromises[currentIndex];
99
119
  });
100
120
  }
@@ -1,4 +1,5 @@
1
1
  import type { ZlibOptions, ZstdOptions } from 'node:zlib';
2
+ import type { Integer } from '@naturalcycles/js-lib/types';
2
3
  export declare function decompressZstdOrInflateToString(buf: Buffer): Promise<string>;
3
4
  /**
4
5
  * Detects if Buffer is zstd-compressed.
@@ -29,7 +30,9 @@ export declare function gunzipBuffer(buf: Buffer, options?: ZlibOptions): Promis
29
30
  */
30
31
  export declare function gzipString(s: string, options?: ZlibOptions): Promise<Buffer<ArrayBuffer>>;
31
32
  export declare function gunzipToString(buf: Buffer, options?: ZlibOptions): Promise<string>;
32
- export declare function zstdCompress(input: Buffer | string, options?: ZstdOptions): Promise<Buffer<ArrayBuffer>>;
33
+ export declare function zstdCompress(input: Buffer | string, level?: Integer, // defaults to 3
34
+ options?: ZstdOptions): Promise<Buffer<ArrayBuffer>>;
35
+ export declare function zstdLevelToOptions(level: Integer | undefined, opt?: ZstdOptions): ZstdOptions;
33
36
  export declare function zstdDecompressToString(input: Buffer, options?: ZstdOptions): Promise<string>;
34
37
  export declare function zstdDecompress(input: Buffer, options?: ZstdOptions): Promise<Buffer<ArrayBuffer>>;
35
38
  export declare function isZstdBuffer(input: Buffer): boolean;
@@ -59,8 +59,20 @@ export async function gzipString(s, options) {
59
59
  export async function gunzipToString(buf, options) {
60
60
  return (await gunzipBuffer(buf, options)).toString();
61
61
  }
62
- export async function zstdCompress(input, options = {}) {
63
- return await zstdCompressAsync(input, options);
62
+ export async function zstdCompress(input, level, // defaults to 3
63
+ options = {}) {
64
+ return await zstdCompressAsync(input, zstdLevelToOptions(level, options));
65
+ }
66
+ export function zstdLevelToOptions(level, opt = {}) {
67
+ if (!level)
68
+ return opt;
69
+ return {
70
+ ...opt,
71
+ params: {
72
+ ...opt.params,
73
+ [zlib.constants.ZSTD_c_compressionLevel]: level,
74
+ },
75
+ };
64
76
  }
65
77
  export async function zstdDecompressToString(input, options = {}) {
66
78
  return (await zstdDecompressAsync(input, options)).toString();
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@naturalcycles/nodejs-lib",
3
3
  "type": "module",
4
- "version": "15.70.1",
4
+ "version": "15.72.0",
5
5
  "dependencies": {
6
6
  "@naturalcycles/js-lib": "^15",
7
7
  "@types/js-yaml": "^4",
@@ -14,13 +14,11 @@
14
14
  "js-yaml": "^4",
15
15
  "jsonwebtoken": "^9",
16
16
  "lru-cache": "^11",
17
- "through2-concurrent": "^2",
18
17
  "tinyglobby": "^0.2",
19
18
  "tslib": "^2",
20
19
  "yargs": "^18"
21
20
  },
22
21
  "devDependencies": {
23
- "@types/through2-concurrent": "^2",
24
22
  "@naturalcycles/dev-lib": "18.4.2"
25
23
  },
26
24
  "exports": {
@@ -16,7 +16,6 @@ export * from './transform/transformFork.js'
16
16
  export * from './transform/transformLimit.js'
17
17
  export * from './transform/transformLogProgress.js'
18
18
  export * from './transform/transformMap.js'
19
- export * from './transform/transformMap2.js'
20
19
  export * from './transform/transformMapSimple.js'
21
20
  export * from './transform/transformMapSync.js'
22
21
  export * from './transform/transformNoOp.js'
@@ -24,6 +24,7 @@ import {
24
24
  type SKIP,
25
25
  } from '@naturalcycles/js-lib/types'
26
26
  import { fs2 } from '../fs/fs2.js'
27
+ import { zstdLevelToOptions } from '../zip/zip.util.js'
27
28
  import { createReadStreamAsNDJson } from './ndjson/createReadStreamAsNDJson.js'
28
29
  import { transformJsonParse } from './ndjson/transformJsonParse.js'
29
30
  import { transformToNDJson } from './ndjson/transformToNDJson.js'
@@ -45,7 +46,6 @@ import {
45
46
  type TransformLogProgressOptions,
46
47
  } from './transform/transformLogProgress.js'
47
48
  import { transformMap, type TransformMapOptions } from './transform/transformMap.js'
48
- import { transformMap2, type TransformMap2Options } from './transform/transformMap2.js'
49
49
  import {
50
50
  transformMapSimple,
51
51
  type TransformMapSimpleOptions,
@@ -185,7 +185,7 @@ export class Pipeline<T = unknown> {
185
185
  return this
186
186
  }
187
187
 
188
- mapLegacy<TO>(
188
+ map<TO>(
189
189
  mapper: AbortableAsyncMapper<T, TO | typeof SKIP | typeof END>,
190
190
  opt?: TransformMapOptions<T, TO>,
191
191
  ): Pipeline<TO> {
@@ -198,19 +198,6 @@ export class Pipeline<T = unknown> {
198
198
  return this as any
199
199
  }
200
200
 
201
- map<TO>(
202
- mapper: AbortableAsyncMapper<T, TO | typeof SKIP | typeof END>,
203
- opt?: TransformMap2Options<T, TO>,
204
- ): Pipeline<TO> {
205
- this.transforms.push(
206
- transformMap2(mapper, {
207
- ...opt,
208
- signal: this.abortableSignal,
209
- }),
210
- )
211
- return this as any
212
- }
213
-
214
201
  mapSync<TO>(
215
202
  mapper: IndexedMapper<T, TO | typeof SKIP | typeof END>,
216
203
  opt?: TransformMapSyncOptions,
@@ -231,7 +218,7 @@ export class Pipeline<T = unknown> {
231
218
 
232
219
  filter(asyncPredicate: AsyncPredicate<T>, opt?: TransformMapOptions): this {
233
220
  this.transforms.push(
234
- transformMap2(v => v, {
221
+ transformMap(v => v, {
235
222
  asyncPredicate,
236
223
  ...opt,
237
224
  signal: this.abortableSignal,
@@ -363,13 +350,12 @@ export class Pipeline<T = unknown> {
363
350
  return this as any
364
351
  }
365
352
 
366
- zstdCompress(this: Pipeline<Uint8Array>, opt?: ZstdOptions): Pipeline<Uint8Array> {
367
- this.transforms.push(
368
- createZstdCompress({
369
- // chunkSize: 64 * 1024, // no observed speedup
370
- ...opt,
371
- }),
372
- )
353
+ zstdCompress(
354
+ this: Pipeline<Uint8Array>,
355
+ level?: Integer, // defaults to 3
356
+ opt?: ZstdOptions,
357
+ ): Pipeline<Uint8Array> {
358
+ this.transforms.push(createZstdCompress(zstdLevelToOptions(level, opt)))
373
359
  this.objectMode = false
374
360
  return this as any
375
361
  }
@@ -398,21 +384,25 @@ export class Pipeline<T = unknown> {
398
384
  await this.run()
399
385
  }
400
386
 
401
- async toNDJsonFile(outputFilePath: string): Promise<void> {
387
+ /**
388
+ * level corresponds to zstd compression level (if filename ends with .zst),
389
+ * or gzip compression level (if filename ends with .gz).
390
+ * Default levels are:
391
+ * gzip: 6
392
+ * zlib: 3 (optimized for throughput, not size, may be larger than gzip at its default level)
393
+ */
394
+ async toNDJsonFile(outputFilePath: string, level?: Integer): Promise<void> {
402
395
  fs2.ensureFile(outputFilePath)
403
396
  this.transforms.push(transformToNDJson())
404
397
  if (outputFilePath.endsWith('.gz')) {
405
398
  this.transforms.push(
406
399
  createGzip({
400
+ level,
407
401
  // chunkSize: 64 * 1024, // no observed speedup
408
402
  }),
409
403
  )
410
404
  } else if (outputFilePath.endsWith('.zst')) {
411
- this.transforms.push(
412
- createZstdCompress({
413
- // chunkSize: 64 * 1024, // no observed speedup
414
- }),
415
- )
405
+ this.transforms.push(createZstdCompress(zstdLevelToOptions(level)))
416
406
  }
417
407
  this.destination = fs2.createWriteStream(outputFilePath, {
418
408
  // highWaterMark: 64 * 1024, // no observed speedup
@@ -425,29 +415,12 @@ export class Pipeline<T = unknown> {
425
415
  await this.run()
426
416
  }
427
417
 
428
- async forEachLegacy(
429
- fn: AsyncIndexedMapper<T, void>,
430
- opt: TransformMapOptions<T, void> & TransformLogProgressOptions<T> = {},
431
- ): Promise<void> {
432
- this.transforms.push(
433
- transformMap2(fn, {
434
- predicate: opt.logEvery ? _passthroughPredicate : undefined, // for the logger to work
435
- ...opt,
436
- signal: this.abortableSignal,
437
- }),
438
- )
439
- if (opt.logEvery) {
440
- this.transforms.push(transformLogProgress(opt))
441
- }
442
- await this.run()
443
- }
444
-
445
418
  async forEach(
446
419
  fn: AsyncIndexedMapper<T, void>,
447
- opt: TransformMap2Options<T, void> & TransformLogProgressOptions<T> = {},
420
+ opt: TransformMapOptions<T, void> & TransformLogProgressOptions<T> = {},
448
421
  ): Promise<void> {
449
422
  this.transforms.push(
450
- transformMap2(fn, {
423
+ transformMap(fn, {
451
424
  predicate: opt.logEvery ? _passthroughPredicate : undefined, // for the logger to work
452
425
  ...opt,
453
426
  signal: this.abortableSignal,
@@ -2,7 +2,7 @@ import { Transform } from 'node:stream'
2
2
  import type { AsyncPredicate, Predicate } from '@naturalcycles/js-lib/types'
3
3
  import type { TransformOptions, TransformTyped } from '../stream.model.js'
4
4
  import type { TransformMapOptions } from './transformMap.js'
5
- import { transformMap2 } from './transformMap2.js'
5
+ import { transformMap } from './transformMap.js'
6
6
 
7
7
  /**
8
8
  * Just a convenience wrapper around `transformMap` that has built-in predicate filtering support.
@@ -11,7 +11,7 @@ export function transformFilter<IN = any>(
11
11
  asyncPredicate: AsyncPredicate<IN>,
12
12
  opt: TransformMapOptions = {},
13
13
  ): TransformTyped<IN, IN> {
14
- return transformMap2(v => v, {
14
+ return transformMap(v => v, {
15
15
  asyncPredicate,
16
16
  ...opt,
17
17
  })