@naturalcycles/nodejs-lib 15.69.1 → 15.70.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -62,11 +62,8 @@ export declare class Pipeline<T = unknown> {
62
62
  flatten<TO>(this: Pipeline<readonly TO[]>): Pipeline<TO>;
63
63
  flattenIfNeeded(): Pipeline<T extends readonly (infer TO)[] ? TO : T>;
64
64
  logProgress(opt?: TransformLogProgressOptions): this;
65
- map<TO>(mapper: AbortableAsyncMapper<T, TO | typeof SKIP | typeof END>, opt?: TransformMapOptions<T, TO>): Pipeline<TO>;
66
- /**
67
- * @experimental if proven to be stable - will replace transformMap
68
- */
69
- map2<TO>(mapper: AbortableAsyncMapper<T, TO | typeof SKIP | typeof END>, opt?: TransformMap2Options<T, TO>): Pipeline<TO>;
65
+ mapLegacy<TO>(mapper: AbortableAsyncMapper<T, TO | typeof SKIP | typeof END>, opt?: TransformMapOptions<T, TO>): Pipeline<TO>;
66
+ map<TO>(mapper: AbortableAsyncMapper<T, TO | typeof SKIP | typeof END>, opt?: TransformMap2Options<T, TO>): Pipeline<TO>;
70
67
  mapSync<TO>(mapper: IndexedMapper<T, TO | typeof SKIP | typeof END>, opt?: TransformMapSyncOptions): Pipeline<TO>;
71
68
  mapSimple<TO>(mapper: IndexedMapper<T, TO>, opt?: TransformMapSimpleOptions): Pipeline<TO>;
72
69
  filter(asyncPredicate: AsyncPredicate<T>, opt?: TransformMapOptions): this;
@@ -108,8 +105,8 @@ export declare class Pipeline<T = unknown> {
108
105
  toFile(outputFilePath: string): Promise<void>;
109
106
  toNDJsonFile(outputFilePath: string): Promise<void>;
110
107
  to(destination: WritableTyped<T>): Promise<void>;
111
- forEach(fn: AsyncIndexedMapper<T, void>, opt?: TransformMapOptions<T, void> & TransformLogProgressOptions<T>): Promise<void>;
112
- forEach2(fn: AsyncIndexedMapper<T, void>, opt?: TransformMap2Options<T, void> & TransformLogProgressOptions<T>): Promise<void>;
108
+ forEachLegacy(fn: AsyncIndexedMapper<T, void>, opt?: TransformMapOptions<T, void> & TransformLogProgressOptions<T>): Promise<void>;
109
+ forEach(fn: AsyncIndexedMapper<T, void>, opt?: TransformMap2Options<T, void> & TransformLogProgressOptions<T>): Promise<void>;
113
110
  forEachSync(fn: IndexedMapper<T, void>, opt?: TransformMapSyncOptions<T, void> & TransformLogProgressOptions<T>): Promise<void>;
114
111
  run(): Promise<void>;
115
112
  }
@@ -128,17 +128,14 @@ export class Pipeline {
128
128
  this.transforms.push(transformLogProgress(opt));
129
129
  return this;
130
130
  }
131
- map(mapper, opt) {
131
+ mapLegacy(mapper, opt) {
132
132
  this.transforms.push(transformMap(mapper, {
133
133
  ...opt,
134
134
  signal: this.abortableSignal,
135
135
  }));
136
136
  return this;
137
137
  }
138
- /**
139
- * @experimental if proven to be stable - will replace transformMap
140
- */
141
- map2(mapper, opt) {
138
+ map(mapper, opt) {
142
139
  this.transforms.push(transformMap2(mapper, {
143
140
  ...opt,
144
141
  signal: this.abortableSignal,
@@ -157,7 +154,7 @@ export class Pipeline {
157
154
  return this;
158
155
  }
159
156
  filter(asyncPredicate, opt) {
160
- this.transforms.push(transformMap(v => v, {
157
+ this.transforms.push(transformMap2(v => v, {
161
158
  asyncPredicate,
162
159
  ...opt,
163
160
  signal: this.abortableSignal,
@@ -313,8 +310,8 @@ export class Pipeline {
313
310
  this.destination = destination;
314
311
  await this.run();
315
312
  }
316
- async forEach(fn, opt = {}) {
317
- this.transforms.push(transformMap(fn, {
313
+ async forEachLegacy(fn, opt = {}) {
314
+ this.transforms.push(transformMap2(fn, {
318
315
  predicate: opt.logEvery ? _passthroughPredicate : undefined, // for the logger to work
319
316
  ...opt,
320
317
  signal: this.abortableSignal,
@@ -324,7 +321,7 @@ export class Pipeline {
324
321
  }
325
322
  await this.run();
326
323
  }
327
- async forEach2(fn, opt = {}) {
324
+ async forEach(fn, opt = {}) {
328
325
  this.transforms.push(transformMap2(fn, {
329
326
  predicate: opt.logEvery ? _passthroughPredicate : undefined, // for the logger to work
330
327
  ...opt,
@@ -1,10 +1,10 @@
1
1
  import { Transform } from 'node:stream';
2
- import { transformMap } from './transformMap.js';
2
+ import { transformMap2 } from './transformMap2.js';
3
3
  /**
4
4
  * Just a convenience wrapper around `transformMap` that has built-in predicate filtering support.
5
5
  */
6
6
  export function transformFilter(asyncPredicate, opt = {}) {
7
- return transformMap(v => v, {
7
+ return transformMap2(v => v, {
8
8
  asyncPredicate,
9
9
  ...opt,
10
10
  });
@@ -6,6 +6,7 @@ import { pDefer } from '@naturalcycles/js-lib/promise/pDefer.js';
6
6
  import { END, SKIP, } from '@naturalcycles/js-lib/types';
7
7
  import { yellow } from '../../colors/colors.js';
8
8
  import { PIPELINE_GRACEFUL_ABORT } from '../stream.util.js';
9
+ const WARMUP_CHECK_INTERVAL_MS = 1000;
9
10
  /**
10
11
  * Like transformMap, but with native concurrency control (no through2-concurrent dependency)
11
12
  * and support for gradual warmup.
@@ -13,7 +14,7 @@ import { PIPELINE_GRACEFUL_ABORT } from '../stream.util.js';
13
14
  * @experimental
14
15
  */
15
16
  export function transformMap2(mapper, opt = {}) {
16
- const { concurrency = 16, warmupSeconds = 0, predicate, asyncPredicate, errorMode = ErrorMode.THROW_IMMEDIATELY, onError, onDone, metric = 'stream', signal, objectMode = true, highWaterMark = 64, } = opt;
17
+ const { concurrency: maxConcurrency = 16, warmupSeconds = 0, predicate, asyncPredicate, errorMode = ErrorMode.THROW_IMMEDIATELY, onError, onDone, metric = 'stream', signal, objectMode = true, highWaterMark = 64, } = opt;
17
18
  const warmupMs = warmupSeconds * 1000;
18
19
  const logger = createCommonLoggerAtLevel(opt.logger, opt.logLevel);
19
20
  // Stats
@@ -24,12 +25,14 @@ export function transformMap2(mapper, opt = {}) {
24
25
  let ok = true;
25
26
  let errors = 0;
26
27
  const collectedErrors = [];
27
- // Concurrency control
28
- let warmupComplete = warmupSeconds <= 0 || concurrency <= 1;
28
+ // Concurrency control - single counter, single callback for backpressure
29
29
  let inFlight = 0;
30
- const waiters = [];
31
- // Track pending operations for proper flush
32
- let pendingOperations = 0;
30
+ let blockedCallback = null;
31
+ let flushBlocked = null;
32
+ // Warmup - cached concurrency to reduce Date.now() syscalls
33
+ let warmupComplete = warmupSeconds <= 0 || maxConcurrency <= 1;
34
+ let concurrency = warmupComplete ? maxConcurrency : 1;
35
+ let lastWarmupCheck = 0;
33
36
  return new Transform({
34
37
  objectMode,
35
38
  readableHighWaterMark: highWaterMark,
@@ -38,47 +41,35 @@ export function transformMap2(mapper, opt = {}) {
38
41
  // Initialize start time on first item
39
42
  if (started === 0) {
40
43
  started = Date.now();
44
+ lastWarmupCheck = started;
41
45
  }
42
- // Stop processing if isSettled
43
46
  if (isSettled)
44
47
  return cb();
45
48
  const currentIndex = ++index;
46
- const currentConcurrency = getCurrentConcurrency();
47
- // Wait for a slot if at capacity
48
- if (inFlight >= currentConcurrency) {
49
- const waiter = pDefer();
50
- waiters.push(waiter);
51
- await waiter;
49
+ inFlight++;
50
+ if (!warmupComplete) {
51
+ updateConcurrency();
52
+ }
53
+ // Apply backpressure if at capacity, otherwise request more input
54
+ if (inFlight < concurrency) {
55
+ cb();
52
56
  }
53
57
  else {
54
- inFlight++;
58
+ blockedCallback = cb;
55
59
  }
56
- // Signal that we're ready for more input
57
- cb();
58
- // Track this operation
59
- pendingOperations++;
60
- // Process the item asynchronously
61
60
  try {
62
61
  const res = await mapper(chunk, currentIndex);
63
- if (isSettled) {
64
- release();
65
- pendingOperations--;
62
+ if (isSettled)
66
63
  return;
67
- }
68
64
  if (res === END) {
69
65
  isSettled = true;
70
66
  logger.log(`transformMap2 END received at index ${currentIndex}`);
71
67
  _assert(signal, 'signal is required when using END');
72
68
  signal.abort(new Error(PIPELINE_GRACEFUL_ABORT));
73
- release();
74
- pendingOperations--;
75
69
  return;
76
70
  }
77
- if (res === SKIP) {
78
- release();
79
- pendingOperations--;
71
+ if (res === SKIP)
80
72
  return;
81
- }
82
73
  let shouldPush = true;
83
74
  if (predicate) {
84
75
  shouldPush = predicate(res, currentIndex);
@@ -104,31 +95,33 @@ export function transformMap2(mapper, opt = {}) {
104
95
  if (errorMode === ErrorMode.THROW_IMMEDIATELY) {
105
96
  isSettled = true;
106
97
  ok = false;
107
- // Call onDone before destroying, since flush won't be called
108
98
  await callOnDone();
109
99
  this.destroy(_anyToError(err));
100
+ return;
110
101
  }
111
- else if (errorMode === ErrorMode.THROW_AGGREGATED) {
102
+ if (errorMode === ErrorMode.THROW_AGGREGATED) {
112
103
  collectedErrors.push(_anyToError(err));
113
104
  }
114
105
  }
115
106
  finally {
116
- release();
117
- pendingOperations--;
107
+ inFlight--;
108
+ // Release blocked callback if we now have capacity
109
+ if (blockedCallback && inFlight < concurrency) {
110
+ const pendingCb = blockedCallback;
111
+ blockedCallback = null;
112
+ pendingCb();
113
+ }
114
+ // Trigger flush completion if all done
115
+ if (inFlight === 0 && flushBlocked) {
116
+ flushBlocked.resolve();
117
+ }
118
118
  }
119
119
  },
120
120
  async flush(cb) {
121
- // Wait for all pending operations to complete
122
- // Polling is simple and race-condition-free
123
- // Timeout prevents infinite loop if something goes wrong
124
- const flushStart = Date.now();
125
- const flushTimeoutMs = 60_000;
126
- while (pendingOperations > 0) {
127
- await new Promise(resolve => setImmediate(resolve));
128
- if (Date.now() - flushStart > flushTimeoutMs) {
129
- logger.error(`transformMap2 flush timeout: ${pendingOperations} operations still pending after ${flushTimeoutMs}ms`);
130
- break;
131
- }
121
+ // Wait for all in-flight operations to complete
122
+ if (inFlight > 0) {
123
+ flushBlocked = pDefer();
124
+ await flushBlocked;
132
125
  }
133
126
  logErrorStats(true);
134
127
  await callOnDone();
@@ -140,25 +133,20 @@ export function transformMap2(mapper, opt = {}) {
140
133
  }
141
134
  },
142
135
  });
143
- function getCurrentConcurrency() {
144
- if (warmupComplete)
145
- return concurrency;
146
- const elapsed = Date.now() - started;
136
+ function updateConcurrency() {
137
+ const now = Date.now();
138
+ if (now - lastWarmupCheck < WARMUP_CHECK_INTERVAL_MS)
139
+ return;
140
+ lastWarmupCheck = now;
141
+ const elapsed = now - started;
147
142
  if (elapsed >= warmupMs) {
148
143
  warmupComplete = true;
144
+ concurrency = maxConcurrency;
149
145
  logger.log(`transformMap2: warmup complete in ${_since(started)}`);
150
- return concurrency;
146
+ return;
151
147
  }
152
148
  const progress = elapsed / warmupMs;
153
- return Math.max(1, Math.floor(1 + (concurrency - 1) * progress));
154
- }
155
- function release() {
156
- inFlight--;
157
- const currentConcurrency = getCurrentConcurrency();
158
- while (waiters.length && inFlight < currentConcurrency) {
159
- inFlight++;
160
- waiters.shift().resolve();
161
- }
149
+ concurrency = Math.max(1, Math.floor(1 + (maxConcurrency - 1) * progress));
162
150
  }
163
151
  function logErrorStats(final = false) {
164
152
  if (!errors)
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@naturalcycles/nodejs-lib",
3
3
  "type": "module",
4
- "version": "15.69.1",
4
+ "version": "15.70.1",
5
5
  "dependencies": {
6
6
  "@naturalcycles/js-lib": "^15",
7
7
  "@types/js-yaml": "^4",
@@ -185,7 +185,7 @@ export class Pipeline<T = unknown> {
185
185
  return this
186
186
  }
187
187
 
188
- map<TO>(
188
+ mapLegacy<TO>(
189
189
  mapper: AbortableAsyncMapper<T, TO | typeof SKIP | typeof END>,
190
190
  opt?: TransformMapOptions<T, TO>,
191
191
  ): Pipeline<TO> {
@@ -198,10 +198,7 @@ export class Pipeline<T = unknown> {
198
198
  return this as any
199
199
  }
200
200
 
201
- /**
202
- * @experimental if proven to be stable - will replace transformMap
203
- */
204
- map2<TO>(
201
+ map<TO>(
205
202
  mapper: AbortableAsyncMapper<T, TO | typeof SKIP | typeof END>,
206
203
  opt?: TransformMap2Options<T, TO>,
207
204
  ): Pipeline<TO> {
@@ -234,7 +231,7 @@ export class Pipeline<T = unknown> {
234
231
 
235
232
  filter(asyncPredicate: AsyncPredicate<T>, opt?: TransformMapOptions): this {
236
233
  this.transforms.push(
237
- transformMap(v => v, {
234
+ transformMap2(v => v, {
238
235
  asyncPredicate,
239
236
  ...opt,
240
237
  signal: this.abortableSignal,
@@ -428,12 +425,12 @@ export class Pipeline<T = unknown> {
428
425
  await this.run()
429
426
  }
430
427
 
431
- async forEach(
428
+ async forEachLegacy(
432
429
  fn: AsyncIndexedMapper<T, void>,
433
430
  opt: TransformMapOptions<T, void> & TransformLogProgressOptions<T> = {},
434
431
  ): Promise<void> {
435
432
  this.transforms.push(
436
- transformMap(fn, {
433
+ transformMap2(fn, {
437
434
  predicate: opt.logEvery ? _passthroughPredicate : undefined, // for the logger to work
438
435
  ...opt,
439
436
  signal: this.abortableSignal,
@@ -445,7 +442,7 @@ export class Pipeline<T = unknown> {
445
442
  await this.run()
446
443
  }
447
444
 
448
- async forEach2(
445
+ async forEach(
449
446
  fn: AsyncIndexedMapper<T, void>,
450
447
  opt: TransformMap2Options<T, void> & TransformLogProgressOptions<T> = {},
451
448
  ): Promise<void> {
@@ -2,7 +2,7 @@ import { Transform } from 'node:stream'
2
2
  import type { AsyncPredicate, Predicate } from '@naturalcycles/js-lib/types'
3
3
  import type { TransformOptions, TransformTyped } from '../stream.model.js'
4
4
  import type { TransformMapOptions } from './transformMap.js'
5
- import { transformMap } from './transformMap.js'
5
+ import { transformMap2 } from './transformMap2.js'
6
6
 
7
7
  /**
8
8
  * Just a convenience wrapper around `transformMap` that has built-in predicate filtering support.
@@ -11,7 +11,7 @@ export function transformFilter<IN = any>(
11
11
  asyncPredicate: AsyncPredicate<IN>,
12
12
  opt: TransformMapOptions = {},
13
13
  ): TransformTyped<IN, IN> {
14
- return transformMap(v => v, {
14
+ return transformMap2(v => v, {
15
15
  asyncPredicate,
16
16
  ...opt,
17
17
  })
@@ -84,6 +84,8 @@ export interface TransformMap2Options<IN = any, OUT = IN> extends TransformOptio
84
84
  signal?: AbortableSignal
85
85
  }
86
86
 
87
+ const WARMUP_CHECK_INTERVAL_MS = 1000
88
+
87
89
  /**
88
90
  * Like transformMap, but with native concurrency control (no through2-concurrent dependency)
89
91
  * and support for gradual warmup.
@@ -95,7 +97,7 @@ export function transformMap2<IN = any, OUT = IN>(
95
97
  opt: TransformMap2Options<IN, OUT> = {},
96
98
  ): TransformTyped<IN, OUT> {
97
99
  const {
98
- concurrency = 16,
100
+ concurrency: maxConcurrency = 16,
99
101
  warmupSeconds = 0,
100
102
  predicate,
101
103
  asyncPredicate,
@@ -120,13 +122,15 @@ export function transformMap2<IN = any, OUT = IN>(
120
122
  let errors = 0
121
123
  const collectedErrors: Error[] = []
122
124
 
123
- // Concurrency control
124
- let warmupComplete = warmupSeconds <= 0 || concurrency <= 1
125
+ // Concurrency control - single counter, single callback for backpressure
125
126
  let inFlight = 0
126
- const waiters: DeferredPromise[] = []
127
+ let blockedCallback: (() => void) | null = null
128
+ let flushBlocked: DeferredPromise | null = null
127
129
 
128
- // Track pending operations for proper flush
129
- let pendingOperations = 0
130
+ // Warmup - cached concurrency to reduce Date.now() syscalls
131
+ let warmupComplete = warmupSeconds <= 0 || maxConcurrency <= 1
132
+ let concurrency = warmupComplete ? maxConcurrency : 1
133
+ let lastWarmupCheck = 0
130
134
 
131
135
  return new Transform({
132
136
  objectMode,
@@ -136,54 +140,38 @@ export function transformMap2<IN = any, OUT = IN>(
136
140
  // Initialize start time on first item
137
141
  if (started === 0) {
138
142
  started = Date.now() as UnixTimestampMillis
143
+ lastWarmupCheck = started
139
144
  }
140
145
 
141
- // Stop processing if isSettled
142
146
  if (isSettled) return cb()
143
147
 
144
148
  const currentIndex = ++index
145
- const currentConcurrency = getCurrentConcurrency()
149
+ inFlight++
150
+ if (!warmupComplete) {
151
+ updateConcurrency()
152
+ }
146
153
 
147
- // Wait for a slot if at capacity
148
- if (inFlight >= currentConcurrency) {
149
- const waiter = pDefer()
150
- waiters.push(waiter)
151
- await waiter
154
+ // Apply backpressure if at capacity, otherwise request more input
155
+ if (inFlight < concurrency) {
156
+ cb()
152
157
  } else {
153
- inFlight++
158
+ blockedCallback = cb
154
159
  }
155
160
 
156
- // Signal that we're ready for more input
157
- cb()
158
-
159
- // Track this operation
160
- pendingOperations++
161
-
162
- // Process the item asynchronously
163
161
  try {
164
162
  const res: OUT | typeof SKIP | typeof END = await mapper(chunk, currentIndex)
165
163
 
166
- if (isSettled) {
167
- release()
168
- pendingOperations--
169
- return
170
- }
164
+ if (isSettled) return
171
165
 
172
166
  if (res === END) {
173
167
  isSettled = true
174
168
  logger.log(`transformMap2 END received at index ${currentIndex}`)
175
169
  _assert(signal, 'signal is required when using END')
176
170
  signal.abort(new Error(PIPELINE_GRACEFUL_ABORT))
177
- release()
178
- pendingOperations--
179
171
  return
180
172
  }
181
173
 
182
- if (res === SKIP) {
183
- release()
184
- pendingOperations--
185
- return
186
- }
174
+ if (res === SKIP) return
187
175
 
188
176
  let shouldPush = true
189
177
  if (predicate) {
@@ -210,31 +198,34 @@ export function transformMap2<IN = any, OUT = IN>(
210
198
  if (errorMode === ErrorMode.THROW_IMMEDIATELY) {
211
199
  isSettled = true
212
200
  ok = false
213
- // Call onDone before destroying, since flush won't be called
214
201
  await callOnDone()
215
202
  this.destroy(_anyToError(err))
216
- } else if (errorMode === ErrorMode.THROW_AGGREGATED) {
203
+ return
204
+ }
205
+ if (errorMode === ErrorMode.THROW_AGGREGATED) {
217
206
  collectedErrors.push(_anyToError(err))
218
207
  }
219
208
  } finally {
220
- release()
221
- pendingOperations--
209
+ inFlight--
210
+
211
+ // Release blocked callback if we now have capacity
212
+ if (blockedCallback && inFlight < concurrency) {
213
+ const pendingCb = blockedCallback
214
+ blockedCallback = null
215
+ pendingCb()
216
+ }
217
+
218
+ // Trigger flush completion if all done
219
+ if (inFlight === 0 && flushBlocked) {
220
+ flushBlocked.resolve()
221
+ }
222
222
  }
223
223
  },
224
224
  async flush(cb) {
225
- // Wait for all pending operations to complete
226
- // Polling is simple and race-condition-free
227
- // Timeout prevents infinite loop if something goes wrong
228
- const flushStart = Date.now()
229
- const flushTimeoutMs = 60_000
230
- while (pendingOperations > 0) {
231
- await new Promise(resolve => setImmediate(resolve))
232
- if (Date.now() - flushStart > flushTimeoutMs) {
233
- logger.error(
234
- `transformMap2 flush timeout: ${pendingOperations} operations still pending after ${flushTimeoutMs}ms`,
235
- )
236
- break
237
- }
225
+ // Wait for all in-flight operations to complete
226
+ if (inFlight > 0) {
227
+ flushBlocked = pDefer()
228
+ await flushBlocked
238
229
  }
239
230
 
240
231
  logErrorStats(true)
@@ -253,27 +244,21 @@ export function transformMap2<IN = any, OUT = IN>(
253
244
  },
254
245
  })
255
246
 
256
- function getCurrentConcurrency(): number {
257
- if (warmupComplete) return concurrency
247
+ function updateConcurrency(): void {
248
+ const now = Date.now()
249
+ if (now - lastWarmupCheck < WARMUP_CHECK_INTERVAL_MS) return
250
+ lastWarmupCheck = now
258
251
 
259
- const elapsed = Date.now() - started
252
+ const elapsed = now - started
260
253
  if (elapsed >= warmupMs) {
261
254
  warmupComplete = true
255
+ concurrency = maxConcurrency
262
256
  logger.log(`transformMap2: warmup complete in ${_since(started)}`)
263
- return concurrency
257
+ return
264
258
  }
265
259
 
266
260
  const progress = elapsed / warmupMs
267
- return Math.max(1, Math.floor(1 + (concurrency - 1) * progress))
268
- }
269
-
270
- function release(): void {
271
- inFlight--
272
- const currentConcurrency = getCurrentConcurrency()
273
- while (waiters.length && inFlight < currentConcurrency) {
274
- inFlight++
275
- waiters.shift()!.resolve()
276
- }
261
+ concurrency = Math.max(1, Math.floor(1 + (maxConcurrency - 1) * progress))
277
262
  }
278
263
 
279
264
  function logErrorStats(final = false): void {