@naturalcycles/nodejs-lib 15.18.0 → 15.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,7 @@ export * from './readable/readableToArray.js';
14
14
  export * from './stream.model.js';
15
15
  export * from './transform/transformChunk.js';
16
16
  export * from './transform/transformFilter.js';
17
+ export * from './transform/transformFlatten.js';
17
18
  export * from './transform/transformLimit.js';
18
19
  export * from './transform/transformLogProgress.js';
19
20
  export * from './transform/transformMap.js';
@@ -14,6 +14,7 @@ export * from './readable/readableToArray.js';
14
14
  export * from './stream.model.js';
15
15
  export * from './transform/transformChunk.js';
16
16
  export * from './transform/transformFilter.js';
17
+ export * from './transform/transformFlatten.js';
17
18
  export * from './transform/transformLimit.js';
18
19
  export * from './transform/transformLogProgress.js';
19
20
  export * from './transform/transformMap.js';
@@ -9,12 +9,6 @@ export interface NDJSONMapOptions<IN = any, OUT = IN> extends TransformMapOption
9
9
  * @default 100_000
10
10
  */
11
11
  logEveryOutput?: number;
12
- /**
13
- * Defaults to `true` for ndjsonMap
14
- *
15
- * @default true
16
- */
17
- flattenArrayOutput?: boolean;
18
12
  }
19
13
  /**
20
14
  * Unzips input file automatically, if it ends with `.gz`.
@@ -1,5 +1,5 @@
1
1
  import { ErrorMode } from '@naturalcycles/js-lib/error/errorMode.js';
2
- import { createReadStreamAsNDJSON, createWriteStreamAsNDJSON, } from '../index.js';
2
+ import { createReadStreamAsNDJSON, createWriteStreamAsNDJSON, transformFlatten, } from '../index.js';
3
3
  import { _pipeline, transformLimit, transformLogProgress, transformMap } from '../index.js';
4
4
  /**
5
5
  * Unzips input file automatically, if it ends with `.gz`.
@@ -16,10 +16,10 @@ export async function ndjsonMap(mapper, opt) {
16
16
  readable,
17
17
  transformLogProgress({ metric: 'read', ...opt }),
18
18
  transformMap(mapper, {
19
- flattenArrayOutput: true,
20
19
  errorMode: ErrorMode.SUPPRESS,
21
20
  ...opt,
22
21
  }),
22
+ transformFlatten(),
23
23
  transformLimit({ limit: limitOutput, sourceReadable: readable }),
24
24
  transformLogProgress({ metric: 'saved', logEvery: logEveryOutput }),
25
25
  ...createWriteStreamAsNDJSON(outputFilePath),
@@ -1,5 +1,5 @@
1
1
  import { inspect } from 'node:util';
2
- import { _mb } from '@naturalcycles/js-lib';
2
+ import { _hc, _mb } from '@naturalcycles/js-lib';
3
3
  import { _since, localTime } from '@naturalcycles/js-lib/datetime';
4
4
  import { SimpleMovingAverage } from '@naturalcycles/js-lib/math';
5
5
  import { boldWhite, dimGrey, hasColors, white, yellow } from '../colors/colors.js';
@@ -108,11 +108,8 @@ export class ProgressLogger {
108
108
  }
109
109
  }
110
110
  if (tenx) {
111
- let perHour = Math.round((batchedProgress * 1000 * 60 * 60) / (now - this.started)) || 0;
112
- if (perHour > 900) {
113
- perHour = Math.round(perHour / 1000) + 'K';
114
- }
115
- logger.log(`${dimGrey(localTime.now().toPretty())} ${white(metric)} took ${yellow(_since(this.started))} so far to process ${yellow(batchedProgress)} rows, ~${yellow(perHour)}/hour`);
111
+ const perHour = _hc((batchedProgress * 1000 * 60 * 60) / (now - this.started));
112
+ logger.log(`${dimGrey(localTime.now().toPretty())} ${white(metric)} took ${yellow(_since(this.started))} so far to process ${yellow(_hc(batchedProgress))} rows, ~${yellow(perHour)}/hour`);
116
113
  }
117
114
  else if (final) {
118
115
  logger.log(`${boldWhite(metric)} took ${yellow(_since(this.started))} to process ${yellow(batchedProgress)} rows with total RPS of ${yellow(rpsTotal)}`);
@@ -0,0 +1,2 @@
1
+ import type { TransformTyped } from '../stream.model.js';
2
+ export declare function transformFlatten<T>(): TransformTyped<T[], T>;
@@ -0,0 +1,18 @@
1
+ import { Transform } from 'node:stream';
2
+ export function transformFlatten() {
3
+ return new Transform({
4
+ objectMode: true,
5
+ transform(chunk, _, cb) {
6
+ if (!Array.isArray(chunk)) {
7
+ // As a safety precaution, to not crash the pipeline - push as is
8
+ this.push(chunk);
9
+ }
10
+ else {
11
+ for (const item of chunk) {
12
+ this.push(item);
13
+ }
14
+ }
15
+ cb(); // acknowledge
16
+ },
17
+ });
18
+ }
@@ -3,12 +3,6 @@ import type { CommonLogger } from '@naturalcycles/js-lib/log';
3
3
  import { type AbortableAsyncMapper, type AsyncPredicate, END, type Promisable, SKIP, type StringMap, type UnixTimestampMillis } from '@naturalcycles/js-lib/types';
4
4
  import type { TransformTyped } from '../stream.model.js';
5
5
  export interface TransformMapOptions<IN = any, OUT = IN> {
6
- /**
7
- * Set true to support "multiMap" - possibility to return [] and emit 1 result for each item in the array.
8
- *
9
- * @default false
10
- */
11
- flattenArrayOutput?: boolean;
12
6
  /**
13
7
  * Predicate to filter outgoing results (after mapper).
14
8
  * Allows to not emit all results.
@@ -1,7 +1,6 @@
1
1
  import { _hc } from '@naturalcycles/js-lib';
2
2
  import { _since } from '@naturalcycles/js-lib/datetime/time.util.js';
3
3
  import { _anyToError, ErrorMode } from '@naturalcycles/js-lib/error';
4
- import { pMap } from '@naturalcycles/js-lib/promise/pMap.js';
5
4
  import { _stringify } from '@naturalcycles/js-lib/string/stringify.js';
6
5
  import { END, SKIP, } from '@naturalcycles/js-lib/types';
7
6
  import through2Concurrent from 'through2-concurrent';
@@ -23,7 +22,7 @@ import { pipelineClose } from '../stream.util.js';
23
22
  */
24
23
  export function transformMap(mapper, opt = {}) {
25
24
  const { concurrency = 16, predicate, // we now default to "no predicate" (meaning pass-everything)
26
- errorMode = ErrorMode.THROW_IMMEDIATELY, flattenArrayOutput, onError, onDone, metric = 'stream', logger = console, } = opt;
25
+ errorMode = ErrorMode.THROW_IMMEDIATELY, onError, onDone, metric = 'stream', logger = console, } = opt;
27
26
  const started = Date.now();
28
27
  let index = -1;
29
28
  let countOut = 0;
@@ -77,38 +76,23 @@ export function transformMap(mapper, opt = {}) {
77
76
  const currentIndex = ++index;
78
77
  try {
79
78
  const res = await mapper(chunk, currentIndex);
80
- // todo: consider retiring flattenArrayOutput from here
81
- // and implementing it as a separate .flat transform/operator
82
- const resInput = (flattenArrayOutput && Array.isArray(res) ? res : [res]);
83
- if (predicate) {
84
- await pMap(resInput, async (r) => {
85
- if (r === END) {
86
- isSettled = true; // will be checked later
87
- return END;
88
- }
89
- if (r === SKIP)
90
- return;
91
- if (await predicate(r, currentIndex)) {
92
- countOut++;
93
- this.push(r);
94
- }
95
- });
96
- }
97
- else {
98
- for (const r of resInput) {
99
- if (r === END) {
100
- isSettled = true; // will be checked later
101
- break;
102
- }
103
- if (r === SKIP)
104
- continue;
105
- countOut++;
106
- this.push(r);
107
- }
108
- }
109
- if (isSettled) {
79
+ // Check for isSettled again, as it may happen while mapper was running
80
+ if (isSettled)
81
+ return cb();
82
+ if (res === END) {
83
+ isSettled = true;
110
84
  logger.log(`transformMap END received at index ${currentIndex}`);
111
85
  pipelineClose('transformMap', this, this.sourceReadable, this.streamDone, logger);
86
+ return cb();
87
+ }
88
+ if (res === SKIP) {
89
+ // do nothing, don't push
90
+ return cb();
91
+ }
92
+ if (!predicate || ((await predicate(res, currentIndex)) && !isSettled)) {
93
+ // isSettled could have happened in parallel, hence the extra check
94
+ countOut++;
95
+ this.push(res);
112
96
  }
113
97
  cb(); // done processing
114
98
  }
@@ -10,11 +10,6 @@ export interface TransformMapSyncOptions<IN = any, OUT = IN> {
10
10
  * @default true
11
11
  */
12
12
  objectMode?: boolean;
13
- /**
14
- * @default false
15
- * Set true to support "multiMap" - possibility to return [] and emit 1 result for each item in the array.
16
- */
17
- flattenArrayOutput?: boolean;
18
13
  /**
19
14
  * Predicate to filter outgoing results (after mapper).
20
15
  * Allows to not emit all results.
@@ -11,7 +11,7 @@ export class TransformMapSync extends AbortableTransform {
11
11
  */
12
12
  export function transformMapSync(mapper, opt = {}) {
13
13
  const { predicate, // defaults to "no predicate" (pass everything)
14
- errorMode = ErrorMode.THROW_IMMEDIATELY, flattenArrayOutput = false, onError, onDone, metric = 'stream', objectMode = true, logger = console, } = opt;
14
+ errorMode = ErrorMode.THROW_IMMEDIATELY, onError, onDone, metric = 'stream', objectMode = true, logger = console, } = opt;
15
15
  const started = Date.now();
16
16
  let index = -1;
17
17
  let countOut = 0;
@@ -29,21 +29,19 @@ export function transformMapSync(mapper, opt = {}) {
29
29
  try {
30
30
  // map and pass through
31
31
  const v = mapper(chunk, currentIndex);
32
- // todo: consider retiring flattenArrayOutput option
33
- const vInput = (flattenArrayOutput && Array.isArray(v) ? v : [v]);
34
- for (const r of vInput) {
35
- if (r === END) {
36
- isSettled = true; // will be checked later
37
- break;
38
- }
39
- if (r !== SKIP && (!predicate || predicate(r, currentIndex))) {
40
- countOut++;
41
- this.push(r);
42
- }
43
- }
44
- if (isSettled) {
32
+ if (v === END) {
33
+ isSettled = true; // will be checked later
45
34
  logger.log(`transformMapSync END received at index ${currentIndex}`);
46
35
  pipelineClose('transformMapSync', this, this.sourceReadable, this.streamDone, logger);
36
+ return cb();
37
+ }
38
+ if (v === SKIP) {
39
+ // do nothing, don't push
40
+ return cb();
41
+ }
42
+ if (!predicate || predicate(v, currentIndex)) {
43
+ countOut++;
44
+ this.push(v);
47
45
  }
48
46
  cb(); // done processing
49
47
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@naturalcycles/nodejs-lib",
3
3
  "type": "module",
4
- "version": "15.18.0",
4
+ "version": "15.19.0",
5
5
  "dependencies": {
6
6
  "@naturalcycles/js-lib": "^15",
7
7
  "@types/js-yaml": "^4",
@@ -14,6 +14,7 @@ export * from './readable/readableToArray.js'
14
14
  export * from './stream.model.js'
15
15
  export * from './transform/transformChunk.js'
16
16
  export * from './transform/transformFilter.js'
17
+ export * from './transform/transformFlatten.js'
17
18
  export * from './transform/transformLimit.js'
18
19
  export * from './transform/transformLogProgress.js'
19
20
  export * from './transform/transformMap.js'
@@ -3,6 +3,7 @@ import type { AbortableAsyncMapper } from '@naturalcycles/js-lib/types'
3
3
  import {
4
4
  createReadStreamAsNDJSON,
5
5
  createWriteStreamAsNDJSON,
6
+ transformFlatten,
6
7
  type TransformLogProgressOptions,
7
8
  type TransformMapOptions,
8
9
  } from '../index.js'
@@ -21,13 +22,6 @@ export interface NDJSONMapOptions<IN = any, OUT = IN>
21
22
  * @default 100_000
22
23
  */
23
24
  logEveryOutput?: number
24
-
25
- /**
26
- * Defaults to `true` for ndjsonMap
27
- *
28
- * @default true
29
- */
30
- flattenArrayOutput?: boolean
31
25
  }
32
26
 
33
27
  /**
@@ -53,10 +47,10 @@ export async function ndjsonMap<IN = any, OUT = any>(
53
47
  readable,
54
48
  transformLogProgress({ metric: 'read', ...opt }),
55
49
  transformMap(mapper, {
56
- flattenArrayOutput: true,
57
50
  errorMode: ErrorMode.SUPPRESS,
58
51
  ...opt,
59
52
  }),
53
+ transformFlatten(),
60
54
  transformLimit({ limit: limitOutput, sourceReadable: readable }),
61
55
  transformLogProgress({ metric: 'saved', logEvery: logEveryOutput }),
62
56
  ...createWriteStreamAsNDJSON(outputFilePath),
@@ -1,6 +1,6 @@
1
1
  import type { InspectOptions } from 'node:util'
2
2
  import { inspect } from 'node:util'
3
- import { _mb } from '@naturalcycles/js-lib'
3
+ import { _hc, _mb } from '@naturalcycles/js-lib'
4
4
  import { _since, localTime } from '@naturalcycles/js-lib/datetime'
5
5
  import type { CommonLogger } from '@naturalcycles/js-lib/log'
6
6
  import { SimpleMovingAverage } from '@naturalcycles/js-lib/math'
@@ -284,16 +284,12 @@ export class ProgressLogger<T> implements Disposable {
284
284
  }
285
285
 
286
286
  if (tenx) {
287
- let perHour: number | string =
288
- Math.round((batchedProgress * 1000 * 60 * 60) / (now - this.started)) || 0
289
- if (perHour > 900) {
290
- perHour = Math.round(perHour / 1000) + 'K'
291
- }
287
+ const perHour = _hc((batchedProgress * 1000 * 60 * 60) / (now - this.started))
292
288
 
293
289
  logger.log(
294
290
  `${dimGrey(localTime.now().toPretty())} ${white(metric)} took ${yellow(
295
291
  _since(this.started),
296
- )} so far to process ${yellow(batchedProgress)} rows, ~${yellow(perHour)}/hour`,
292
+ )} so far to process ${yellow(_hc(batchedProgress))} rows, ~${yellow(perHour)}/hour`,
297
293
  )
298
294
  } else if (final) {
299
295
  logger.log(
@@ -0,0 +1,19 @@
1
+ import { Transform } from 'node:stream'
2
+ import type { TransformTyped } from '../stream.model.js'
3
+
4
+ export function transformFlatten<T>(): TransformTyped<T[], T> {
5
+ return new Transform({
6
+ objectMode: true,
7
+ transform(chunk: T[], _, cb) {
8
+ if (!Array.isArray(chunk)) {
9
+ // As a safety precaution, to not crash the pipeline - push as is
10
+ this.push(chunk)
11
+ } else {
12
+ for (const item of chunk) {
13
+ this.push(item)
14
+ }
15
+ }
16
+ cb() // acknowledge
17
+ },
18
+ })
19
+ }
@@ -2,7 +2,6 @@ import { _hc } from '@naturalcycles/js-lib'
2
2
  import { _since } from '@naturalcycles/js-lib/datetime/time.util.js'
3
3
  import { _anyToError, ErrorMode } from '@naturalcycles/js-lib/error'
4
4
  import type { CommonLogger } from '@naturalcycles/js-lib/log'
5
- import { pMap } from '@naturalcycles/js-lib/promise/pMap.js'
6
5
  import { _stringify } from '@naturalcycles/js-lib/string/stringify.js'
7
6
  import {
8
7
  type AbortableAsyncMapper,
@@ -20,13 +19,6 @@ import type { TransformTyped } from '../stream.model.js'
20
19
  import { pipelineClose } from '../stream.util.js'
21
20
 
22
21
  export interface TransformMapOptions<IN = any, OUT = IN> {
23
- /**
24
- * Set true to support "multiMap" - possibility to return [] and emit 1 result for each item in the array.
25
- *
26
- * @default false
27
- */
28
- flattenArrayOutput?: boolean
29
-
30
22
  /**
31
23
  * Predicate to filter outgoing results (after mapper).
32
24
  * Allows to not emit all results.
@@ -133,7 +125,6 @@ export function transformMap<IN = any, OUT = IN>(
133
125
  concurrency = 16,
134
126
  predicate, // we now default to "no predicate" (meaning pass-everything)
135
127
  errorMode = ErrorMode.THROW_IMMEDIATELY,
136
- flattenArrayOutput,
137
128
  onError,
138
129
  onDone,
139
130
  metric = 'stream',
@@ -204,41 +195,25 @@ export function transformMap<IN = any, OUT = IN>(
204
195
 
205
196
  try {
206
197
  const res: OUT | typeof SKIP | typeof END = await mapper(chunk, currentIndex)
207
- // todo: consider retiring flattenArrayOutput from here
208
- // and implementing it as a separate .flat transform/operator
209
- const resInput = (flattenArrayOutput && Array.isArray(res) ? res : [res]) as (
210
- | OUT
211
- | typeof SKIP
212
- | typeof END
213
- )[]
214
-
215
- if (predicate) {
216
- await pMap(resInput, async r => {
217
- if (r === END) {
218
- isSettled = true // will be checked later
219
- return END
220
- }
221
- if (r === SKIP) return
222
- if (await predicate(r, currentIndex)) {
223
- countOut++
224
- this.push(r)
225
- }
226
- })
227
- } else {
228
- for (const r of resInput) {
229
- if (r === END) {
230
- isSettled = true // will be checked later
231
- break
232
- }
233
- if (r === SKIP) continue
234
- countOut++
235
- this.push(r)
236
- }
237
- }
198
+ // Check for isSettled again, as it may happen while mapper was running
199
+ if (isSettled) return cb()
238
200
 
239
- if (isSettled) {
201
+ if (res === END) {
202
+ isSettled = true
240
203
  logger.log(`transformMap END received at index ${currentIndex}`)
241
204
  pipelineClose('transformMap', this, this.sourceReadable, this.streamDone, logger)
205
+ return cb()
206
+ }
207
+
208
+ if (res === SKIP) {
209
+ // do nothing, don't push
210
+ return cb()
211
+ }
212
+
213
+ if (!predicate || ((await predicate(res, currentIndex)) && !isSettled)) {
214
+ // isSettled could have happened in parallel, hence the extra check
215
+ countOut++
216
+ this.push(res)
242
217
  }
243
218
 
244
219
  cb() // done processing
@@ -14,12 +14,6 @@ export interface TransformMapSyncOptions<IN = any, OUT = IN> {
14
14
  */
15
15
  objectMode?: boolean
16
16
 
17
- /**
18
- * @default false
19
- * Set true to support "multiMap" - possibility to return [] and emit 1 result for each item in the array.
20
- */
21
- flattenArrayOutput?: boolean
22
-
23
17
  /**
24
18
  * Predicate to filter outgoing results (after mapper).
25
19
  * Allows to not emit all results.
@@ -75,7 +69,6 @@ export function transformMapSync<IN = any, OUT = IN>(
75
69
  const {
76
70
  predicate, // defaults to "no predicate" (pass everything)
77
71
  errorMode = ErrorMode.THROW_IMMEDIATELY,
78
- flattenArrayOutput = false,
79
72
  onError,
80
73
  onDone,
81
74
  metric = 'stream',
@@ -102,27 +95,22 @@ export function transformMapSync<IN = any, OUT = IN>(
102
95
  try {
103
96
  // map and pass through
104
97
  const v = mapper(chunk, currentIndex)
105
- // todo: consider retiring flattenArrayOutput option
106
- const vInput = (flattenArrayOutput && Array.isArray(v) ? v : [v]) as (
107
- | OUT
108
- | typeof SKIP
109
- | typeof END
110
- )[]
111
-
112
- for (const r of vInput) {
113
- if (r === END) {
114
- isSettled = true // will be checked later
115
- break
116
- }
117
- if (r !== SKIP && (!predicate || predicate(r, currentIndex))) {
118
- countOut++
119
- this.push(r)
120
- }
121
- }
122
98
 
123
- if (isSettled) {
99
+ if (v === END) {
100
+ isSettled = true // will be checked later
124
101
  logger.log(`transformMapSync END received at index ${currentIndex}`)
125
102
  pipelineClose('transformMapSync', this, this.sourceReadable, this.streamDone, logger)
103
+ return cb()
104
+ }
105
+
106
+ if (v === SKIP) {
107
+ // do nothing, don't push
108
+ return cb()
109
+ }
110
+
111
+ if (!predicate || predicate(v, currentIndex)) {
112
+ countOut++
113
+ this.push(v)
126
114
  }
127
115
 
128
116
  cb() // done processing