@naturalcycles/nodejs-lib 13.9.1 → 13.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/dist/csv/csvReader.js +1 -1
  2. package/dist/index.d.ts +1 -2
  3. package/dist/index.js +1 -2
  4. package/dist/stream/progressLogger.d.ts +152 -0
  5. package/dist/stream/progressLogger.js +133 -0
  6. package/dist/stream/readable/readableCreate.d.ts +1 -1
  7. package/dist/stream/readable/readableCreate.js +2 -2
  8. package/dist/stream/readable/readableForEach.d.ts +2 -0
  9. package/dist/stream/readable/readableForEach.js +2 -0
  10. package/dist/stream/readable/readableToArray.d.ts +3 -0
  11. package/dist/stream/readable/readableToArray.js +11 -5
  12. package/dist/stream/stream.model.d.ts +24 -2
  13. package/dist/stream/transform/transformLogProgress.d.ts +2 -105
  14. package/dist/stream/transform/transformLogProgress.js +4 -82
  15. package/dist/stream/transform/transformTee.d.ts +1 -1
  16. package/package.json +1 -1
  17. package/src/csv/csvReader.ts +1 -1
  18. package/src/index.ts +1 -2
  19. package/src/stream/progressLogger.ts +324 -0
  20. package/src/stream/readable/readableCreate.ts +2 -2
  21. package/src/stream/readable/readableForEach.ts +2 -0
  22. package/src/stream/readable/readableToArray.ts +11 -7
  23. package/src/stream/stream.model.ts +46 -4
  24. package/src/stream/transform/transformLogProgress.ts +7 -253
  25. package/src/stream/transform/transformTee.ts +1 -1
  26. package/dist/stream/readable/readableMap.d.ts +0 -3
  27. package/dist/stream/readable/readableMap.js +0 -31
  28. package/dist/stream/readable/readableMapToArray.d.ts +0 -10
  29. package/dist/stream/readable/readableMapToArray.js +0 -16
  30. package/src/stream/readable/readableMap.ts +0 -34
  31. package/src/stream/readable/readableMapToArray.ts +0 -22
@@ -1,155 +1,10 @@
1
1
  import { Transform } from 'node:stream'
2
- import { inspect, InspectOptions } from 'node:util'
3
- import {
4
- SimpleMovingAverage,
5
- _mb,
6
- _since,
7
- AnyObject,
8
- CommonLogger,
9
- localTimeNow,
10
- } from '@naturalcycles/js-lib'
11
- import { hasColors, boldWhite, dimGrey, white, yellow } from '../../colors/colors'
12
- import { SizeStack } from '../sizeStack'
2
+ import { progressLogger, ProgressLoggerCfg } from '../progressLogger'
13
3
  import { TransformOptions, TransformTyped } from '../stream.model'
14
4
 
15
- export interface TransformLogProgressOptions<IN = any> extends TransformOptions {
16
- /**
17
- * Progress metric
18
- *
19
- * @default `progress`
20
- */
21
- metric?: string
22
-
23
- /**
24
- * Include `heapUsed` in log.
25
- *
26
- * @default false
27
- */
28
- heapUsed?: boolean
29
-
30
- /**
31
- * Include `heapTotal` in log.
32
- *
33
- * @default false
34
- */
35
- heapTotal?: boolean
36
-
37
- /**
38
- * Include `rss` in log.
39
- *
40
- * @default true
41
- */
42
- rss?: boolean
43
-
44
- /**
45
- * Incude Peak RSS in log.
46
- *
47
- * @default true
48
- */
49
- peakRSS?: boolean
50
-
51
- /**
52
- * Include `external` in log.
53
- *
54
- * @default false
55
- */
56
- external?: boolean
57
-
58
- /**
59
- * Include `arrayBuffers` in log.
60
- *
61
- * @default false
62
- */
63
- arrayBuffers?: boolean
64
-
65
- /**
66
- * Log (rss - heapTotal)
67
- * For convenience of debugging "out-of-heap" memory size.
68
- *
69
- * @default false
70
- */
71
- rssMinusHeap?: boolean
72
-
73
- /**
74
- * Log "rows per second"
75
- *
76
- * @default true
77
- */
78
- logRPS?: boolean
79
-
80
- /**
81
- * Set to false to disable logging progress
82
- *
83
- * @default true
84
- */
85
- logProgress?: boolean
86
-
87
- /**
88
- * Log progress event Nth record that is _processed_ (went through mapper).
89
- * Set to 0 to disable logging.
90
- *
91
- * @default 1000
92
- */
93
- logEvery?: number
94
-
95
- logger?: CommonLogger
96
-
97
- /**
98
- * Function to return extra properties to the "progress object".
99
- *
100
- * chunk is undefined for "final" stats, otherwise is defined.
101
- */
102
- extra?: (chunk: IN | undefined, index: number) => AnyObject
103
-
104
- /**
105
- * If specified - will multiply the counter by this number.
106
- * Useful e.g when using `transformBuffer({ batchSize: 500 })`, so
107
- * it'll accurately represent the number of processed entries (not batches).
108
- *
109
- * Defaults to 1.
110
- */
111
- batchSize?: number
112
-
113
- /**
114
- * Experimental logging of item (shunk) sizes, when json-stringified.
115
- *
116
- * Defaults to false.
117
- *
118
- * @experimental
119
- */
120
- logSizes?: boolean
121
-
122
- /**
123
- * How many last item sizes to keep in a buffer, to calculate stats (p50, p90, avg, etc).
124
- * Defaults to 100_000.
125
- * Cannot be Infinity.
126
- */
127
- logSizesBuffer?: number
128
-
129
- /**
130
- * Works in addition to `logSizes`. Adds "zipped sizes".
131
- *
132
- * @experimental
133
- */
134
- logZippedSizes?: boolean
135
- }
136
-
137
- interface LogItem extends AnyObject {
138
- heapUsed?: number
139
- heapTotal?: number
140
- rss?: number
141
- peakRSS?: number
142
- rssMinusHeap?: number
143
- external?: number
144
- arrayBuffers?: number
145
- rps10?: number
146
- rpsTotal?: number
147
- }
148
-
149
- const inspectOpt: InspectOptions = {
150
- colors: hasColors,
151
- breakLength: 300,
152
- }
5
+ export interface TransformLogProgressOptions<IN = any>
6
+ extends ProgressLoggerCfg<IN>,
7
+ TransformOptions {}
153
8
 
154
9
  /**
155
10
  * Pass-through transform that optionally logs progress.
@@ -157,119 +12,18 @@ const inspectOpt: InspectOptions = {
157
12
  export function transformLogProgress<IN = any>(
158
13
  opt: TransformLogProgressOptions = {},
159
14
  ): TransformTyped<IN, IN> {
160
- const {
161
- metric = 'progress',
162
- heapTotal: logHeapTotal = false,
163
- heapUsed: logHeapUsed = false,
164
- rss: logRss = true,
165
- peakRSS: logPeakRSS = true,
166
- logRPS = true,
167
- logEvery = 1000,
168
- logSizes = false,
169
- logSizesBuffer = 100_000,
170
- logZippedSizes = false,
171
- batchSize = 1,
172
- extra,
173
- logger = console,
174
- } = opt
175
- const logProgress = opt.logProgress !== false && logEvery !== 0 // true by default
176
- const logEvery10 = logEvery * 10
177
-
178
- const started = Date.now()
179
- let lastSecondStarted = Date.now()
180
- const sma = new SimpleMovingAverage(10) // over last 10 seconds
181
- let processedLastSecond = 0
182
- let progress = 0
183
- let peakRSS = 0
184
-
185
- const sizes = logSizes ? new SizeStack('json', logSizesBuffer) : undefined
186
- const sizesZipped = logZippedSizes ? new SizeStack('json.gz', logSizesBuffer) : undefined
187
-
188
- logStats() // initial
15
+ const progress = progressLogger(opt)
189
16
 
190
17
  return new Transform({
191
18
  objectMode: true,
192
19
  ...opt,
193
20
  transform(chunk: IN, _, cb) {
194
- progress++
195
- processedLastSecond++
196
-
197
- if (sizes) {
198
- // Check it, cause gzipping might be delayed here..
199
- void SizeStack.countItem(chunk, logger, sizes, sizesZipped)
200
- }
201
-
202
- if (logProgress && progress % logEvery === 0) {
203
- logStats(chunk, false, progress % logEvery10 === 0)
204
- }
205
-
21
+ progress.log(chunk)
206
22
  cb(null, chunk) // pass-through
207
23
  },
208
24
  final(cb) {
209
- logStats(undefined, true)
210
-
25
+ progress.done()
211
26
  cb()
212
27
  },
213
28
  })
214
-
215
- function logStats(chunk?: IN, final = false, tenx = false): void {
216
- if (!logProgress) return
217
-
218
- const mem = process.memoryUsage()
219
-
220
- const now = Date.now()
221
- const batchedProgress = progress * batchSize
222
- const lastRPS = (processedLastSecond * batchSize) / ((now - lastSecondStarted) / 1000) || 0
223
- const rpsTotal = Math.round(batchedProgress / ((now - started) / 1000)) || 0
224
- lastSecondStarted = now
225
- processedLastSecond = 0
226
-
227
- const rps10 = Math.round(sma.pushGetAvg(lastRPS))
228
- if (mem.rss > peakRSS) peakRSS = mem.rss
229
-
230
- const o: LogItem = {
231
- [final ? `${metric}_final` : metric]: batchedProgress,
232
- }
233
-
234
- if (extra) Object.assign(o, extra(chunk, progress))
235
- if (logHeapUsed) o.heapUsed = _mb(mem.heapUsed)
236
- if (logHeapTotal) o.heapTotal = _mb(mem.heapTotal)
237
- if (logRss) o.rss = _mb(mem.rss)
238
- if (logPeakRSS) o.peakRSS = _mb(peakRSS)
239
- if (opt.rssMinusHeap) o.rssMinusHeap = _mb(mem.rss - mem.heapTotal)
240
- if (opt.external) o.external = _mb(mem.external)
241
- if (opt.arrayBuffers) o.arrayBuffers = _mb(mem.arrayBuffers || 0)
242
-
243
- if (logRPS) Object.assign(o, { rps10, rpsTotal })
244
-
245
- logger.log(inspect(o, inspectOpt))
246
-
247
- if (sizes?.items.length) {
248
- logger.log(sizes.getStats())
249
-
250
- if (sizesZipped?.items.length) {
251
- logger.log(sizesZipped.getStats())
252
- }
253
- }
254
-
255
- if (tenx) {
256
- let perHour: number | string =
257
- Math.round((batchedProgress * 1000 * 60 * 60) / (now - started)) || 0
258
- if (perHour > 900) {
259
- perHour = Math.round(perHour / 1000) + 'K'
260
- }
261
-
262
- logger.log(
263
- `${dimGrey(localTimeNow().toPretty())} ${white(metric)} took ${yellow(
264
- _since(started),
265
- )} so far to process ${yellow(batchedProgress)} rows, ~${yellow(perHour)}/hour`,
266
- )
267
- } else if (final) {
268
- logger.log(
269
- `${boldWhite(metric)} took ${yellow(_since(started))} to process ${yellow(
270
- batchedProgress,
271
- )} rows with total RPS of ${yellow(rpsTotal)}`,
272
- )
273
- }
274
- }
275
29
  }
@@ -14,7 +14,7 @@ type AnyStream = NodeJS.WritableStream | NodeJS.ReadWriteStream
14
14
  *
15
15
  * @experimental
16
16
  */
17
- export function transformTee<T>(streams: AnyStream[]): TransformTyped<T> {
17
+ export function transformTee<T>(streams: AnyStream[]): TransformTyped<T, T> {
18
18
  const readable = readableCreate<T>()
19
19
 
20
20
  const secondPipelinePromise = _pipeline([readable, ...streams])
@@ -1,3 +0,0 @@
1
- import { AbortableAsyncMapper } from '@naturalcycles/js-lib';
2
- import { ReadableTyped } from '../stream.model';
3
- export declare function readableMap<IN, OUT>(readable: ReadableTyped<IN>, mapper: AbortableAsyncMapper<IN, OUT>): ReadableTyped<OUT>;
@@ -1,31 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.readableMap = void 0;
4
- const node_stream_1 = require("node:stream");
5
- const js_lib_1 = require("@naturalcycles/js-lib");
6
- function readableMap(readable, mapper) {
7
- let i = -1;
8
- const stream = readable
9
- .on('error', err => stream.emit('error', err))
10
- .pipe(new node_stream_1.Transform({
11
- objectMode: true,
12
- async transform(chunk, _enc, cb) {
13
- try {
14
- const r = await mapper(chunk, ++i);
15
- if (r === js_lib_1.SKIP) {
16
- cb();
17
- }
18
- else {
19
- // _assert(r !== END, `readableMap END not supported`)
20
- cb(null, r);
21
- }
22
- }
23
- catch (err) {
24
- console.error(err);
25
- cb(err);
26
- }
27
- },
28
- }));
29
- return stream;
30
- }
31
- exports.readableMap = readableMap;
@@ -1,10 +0,0 @@
1
- import { AsyncMapper } from '@naturalcycles/js-lib';
2
- import { ReadableTyped } from '../stream.model';
3
- import { TransformMapOptions } from '../transform/transformMap';
4
- /**
5
- * Map Readable items to array of results (in memory),
6
- * passing each result via `transformMap`.
7
- *
8
- * Warning! All results are stored in memory (no backpressure).
9
- */
10
- export declare function readableMapToArray<IN, OUT = IN>(stream: ReadableTyped<IN>, mapper?: AsyncMapper<IN, OUT>, opt?: TransformMapOptions<IN, OUT>): Promise<OUT[]>;
@@ -1,16 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.readableMapToArray = void 0;
4
- const index_1 = require("../../index");
5
- /**
6
- * Map Readable items to array of results (in memory),
7
- * passing each result via `transformMap`.
8
- *
9
- * Warning! All results are stored in memory (no backpressure).
10
- */
11
- async function readableMapToArray(stream, mapper = item => item, opt) {
12
- const res = [];
13
- await (0, index_1._pipeline)([stream, (0, index_1.transformMap)(mapper, opt), (0, index_1.writablePushToArray)(res)]);
14
- return res;
15
- }
16
- exports.readableMapToArray = readableMapToArray;
@@ -1,34 +0,0 @@
1
- import { Transform } from 'node:stream'
2
- import { AbortableAsyncMapper, SKIP } from '@naturalcycles/js-lib'
3
- import { ReadableTyped } from '../stream.model'
4
-
5
- export function readableMap<IN, OUT>(
6
- readable: ReadableTyped<IN>,
7
- mapper: AbortableAsyncMapper<IN, OUT>,
8
- ): ReadableTyped<OUT> {
9
- let i = -1
10
-
11
- const stream: ReadableTyped<OUT> = readable
12
- .on('error', err => stream.emit('error', err))
13
- .pipe(
14
- new Transform({
15
- objectMode: true,
16
- async transform(chunk, _enc, cb) {
17
- try {
18
- const r = await mapper(chunk, ++i)
19
- if (r === SKIP) {
20
- cb()
21
- } else {
22
- // _assert(r !== END, `readableMap END not supported`)
23
- cb(null, r)
24
- }
25
- } catch (err) {
26
- console.error(err)
27
- cb(err as Error)
28
- }
29
- },
30
- }),
31
- )
32
-
33
- return stream
34
- }
@@ -1,22 +0,0 @@
1
- import { AsyncMapper } from '@naturalcycles/js-lib'
2
- import { transformMap, writablePushToArray, _pipeline } from '../../index'
3
- import { ReadableTyped } from '../stream.model'
4
- import { TransformMapOptions } from '../transform/transformMap'
5
-
6
- /**
7
- * Map Readable items to array of results (in memory),
8
- * passing each result via `transformMap`.
9
- *
10
- * Warning! All results are stored in memory (no backpressure).
11
- */
12
- export async function readableMapToArray<IN, OUT = IN>(
13
- stream: ReadableTyped<IN>,
14
- mapper: AsyncMapper<IN, OUT> = item => item as any,
15
- opt?: TransformMapOptions<IN, OUT>,
16
- ): Promise<OUT[]> {
17
- const res: OUT[] = []
18
-
19
- await _pipeline([stream, transformMap(mapper, opt), writablePushToArray(res)])
20
-
21
- return res
22
- }