@naturalcycles/nodejs-lib 13.9.1 → 13.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,7 +35,7 @@ function csvStringParse(str, cfg = {}) {
35
35
  exports.csvStringParse = csvStringParse;
36
36
  function csvStringToArray(str) {
37
37
  const objPattern = new RegExp('(,|\\r?\\n|\\r|^)(?:"([^"]*(?:""[^"]*)*)"|([^,\\r\\n]*))', 'gi');
38
- let matches = null;
38
+ let matches;
39
39
  const arr = [[]];
40
40
  while ((matches = objPattern.exec(str))) {
41
41
  if (matches[1].length && matches[1] !== ',') {
package/dist/index.d.ts CHANGED
@@ -35,6 +35,7 @@ export * from './stream/readable/readableMap';
35
35
  export * from './stream/readable/readableMapToArray';
36
36
  export * from './stream/readable/readableToArray';
37
37
  export * from './stream/stream.model';
38
+ export * from './stream/progressLogger';
38
39
  export * from './stream/transform/transformBuffer';
39
40
  export * from './stream/transform/transformFilter';
40
41
  export * from './stream/transform/transformLimit';
package/dist/index.js CHANGED
@@ -39,6 +39,7 @@ tslib_1.__exportStar(require("./stream/readable/readableMap"), exports);
39
39
  tslib_1.__exportStar(require("./stream/readable/readableMapToArray"), exports);
40
40
  tslib_1.__exportStar(require("./stream/readable/readableToArray"), exports);
41
41
  tslib_1.__exportStar(require("./stream/stream.model"), exports);
42
+ tslib_1.__exportStar(require("./stream/progressLogger"), exports);
42
43
  tslib_1.__exportStar(require("./stream/transform/transformBuffer"), exports);
43
44
  tslib_1.__exportStar(require("./stream/transform/transformFilter"), exports);
44
45
  tslib_1.__exportStar(require("./stream/transform/transformLimit"), exports);
@@ -0,0 +1,145 @@
1
+ import { AnyObject, CommonLogger } from '@naturalcycles/js-lib';
2
+ export interface ProgressLoggerCfg<IN = any> {
3
+ /**
4
+ * Progress metric
5
+ *
6
+ * @default `progress`
7
+ */
8
+ metric?: string;
9
+ /**
10
+ * Include `heapUsed` in log.
11
+ *
12
+ * @default false
13
+ */
14
+ heapUsed?: boolean;
15
+ /**
16
+ * Include `heapTotal` in log.
17
+ *
18
+ * @default false
19
+ */
20
+ heapTotal?: boolean;
21
+ /**
22
+ * Include `rss` in log.
23
+ *
24
+ * @default true
25
+ */
26
+ rss?: boolean;
27
+ /**
28
+ * Incude Peak RSS in log.
29
+ *
30
+ * @default true
31
+ */
32
+ peakRSS?: boolean;
33
+ /**
34
+ * Include `external` in log.
35
+ *
36
+ * @default false
37
+ */
38
+ external?: boolean;
39
+ /**
40
+ * Include `arrayBuffers` in log.
41
+ *
42
+ * @default false
43
+ */
44
+ arrayBuffers?: boolean;
45
+ /**
46
+ * Log (rss - heapTotal)
47
+ * For convenience of debugging "out-of-heap" memory size.
48
+ *
49
+ * @default false
50
+ */
51
+ rssMinusHeap?: boolean;
52
+ /**
53
+ * Log "rows per second"
54
+ *
55
+ * @default true
56
+ */
57
+ logRPS?: boolean;
58
+ /**
59
+ * Set to false to disable logging progress
60
+ *
61
+ * @default true
62
+ */
63
+ logProgress?: boolean;
64
+ /**
65
+ * Log progress event Nth record that is _processed_ (went through mapper).
66
+ * Set to 0 to disable logging.
67
+ *
68
+ * @default 1000
69
+ */
70
+ logEvery?: number;
71
+ logger?: CommonLogger;
72
+ /**
73
+ * Function to return extra properties to the "progress object".
74
+ *
75
+ * chunk is undefined for "final" stats, otherwise is defined.
76
+ */
77
+ extra?: (chunk: IN | undefined, index: number) => AnyObject;
78
+ /**
79
+ * If specified - will multiply the counter by this number.
80
+ * Useful e.g when using `transformBuffer({ batchSize: 500 })`, so
81
+ * it'll accurately represent the number of processed entries (not batches).
82
+ *
83
+ * Defaults to 1.
84
+ */
85
+ batchSize?: number;
86
+ /**
87
+ * Experimental logging of item (shunk) sizes, when json-stringified.
88
+ *
89
+ * Defaults to false.
90
+ *
91
+ * @experimental
92
+ */
93
+ logSizes?: boolean;
94
+ /**
95
+ * How many last item sizes to keep in a buffer, to calculate stats (p50, p90, avg, etc).
96
+ * Defaults to 100_000.
97
+ * Cannot be Infinity.
98
+ */
99
+ logSizesBuffer?: number;
100
+ /**
101
+ * Works in addition to `logSizes`. Adds "zipped sizes".
102
+ *
103
+ * @experimental
104
+ */
105
+ logZippedSizes?: boolean;
106
+ }
107
+ export interface ProgressLogItem extends AnyObject {
108
+ heapUsed?: number;
109
+ heapTotal?: number;
110
+ rss?: number;
111
+ peakRSS?: number;
112
+ rssMinusHeap?: number;
113
+ external?: number;
114
+ arrayBuffers?: number;
115
+ rps10?: number;
116
+ rpsTotal?: number;
117
+ }
118
+ export declare class ProgressLogger<IN> implements Disposable {
119
+ constructor(cfg?: ProgressLoggerCfg<IN>);
120
+ cfg: ProgressLoggerCfg<IN> & {
121
+ logEvery: number;
122
+ logSizesBuffer: number;
123
+ batchSize: number;
124
+ metric: string;
125
+ logger: CommonLogger;
126
+ };
127
+ private started;
128
+ private lastSecondStarted;
129
+ private sma;
130
+ private logEvery10;
131
+ private processedLastSecond;
132
+ private progress;
133
+ private peakRSS;
134
+ private sizes?;
135
+ private sizesZipped?;
136
+ private start;
137
+ log(chunk?: IN): void;
138
+ done(): void;
139
+ [Symbol.dispose](): void;
140
+ private logStats;
141
+ }
142
+ /**
143
+ * Create new ProgressLogger.
144
+ */
145
+ export declare function progressLogger<IN>(cfg?: ProgressLoggerCfg<IN>): ProgressLogger<IN>;
@@ -0,0 +1,120 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.progressLogger = exports.ProgressLogger = void 0;
4
+ const node_util_1 = require("node:util");
5
+ const js_lib_1 = require("@naturalcycles/js-lib");
6
+ const colors_1 = require("../colors/colors");
7
+ const sizeStack_1 = require("./sizeStack");
8
+ const inspectOpt = {
9
+ colors: colors_1.hasColors,
10
+ breakLength: 300,
11
+ };
12
+ class ProgressLogger {
13
+ constructor(cfg = {}) {
14
+ this.cfg = {
15
+ metric: 'progress',
16
+ rss: true,
17
+ peakRSS: true,
18
+ logRPS: true,
19
+ logEvery: 1000,
20
+ logSizesBuffer: 100_000,
21
+ batchSize: 1,
22
+ logger: console,
23
+ logProgress: cfg.logProgress !== false && cfg.logEvery !== 0,
24
+ ...cfg,
25
+ };
26
+ this.logEvery10 = this.cfg.logEvery * 10;
27
+ this.start();
28
+ this.logStats(); // initial
29
+ }
30
+ start() {
31
+ this.started = Date.now();
32
+ this.lastSecondStarted = Date.now();
33
+ this.sma = new js_lib_1.SimpleMovingAverage(10);
34
+ this.processedLastSecond = 0;
35
+ this.progress = 0;
36
+ this.peakRSS = 0;
37
+ this.sizes = this.cfg.logSizes ? new sizeStack_1.SizeStack('json', this.cfg.logSizesBuffer) : undefined;
38
+ this.sizesZipped = this.cfg.logZippedSizes
39
+ ? new sizeStack_1.SizeStack('json.gz', this.cfg.logSizesBuffer)
40
+ : undefined;
41
+ }
42
+ log(chunk) {
43
+ this.progress++;
44
+ this.processedLastSecond++;
45
+ if (this.sizes) {
46
+ // Check it, cause gzipping might be delayed here..
47
+ void sizeStack_1.SizeStack.countItem(chunk, this.cfg.logger, this.sizes, this.sizesZipped);
48
+ }
49
+ if (this.cfg.logProgress && this.progress % this.cfg.logEvery === 0) {
50
+ this.logStats(chunk, false, this.progress % this.logEvery10 === 0);
51
+ }
52
+ }
53
+ done() {
54
+ this.logStats(undefined, true);
55
+ }
56
+ [Symbol.dispose]() {
57
+ this.done();
58
+ }
59
+ logStats(chunk, final = false, tenx = false) {
60
+ if (!this.cfg.logProgress)
61
+ return;
62
+ const { metric, extra, batchSize, heapUsed: logHeapUsed, heapTotal: logHeapTotal, rss: logRss, peakRSS: logPeakRss, rssMinusHeap, external, arrayBuffers, logRPS, logger, } = this.cfg;
63
+ const mem = process.memoryUsage();
64
+ const now = Date.now();
65
+ const batchedProgress = this.progress * batchSize;
66
+ const lastRPS = (this.processedLastSecond * batchSize) / ((now - this.lastSecondStarted) / 1000) || 0;
67
+ const rpsTotal = Math.round(batchedProgress / ((now - this.started) / 1000)) || 0;
68
+ this.lastSecondStarted = now;
69
+ this.processedLastSecond = 0;
70
+ const rps10 = Math.round(this.sma.pushGetAvg(lastRPS));
71
+ if (mem.rss > this.peakRSS)
72
+ this.peakRSS = mem.rss;
73
+ const o = {
74
+ [final ? `${this.cfg.metric}_final` : this.cfg.metric]: batchedProgress,
75
+ };
76
+ if (extra)
77
+ Object.assign(o, extra(chunk, this.progress));
78
+ if (logHeapUsed)
79
+ o.heapUsed = (0, js_lib_1._mb)(mem.heapUsed);
80
+ if (logHeapTotal)
81
+ o.heapTotal = (0, js_lib_1._mb)(mem.heapTotal);
82
+ if (logRss)
83
+ o.rss = (0, js_lib_1._mb)(mem.rss);
84
+ if (logPeakRss)
85
+ o.peakRSS = (0, js_lib_1._mb)(this.peakRSS);
86
+ if (rssMinusHeap)
87
+ o.rssMinusHeap = (0, js_lib_1._mb)(mem.rss - mem.heapTotal);
88
+ if (external)
89
+ o.external = (0, js_lib_1._mb)(mem.external);
90
+ if (arrayBuffers)
91
+ o.arrayBuffers = (0, js_lib_1._mb)(mem.arrayBuffers || 0);
92
+ if (logRPS)
93
+ Object.assign(o, { rps10, rpsTotal });
94
+ logger.log((0, node_util_1.inspect)(o, inspectOpt));
95
+ if (this.sizes?.items.length) {
96
+ logger.log(this.sizes.getStats());
97
+ if (this.sizesZipped?.items.length) {
98
+ logger.log(this.sizesZipped.getStats());
99
+ }
100
+ }
101
+ if (tenx) {
102
+ let perHour = Math.round((batchedProgress * 1000 * 60 * 60) / (now - this.started)) || 0;
103
+ if (perHour > 900) {
104
+ perHour = Math.round(perHour / 1000) + 'K';
105
+ }
106
+ logger.log(`${(0, colors_1.dimGrey)((0, js_lib_1.localTimeNow)().toPretty())} ${(0, colors_1.white)(metric)} took ${(0, colors_1.yellow)((0, js_lib_1._since)(this.started))} so far to process ${(0, colors_1.yellow)(batchedProgress)} rows, ~${(0, colors_1.yellow)(perHour)}/hour`);
107
+ }
108
+ else if (final) {
109
+ logger.log(`${(0, colors_1.boldWhite)(metric)} took ${(0, colors_1.yellow)((0, js_lib_1._since)(this.started))} to process ${(0, colors_1.yellow)(batchedProgress)} rows with total RPS of ${(0, colors_1.yellow)(rpsTotal)}`);
110
+ }
111
+ }
112
+ }
113
+ exports.ProgressLogger = ProgressLogger;
114
+ /**
115
+ * Create new ProgressLogger.
116
+ */
117
+ function progressLogger(cfg = {}) {
118
+ return new ProgressLogger(cfg);
119
+ }
120
+ exports.progressLogger = progressLogger;
@@ -5,6 +5,8 @@ import { TransformMapOptions } from '../transform/transformMap';
5
5
  * Convenience function to do `.forEach` over a Readable.
6
6
  * Typed! (unlike default Readable).
7
7
  *
8
+ * Try native readable.forEach() instead!
9
+ *
8
10
  * @experimental
9
11
  */
10
12
  export declare function readableForEach<T>(readable: ReadableTyped<T>, mapper: AbortableAsyncMapper<T, void>, opt?: TransformMapOptions<T, void>): Promise<void>;
@@ -8,6 +8,8 @@ const transformMap_1 = require("../transform/transformMap");
8
8
  * Convenience function to do `.forEach` over a Readable.
9
9
  * Typed! (unlike default Readable).
10
10
  *
11
+ * Try native readable.forEach() instead!
12
+ *
11
13
  * @experimental
12
14
  */
13
15
  async function readableForEach(readable, mapper, opt = {}) {
@@ -6,5 +6,7 @@ import { TransformMapOptions } from '../transform/transformMap';
6
6
  * passing each result via `transformMap`.
7
7
  *
8
8
  * Warning! All results are stored in memory (no backpressure).
9
+ *
10
+ * Try native readable.toArray instead!
9
11
  */
10
12
  export declare function readableMapToArray<IN, OUT = IN>(stream: ReadableTyped<IN>, mapper?: AsyncMapper<IN, OUT>, opt?: TransformMapOptions<IN, OUT>): Promise<OUT[]>;
@@ -7,6 +7,8 @@ const index_1 = require("../../index");
7
7
  * passing each result via `transformMap`.
8
8
  *
9
9
  * Warning! All results are stored in memory (no backpressure).
10
+ *
11
+ * Try native readable.toArray instead!
10
12
  */
11
13
  async function readableMapToArray(stream, mapper = item => item, opt) {
12
14
  const res = [];
@@ -1,109 +1,6 @@
1
- import { AnyObject, CommonLogger } from '@naturalcycles/js-lib';
1
+ import { ProgressLoggerCfg } from '../progressLogger';
2
2
  import { TransformOptions, TransformTyped } from '../stream.model';
3
- export interface TransformLogProgressOptions<IN = any> extends TransformOptions {
4
- /**
5
- * Progress metric
6
- *
7
- * @default `progress`
8
- */
9
- metric?: string;
10
- /**
11
- * Include `heapUsed` in log.
12
- *
13
- * @default false
14
- */
15
- heapUsed?: boolean;
16
- /**
17
- * Include `heapTotal` in log.
18
- *
19
- * @default false
20
- */
21
- heapTotal?: boolean;
22
- /**
23
- * Include `rss` in log.
24
- *
25
- * @default true
26
- */
27
- rss?: boolean;
28
- /**
29
- * Incude Peak RSS in log.
30
- *
31
- * @default true
32
- */
33
- peakRSS?: boolean;
34
- /**
35
- * Include `external` in log.
36
- *
37
- * @default false
38
- */
39
- external?: boolean;
40
- /**
41
- * Include `arrayBuffers` in log.
42
- *
43
- * @default false
44
- */
45
- arrayBuffers?: boolean;
46
- /**
47
- * Log (rss - heapTotal)
48
- * For convenience of debugging "out-of-heap" memory size.
49
- *
50
- * @default false
51
- */
52
- rssMinusHeap?: boolean;
53
- /**
54
- * Log "rows per second"
55
- *
56
- * @default true
57
- */
58
- logRPS?: boolean;
59
- /**
60
- * Set to false to disable logging progress
61
- *
62
- * @default true
63
- */
64
- logProgress?: boolean;
65
- /**
66
- * Log progress event Nth record that is _processed_ (went through mapper).
67
- * Set to 0 to disable logging.
68
- *
69
- * @default 1000
70
- */
71
- logEvery?: number;
72
- logger?: CommonLogger;
73
- /**
74
- * Function to return extra properties to the "progress object".
75
- *
76
- * chunk is undefined for "final" stats, otherwise is defined.
77
- */
78
- extra?: (chunk: IN | undefined, index: number) => AnyObject;
79
- /**
80
- * If specified - will multiply the counter by this number.
81
- * Useful e.g when using `transformBuffer({ batchSize: 500 })`, so
82
- * it'll accurately represent the number of processed entries (not batches).
83
- *
84
- * Defaults to 1.
85
- */
86
- batchSize?: number;
87
- /**
88
- * Experimental logging of item (shunk) sizes, when json-stringified.
89
- *
90
- * Defaults to false.
91
- *
92
- * @experimental
93
- */
94
- logSizes?: boolean;
95
- /**
96
- * How many last item sizes to keep in a buffer, to calculate stats (p50, p90, avg, etc).
97
- * Defaults to 100_000.
98
- * Cannot be Infinity.
99
- */
100
- logSizesBuffer?: number;
101
- /**
102
- * Works in addition to `logSizes`. Adds "zipped sizes".
103
- *
104
- * @experimental
105
- */
106
- logZippedSizes?: boolean;
3
+ export interface TransformLogProgressOptions<IN = any> extends ProgressLoggerCfg<IN>, TransformOptions {
107
4
  }
108
5
  /**
109
6
  * Pass-through transform that optionally logs progress.
@@ -2,101 +2,23 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.transformLogProgress = void 0;
4
4
  const node_stream_1 = require("node:stream");
5
- const node_util_1 = require("node:util");
6
- const js_lib_1 = require("@naturalcycles/js-lib");
7
- const colors_1 = require("../../colors/colors");
8
- const sizeStack_1 = require("../sizeStack");
9
- const inspectOpt = {
10
- colors: colors_1.hasColors,
11
- breakLength: 300,
12
- };
5
+ const progressLogger_1 = require("../progressLogger");
13
6
  /**
14
7
  * Pass-through transform that optionally logs progress.
15
8
  */
16
9
  function transformLogProgress(opt = {}) {
17
- const { metric = 'progress', heapTotal: logHeapTotal = false, heapUsed: logHeapUsed = false, rss: logRss = true, peakRSS: logPeakRSS = true, logRPS = true, logEvery = 1000, logSizes = false, logSizesBuffer = 100_000, logZippedSizes = false, batchSize = 1, extra, logger = console, } = opt;
18
- const logProgress = opt.logProgress !== false && logEvery !== 0; // true by default
19
- const logEvery10 = logEvery * 10;
20
- const started = Date.now();
21
- let lastSecondStarted = Date.now();
22
- const sma = new js_lib_1.SimpleMovingAverage(10); // over last 10 seconds
23
- let processedLastSecond = 0;
24
- let progress = 0;
25
- let peakRSS = 0;
26
- const sizes = logSizes ? new sizeStack_1.SizeStack('json', logSizesBuffer) : undefined;
27
- const sizesZipped = logZippedSizes ? new sizeStack_1.SizeStack('json.gz', logSizesBuffer) : undefined;
28
- logStats(); // initial
10
+ const progress = (0, progressLogger_1.progressLogger)(opt);
29
11
  return new node_stream_1.Transform({
30
12
  objectMode: true,
31
13
  ...opt,
32
14
  transform(chunk, _, cb) {
33
- progress++;
34
- processedLastSecond++;
35
- if (sizes) {
36
- // Check it, cause gzipping might be delayed here..
37
- void sizeStack_1.SizeStack.countItem(chunk, logger, sizes, sizesZipped);
38
- }
39
- if (logProgress && progress % logEvery === 0) {
40
- logStats(chunk, false, progress % logEvery10 === 0);
41
- }
15
+ progress.log(chunk);
42
16
  cb(null, chunk); // pass-through
43
17
  },
44
18
  final(cb) {
45
- logStats(undefined, true);
19
+ progress.done();
46
20
  cb();
47
21
  },
48
22
  });
49
- function logStats(chunk, final = false, tenx = false) {
50
- if (!logProgress)
51
- return;
52
- const mem = process.memoryUsage();
53
- const now = Date.now();
54
- const batchedProgress = progress * batchSize;
55
- const lastRPS = (processedLastSecond * batchSize) / ((now - lastSecondStarted) / 1000) || 0;
56
- const rpsTotal = Math.round(batchedProgress / ((now - started) / 1000)) || 0;
57
- lastSecondStarted = now;
58
- processedLastSecond = 0;
59
- const rps10 = Math.round(sma.pushGetAvg(lastRPS));
60
- if (mem.rss > peakRSS)
61
- peakRSS = mem.rss;
62
- const o = {
63
- [final ? `${metric}_final` : metric]: batchedProgress,
64
- };
65
- if (extra)
66
- Object.assign(o, extra(chunk, progress));
67
- if (logHeapUsed)
68
- o.heapUsed = (0, js_lib_1._mb)(mem.heapUsed);
69
- if (logHeapTotal)
70
- o.heapTotal = (0, js_lib_1._mb)(mem.heapTotal);
71
- if (logRss)
72
- o.rss = (0, js_lib_1._mb)(mem.rss);
73
- if (logPeakRSS)
74
- o.peakRSS = (0, js_lib_1._mb)(peakRSS);
75
- if (opt.rssMinusHeap)
76
- o.rssMinusHeap = (0, js_lib_1._mb)(mem.rss - mem.heapTotal);
77
- if (opt.external)
78
- o.external = (0, js_lib_1._mb)(mem.external);
79
- if (opt.arrayBuffers)
80
- o.arrayBuffers = (0, js_lib_1._mb)(mem.arrayBuffers || 0);
81
- if (logRPS)
82
- Object.assign(o, { rps10, rpsTotal });
83
- logger.log((0, node_util_1.inspect)(o, inspectOpt));
84
- if (sizes?.items.length) {
85
- logger.log(sizes.getStats());
86
- if (sizesZipped?.items.length) {
87
- logger.log(sizesZipped.getStats());
88
- }
89
- }
90
- if (tenx) {
91
- let perHour = Math.round((batchedProgress * 1000 * 60 * 60) / (now - started)) || 0;
92
- if (perHour > 900) {
93
- perHour = Math.round(perHour / 1000) + 'K';
94
- }
95
- logger.log(`${(0, colors_1.dimGrey)((0, js_lib_1.localTimeNow)().toPretty())} ${(0, colors_1.white)(metric)} took ${(0, colors_1.yellow)((0, js_lib_1._since)(started))} so far to process ${(0, colors_1.yellow)(batchedProgress)} rows, ~${(0, colors_1.yellow)(perHour)}/hour`);
96
- }
97
- else if (final) {
98
- logger.log(`${(0, colors_1.boldWhite)(metric)} took ${(0, colors_1.yellow)((0, js_lib_1._since)(started))} to process ${(0, colors_1.yellow)(batchedProgress)} rows with total RPS of ${(0, colors_1.yellow)(rpsTotal)}`);
99
- }
100
- }
101
23
  }
102
24
  exports.transformLogProgress = transformLogProgress;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@naturalcycles/nodejs-lib",
3
- "version": "13.9.1",
3
+ "version": "13.10.0",
4
4
  "scripts": {
5
5
  "prepare": "husky",
6
6
  "docs-serve": "vuepress dev docs",
@@ -57,7 +57,7 @@ export function csvStringParse<T extends AnyObject = any>(
57
57
 
58
58
  export function csvStringToArray(str: string): string[][] {
59
59
  const objPattern = new RegExp('(,|\\r?\\n|\\r|^)(?:"([^"]*(?:""[^"]*)*)"|([^,\\r\\n]*))', 'gi')
60
- let matches = null
60
+ let matches: RegExpExecArray | null
61
61
  const arr: any[][] = [[]]
62
62
 
63
63
  while ((matches = objPattern.exec(str))) {
package/src/index.ts CHANGED
@@ -45,6 +45,7 @@ export * from './stream/readable/readableMap'
45
45
  export * from './stream/readable/readableMapToArray'
46
46
  export * from './stream/readable/readableToArray'
47
47
  export * from './stream/stream.model'
48
+ export * from './stream/progressLogger'
48
49
  export * from './stream/transform/transformBuffer'
49
50
  export * from './stream/transform/transformFilter'
50
51
  export * from './stream/transform/transformLimit'
@@ -0,0 +1,309 @@
1
+ import { inspect, InspectOptions } from 'node:util'
2
+ import {
3
+ _mb,
4
+ _since,
5
+ AnyObject,
6
+ CommonLogger,
7
+ localTimeNow,
8
+ SimpleMovingAverage,
9
+ UnixTimestampMillisNumber,
10
+ } from '@naturalcycles/js-lib'
11
+ import { boldWhite, dimGrey, hasColors, white, yellow } from '../colors/colors'
12
+ import { SizeStack } from './sizeStack'
13
+
14
+ export interface ProgressLoggerCfg<IN = any> {
15
+ /**
16
+ * Progress metric
17
+ *
18
+ * @default `progress`
19
+ */
20
+ metric?: string
21
+
22
+ /**
23
+ * Include `heapUsed` in log.
24
+ *
25
+ * @default false
26
+ */
27
+ heapUsed?: boolean
28
+
29
+ /**
30
+ * Include `heapTotal` in log.
31
+ *
32
+ * @default false
33
+ */
34
+ heapTotal?: boolean
35
+
36
+ /**
37
+ * Include `rss` in log.
38
+ *
39
+ * @default true
40
+ */
41
+ rss?: boolean
42
+
43
+ /**
44
+ * Incude Peak RSS in log.
45
+ *
46
+ * @default true
47
+ */
48
+ peakRSS?: boolean
49
+
50
+ /**
51
+ * Include `external` in log.
52
+ *
53
+ * @default false
54
+ */
55
+ external?: boolean
56
+
57
+ /**
58
+ * Include `arrayBuffers` in log.
59
+ *
60
+ * @default false
61
+ */
62
+ arrayBuffers?: boolean
63
+
64
+ /**
65
+ * Log (rss - heapTotal)
66
+ * For convenience of debugging "out-of-heap" memory size.
67
+ *
68
+ * @default false
69
+ */
70
+ rssMinusHeap?: boolean
71
+
72
+ /**
73
+ * Log "rows per second"
74
+ *
75
+ * @default true
76
+ */
77
+ logRPS?: boolean
78
+
79
+ /**
80
+ * Set to false to disable logging progress
81
+ *
82
+ * @default true
83
+ */
84
+ logProgress?: boolean
85
+
86
+ /**
87
+ * Log progress event Nth record that is _processed_ (went through mapper).
88
+ * Set to 0 to disable logging.
89
+ *
90
+ * @default 1000
91
+ */
92
+ logEvery?: number
93
+
94
+ logger?: CommonLogger
95
+
96
+ /**
97
+ * Function to return extra properties to the "progress object".
98
+ *
99
+ * chunk is undefined for "final" stats, otherwise is defined.
100
+ */
101
+ extra?: (chunk: IN | undefined, index: number) => AnyObject
102
+
103
+ /**
104
+ * If specified - will multiply the counter by this number.
105
+ * Useful e.g when using `transformBuffer({ batchSize: 500 })`, so
106
+ * it'll accurately represent the number of processed entries (not batches).
107
+ *
108
+ * Defaults to 1.
109
+ */
110
+ batchSize?: number
111
+
112
+ /**
113
+ * Experimental logging of item (shunk) sizes, when json-stringified.
114
+ *
115
+ * Defaults to false.
116
+ *
117
+ * @experimental
118
+ */
119
+ logSizes?: boolean
120
+
121
+ /**
122
+ * How many last item sizes to keep in a buffer, to calculate stats (p50, p90, avg, etc).
123
+ * Defaults to 100_000.
124
+ * Cannot be Infinity.
125
+ */
126
+ logSizesBuffer?: number
127
+
128
+ /**
129
+ * Works in addition to `logSizes`. Adds "zipped sizes".
130
+ *
131
+ * @experimental
132
+ */
133
+ logZippedSizes?: boolean
134
+ }
135
+
136
+ export interface ProgressLogItem extends AnyObject {
137
+ heapUsed?: number
138
+ heapTotal?: number
139
+ rss?: number
140
+ peakRSS?: number
141
+ rssMinusHeap?: number
142
+ external?: number
143
+ arrayBuffers?: number
144
+ rps10?: number
145
+ rpsTotal?: number
146
+ }
147
+
148
+ const inspectOpt: InspectOptions = {
149
+ colors: hasColors,
150
+ breakLength: 300,
151
+ }
152
+
153
+ export class ProgressLogger<IN> implements Disposable {
154
+ constructor(cfg: ProgressLoggerCfg<IN> = {}) {
155
+ this.cfg = {
156
+ metric: 'progress',
157
+ rss: true,
158
+ peakRSS: true,
159
+ logRPS: true,
160
+ logEvery: 1000,
161
+ logSizesBuffer: 100_000,
162
+ batchSize: 1,
163
+ logger: console,
164
+ logProgress: cfg.logProgress !== false && cfg.logEvery !== 0,
165
+ ...cfg,
166
+ }
167
+ this.logEvery10 = this.cfg.logEvery * 10
168
+
169
+ this.start()
170
+ this.logStats() // initial
171
+ }
172
+
173
+ cfg!: ProgressLoggerCfg<IN> & {
174
+ logEvery: number
175
+ logSizesBuffer: number
176
+ batchSize: number
177
+ metric: string
178
+ logger: CommonLogger
179
+ }
180
+
181
+ private started!: UnixTimestampMillisNumber
182
+ private lastSecondStarted!: UnixTimestampMillisNumber
183
+ private sma!: SimpleMovingAverage
184
+ private logEvery10!: number
185
+ private processedLastSecond!: number
186
+ private progress!: number
187
+ private peakRSS!: number
188
+ private sizes?: SizeStack
189
+ private sizesZipped?: SizeStack
190
+
191
+ private start(): void {
192
+ this.started = Date.now()
193
+ this.lastSecondStarted = Date.now()
194
+ this.sma = new SimpleMovingAverage(10)
195
+ this.processedLastSecond = 0
196
+ this.progress = 0
197
+ this.peakRSS = 0
198
+ this.sizes = this.cfg.logSizes ? new SizeStack('json', this.cfg.logSizesBuffer) : undefined
199
+ this.sizesZipped = this.cfg.logZippedSizes
200
+ ? new SizeStack('json.gz', this.cfg.logSizesBuffer)
201
+ : undefined
202
+ }
203
+
204
+ log(chunk?: IN): void {
205
+ this.progress++
206
+ this.processedLastSecond++
207
+
208
+ if (this.sizes) {
209
+ // Check it, cause gzipping might be delayed here..
210
+ void SizeStack.countItem(chunk, this.cfg.logger, this.sizes, this.sizesZipped)
211
+ }
212
+
213
+ if (this.cfg.logProgress && this.progress % this.cfg.logEvery === 0) {
214
+ this.logStats(chunk, false, this.progress % this.logEvery10 === 0)
215
+ }
216
+ }
217
+
218
+ done(): void {
219
+ this.logStats(undefined, true)
220
+ }
221
+
222
+ [Symbol.dispose](): void {
223
+ this.done()
224
+ }
225
+
226
+ private logStats(chunk?: IN, final = false, tenx = false): void {
227
+ if (!this.cfg.logProgress) return
228
+
229
+ const {
230
+ metric,
231
+ extra,
232
+ batchSize,
233
+ heapUsed: logHeapUsed,
234
+ heapTotal: logHeapTotal,
235
+ rss: logRss,
236
+ peakRSS: logPeakRss,
237
+ rssMinusHeap,
238
+ external,
239
+ arrayBuffers,
240
+ logRPS,
241
+ logger,
242
+ } = this.cfg
243
+
244
+ const mem = process.memoryUsage()
245
+
246
+ const now = Date.now()
247
+ const batchedProgress = this.progress * batchSize
248
+ const lastRPS =
249
+ (this.processedLastSecond * batchSize) / ((now - this.lastSecondStarted) / 1000) || 0
250
+ const rpsTotal = Math.round(batchedProgress / ((now - this.started) / 1000)) || 0
251
+ this.lastSecondStarted = now
252
+ this.processedLastSecond = 0
253
+
254
+ const rps10 = Math.round(this.sma.pushGetAvg(lastRPS))
255
+ if (mem.rss > this.peakRSS) this.peakRSS = mem.rss
256
+
257
+ const o: ProgressLogItem = {
258
+ [final ? `${this.cfg.metric}_final` : this.cfg.metric]: batchedProgress,
259
+ }
260
+
261
+ if (extra) Object.assign(o, extra(chunk, this.progress))
262
+ if (logHeapUsed) o.heapUsed = _mb(mem.heapUsed)
263
+ if (logHeapTotal) o.heapTotal = _mb(mem.heapTotal)
264
+ if (logRss) o.rss = _mb(mem.rss)
265
+ if (logPeakRss) o.peakRSS = _mb(this.peakRSS)
266
+ if (rssMinusHeap) o.rssMinusHeap = _mb(mem.rss - mem.heapTotal)
267
+ if (external) o.external = _mb(mem.external)
268
+ if (arrayBuffers) o.arrayBuffers = _mb(mem.arrayBuffers || 0)
269
+
270
+ if (logRPS) Object.assign(o, { rps10, rpsTotal })
271
+
272
+ logger.log(inspect(o, inspectOpt))
273
+
274
+ if (this.sizes?.items.length) {
275
+ logger.log(this.sizes.getStats())
276
+
277
+ if (this.sizesZipped?.items.length) {
278
+ logger.log(this.sizesZipped.getStats())
279
+ }
280
+ }
281
+
282
+ if (tenx) {
283
+ let perHour: number | string =
284
+ Math.round((batchedProgress * 1000 * 60 * 60) / (now - this.started)) || 0
285
+ if (perHour > 900) {
286
+ perHour = Math.round(perHour / 1000) + 'K'
287
+ }
288
+
289
+ logger.log(
290
+ `${dimGrey(localTimeNow().toPretty())} ${white(metric)} took ${yellow(
291
+ _since(this.started),
292
+ )} so far to process ${yellow(batchedProgress)} rows, ~${yellow(perHour)}/hour`,
293
+ )
294
+ } else if (final) {
295
+ logger.log(
296
+ `${boldWhite(metric)} took ${yellow(_since(this.started))} to process ${yellow(
297
+ batchedProgress,
298
+ )} rows with total RPS of ${yellow(rpsTotal)}`,
299
+ )
300
+ }
301
+ }
302
+ }
303
+
304
+ /**
305
+ * Create new ProgressLogger.
306
+ */
307
+ export function progressLogger<IN>(cfg: ProgressLoggerCfg<IN> = {}): ProgressLogger<IN> {
308
+ return new ProgressLogger(cfg)
309
+ }
@@ -6,6 +6,8 @@ import { transformMap, TransformMapOptions } from '../transform/transformMap'
6
6
  * Convenience function to do `.forEach` over a Readable.
7
7
  * Typed! (unlike default Readable).
8
8
  *
9
+ * Try native readable.forEach() instead!
10
+ *
9
11
  * @experimental
10
12
  */
11
13
  export async function readableForEach<T>(
@@ -8,6 +8,8 @@ import { TransformMapOptions } from '../transform/transformMap'
8
8
  * passing each result via `transformMap`.
9
9
  *
10
10
  * Warning! All results are stored in memory (no backpressure).
11
+ *
12
+ * Try native readable.toArray instead!
11
13
  */
12
14
  export async function readableMapToArray<IN, OUT = IN>(
13
15
  stream: ReadableTyped<IN>,
@@ -1,155 +1,10 @@
1
1
  import { Transform } from 'node:stream'
2
- import { inspect, InspectOptions } from 'node:util'
3
- import {
4
- SimpleMovingAverage,
5
- _mb,
6
- _since,
7
- AnyObject,
8
- CommonLogger,
9
- localTimeNow,
10
- } from '@naturalcycles/js-lib'
11
- import { hasColors, boldWhite, dimGrey, white, yellow } from '../../colors/colors'
12
- import { SizeStack } from '../sizeStack'
2
+ import { progressLogger, ProgressLoggerCfg } from '../progressLogger'
13
3
  import { TransformOptions, TransformTyped } from '../stream.model'
14
4
 
15
- export interface TransformLogProgressOptions<IN = any> extends TransformOptions {
16
- /**
17
- * Progress metric
18
- *
19
- * @default `progress`
20
- */
21
- metric?: string
22
-
23
- /**
24
- * Include `heapUsed` in log.
25
- *
26
- * @default false
27
- */
28
- heapUsed?: boolean
29
-
30
- /**
31
- * Include `heapTotal` in log.
32
- *
33
- * @default false
34
- */
35
- heapTotal?: boolean
36
-
37
- /**
38
- * Include `rss` in log.
39
- *
40
- * @default true
41
- */
42
- rss?: boolean
43
-
44
- /**
45
- * Incude Peak RSS in log.
46
- *
47
- * @default true
48
- */
49
- peakRSS?: boolean
50
-
51
- /**
52
- * Include `external` in log.
53
- *
54
- * @default false
55
- */
56
- external?: boolean
57
-
58
- /**
59
- * Include `arrayBuffers` in log.
60
- *
61
- * @default false
62
- */
63
- arrayBuffers?: boolean
64
-
65
- /**
66
- * Log (rss - heapTotal)
67
- * For convenience of debugging "out-of-heap" memory size.
68
- *
69
- * @default false
70
- */
71
- rssMinusHeap?: boolean
72
-
73
- /**
74
- * Log "rows per second"
75
- *
76
- * @default true
77
- */
78
- logRPS?: boolean
79
-
80
- /**
81
- * Set to false to disable logging progress
82
- *
83
- * @default true
84
- */
85
- logProgress?: boolean
86
-
87
- /**
88
- * Log progress event Nth record that is _processed_ (went through mapper).
89
- * Set to 0 to disable logging.
90
- *
91
- * @default 1000
92
- */
93
- logEvery?: number
94
-
95
- logger?: CommonLogger
96
-
97
- /**
98
- * Function to return extra properties to the "progress object".
99
- *
100
- * chunk is undefined for "final" stats, otherwise is defined.
101
- */
102
- extra?: (chunk: IN | undefined, index: number) => AnyObject
103
-
104
- /**
105
- * If specified - will multiply the counter by this number.
106
- * Useful e.g when using `transformBuffer({ batchSize: 500 })`, so
107
- * it'll accurately represent the number of processed entries (not batches).
108
- *
109
- * Defaults to 1.
110
- */
111
- batchSize?: number
112
-
113
- /**
114
- * Experimental logging of item (shunk) sizes, when json-stringified.
115
- *
116
- * Defaults to false.
117
- *
118
- * @experimental
119
- */
120
- logSizes?: boolean
121
-
122
- /**
123
- * How many last item sizes to keep in a buffer, to calculate stats (p50, p90, avg, etc).
124
- * Defaults to 100_000.
125
- * Cannot be Infinity.
126
- */
127
- logSizesBuffer?: number
128
-
129
- /**
130
- * Works in addition to `logSizes`. Adds "zipped sizes".
131
- *
132
- * @experimental
133
- */
134
- logZippedSizes?: boolean
135
- }
136
-
137
- interface LogItem extends AnyObject {
138
- heapUsed?: number
139
- heapTotal?: number
140
- rss?: number
141
- peakRSS?: number
142
- rssMinusHeap?: number
143
- external?: number
144
- arrayBuffers?: number
145
- rps10?: number
146
- rpsTotal?: number
147
- }
148
-
149
- const inspectOpt: InspectOptions = {
150
- colors: hasColors,
151
- breakLength: 300,
152
- }
5
+ export interface TransformLogProgressOptions<IN = any>
6
+ extends ProgressLoggerCfg<IN>,
7
+ TransformOptions {}
153
8
 
154
9
  /**
155
10
  * Pass-through transform that optionally logs progress.
@@ -157,119 +12,18 @@ const inspectOpt: InspectOptions = {
157
12
  export function transformLogProgress<IN = any>(
158
13
  opt: TransformLogProgressOptions = {},
159
14
  ): TransformTyped<IN, IN> {
160
- const {
161
- metric = 'progress',
162
- heapTotal: logHeapTotal = false,
163
- heapUsed: logHeapUsed = false,
164
- rss: logRss = true,
165
- peakRSS: logPeakRSS = true,
166
- logRPS = true,
167
- logEvery = 1000,
168
- logSizes = false,
169
- logSizesBuffer = 100_000,
170
- logZippedSizes = false,
171
- batchSize = 1,
172
- extra,
173
- logger = console,
174
- } = opt
175
- const logProgress = opt.logProgress !== false && logEvery !== 0 // true by default
176
- const logEvery10 = logEvery * 10
177
-
178
- const started = Date.now()
179
- let lastSecondStarted = Date.now()
180
- const sma = new SimpleMovingAverage(10) // over last 10 seconds
181
- let processedLastSecond = 0
182
- let progress = 0
183
- let peakRSS = 0
184
-
185
- const sizes = logSizes ? new SizeStack('json', logSizesBuffer) : undefined
186
- const sizesZipped = logZippedSizes ? new SizeStack('json.gz', logSizesBuffer) : undefined
187
-
188
- logStats() // initial
15
+ const progress = progressLogger(opt)
189
16
 
190
17
  return new Transform({
191
18
  objectMode: true,
192
19
  ...opt,
193
20
  transform(chunk: IN, _, cb) {
194
- progress++
195
- processedLastSecond++
196
-
197
- if (sizes) {
198
- // Check it, cause gzipping might be delayed here..
199
- void SizeStack.countItem(chunk, logger, sizes, sizesZipped)
200
- }
201
-
202
- if (logProgress && progress % logEvery === 0) {
203
- logStats(chunk, false, progress % logEvery10 === 0)
204
- }
205
-
21
+ progress.log(chunk)
206
22
  cb(null, chunk) // pass-through
207
23
  },
208
24
  final(cb) {
209
- logStats(undefined, true)
210
-
25
+ progress.done()
211
26
  cb()
212
27
  },
213
28
  })
214
-
215
- function logStats(chunk?: IN, final = false, tenx = false): void {
216
- if (!logProgress) return
217
-
218
- const mem = process.memoryUsage()
219
-
220
- const now = Date.now()
221
- const batchedProgress = progress * batchSize
222
- const lastRPS = (processedLastSecond * batchSize) / ((now - lastSecondStarted) / 1000) || 0
223
- const rpsTotal = Math.round(batchedProgress / ((now - started) / 1000)) || 0
224
- lastSecondStarted = now
225
- processedLastSecond = 0
226
-
227
- const rps10 = Math.round(sma.pushGetAvg(lastRPS))
228
- if (mem.rss > peakRSS) peakRSS = mem.rss
229
-
230
- const o: LogItem = {
231
- [final ? `${metric}_final` : metric]: batchedProgress,
232
- }
233
-
234
- if (extra) Object.assign(o, extra(chunk, progress))
235
- if (logHeapUsed) o.heapUsed = _mb(mem.heapUsed)
236
- if (logHeapTotal) o.heapTotal = _mb(mem.heapTotal)
237
- if (logRss) o.rss = _mb(mem.rss)
238
- if (logPeakRSS) o.peakRSS = _mb(peakRSS)
239
- if (opt.rssMinusHeap) o.rssMinusHeap = _mb(mem.rss - mem.heapTotal)
240
- if (opt.external) o.external = _mb(mem.external)
241
- if (opt.arrayBuffers) o.arrayBuffers = _mb(mem.arrayBuffers || 0)
242
-
243
- if (logRPS) Object.assign(o, { rps10, rpsTotal })
244
-
245
- logger.log(inspect(o, inspectOpt))
246
-
247
- if (sizes?.items.length) {
248
- logger.log(sizes.getStats())
249
-
250
- if (sizesZipped?.items.length) {
251
- logger.log(sizesZipped.getStats())
252
- }
253
- }
254
-
255
- if (tenx) {
256
- let perHour: number | string =
257
- Math.round((batchedProgress * 1000 * 60 * 60) / (now - started)) || 0
258
- if (perHour > 900) {
259
- perHour = Math.round(perHour / 1000) + 'K'
260
- }
261
-
262
- logger.log(
263
- `${dimGrey(localTimeNow().toPretty())} ${white(metric)} took ${yellow(
264
- _since(started),
265
- )} so far to process ${yellow(batchedProgress)} rows, ~${yellow(perHour)}/hour`,
266
- )
267
- } else if (final) {
268
- logger.log(
269
- `${boldWhite(metric)} took ${yellow(_since(started))} to process ${yellow(
270
- batchedProgress,
271
- )} rows with total RPS of ${yellow(rpsTotal)}`,
272
- )
273
- }
274
- }
275
29
  }