@naturalcycles/nodejs-lib 13.9.0 → 13.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/csv/csvReader.js +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/stream/progressLogger.d.ts +145 -0
- package/dist/stream/progressLogger.js +120 -0
- package/dist/stream/readable/readableForEach.d.ts +2 -0
- package/dist/stream/readable/readableForEach.js +2 -0
- package/dist/stream/readable/readableMapToArray.d.ts +2 -0
- package/dist/stream/readable/readableMapToArray.js +2 -0
- package/dist/stream/transform/transformLogProgress.d.ts +2 -105
- package/dist/stream/transform/transformLogProgress.js +4 -82
- package/package.json +3 -3
- package/src/csv/csvReader.ts +1 -1
- package/src/index.ts +1 -0
- package/src/stream/progressLogger.ts +309 -0
- package/src/stream/readable/readableForEach.ts +2 -0
- package/src/stream/readable/readableMapToArray.ts +2 -0
- package/src/stream/transform/transformLogProgress.ts +7 -253
package/dist/csv/csvReader.js
CHANGED
|
@@ -35,7 +35,7 @@ function csvStringParse(str, cfg = {}) {
|
|
|
35
35
|
exports.csvStringParse = csvStringParse;
|
|
36
36
|
function csvStringToArray(str) {
|
|
37
37
|
const objPattern = new RegExp('(,|\\r?\\n|\\r|^)(?:"([^"]*(?:""[^"]*)*)"|([^,\\r\\n]*))', 'gi');
|
|
38
|
-
let matches
|
|
38
|
+
let matches;
|
|
39
39
|
const arr = [[]];
|
|
40
40
|
while ((matches = objPattern.exec(str))) {
|
|
41
41
|
if (matches[1].length && matches[1] !== ',') {
|
package/dist/index.d.ts
CHANGED
|
@@ -35,6 +35,7 @@ export * from './stream/readable/readableMap';
|
|
|
35
35
|
export * from './stream/readable/readableMapToArray';
|
|
36
36
|
export * from './stream/readable/readableToArray';
|
|
37
37
|
export * from './stream/stream.model';
|
|
38
|
+
export * from './stream/progressLogger';
|
|
38
39
|
export * from './stream/transform/transformBuffer';
|
|
39
40
|
export * from './stream/transform/transformFilter';
|
|
40
41
|
export * from './stream/transform/transformLimit';
|
package/dist/index.js
CHANGED
|
@@ -39,6 +39,7 @@ tslib_1.__exportStar(require("./stream/readable/readableMap"), exports);
|
|
|
39
39
|
tslib_1.__exportStar(require("./stream/readable/readableMapToArray"), exports);
|
|
40
40
|
tslib_1.__exportStar(require("./stream/readable/readableToArray"), exports);
|
|
41
41
|
tslib_1.__exportStar(require("./stream/stream.model"), exports);
|
|
42
|
+
tslib_1.__exportStar(require("./stream/progressLogger"), exports);
|
|
42
43
|
tslib_1.__exportStar(require("./stream/transform/transformBuffer"), exports);
|
|
43
44
|
tslib_1.__exportStar(require("./stream/transform/transformFilter"), exports);
|
|
44
45
|
tslib_1.__exportStar(require("./stream/transform/transformLimit"), exports);
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import { AnyObject, CommonLogger } from '@naturalcycles/js-lib';
|
|
2
|
+
export interface ProgressLoggerCfg<IN = any> {
|
|
3
|
+
/**
|
|
4
|
+
* Progress metric
|
|
5
|
+
*
|
|
6
|
+
* @default `progress`
|
|
7
|
+
*/
|
|
8
|
+
metric?: string;
|
|
9
|
+
/**
|
|
10
|
+
* Include `heapUsed` in log.
|
|
11
|
+
*
|
|
12
|
+
* @default false
|
|
13
|
+
*/
|
|
14
|
+
heapUsed?: boolean;
|
|
15
|
+
/**
|
|
16
|
+
* Include `heapTotal` in log.
|
|
17
|
+
*
|
|
18
|
+
* @default false
|
|
19
|
+
*/
|
|
20
|
+
heapTotal?: boolean;
|
|
21
|
+
/**
|
|
22
|
+
* Include `rss` in log.
|
|
23
|
+
*
|
|
24
|
+
* @default true
|
|
25
|
+
*/
|
|
26
|
+
rss?: boolean;
|
|
27
|
+
/**
|
|
28
|
+
* Incude Peak RSS in log.
|
|
29
|
+
*
|
|
30
|
+
* @default true
|
|
31
|
+
*/
|
|
32
|
+
peakRSS?: boolean;
|
|
33
|
+
/**
|
|
34
|
+
* Include `external` in log.
|
|
35
|
+
*
|
|
36
|
+
* @default false
|
|
37
|
+
*/
|
|
38
|
+
external?: boolean;
|
|
39
|
+
/**
|
|
40
|
+
* Include `arrayBuffers` in log.
|
|
41
|
+
*
|
|
42
|
+
* @default false
|
|
43
|
+
*/
|
|
44
|
+
arrayBuffers?: boolean;
|
|
45
|
+
/**
|
|
46
|
+
* Log (rss - heapTotal)
|
|
47
|
+
* For convenience of debugging "out-of-heap" memory size.
|
|
48
|
+
*
|
|
49
|
+
* @default false
|
|
50
|
+
*/
|
|
51
|
+
rssMinusHeap?: boolean;
|
|
52
|
+
/**
|
|
53
|
+
* Log "rows per second"
|
|
54
|
+
*
|
|
55
|
+
* @default true
|
|
56
|
+
*/
|
|
57
|
+
logRPS?: boolean;
|
|
58
|
+
/**
|
|
59
|
+
* Set to false to disable logging progress
|
|
60
|
+
*
|
|
61
|
+
* @default true
|
|
62
|
+
*/
|
|
63
|
+
logProgress?: boolean;
|
|
64
|
+
/**
|
|
65
|
+
* Log progress event Nth record that is _processed_ (went through mapper).
|
|
66
|
+
* Set to 0 to disable logging.
|
|
67
|
+
*
|
|
68
|
+
* @default 1000
|
|
69
|
+
*/
|
|
70
|
+
logEvery?: number;
|
|
71
|
+
logger?: CommonLogger;
|
|
72
|
+
/**
|
|
73
|
+
* Function to return extra properties to the "progress object".
|
|
74
|
+
*
|
|
75
|
+
* chunk is undefined for "final" stats, otherwise is defined.
|
|
76
|
+
*/
|
|
77
|
+
extra?: (chunk: IN | undefined, index: number) => AnyObject;
|
|
78
|
+
/**
|
|
79
|
+
* If specified - will multiply the counter by this number.
|
|
80
|
+
* Useful e.g when using `transformBuffer({ batchSize: 500 })`, so
|
|
81
|
+
* it'll accurately represent the number of processed entries (not batches).
|
|
82
|
+
*
|
|
83
|
+
* Defaults to 1.
|
|
84
|
+
*/
|
|
85
|
+
batchSize?: number;
|
|
86
|
+
/**
|
|
87
|
+
* Experimental logging of item (shunk) sizes, when json-stringified.
|
|
88
|
+
*
|
|
89
|
+
* Defaults to false.
|
|
90
|
+
*
|
|
91
|
+
* @experimental
|
|
92
|
+
*/
|
|
93
|
+
logSizes?: boolean;
|
|
94
|
+
/**
|
|
95
|
+
* How many last item sizes to keep in a buffer, to calculate stats (p50, p90, avg, etc).
|
|
96
|
+
* Defaults to 100_000.
|
|
97
|
+
* Cannot be Infinity.
|
|
98
|
+
*/
|
|
99
|
+
logSizesBuffer?: number;
|
|
100
|
+
/**
|
|
101
|
+
* Works in addition to `logSizes`. Adds "zipped sizes".
|
|
102
|
+
*
|
|
103
|
+
* @experimental
|
|
104
|
+
*/
|
|
105
|
+
logZippedSizes?: boolean;
|
|
106
|
+
}
|
|
107
|
+
export interface ProgressLogItem extends AnyObject {
|
|
108
|
+
heapUsed?: number;
|
|
109
|
+
heapTotal?: number;
|
|
110
|
+
rss?: number;
|
|
111
|
+
peakRSS?: number;
|
|
112
|
+
rssMinusHeap?: number;
|
|
113
|
+
external?: number;
|
|
114
|
+
arrayBuffers?: number;
|
|
115
|
+
rps10?: number;
|
|
116
|
+
rpsTotal?: number;
|
|
117
|
+
}
|
|
118
|
+
export declare class ProgressLogger<IN> implements Disposable {
|
|
119
|
+
constructor(cfg?: ProgressLoggerCfg<IN>);
|
|
120
|
+
cfg: ProgressLoggerCfg<IN> & {
|
|
121
|
+
logEvery: number;
|
|
122
|
+
logSizesBuffer: number;
|
|
123
|
+
batchSize: number;
|
|
124
|
+
metric: string;
|
|
125
|
+
logger: CommonLogger;
|
|
126
|
+
};
|
|
127
|
+
private started;
|
|
128
|
+
private lastSecondStarted;
|
|
129
|
+
private sma;
|
|
130
|
+
private logEvery10;
|
|
131
|
+
private processedLastSecond;
|
|
132
|
+
private progress;
|
|
133
|
+
private peakRSS;
|
|
134
|
+
private sizes?;
|
|
135
|
+
private sizesZipped?;
|
|
136
|
+
private start;
|
|
137
|
+
log(chunk?: IN): void;
|
|
138
|
+
done(): void;
|
|
139
|
+
[Symbol.dispose](): void;
|
|
140
|
+
private logStats;
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Create new ProgressLogger.
|
|
144
|
+
*/
|
|
145
|
+
export declare function progressLogger<IN>(cfg?: ProgressLoggerCfg<IN>): ProgressLogger<IN>;
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.progressLogger = exports.ProgressLogger = void 0;
|
|
4
|
+
const node_util_1 = require("node:util");
|
|
5
|
+
const js_lib_1 = require("@naturalcycles/js-lib");
|
|
6
|
+
const colors_1 = require("../colors/colors");
|
|
7
|
+
const sizeStack_1 = require("./sizeStack");
|
|
8
|
+
const inspectOpt = {
|
|
9
|
+
colors: colors_1.hasColors,
|
|
10
|
+
breakLength: 300,
|
|
11
|
+
};
|
|
12
|
+
class ProgressLogger {
|
|
13
|
+
constructor(cfg = {}) {
|
|
14
|
+
this.cfg = {
|
|
15
|
+
metric: 'progress',
|
|
16
|
+
rss: true,
|
|
17
|
+
peakRSS: true,
|
|
18
|
+
logRPS: true,
|
|
19
|
+
logEvery: 1000,
|
|
20
|
+
logSizesBuffer: 100_000,
|
|
21
|
+
batchSize: 1,
|
|
22
|
+
logger: console,
|
|
23
|
+
logProgress: cfg.logProgress !== false && cfg.logEvery !== 0,
|
|
24
|
+
...cfg,
|
|
25
|
+
};
|
|
26
|
+
this.logEvery10 = this.cfg.logEvery * 10;
|
|
27
|
+
this.start();
|
|
28
|
+
this.logStats(); // initial
|
|
29
|
+
}
|
|
30
|
+
start() {
|
|
31
|
+
this.started = Date.now();
|
|
32
|
+
this.lastSecondStarted = Date.now();
|
|
33
|
+
this.sma = new js_lib_1.SimpleMovingAverage(10);
|
|
34
|
+
this.processedLastSecond = 0;
|
|
35
|
+
this.progress = 0;
|
|
36
|
+
this.peakRSS = 0;
|
|
37
|
+
this.sizes = this.cfg.logSizes ? new sizeStack_1.SizeStack('json', this.cfg.logSizesBuffer) : undefined;
|
|
38
|
+
this.sizesZipped = this.cfg.logZippedSizes
|
|
39
|
+
? new sizeStack_1.SizeStack('json.gz', this.cfg.logSizesBuffer)
|
|
40
|
+
: undefined;
|
|
41
|
+
}
|
|
42
|
+
log(chunk) {
|
|
43
|
+
this.progress++;
|
|
44
|
+
this.processedLastSecond++;
|
|
45
|
+
if (this.sizes) {
|
|
46
|
+
// Check it, cause gzipping might be delayed here..
|
|
47
|
+
void sizeStack_1.SizeStack.countItem(chunk, this.cfg.logger, this.sizes, this.sizesZipped);
|
|
48
|
+
}
|
|
49
|
+
if (this.cfg.logProgress && this.progress % this.cfg.logEvery === 0) {
|
|
50
|
+
this.logStats(chunk, false, this.progress % this.logEvery10 === 0);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
done() {
|
|
54
|
+
this.logStats(undefined, true);
|
|
55
|
+
}
|
|
56
|
+
[Symbol.dispose]() {
|
|
57
|
+
this.done();
|
|
58
|
+
}
|
|
59
|
+
logStats(chunk, final = false, tenx = false) {
|
|
60
|
+
if (!this.cfg.logProgress)
|
|
61
|
+
return;
|
|
62
|
+
const { metric, extra, batchSize, heapUsed: logHeapUsed, heapTotal: logHeapTotal, rss: logRss, peakRSS: logPeakRss, rssMinusHeap, external, arrayBuffers, logRPS, logger, } = this.cfg;
|
|
63
|
+
const mem = process.memoryUsage();
|
|
64
|
+
const now = Date.now();
|
|
65
|
+
const batchedProgress = this.progress * batchSize;
|
|
66
|
+
const lastRPS = (this.processedLastSecond * batchSize) / ((now - this.lastSecondStarted) / 1000) || 0;
|
|
67
|
+
const rpsTotal = Math.round(batchedProgress / ((now - this.started) / 1000)) || 0;
|
|
68
|
+
this.lastSecondStarted = now;
|
|
69
|
+
this.processedLastSecond = 0;
|
|
70
|
+
const rps10 = Math.round(this.sma.pushGetAvg(lastRPS));
|
|
71
|
+
if (mem.rss > this.peakRSS)
|
|
72
|
+
this.peakRSS = mem.rss;
|
|
73
|
+
const o = {
|
|
74
|
+
[final ? `${this.cfg.metric}_final` : this.cfg.metric]: batchedProgress,
|
|
75
|
+
};
|
|
76
|
+
if (extra)
|
|
77
|
+
Object.assign(o, extra(chunk, this.progress));
|
|
78
|
+
if (logHeapUsed)
|
|
79
|
+
o.heapUsed = (0, js_lib_1._mb)(mem.heapUsed);
|
|
80
|
+
if (logHeapTotal)
|
|
81
|
+
o.heapTotal = (0, js_lib_1._mb)(mem.heapTotal);
|
|
82
|
+
if (logRss)
|
|
83
|
+
o.rss = (0, js_lib_1._mb)(mem.rss);
|
|
84
|
+
if (logPeakRss)
|
|
85
|
+
o.peakRSS = (0, js_lib_1._mb)(this.peakRSS);
|
|
86
|
+
if (rssMinusHeap)
|
|
87
|
+
o.rssMinusHeap = (0, js_lib_1._mb)(mem.rss - mem.heapTotal);
|
|
88
|
+
if (external)
|
|
89
|
+
o.external = (0, js_lib_1._mb)(mem.external);
|
|
90
|
+
if (arrayBuffers)
|
|
91
|
+
o.arrayBuffers = (0, js_lib_1._mb)(mem.arrayBuffers || 0);
|
|
92
|
+
if (logRPS)
|
|
93
|
+
Object.assign(o, { rps10, rpsTotal });
|
|
94
|
+
logger.log((0, node_util_1.inspect)(o, inspectOpt));
|
|
95
|
+
if (this.sizes?.items.length) {
|
|
96
|
+
logger.log(this.sizes.getStats());
|
|
97
|
+
if (this.sizesZipped?.items.length) {
|
|
98
|
+
logger.log(this.sizesZipped.getStats());
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
if (tenx) {
|
|
102
|
+
let perHour = Math.round((batchedProgress * 1000 * 60 * 60) / (now - this.started)) || 0;
|
|
103
|
+
if (perHour > 900) {
|
|
104
|
+
perHour = Math.round(perHour / 1000) + 'K';
|
|
105
|
+
}
|
|
106
|
+
logger.log(`${(0, colors_1.dimGrey)((0, js_lib_1.localTimeNow)().toPretty())} ${(0, colors_1.white)(metric)} took ${(0, colors_1.yellow)((0, js_lib_1._since)(this.started))} so far to process ${(0, colors_1.yellow)(batchedProgress)} rows, ~${(0, colors_1.yellow)(perHour)}/hour`);
|
|
107
|
+
}
|
|
108
|
+
else if (final) {
|
|
109
|
+
logger.log(`${(0, colors_1.boldWhite)(metric)} took ${(0, colors_1.yellow)((0, js_lib_1._since)(this.started))} to process ${(0, colors_1.yellow)(batchedProgress)} rows with total RPS of ${(0, colors_1.yellow)(rpsTotal)}`);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
exports.ProgressLogger = ProgressLogger;
|
|
114
|
+
/**
|
|
115
|
+
* Create new ProgressLogger.
|
|
116
|
+
*/
|
|
117
|
+
function progressLogger(cfg = {}) {
|
|
118
|
+
return new ProgressLogger(cfg);
|
|
119
|
+
}
|
|
120
|
+
exports.progressLogger = progressLogger;
|
|
@@ -5,6 +5,8 @@ import { TransformMapOptions } from '../transform/transformMap';
|
|
|
5
5
|
* Convenience function to do `.forEach` over a Readable.
|
|
6
6
|
* Typed! (unlike default Readable).
|
|
7
7
|
*
|
|
8
|
+
* Try native readable.forEach() instead!
|
|
9
|
+
*
|
|
8
10
|
* @experimental
|
|
9
11
|
*/
|
|
10
12
|
export declare function readableForEach<T>(readable: ReadableTyped<T>, mapper: AbortableAsyncMapper<T, void>, opt?: TransformMapOptions<T, void>): Promise<void>;
|
|
@@ -8,6 +8,8 @@ const transformMap_1 = require("../transform/transformMap");
|
|
|
8
8
|
* Convenience function to do `.forEach` over a Readable.
|
|
9
9
|
* Typed! (unlike default Readable).
|
|
10
10
|
*
|
|
11
|
+
* Try native readable.forEach() instead!
|
|
12
|
+
*
|
|
11
13
|
* @experimental
|
|
12
14
|
*/
|
|
13
15
|
async function readableForEach(readable, mapper, opt = {}) {
|
|
@@ -6,5 +6,7 @@ import { TransformMapOptions } from '../transform/transformMap';
|
|
|
6
6
|
* passing each result via `transformMap`.
|
|
7
7
|
*
|
|
8
8
|
* Warning! All results are stored in memory (no backpressure).
|
|
9
|
+
*
|
|
10
|
+
* Try native readable.toArray instead!
|
|
9
11
|
*/
|
|
10
12
|
export declare function readableMapToArray<IN, OUT = IN>(stream: ReadableTyped<IN>, mapper?: AsyncMapper<IN, OUT>, opt?: TransformMapOptions<IN, OUT>): Promise<OUT[]>;
|
|
@@ -7,6 +7,8 @@ const index_1 = require("../../index");
|
|
|
7
7
|
* passing each result via `transformMap`.
|
|
8
8
|
*
|
|
9
9
|
* Warning! All results are stored in memory (no backpressure).
|
|
10
|
+
*
|
|
11
|
+
* Try native readable.toArray instead!
|
|
10
12
|
*/
|
|
11
13
|
async function readableMapToArray(stream, mapper = item => item, opt) {
|
|
12
14
|
const res = [];
|
|
@@ -1,109 +1,6 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { ProgressLoggerCfg } from '../progressLogger';
|
|
2
2
|
import { TransformOptions, TransformTyped } from '../stream.model';
|
|
3
|
-
export interface TransformLogProgressOptions<IN = any> extends TransformOptions {
|
|
4
|
-
/**
|
|
5
|
-
* Progress metric
|
|
6
|
-
*
|
|
7
|
-
* @default `progress`
|
|
8
|
-
*/
|
|
9
|
-
metric?: string;
|
|
10
|
-
/**
|
|
11
|
-
* Include `heapUsed` in log.
|
|
12
|
-
*
|
|
13
|
-
* @default false
|
|
14
|
-
*/
|
|
15
|
-
heapUsed?: boolean;
|
|
16
|
-
/**
|
|
17
|
-
* Include `heapTotal` in log.
|
|
18
|
-
*
|
|
19
|
-
* @default false
|
|
20
|
-
*/
|
|
21
|
-
heapTotal?: boolean;
|
|
22
|
-
/**
|
|
23
|
-
* Include `rss` in log.
|
|
24
|
-
*
|
|
25
|
-
* @default true
|
|
26
|
-
*/
|
|
27
|
-
rss?: boolean;
|
|
28
|
-
/**
|
|
29
|
-
* Incude Peak RSS in log.
|
|
30
|
-
*
|
|
31
|
-
* @default true
|
|
32
|
-
*/
|
|
33
|
-
peakRSS?: boolean;
|
|
34
|
-
/**
|
|
35
|
-
* Include `external` in log.
|
|
36
|
-
*
|
|
37
|
-
* @default false
|
|
38
|
-
*/
|
|
39
|
-
external?: boolean;
|
|
40
|
-
/**
|
|
41
|
-
* Include `arrayBuffers` in log.
|
|
42
|
-
*
|
|
43
|
-
* @default false
|
|
44
|
-
*/
|
|
45
|
-
arrayBuffers?: boolean;
|
|
46
|
-
/**
|
|
47
|
-
* Log (rss - heapTotal)
|
|
48
|
-
* For convenience of debugging "out-of-heap" memory size.
|
|
49
|
-
*
|
|
50
|
-
* @default false
|
|
51
|
-
*/
|
|
52
|
-
rssMinusHeap?: boolean;
|
|
53
|
-
/**
|
|
54
|
-
* Log "rows per second"
|
|
55
|
-
*
|
|
56
|
-
* @default true
|
|
57
|
-
*/
|
|
58
|
-
logRPS?: boolean;
|
|
59
|
-
/**
|
|
60
|
-
* Set to false to disable logging progress
|
|
61
|
-
*
|
|
62
|
-
* @default true
|
|
63
|
-
*/
|
|
64
|
-
logProgress?: boolean;
|
|
65
|
-
/**
|
|
66
|
-
* Log progress event Nth record that is _processed_ (went through mapper).
|
|
67
|
-
* Set to 0 to disable logging.
|
|
68
|
-
*
|
|
69
|
-
* @default 1000
|
|
70
|
-
*/
|
|
71
|
-
logEvery?: number;
|
|
72
|
-
logger?: CommonLogger;
|
|
73
|
-
/**
|
|
74
|
-
* Function to return extra properties to the "progress object".
|
|
75
|
-
*
|
|
76
|
-
* chunk is undefined for "final" stats, otherwise is defined.
|
|
77
|
-
*/
|
|
78
|
-
extra?: (chunk: IN | undefined, index: number) => AnyObject;
|
|
79
|
-
/**
|
|
80
|
-
* If specified - will multiply the counter by this number.
|
|
81
|
-
* Useful e.g when using `transformBuffer({ batchSize: 500 })`, so
|
|
82
|
-
* it'll accurately represent the number of processed entries (not batches).
|
|
83
|
-
*
|
|
84
|
-
* Defaults to 1.
|
|
85
|
-
*/
|
|
86
|
-
batchSize?: number;
|
|
87
|
-
/**
|
|
88
|
-
* Experimental logging of item (shunk) sizes, when json-stringified.
|
|
89
|
-
*
|
|
90
|
-
* Defaults to false.
|
|
91
|
-
*
|
|
92
|
-
* @experimental
|
|
93
|
-
*/
|
|
94
|
-
logSizes?: boolean;
|
|
95
|
-
/**
|
|
96
|
-
* How many last item sizes to keep in a buffer, to calculate stats (p50, p90, avg, etc).
|
|
97
|
-
* Defaults to 100_000.
|
|
98
|
-
* Cannot be Infinity.
|
|
99
|
-
*/
|
|
100
|
-
logSizesBuffer?: number;
|
|
101
|
-
/**
|
|
102
|
-
* Works in addition to `logSizes`. Adds "zipped sizes".
|
|
103
|
-
*
|
|
104
|
-
* @experimental
|
|
105
|
-
*/
|
|
106
|
-
logZippedSizes?: boolean;
|
|
3
|
+
export interface TransformLogProgressOptions<IN = any> extends ProgressLoggerCfg<IN>, TransformOptions {
|
|
107
4
|
}
|
|
108
5
|
/**
|
|
109
6
|
* Pass-through transform that optionally logs progress.
|
|
@@ -2,101 +2,23 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.transformLogProgress = void 0;
|
|
4
4
|
const node_stream_1 = require("node:stream");
|
|
5
|
-
const
|
|
6
|
-
const js_lib_1 = require("@naturalcycles/js-lib");
|
|
7
|
-
const colors_1 = require("../../colors/colors");
|
|
8
|
-
const sizeStack_1 = require("../sizeStack");
|
|
9
|
-
const inspectOpt = {
|
|
10
|
-
colors: colors_1.hasColors,
|
|
11
|
-
breakLength: 300,
|
|
12
|
-
};
|
|
5
|
+
const progressLogger_1 = require("../progressLogger");
|
|
13
6
|
/**
|
|
14
7
|
* Pass-through transform that optionally logs progress.
|
|
15
8
|
*/
|
|
16
9
|
function transformLogProgress(opt = {}) {
|
|
17
|
-
const
|
|
18
|
-
const logProgress = opt.logProgress !== false && logEvery !== 0; // true by default
|
|
19
|
-
const logEvery10 = logEvery * 10;
|
|
20
|
-
const started = Date.now();
|
|
21
|
-
let lastSecondStarted = Date.now();
|
|
22
|
-
const sma = new js_lib_1.SimpleMovingAverage(10); // over last 10 seconds
|
|
23
|
-
let processedLastSecond = 0;
|
|
24
|
-
let progress = 0;
|
|
25
|
-
let peakRSS = 0;
|
|
26
|
-
const sizes = logSizes ? new sizeStack_1.SizeStack('json', logSizesBuffer) : undefined;
|
|
27
|
-
const sizesZipped = logZippedSizes ? new sizeStack_1.SizeStack('json.gz', logSizesBuffer) : undefined;
|
|
28
|
-
logStats(); // initial
|
|
10
|
+
const progress = (0, progressLogger_1.progressLogger)(opt);
|
|
29
11
|
return new node_stream_1.Transform({
|
|
30
12
|
objectMode: true,
|
|
31
13
|
...opt,
|
|
32
14
|
transform(chunk, _, cb) {
|
|
33
|
-
progress
|
|
34
|
-
processedLastSecond++;
|
|
35
|
-
if (sizes) {
|
|
36
|
-
// Check it, cause gzipping might be delayed here..
|
|
37
|
-
void sizeStack_1.SizeStack.countItem(chunk, logger, sizes, sizesZipped);
|
|
38
|
-
}
|
|
39
|
-
if (logProgress && progress % logEvery === 0) {
|
|
40
|
-
logStats(chunk, false, progress % logEvery10 === 0);
|
|
41
|
-
}
|
|
15
|
+
progress.log(chunk);
|
|
42
16
|
cb(null, chunk); // pass-through
|
|
43
17
|
},
|
|
44
18
|
final(cb) {
|
|
45
|
-
|
|
19
|
+
progress.done();
|
|
46
20
|
cb();
|
|
47
21
|
},
|
|
48
22
|
});
|
|
49
|
-
function logStats(chunk, final = false, tenx = false) {
|
|
50
|
-
if (!logProgress)
|
|
51
|
-
return;
|
|
52
|
-
const mem = process.memoryUsage();
|
|
53
|
-
const now = Date.now();
|
|
54
|
-
const batchedProgress = progress * batchSize;
|
|
55
|
-
const lastRPS = (processedLastSecond * batchSize) / ((now - lastSecondStarted) / 1000) || 0;
|
|
56
|
-
const rpsTotal = Math.round(batchedProgress / ((now - started) / 1000)) || 0;
|
|
57
|
-
lastSecondStarted = now;
|
|
58
|
-
processedLastSecond = 0;
|
|
59
|
-
const rps10 = Math.round(sma.pushGetAvg(lastRPS));
|
|
60
|
-
if (mem.rss > peakRSS)
|
|
61
|
-
peakRSS = mem.rss;
|
|
62
|
-
const o = {
|
|
63
|
-
[final ? `${metric}_final` : metric]: batchedProgress,
|
|
64
|
-
};
|
|
65
|
-
if (extra)
|
|
66
|
-
Object.assign(o, extra(chunk, progress));
|
|
67
|
-
if (logHeapUsed)
|
|
68
|
-
o.heapUsed = (0, js_lib_1._mb)(mem.heapUsed);
|
|
69
|
-
if (logHeapTotal)
|
|
70
|
-
o.heapTotal = (0, js_lib_1._mb)(mem.heapTotal);
|
|
71
|
-
if (logRss)
|
|
72
|
-
o.rss = (0, js_lib_1._mb)(mem.rss);
|
|
73
|
-
if (logPeakRSS)
|
|
74
|
-
o.peakRSS = (0, js_lib_1._mb)(peakRSS);
|
|
75
|
-
if (opt.rssMinusHeap)
|
|
76
|
-
o.rssMinusHeap = (0, js_lib_1._mb)(mem.rss - mem.heapTotal);
|
|
77
|
-
if (opt.external)
|
|
78
|
-
o.external = (0, js_lib_1._mb)(mem.external);
|
|
79
|
-
if (opt.arrayBuffers)
|
|
80
|
-
o.arrayBuffers = (0, js_lib_1._mb)(mem.arrayBuffers || 0);
|
|
81
|
-
if (logRPS)
|
|
82
|
-
Object.assign(o, { rps10, rpsTotal });
|
|
83
|
-
logger.log((0, node_util_1.inspect)(o, inspectOpt));
|
|
84
|
-
if (sizes?.items.length) {
|
|
85
|
-
logger.log(sizes.getStats());
|
|
86
|
-
if (sizesZipped?.items.length) {
|
|
87
|
-
logger.log(sizesZipped.getStats());
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
if (tenx) {
|
|
91
|
-
let perHour = Math.round((batchedProgress * 1000 * 60 * 60) / (now - started)) || 0;
|
|
92
|
-
if (perHour > 900) {
|
|
93
|
-
perHour = Math.round(perHour / 1000) + 'K';
|
|
94
|
-
}
|
|
95
|
-
logger.log(`${(0, colors_1.dimGrey)((0, js_lib_1.localTimeNow)().toPretty())} ${(0, colors_1.white)(metric)} took ${(0, colors_1.yellow)((0, js_lib_1._since)(started))} so far to process ${(0, colors_1.yellow)(batchedProgress)} rows, ~${(0, colors_1.yellow)(perHour)}/hour`);
|
|
96
|
-
}
|
|
97
|
-
else if (final) {
|
|
98
|
-
logger.log(`${(0, colors_1.boldWhite)(metric)} took ${(0, colors_1.yellow)((0, js_lib_1._since)(started))} to process ${(0, colors_1.yellow)(batchedProgress)} rows with total RPS of ${(0, colors_1.yellow)(rpsTotal)}`);
|
|
99
|
-
}
|
|
100
|
-
}
|
|
101
23
|
}
|
|
102
24
|
exports.transformLogProgress = transformLogProgress;
|
package/package.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@naturalcycles/nodejs-lib",
|
|
3
|
-
"version": "13.
|
|
3
|
+
"version": "13.10.0",
|
|
4
4
|
"scripts": {
|
|
5
|
-
"prepare": "husky
|
|
5
|
+
"prepare": "husky",
|
|
6
6
|
"docs-serve": "vuepress dev docs",
|
|
7
7
|
"docs-build": "vuepress build docs",
|
|
8
8
|
"slack-this-debug": "tsn ./src/bin/slack-this.ts --channel test --msg 'Hello slack!'",
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
"@types/jsonwebtoken": "^9.0.0",
|
|
20
20
|
"@types/through2-concurrent": "^2.0.0",
|
|
21
21
|
"ajv": "^8.6.2",
|
|
22
|
-
"ajv-formats": "^
|
|
22
|
+
"ajv-formats": "^3.0.1",
|
|
23
23
|
"ajv-keywords": "^5.0.0",
|
|
24
24
|
"binary-split": "^1.0.5",
|
|
25
25
|
"chalk": "^4.0.0",
|
package/src/csv/csvReader.ts
CHANGED
|
@@ -57,7 +57,7 @@ export function csvStringParse<T extends AnyObject = any>(
|
|
|
57
57
|
|
|
58
58
|
export function csvStringToArray(str: string): string[][] {
|
|
59
59
|
const objPattern = new RegExp('(,|\\r?\\n|\\r|^)(?:"([^"]*(?:""[^"]*)*)"|([^,\\r\\n]*))', 'gi')
|
|
60
|
-
let matches
|
|
60
|
+
let matches: RegExpExecArray | null
|
|
61
61
|
const arr: any[][] = [[]]
|
|
62
62
|
|
|
63
63
|
while ((matches = objPattern.exec(str))) {
|
package/src/index.ts
CHANGED
|
@@ -45,6 +45,7 @@ export * from './stream/readable/readableMap'
|
|
|
45
45
|
export * from './stream/readable/readableMapToArray'
|
|
46
46
|
export * from './stream/readable/readableToArray'
|
|
47
47
|
export * from './stream/stream.model'
|
|
48
|
+
export * from './stream/progressLogger'
|
|
48
49
|
export * from './stream/transform/transformBuffer'
|
|
49
50
|
export * from './stream/transform/transformFilter'
|
|
50
51
|
export * from './stream/transform/transformLimit'
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
import { inspect, InspectOptions } from 'node:util'
|
|
2
|
+
import {
|
|
3
|
+
_mb,
|
|
4
|
+
_since,
|
|
5
|
+
AnyObject,
|
|
6
|
+
CommonLogger,
|
|
7
|
+
localTimeNow,
|
|
8
|
+
SimpleMovingAverage,
|
|
9
|
+
UnixTimestampMillisNumber,
|
|
10
|
+
} from '@naturalcycles/js-lib'
|
|
11
|
+
import { boldWhite, dimGrey, hasColors, white, yellow } from '../colors/colors'
|
|
12
|
+
import { SizeStack } from './sizeStack'
|
|
13
|
+
|
|
14
|
+
export interface ProgressLoggerCfg<IN = any> {
|
|
15
|
+
/**
|
|
16
|
+
* Progress metric
|
|
17
|
+
*
|
|
18
|
+
* @default `progress`
|
|
19
|
+
*/
|
|
20
|
+
metric?: string
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Include `heapUsed` in log.
|
|
24
|
+
*
|
|
25
|
+
* @default false
|
|
26
|
+
*/
|
|
27
|
+
heapUsed?: boolean
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Include `heapTotal` in log.
|
|
31
|
+
*
|
|
32
|
+
* @default false
|
|
33
|
+
*/
|
|
34
|
+
heapTotal?: boolean
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Include `rss` in log.
|
|
38
|
+
*
|
|
39
|
+
* @default true
|
|
40
|
+
*/
|
|
41
|
+
rss?: boolean
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Incude Peak RSS in log.
|
|
45
|
+
*
|
|
46
|
+
* @default true
|
|
47
|
+
*/
|
|
48
|
+
peakRSS?: boolean
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Include `external` in log.
|
|
52
|
+
*
|
|
53
|
+
* @default false
|
|
54
|
+
*/
|
|
55
|
+
external?: boolean
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Include `arrayBuffers` in log.
|
|
59
|
+
*
|
|
60
|
+
* @default false
|
|
61
|
+
*/
|
|
62
|
+
arrayBuffers?: boolean
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Log (rss - heapTotal)
|
|
66
|
+
* For convenience of debugging "out-of-heap" memory size.
|
|
67
|
+
*
|
|
68
|
+
* @default false
|
|
69
|
+
*/
|
|
70
|
+
rssMinusHeap?: boolean
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Log "rows per second"
|
|
74
|
+
*
|
|
75
|
+
* @default true
|
|
76
|
+
*/
|
|
77
|
+
logRPS?: boolean
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Set to false to disable logging progress
|
|
81
|
+
*
|
|
82
|
+
* @default true
|
|
83
|
+
*/
|
|
84
|
+
logProgress?: boolean
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Log progress event Nth record that is _processed_ (went through mapper).
|
|
88
|
+
* Set to 0 to disable logging.
|
|
89
|
+
*
|
|
90
|
+
* @default 1000
|
|
91
|
+
*/
|
|
92
|
+
logEvery?: number
|
|
93
|
+
|
|
94
|
+
logger?: CommonLogger
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Function to return extra properties to the "progress object".
|
|
98
|
+
*
|
|
99
|
+
* chunk is undefined for "final" stats, otherwise is defined.
|
|
100
|
+
*/
|
|
101
|
+
extra?: (chunk: IN | undefined, index: number) => AnyObject
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* If specified - will multiply the counter by this number.
|
|
105
|
+
* Useful e.g when using `transformBuffer({ batchSize: 500 })`, so
|
|
106
|
+
* it'll accurately represent the number of processed entries (not batches).
|
|
107
|
+
*
|
|
108
|
+
* Defaults to 1.
|
|
109
|
+
*/
|
|
110
|
+
batchSize?: number
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Experimental logging of item (shunk) sizes, when json-stringified.
|
|
114
|
+
*
|
|
115
|
+
* Defaults to false.
|
|
116
|
+
*
|
|
117
|
+
* @experimental
|
|
118
|
+
*/
|
|
119
|
+
logSizes?: boolean
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* How many last item sizes to keep in a buffer, to calculate stats (p50, p90, avg, etc).
|
|
123
|
+
* Defaults to 100_000.
|
|
124
|
+
* Cannot be Infinity.
|
|
125
|
+
*/
|
|
126
|
+
logSizesBuffer?: number
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Works in addition to `logSizes`. Adds "zipped sizes".
|
|
130
|
+
*
|
|
131
|
+
* @experimental
|
|
132
|
+
*/
|
|
133
|
+
logZippedSizes?: boolean
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export interface ProgressLogItem extends AnyObject {
|
|
137
|
+
heapUsed?: number
|
|
138
|
+
heapTotal?: number
|
|
139
|
+
rss?: number
|
|
140
|
+
peakRSS?: number
|
|
141
|
+
rssMinusHeap?: number
|
|
142
|
+
external?: number
|
|
143
|
+
arrayBuffers?: number
|
|
144
|
+
rps10?: number
|
|
145
|
+
rpsTotal?: number
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const inspectOpt: InspectOptions = {
|
|
149
|
+
colors: hasColors,
|
|
150
|
+
breakLength: 300,
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
export class ProgressLogger<IN> implements Disposable {
|
|
154
|
+
constructor(cfg: ProgressLoggerCfg<IN> = {}) {
|
|
155
|
+
this.cfg = {
|
|
156
|
+
metric: 'progress',
|
|
157
|
+
rss: true,
|
|
158
|
+
peakRSS: true,
|
|
159
|
+
logRPS: true,
|
|
160
|
+
logEvery: 1000,
|
|
161
|
+
logSizesBuffer: 100_000,
|
|
162
|
+
batchSize: 1,
|
|
163
|
+
logger: console,
|
|
164
|
+
logProgress: cfg.logProgress !== false && cfg.logEvery !== 0,
|
|
165
|
+
...cfg,
|
|
166
|
+
}
|
|
167
|
+
this.logEvery10 = this.cfg.logEvery * 10
|
|
168
|
+
|
|
169
|
+
this.start()
|
|
170
|
+
this.logStats() // initial
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
cfg!: ProgressLoggerCfg<IN> & {
|
|
174
|
+
logEvery: number
|
|
175
|
+
logSizesBuffer: number
|
|
176
|
+
batchSize: number
|
|
177
|
+
metric: string
|
|
178
|
+
logger: CommonLogger
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
private started!: UnixTimestampMillisNumber
|
|
182
|
+
private lastSecondStarted!: UnixTimestampMillisNumber
|
|
183
|
+
private sma!: SimpleMovingAverage
|
|
184
|
+
private logEvery10!: number
|
|
185
|
+
private processedLastSecond!: number
|
|
186
|
+
private progress!: number
|
|
187
|
+
private peakRSS!: number
|
|
188
|
+
private sizes?: SizeStack
|
|
189
|
+
private sizesZipped?: SizeStack
|
|
190
|
+
|
|
191
|
+
private start(): void {
|
|
192
|
+
this.started = Date.now()
|
|
193
|
+
this.lastSecondStarted = Date.now()
|
|
194
|
+
this.sma = new SimpleMovingAverage(10)
|
|
195
|
+
this.processedLastSecond = 0
|
|
196
|
+
this.progress = 0
|
|
197
|
+
this.peakRSS = 0
|
|
198
|
+
this.sizes = this.cfg.logSizes ? new SizeStack('json', this.cfg.logSizesBuffer) : undefined
|
|
199
|
+
this.sizesZipped = this.cfg.logZippedSizes
|
|
200
|
+
? new SizeStack('json.gz', this.cfg.logSizesBuffer)
|
|
201
|
+
: undefined
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
log(chunk?: IN): void {
|
|
205
|
+
this.progress++
|
|
206
|
+
this.processedLastSecond++
|
|
207
|
+
|
|
208
|
+
if (this.sizes) {
|
|
209
|
+
// Check it, cause gzipping might be delayed here..
|
|
210
|
+
void SizeStack.countItem(chunk, this.cfg.logger, this.sizes, this.sizesZipped)
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
if (this.cfg.logProgress && this.progress % this.cfg.logEvery === 0) {
|
|
214
|
+
this.logStats(chunk, false, this.progress % this.logEvery10 === 0)
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
done(): void {
|
|
219
|
+
this.logStats(undefined, true)
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
[Symbol.dispose](): void {
|
|
223
|
+
this.done()
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
private logStats(chunk?: IN, final = false, tenx = false): void {
|
|
227
|
+
if (!this.cfg.logProgress) return
|
|
228
|
+
|
|
229
|
+
const {
|
|
230
|
+
metric,
|
|
231
|
+
extra,
|
|
232
|
+
batchSize,
|
|
233
|
+
heapUsed: logHeapUsed,
|
|
234
|
+
heapTotal: logHeapTotal,
|
|
235
|
+
rss: logRss,
|
|
236
|
+
peakRSS: logPeakRss,
|
|
237
|
+
rssMinusHeap,
|
|
238
|
+
external,
|
|
239
|
+
arrayBuffers,
|
|
240
|
+
logRPS,
|
|
241
|
+
logger,
|
|
242
|
+
} = this.cfg
|
|
243
|
+
|
|
244
|
+
const mem = process.memoryUsage()
|
|
245
|
+
|
|
246
|
+
const now = Date.now()
|
|
247
|
+
const batchedProgress = this.progress * batchSize
|
|
248
|
+
const lastRPS =
|
|
249
|
+
(this.processedLastSecond * batchSize) / ((now - this.lastSecondStarted) / 1000) || 0
|
|
250
|
+
const rpsTotal = Math.round(batchedProgress / ((now - this.started) / 1000)) || 0
|
|
251
|
+
this.lastSecondStarted = now
|
|
252
|
+
this.processedLastSecond = 0
|
|
253
|
+
|
|
254
|
+
const rps10 = Math.round(this.sma.pushGetAvg(lastRPS))
|
|
255
|
+
if (mem.rss > this.peakRSS) this.peakRSS = mem.rss
|
|
256
|
+
|
|
257
|
+
const o: ProgressLogItem = {
|
|
258
|
+
[final ? `${this.cfg.metric}_final` : this.cfg.metric]: batchedProgress,
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
if (extra) Object.assign(o, extra(chunk, this.progress))
|
|
262
|
+
if (logHeapUsed) o.heapUsed = _mb(mem.heapUsed)
|
|
263
|
+
if (logHeapTotal) o.heapTotal = _mb(mem.heapTotal)
|
|
264
|
+
if (logRss) o.rss = _mb(mem.rss)
|
|
265
|
+
if (logPeakRss) o.peakRSS = _mb(this.peakRSS)
|
|
266
|
+
if (rssMinusHeap) o.rssMinusHeap = _mb(mem.rss - mem.heapTotal)
|
|
267
|
+
if (external) o.external = _mb(mem.external)
|
|
268
|
+
if (arrayBuffers) o.arrayBuffers = _mb(mem.arrayBuffers || 0)
|
|
269
|
+
|
|
270
|
+
if (logRPS) Object.assign(o, { rps10, rpsTotal })
|
|
271
|
+
|
|
272
|
+
logger.log(inspect(o, inspectOpt))
|
|
273
|
+
|
|
274
|
+
if (this.sizes?.items.length) {
|
|
275
|
+
logger.log(this.sizes.getStats())
|
|
276
|
+
|
|
277
|
+
if (this.sizesZipped?.items.length) {
|
|
278
|
+
logger.log(this.sizesZipped.getStats())
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
if (tenx) {
|
|
283
|
+
let perHour: number | string =
|
|
284
|
+
Math.round((batchedProgress * 1000 * 60 * 60) / (now - this.started)) || 0
|
|
285
|
+
if (perHour > 900) {
|
|
286
|
+
perHour = Math.round(perHour / 1000) + 'K'
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
logger.log(
|
|
290
|
+
`${dimGrey(localTimeNow().toPretty())} ${white(metric)} took ${yellow(
|
|
291
|
+
_since(this.started),
|
|
292
|
+
)} so far to process ${yellow(batchedProgress)} rows, ~${yellow(perHour)}/hour`,
|
|
293
|
+
)
|
|
294
|
+
} else if (final) {
|
|
295
|
+
logger.log(
|
|
296
|
+
`${boldWhite(metric)} took ${yellow(_since(this.started))} to process ${yellow(
|
|
297
|
+
batchedProgress,
|
|
298
|
+
)} rows with total RPS of ${yellow(rpsTotal)}`,
|
|
299
|
+
)
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Create new ProgressLogger.
|
|
306
|
+
*/
|
|
307
|
+
export function progressLogger<IN>(cfg: ProgressLoggerCfg<IN> = {}): ProgressLogger<IN> {
|
|
308
|
+
return new ProgressLogger(cfg)
|
|
309
|
+
}
|
|
@@ -6,6 +6,8 @@ import { transformMap, TransformMapOptions } from '../transform/transformMap'
|
|
|
6
6
|
* Convenience function to do `.forEach` over a Readable.
|
|
7
7
|
* Typed! (unlike default Readable).
|
|
8
8
|
*
|
|
9
|
+
* Try native readable.forEach() instead!
|
|
10
|
+
*
|
|
9
11
|
* @experimental
|
|
10
12
|
*/
|
|
11
13
|
export async function readableForEach<T>(
|
|
@@ -8,6 +8,8 @@ import { TransformMapOptions } from '../transform/transformMap'
|
|
|
8
8
|
* passing each result via `transformMap`.
|
|
9
9
|
*
|
|
10
10
|
* Warning! All results are stored in memory (no backpressure).
|
|
11
|
+
*
|
|
12
|
+
* Try native readable.toArray instead!
|
|
11
13
|
*/
|
|
12
14
|
export async function readableMapToArray<IN, OUT = IN>(
|
|
13
15
|
stream: ReadableTyped<IN>,
|
|
@@ -1,155 +1,10 @@
|
|
|
1
1
|
import { Transform } from 'node:stream'
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
SimpleMovingAverage,
|
|
5
|
-
_mb,
|
|
6
|
-
_since,
|
|
7
|
-
AnyObject,
|
|
8
|
-
CommonLogger,
|
|
9
|
-
localTimeNow,
|
|
10
|
-
} from '@naturalcycles/js-lib'
|
|
11
|
-
import { hasColors, boldWhite, dimGrey, white, yellow } from '../../colors/colors'
|
|
12
|
-
import { SizeStack } from '../sizeStack'
|
|
2
|
+
import { progressLogger, ProgressLoggerCfg } from '../progressLogger'
|
|
13
3
|
import { TransformOptions, TransformTyped } from '../stream.model'
|
|
14
4
|
|
|
15
|
-
export interface TransformLogProgressOptions<IN = any>
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
*
|
|
19
|
-
* @default `progress`
|
|
20
|
-
*/
|
|
21
|
-
metric?: string
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* Include `heapUsed` in log.
|
|
25
|
-
*
|
|
26
|
-
* @default false
|
|
27
|
-
*/
|
|
28
|
-
heapUsed?: boolean
|
|
29
|
-
|
|
30
|
-
/**
|
|
31
|
-
* Include `heapTotal` in log.
|
|
32
|
-
*
|
|
33
|
-
* @default false
|
|
34
|
-
*/
|
|
35
|
-
heapTotal?: boolean
|
|
36
|
-
|
|
37
|
-
/**
|
|
38
|
-
* Include `rss` in log.
|
|
39
|
-
*
|
|
40
|
-
* @default true
|
|
41
|
-
*/
|
|
42
|
-
rss?: boolean
|
|
43
|
-
|
|
44
|
-
/**
|
|
45
|
-
* Incude Peak RSS in log.
|
|
46
|
-
*
|
|
47
|
-
* @default true
|
|
48
|
-
*/
|
|
49
|
-
peakRSS?: boolean
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* Include `external` in log.
|
|
53
|
-
*
|
|
54
|
-
* @default false
|
|
55
|
-
*/
|
|
56
|
-
external?: boolean
|
|
57
|
-
|
|
58
|
-
/**
|
|
59
|
-
* Include `arrayBuffers` in log.
|
|
60
|
-
*
|
|
61
|
-
* @default false
|
|
62
|
-
*/
|
|
63
|
-
arrayBuffers?: boolean
|
|
64
|
-
|
|
65
|
-
/**
|
|
66
|
-
* Log (rss - heapTotal)
|
|
67
|
-
* For convenience of debugging "out-of-heap" memory size.
|
|
68
|
-
*
|
|
69
|
-
* @default false
|
|
70
|
-
*/
|
|
71
|
-
rssMinusHeap?: boolean
|
|
72
|
-
|
|
73
|
-
/**
|
|
74
|
-
* Log "rows per second"
|
|
75
|
-
*
|
|
76
|
-
* @default true
|
|
77
|
-
*/
|
|
78
|
-
logRPS?: boolean
|
|
79
|
-
|
|
80
|
-
/**
|
|
81
|
-
* Set to false to disable logging progress
|
|
82
|
-
*
|
|
83
|
-
* @default true
|
|
84
|
-
*/
|
|
85
|
-
logProgress?: boolean
|
|
86
|
-
|
|
87
|
-
/**
|
|
88
|
-
* Log progress event Nth record that is _processed_ (went through mapper).
|
|
89
|
-
* Set to 0 to disable logging.
|
|
90
|
-
*
|
|
91
|
-
* @default 1000
|
|
92
|
-
*/
|
|
93
|
-
logEvery?: number
|
|
94
|
-
|
|
95
|
-
logger?: CommonLogger
|
|
96
|
-
|
|
97
|
-
/**
|
|
98
|
-
* Function to return extra properties to the "progress object".
|
|
99
|
-
*
|
|
100
|
-
* chunk is undefined for "final" stats, otherwise is defined.
|
|
101
|
-
*/
|
|
102
|
-
extra?: (chunk: IN | undefined, index: number) => AnyObject
|
|
103
|
-
|
|
104
|
-
/**
|
|
105
|
-
* If specified - will multiply the counter by this number.
|
|
106
|
-
* Useful e.g when using `transformBuffer({ batchSize: 500 })`, so
|
|
107
|
-
* it'll accurately represent the number of processed entries (not batches).
|
|
108
|
-
*
|
|
109
|
-
* Defaults to 1.
|
|
110
|
-
*/
|
|
111
|
-
batchSize?: number
|
|
112
|
-
|
|
113
|
-
/**
|
|
114
|
-
* Experimental logging of item (shunk) sizes, when json-stringified.
|
|
115
|
-
*
|
|
116
|
-
* Defaults to false.
|
|
117
|
-
*
|
|
118
|
-
* @experimental
|
|
119
|
-
*/
|
|
120
|
-
logSizes?: boolean
|
|
121
|
-
|
|
122
|
-
/**
|
|
123
|
-
* How many last item sizes to keep in a buffer, to calculate stats (p50, p90, avg, etc).
|
|
124
|
-
* Defaults to 100_000.
|
|
125
|
-
* Cannot be Infinity.
|
|
126
|
-
*/
|
|
127
|
-
logSizesBuffer?: number
|
|
128
|
-
|
|
129
|
-
/**
|
|
130
|
-
* Works in addition to `logSizes`. Adds "zipped sizes".
|
|
131
|
-
*
|
|
132
|
-
* @experimental
|
|
133
|
-
*/
|
|
134
|
-
logZippedSizes?: boolean
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
interface LogItem extends AnyObject {
|
|
138
|
-
heapUsed?: number
|
|
139
|
-
heapTotal?: number
|
|
140
|
-
rss?: number
|
|
141
|
-
peakRSS?: number
|
|
142
|
-
rssMinusHeap?: number
|
|
143
|
-
external?: number
|
|
144
|
-
arrayBuffers?: number
|
|
145
|
-
rps10?: number
|
|
146
|
-
rpsTotal?: number
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
const inspectOpt: InspectOptions = {
|
|
150
|
-
colors: hasColors,
|
|
151
|
-
breakLength: 300,
|
|
152
|
-
}
|
|
5
|
+
export interface TransformLogProgressOptions<IN = any>
|
|
6
|
+
extends ProgressLoggerCfg<IN>,
|
|
7
|
+
TransformOptions {}
|
|
153
8
|
|
|
154
9
|
/**
|
|
155
10
|
* Pass-through transform that optionally logs progress.
|
|
@@ -157,119 +12,18 @@ const inspectOpt: InspectOptions = {
|
|
|
157
12
|
export function transformLogProgress<IN = any>(
|
|
158
13
|
opt: TransformLogProgressOptions = {},
|
|
159
14
|
): TransformTyped<IN, IN> {
|
|
160
|
-
const
|
|
161
|
-
metric = 'progress',
|
|
162
|
-
heapTotal: logHeapTotal = false,
|
|
163
|
-
heapUsed: logHeapUsed = false,
|
|
164
|
-
rss: logRss = true,
|
|
165
|
-
peakRSS: logPeakRSS = true,
|
|
166
|
-
logRPS = true,
|
|
167
|
-
logEvery = 1000,
|
|
168
|
-
logSizes = false,
|
|
169
|
-
logSizesBuffer = 100_000,
|
|
170
|
-
logZippedSizes = false,
|
|
171
|
-
batchSize = 1,
|
|
172
|
-
extra,
|
|
173
|
-
logger = console,
|
|
174
|
-
} = opt
|
|
175
|
-
const logProgress = opt.logProgress !== false && logEvery !== 0 // true by default
|
|
176
|
-
const logEvery10 = logEvery * 10
|
|
177
|
-
|
|
178
|
-
const started = Date.now()
|
|
179
|
-
let lastSecondStarted = Date.now()
|
|
180
|
-
const sma = new SimpleMovingAverage(10) // over last 10 seconds
|
|
181
|
-
let processedLastSecond = 0
|
|
182
|
-
let progress = 0
|
|
183
|
-
let peakRSS = 0
|
|
184
|
-
|
|
185
|
-
const sizes = logSizes ? new SizeStack('json', logSizesBuffer) : undefined
|
|
186
|
-
const sizesZipped = logZippedSizes ? new SizeStack('json.gz', logSizesBuffer) : undefined
|
|
187
|
-
|
|
188
|
-
logStats() // initial
|
|
15
|
+
const progress = progressLogger(opt)
|
|
189
16
|
|
|
190
17
|
return new Transform({
|
|
191
18
|
objectMode: true,
|
|
192
19
|
...opt,
|
|
193
20
|
transform(chunk: IN, _, cb) {
|
|
194
|
-
progress
|
|
195
|
-
processedLastSecond++
|
|
196
|
-
|
|
197
|
-
if (sizes) {
|
|
198
|
-
// Check it, cause gzipping might be delayed here..
|
|
199
|
-
void SizeStack.countItem(chunk, logger, sizes, sizesZipped)
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
if (logProgress && progress % logEvery === 0) {
|
|
203
|
-
logStats(chunk, false, progress % logEvery10 === 0)
|
|
204
|
-
}
|
|
205
|
-
|
|
21
|
+
progress.log(chunk)
|
|
206
22
|
cb(null, chunk) // pass-through
|
|
207
23
|
},
|
|
208
24
|
final(cb) {
|
|
209
|
-
|
|
210
|
-
|
|
25
|
+
progress.done()
|
|
211
26
|
cb()
|
|
212
27
|
},
|
|
213
28
|
})
|
|
214
|
-
|
|
215
|
-
function logStats(chunk?: IN, final = false, tenx = false): void {
|
|
216
|
-
if (!logProgress) return
|
|
217
|
-
|
|
218
|
-
const mem = process.memoryUsage()
|
|
219
|
-
|
|
220
|
-
const now = Date.now()
|
|
221
|
-
const batchedProgress = progress * batchSize
|
|
222
|
-
const lastRPS = (processedLastSecond * batchSize) / ((now - lastSecondStarted) / 1000) || 0
|
|
223
|
-
const rpsTotal = Math.round(batchedProgress / ((now - started) / 1000)) || 0
|
|
224
|
-
lastSecondStarted = now
|
|
225
|
-
processedLastSecond = 0
|
|
226
|
-
|
|
227
|
-
const rps10 = Math.round(sma.pushGetAvg(lastRPS))
|
|
228
|
-
if (mem.rss > peakRSS) peakRSS = mem.rss
|
|
229
|
-
|
|
230
|
-
const o: LogItem = {
|
|
231
|
-
[final ? `${metric}_final` : metric]: batchedProgress,
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
if (extra) Object.assign(o, extra(chunk, progress))
|
|
235
|
-
if (logHeapUsed) o.heapUsed = _mb(mem.heapUsed)
|
|
236
|
-
if (logHeapTotal) o.heapTotal = _mb(mem.heapTotal)
|
|
237
|
-
if (logRss) o.rss = _mb(mem.rss)
|
|
238
|
-
if (logPeakRSS) o.peakRSS = _mb(peakRSS)
|
|
239
|
-
if (opt.rssMinusHeap) o.rssMinusHeap = _mb(mem.rss - mem.heapTotal)
|
|
240
|
-
if (opt.external) o.external = _mb(mem.external)
|
|
241
|
-
if (opt.arrayBuffers) o.arrayBuffers = _mb(mem.arrayBuffers || 0)
|
|
242
|
-
|
|
243
|
-
if (logRPS) Object.assign(o, { rps10, rpsTotal })
|
|
244
|
-
|
|
245
|
-
logger.log(inspect(o, inspectOpt))
|
|
246
|
-
|
|
247
|
-
if (sizes?.items.length) {
|
|
248
|
-
logger.log(sizes.getStats())
|
|
249
|
-
|
|
250
|
-
if (sizesZipped?.items.length) {
|
|
251
|
-
logger.log(sizesZipped.getStats())
|
|
252
|
-
}
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
if (tenx) {
|
|
256
|
-
let perHour: number | string =
|
|
257
|
-
Math.round((batchedProgress * 1000 * 60 * 60) / (now - started)) || 0
|
|
258
|
-
if (perHour > 900) {
|
|
259
|
-
perHour = Math.round(perHour / 1000) + 'K'
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
logger.log(
|
|
263
|
-
`${dimGrey(localTimeNow().toPretty())} ${white(metric)} took ${yellow(
|
|
264
|
-
_since(started),
|
|
265
|
-
)} so far to process ${yellow(batchedProgress)} rows, ~${yellow(perHour)}/hour`,
|
|
266
|
-
)
|
|
267
|
-
} else if (final) {
|
|
268
|
-
logger.log(
|
|
269
|
-
`${boldWhite(metric)} took ${yellow(_since(started))} to process ${yellow(
|
|
270
|
-
batchedProgress,
|
|
271
|
-
)} rows with total RPS of ${yellow(rpsTotal)}`,
|
|
272
|
-
)
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
29
|
}
|