@naturalcycles/nodejs-lib 13.9.1 → 13.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/csv/csvReader.js +1 -1
- package/dist/index.d.ts +1 -2
- package/dist/index.js +1 -2
- package/dist/stream/progressLogger.d.ts +152 -0
- package/dist/stream/progressLogger.js +133 -0
- package/dist/stream/readable/readableCreate.d.ts +1 -1
- package/dist/stream/readable/readableCreate.js +2 -2
- package/dist/stream/readable/readableForEach.d.ts +2 -0
- package/dist/stream/readable/readableForEach.js +2 -0
- package/dist/stream/readable/readableToArray.d.ts +3 -0
- package/dist/stream/readable/readableToArray.js +11 -5
- package/dist/stream/stream.model.d.ts +24 -2
- package/dist/stream/transform/transformLogProgress.d.ts +2 -105
- package/dist/stream/transform/transformLogProgress.js +4 -82
- package/dist/stream/transform/transformTee.d.ts +1 -1
- package/package.json +1 -1
- package/src/csv/csvReader.ts +1 -1
- package/src/index.ts +1 -2
- package/src/stream/progressLogger.ts +324 -0
- package/src/stream/readable/readableCreate.ts +2 -2
- package/src/stream/readable/readableForEach.ts +2 -0
- package/src/stream/readable/readableToArray.ts +11 -7
- package/src/stream/stream.model.ts +46 -4
- package/src/stream/transform/transformLogProgress.ts +7 -253
- package/src/stream/transform/transformTee.ts +1 -1
- package/dist/stream/readable/readableMap.d.ts +0 -3
- package/dist/stream/readable/readableMap.js +0 -31
- package/dist/stream/readable/readableMapToArray.d.ts +0 -10
- package/dist/stream/readable/readableMapToArray.js +0 -16
- package/src/stream/readable/readableMap.ts +0 -34
- package/src/stream/readable/readableMapToArray.ts +0 -22
package/src/index.ts
CHANGED
|
@@ -41,10 +41,9 @@ export * from './stream/pipeline/pipeline'
|
|
|
41
41
|
export * from './stream/readable/readableCreate'
|
|
42
42
|
export * from './stream/readable/readableForEach'
|
|
43
43
|
export * from './stream/readable/readableFromArray'
|
|
44
|
-
export * from './stream/readable/readableMap'
|
|
45
|
-
export * from './stream/readable/readableMapToArray'
|
|
46
44
|
export * from './stream/readable/readableToArray'
|
|
47
45
|
export * from './stream/stream.model'
|
|
46
|
+
export * from './stream/progressLogger'
|
|
48
47
|
export * from './stream/transform/transformBuffer'
|
|
49
48
|
export * from './stream/transform/transformFilter'
|
|
50
49
|
export * from './stream/transform/transformLimit'
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
import { inspect, InspectOptions } from 'node:util'
|
|
2
|
+
import {
|
|
3
|
+
_mb,
|
|
4
|
+
_since,
|
|
5
|
+
AnyObject,
|
|
6
|
+
CommonLogger,
|
|
7
|
+
localTimeNow,
|
|
8
|
+
SimpleMovingAverage,
|
|
9
|
+
UnixTimestampMillisNumber,
|
|
10
|
+
} from '@naturalcycles/js-lib'
|
|
11
|
+
import { boldWhite, dimGrey, hasColors, white, yellow } from '../colors/colors'
|
|
12
|
+
import { SizeStack } from './sizeStack'
|
|
13
|
+
import { ReadableMapper } from './stream.model'
|
|
14
|
+
|
|
15
|
+
export interface ProgressLoggerCfg<T = any> {
|
|
16
|
+
/**
|
|
17
|
+
* Progress metric
|
|
18
|
+
*
|
|
19
|
+
* @default `progress`
|
|
20
|
+
*/
|
|
21
|
+
metric?: string
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Include `heapUsed` in log.
|
|
25
|
+
*
|
|
26
|
+
* @default false
|
|
27
|
+
*/
|
|
28
|
+
heapUsed?: boolean
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Include `heapTotal` in log.
|
|
32
|
+
*
|
|
33
|
+
* @default false
|
|
34
|
+
*/
|
|
35
|
+
heapTotal?: boolean
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Include `rss` in log.
|
|
39
|
+
*
|
|
40
|
+
* @default true
|
|
41
|
+
*/
|
|
42
|
+
rss?: boolean
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Incude Peak RSS in log.
|
|
46
|
+
*
|
|
47
|
+
* @default true
|
|
48
|
+
*/
|
|
49
|
+
peakRSS?: boolean
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Include `external` in log.
|
|
53
|
+
*
|
|
54
|
+
* @default false
|
|
55
|
+
*/
|
|
56
|
+
external?: boolean
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Include `arrayBuffers` in log.
|
|
60
|
+
*
|
|
61
|
+
* @default false
|
|
62
|
+
*/
|
|
63
|
+
arrayBuffers?: boolean
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Log (rss - heapTotal)
|
|
67
|
+
* For convenience of debugging "out-of-heap" memory size.
|
|
68
|
+
*
|
|
69
|
+
* @default false
|
|
70
|
+
*/
|
|
71
|
+
rssMinusHeap?: boolean
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Log "rows per second"
|
|
75
|
+
*
|
|
76
|
+
* @default true
|
|
77
|
+
*/
|
|
78
|
+
logRPS?: boolean
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Set to false to disable logging progress
|
|
82
|
+
*
|
|
83
|
+
* @default true
|
|
84
|
+
*/
|
|
85
|
+
logProgress?: boolean
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Log progress event Nth record that is _processed_ (went through mapper).
|
|
89
|
+
* Set to 0 to disable logging.
|
|
90
|
+
*
|
|
91
|
+
* @default 1000
|
|
92
|
+
*/
|
|
93
|
+
logEvery?: number
|
|
94
|
+
|
|
95
|
+
logger?: CommonLogger
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Function to return extra properties to the "progress object".
|
|
99
|
+
*
|
|
100
|
+
* chunk is undefined for "final" stats, otherwise is defined.
|
|
101
|
+
*/
|
|
102
|
+
extra?: (chunk: T | undefined, index: number) => AnyObject
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* If specified - will multiply the counter by this number.
|
|
106
|
+
* Useful e.g when using `transformBuffer({ batchSize: 500 })`, so
|
|
107
|
+
* it'll accurately represent the number of processed entries (not batches).
|
|
108
|
+
*
|
|
109
|
+
* Defaults to 1.
|
|
110
|
+
*/
|
|
111
|
+
batchSize?: number
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Experimental logging of item (shunk) sizes, when json-stringified.
|
|
115
|
+
*
|
|
116
|
+
* Defaults to false.
|
|
117
|
+
*
|
|
118
|
+
* @experimental
|
|
119
|
+
*/
|
|
120
|
+
logSizes?: boolean
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* How many last item sizes to keep in a buffer, to calculate stats (p50, p90, avg, etc).
|
|
124
|
+
* Defaults to 100_000.
|
|
125
|
+
* Cannot be Infinity.
|
|
126
|
+
*/
|
|
127
|
+
logSizesBuffer?: number
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Works in addition to `logSizes`. Adds "zipped sizes".
|
|
131
|
+
*
|
|
132
|
+
* @experimental
|
|
133
|
+
*/
|
|
134
|
+
logZippedSizes?: boolean
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
export interface ProgressLogItem extends AnyObject {
|
|
138
|
+
heapUsed?: number
|
|
139
|
+
heapTotal?: number
|
|
140
|
+
rss?: number
|
|
141
|
+
peakRSS?: number
|
|
142
|
+
rssMinusHeap?: number
|
|
143
|
+
external?: number
|
|
144
|
+
arrayBuffers?: number
|
|
145
|
+
rps10?: number
|
|
146
|
+
rpsTotal?: number
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const inspectOpt: InspectOptions = {
|
|
150
|
+
colors: hasColors,
|
|
151
|
+
breakLength: 300,
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
export class ProgressLogger<T> implements Disposable {
|
|
155
|
+
constructor(cfg: ProgressLoggerCfg<T> = {}) {
|
|
156
|
+
this.cfg = {
|
|
157
|
+
metric: 'progress',
|
|
158
|
+
rss: true,
|
|
159
|
+
peakRSS: true,
|
|
160
|
+
logRPS: true,
|
|
161
|
+
logEvery: 1000,
|
|
162
|
+
logSizesBuffer: 100_000,
|
|
163
|
+
batchSize: 1,
|
|
164
|
+
logger: console,
|
|
165
|
+
logProgress: cfg.logProgress !== false && cfg.logEvery !== 0,
|
|
166
|
+
...cfg,
|
|
167
|
+
}
|
|
168
|
+
this.logEvery10 = this.cfg.logEvery * 10
|
|
169
|
+
|
|
170
|
+
this.start()
|
|
171
|
+
this.logStats() // initial
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
cfg!: ProgressLoggerCfg<T> & {
|
|
175
|
+
logEvery: number
|
|
176
|
+
logSizesBuffer: number
|
|
177
|
+
batchSize: number
|
|
178
|
+
metric: string
|
|
179
|
+
logger: CommonLogger
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
private started!: UnixTimestampMillisNumber
|
|
183
|
+
private lastSecondStarted!: UnixTimestampMillisNumber
|
|
184
|
+
private sma!: SimpleMovingAverage
|
|
185
|
+
private logEvery10!: number
|
|
186
|
+
private processedLastSecond!: number
|
|
187
|
+
private progress!: number
|
|
188
|
+
private peakRSS!: number
|
|
189
|
+
private sizes?: SizeStack
|
|
190
|
+
private sizesZipped?: SizeStack
|
|
191
|
+
|
|
192
|
+
private start(): void {
|
|
193
|
+
this.started = Date.now()
|
|
194
|
+
this.lastSecondStarted = Date.now()
|
|
195
|
+
this.sma = new SimpleMovingAverage(10)
|
|
196
|
+
this.processedLastSecond = 0
|
|
197
|
+
this.progress = 0
|
|
198
|
+
this.peakRSS = 0
|
|
199
|
+
this.sizes = this.cfg.logSizes ? new SizeStack('json', this.cfg.logSizesBuffer) : undefined
|
|
200
|
+
this.sizesZipped = this.cfg.logZippedSizes
|
|
201
|
+
? new SizeStack('json.gz', this.cfg.logSizesBuffer)
|
|
202
|
+
: undefined
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
log(chunk?: T): void {
|
|
206
|
+
this.progress++
|
|
207
|
+
this.processedLastSecond++
|
|
208
|
+
|
|
209
|
+
if (this.sizes) {
|
|
210
|
+
// Check it, cause gzipping might be delayed here..
|
|
211
|
+
void SizeStack.countItem(chunk, this.cfg.logger, this.sizes, this.sizesZipped)
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if (this.cfg.logProgress && this.progress % this.cfg.logEvery === 0) {
|
|
215
|
+
this.logStats(chunk, false, this.progress % this.logEvery10 === 0)
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
done(): void {
|
|
220
|
+
this.logStats(undefined, true)
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
[Symbol.dispose](): void {
|
|
224
|
+
this.done()
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
private logStats(chunk?: T, final = false, tenx = false): void {
|
|
228
|
+
if (!this.cfg.logProgress) return
|
|
229
|
+
|
|
230
|
+
const {
|
|
231
|
+
metric,
|
|
232
|
+
extra,
|
|
233
|
+
batchSize,
|
|
234
|
+
heapUsed: logHeapUsed,
|
|
235
|
+
heapTotal: logHeapTotal,
|
|
236
|
+
rss: logRss,
|
|
237
|
+
peakRSS: logPeakRss,
|
|
238
|
+
rssMinusHeap,
|
|
239
|
+
external,
|
|
240
|
+
arrayBuffers,
|
|
241
|
+
logRPS,
|
|
242
|
+
logger,
|
|
243
|
+
} = this.cfg
|
|
244
|
+
|
|
245
|
+
const mem = process.memoryUsage()
|
|
246
|
+
|
|
247
|
+
const now = Date.now()
|
|
248
|
+
const batchedProgress = this.progress * batchSize
|
|
249
|
+
const lastRPS =
|
|
250
|
+
(this.processedLastSecond * batchSize) / ((now - this.lastSecondStarted) / 1000) || 0
|
|
251
|
+
const rpsTotal = Math.round(batchedProgress / ((now - this.started) / 1000)) || 0
|
|
252
|
+
this.lastSecondStarted = now
|
|
253
|
+
this.processedLastSecond = 0
|
|
254
|
+
|
|
255
|
+
const rps10 = Math.round(this.sma.pushGetAvg(lastRPS))
|
|
256
|
+
if (mem.rss > this.peakRSS) this.peakRSS = mem.rss
|
|
257
|
+
|
|
258
|
+
const o: ProgressLogItem = {
|
|
259
|
+
[final ? `${this.cfg.metric}_final` : this.cfg.metric]: batchedProgress,
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
if (extra) Object.assign(o, extra(chunk, this.progress))
|
|
263
|
+
if (logHeapUsed) o.heapUsed = _mb(mem.heapUsed)
|
|
264
|
+
if (logHeapTotal) o.heapTotal = _mb(mem.heapTotal)
|
|
265
|
+
if (logRss) o.rss = _mb(mem.rss)
|
|
266
|
+
if (logPeakRss) o.peakRSS = _mb(this.peakRSS)
|
|
267
|
+
if (rssMinusHeap) o.rssMinusHeap = _mb(mem.rss - mem.heapTotal)
|
|
268
|
+
if (external) o.external = _mb(mem.external)
|
|
269
|
+
if (arrayBuffers) o.arrayBuffers = _mb(mem.arrayBuffers || 0)
|
|
270
|
+
|
|
271
|
+
if (logRPS) Object.assign(o, { rps10, rpsTotal })
|
|
272
|
+
|
|
273
|
+
logger.log(inspect(o, inspectOpt))
|
|
274
|
+
|
|
275
|
+
if (this.sizes?.items.length) {
|
|
276
|
+
logger.log(this.sizes.getStats())
|
|
277
|
+
|
|
278
|
+
if (this.sizesZipped?.items.length) {
|
|
279
|
+
logger.log(this.sizesZipped.getStats())
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
if (tenx) {
|
|
284
|
+
let perHour: number | string =
|
|
285
|
+
Math.round((batchedProgress * 1000 * 60 * 60) / (now - this.started)) || 0
|
|
286
|
+
if (perHour > 900) {
|
|
287
|
+
perHour = Math.round(perHour / 1000) + 'K'
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
logger.log(
|
|
291
|
+
`${dimGrey(localTimeNow().toPretty())} ${white(metric)} took ${yellow(
|
|
292
|
+
_since(this.started),
|
|
293
|
+
)} so far to process ${yellow(batchedProgress)} rows, ~${yellow(perHour)}/hour`,
|
|
294
|
+
)
|
|
295
|
+
} else if (final) {
|
|
296
|
+
logger.log(
|
|
297
|
+
`${boldWhite(metric)} took ${yellow(_since(this.started))} to process ${yellow(
|
|
298
|
+
batchedProgress,
|
|
299
|
+
)} rows with total RPS of ${yellow(rpsTotal)}`,
|
|
300
|
+
)
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Create new ProgressLogger.
|
|
307
|
+
*/
|
|
308
|
+
export function progressLogger<T>(cfg: ProgressLoggerCfg<T> = {}): ProgressLogger<T> {
|
|
309
|
+
return new ProgressLogger(cfg)
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
/**
|
|
313
|
+
* Limitation: I don't know how to catch the `final` callback to log final stats.
|
|
314
|
+
*
|
|
315
|
+
* @experimental
|
|
316
|
+
*/
|
|
317
|
+
export function progressReadableMapper<T>(cfg: ProgressLoggerCfg<T> = {}): ReadableMapper<T, T> {
|
|
318
|
+
const progress = new ProgressLogger(cfg)
|
|
319
|
+
|
|
320
|
+
return chunk => {
|
|
321
|
+
progress.log(chunk)
|
|
322
|
+
return chunk
|
|
323
|
+
}
|
|
324
|
+
}
|
|
@@ -34,8 +34,8 @@ export function readableCreate<T>(
|
|
|
34
34
|
* Convenience type-safe wrapper around Readable.from() that infers the Type of input.
|
|
35
35
|
*/
|
|
36
36
|
export function readableFrom<T>(
|
|
37
|
-
|
|
37
|
+
iterable: Iterable<T> | AsyncIterable<T>,
|
|
38
38
|
opt?: ReadableOptions,
|
|
39
39
|
): ReadableTyped<T> {
|
|
40
|
-
return Readable.from(
|
|
40
|
+
return Readable.from(iterable, opt)
|
|
41
41
|
}
|
|
@@ -6,6 +6,8 @@ import { transformMap, TransformMapOptions } from '../transform/transformMap'
|
|
|
6
6
|
* Convenience function to do `.forEach` over a Readable.
|
|
7
7
|
* Typed! (unlike default Readable).
|
|
8
8
|
*
|
|
9
|
+
* Try native readable.forEach() instead!
|
|
10
|
+
*
|
|
9
11
|
* @experimental
|
|
10
12
|
*/
|
|
11
13
|
export async function readableForEach<T>(
|
|
@@ -3,13 +3,17 @@ import { ReadableTyped } from '../stream.model'
|
|
|
3
3
|
/**
|
|
4
4
|
* Convenience function to read the whole Readable stream into Array (in-memory)
|
|
5
5
|
* and return that array.
|
|
6
|
+
*
|
|
7
|
+
* Native `await readable.toArray()` can be used instead.
|
|
8
|
+
* This helper is kept for type-safery support.
|
|
6
9
|
*/
|
|
7
10
|
export async function readableToArray<T>(readable: ReadableTyped<T>): Promise<T[]> {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
return await readable.toArray()
|
|
12
|
+
// const a: T[] = []
|
|
13
|
+
//
|
|
14
|
+
// for await (const item of readable) {
|
|
15
|
+
// a.push(item)
|
|
16
|
+
// }
|
|
17
|
+
//
|
|
18
|
+
// return a
|
|
15
19
|
}
|
|
@@ -1,13 +1,55 @@
|
|
|
1
|
-
import { Readable, Transform, Writable } from 'node:stream'
|
|
1
|
+
import type { Readable, Transform, Writable } from 'node:stream'
|
|
2
|
+
import type { Promisable } from '@naturalcycles/js-lib'
|
|
2
3
|
|
|
3
|
-
|
|
4
|
-
|
|
4
|
+
export interface ReadableSignalOptions {
|
|
5
|
+
/** allows destroying the stream if the signal is aborted. */
|
|
6
|
+
signal?: AbortSignal
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export interface ReadableArrayOptions {
|
|
10
|
+
/** the maximum concurrent invocations of `fn` to call on the stream at once. **Default: 1**. */
|
|
11
|
+
concurrency?: number
|
|
12
|
+
/** allows destroying the stream if the signal is aborted. */
|
|
13
|
+
signal?: AbortSignal
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export type ReadableMapper<IN, OUT> = (data: IN, opt?: ReadableSignalOptions) => Promisable<OUT>
|
|
17
|
+
|
|
18
|
+
export type ReadableFlatMapper<IN, OUT> = (
|
|
19
|
+
data: IN,
|
|
20
|
+
opt?: ReadableSignalOptions,
|
|
21
|
+
) => Promisable<OUT[]>
|
|
22
|
+
|
|
23
|
+
export type ReadableVoidMapper<IN> = (data: IN, opt?: ReadableSignalOptions) => void | Promise<void>
|
|
24
|
+
|
|
25
|
+
export type ReadablePredicate<IN> = (
|
|
26
|
+
data: IN,
|
|
27
|
+
opt?: ReadableSignalOptions,
|
|
28
|
+
) => boolean | Promise<boolean>
|
|
29
|
+
|
|
30
|
+
export interface ReadableTyped<T> extends Readable {
|
|
31
|
+
toArray: (opt?: ReadableSignalOptions) => Promise<T[]>
|
|
32
|
+
|
|
33
|
+
map: <OUT>(mapper: ReadableMapper<T, OUT>, opt?: ReadableArrayOptions) => ReadableTyped<OUT>
|
|
34
|
+
|
|
35
|
+
flatMap: <OUT>(
|
|
36
|
+
mapper: ReadableFlatMapper<T, OUT>,
|
|
37
|
+
opt?: ReadableArrayOptions,
|
|
38
|
+
) => ReadableTyped<OUT>
|
|
39
|
+
|
|
40
|
+
filter: (predicate: ReadablePredicate<T>, opt?: ReadableArrayOptions) => ReadableTyped<T>
|
|
41
|
+
|
|
42
|
+
forEach: (mapper: ReadableVoidMapper<T>, opt?: ReadableArrayOptions) => Promise<void>
|
|
43
|
+
|
|
44
|
+
take: (limit: number, opt?: ReadableSignalOptions) => ReadableTyped<T>
|
|
45
|
+
drop: (limit: number, opt?: ReadableSignalOptions) => ReadableTyped<T>
|
|
46
|
+
}
|
|
5
47
|
|
|
6
48
|
// eslint-disable-next-line unused-imports/no-unused-vars
|
|
7
49
|
export interface WritableTyped<T> extends Writable {}
|
|
8
50
|
|
|
9
51
|
// eslint-disable-next-line unused-imports/no-unused-vars
|
|
10
|
-
export interface TransformTyped<IN, OUT
|
|
52
|
+
export interface TransformTyped<IN, OUT> extends Transform {}
|
|
11
53
|
|
|
12
54
|
export interface TransformOptions {
|
|
13
55
|
/**
|