@naturalcycles/nodejs-lib 13.9.1 → 13.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/dist/csv/csvReader.js +1 -1
  2. package/dist/index.d.ts +1 -2
  3. package/dist/index.js +1 -2
  4. package/dist/stream/progressLogger.d.ts +152 -0
  5. package/dist/stream/progressLogger.js +133 -0
  6. package/dist/stream/readable/readableCreate.d.ts +1 -1
  7. package/dist/stream/readable/readableCreate.js +2 -2
  8. package/dist/stream/readable/readableForEach.d.ts +2 -0
  9. package/dist/stream/readable/readableForEach.js +2 -0
  10. package/dist/stream/readable/readableToArray.d.ts +3 -0
  11. package/dist/stream/readable/readableToArray.js +11 -5
  12. package/dist/stream/stream.model.d.ts +24 -2
  13. package/dist/stream/transform/transformLogProgress.d.ts +2 -105
  14. package/dist/stream/transform/transformLogProgress.js +4 -82
  15. package/dist/stream/transform/transformTee.d.ts +1 -1
  16. package/package.json +1 -1
  17. package/src/csv/csvReader.ts +1 -1
  18. package/src/index.ts +1 -2
  19. package/src/stream/progressLogger.ts +324 -0
  20. package/src/stream/readable/readableCreate.ts +2 -2
  21. package/src/stream/readable/readableForEach.ts +2 -0
  22. package/src/stream/readable/readableToArray.ts +11 -7
  23. package/src/stream/stream.model.ts +46 -4
  24. package/src/stream/transform/transformLogProgress.ts +7 -253
  25. package/src/stream/transform/transformTee.ts +1 -1
  26. package/dist/stream/readable/readableMap.d.ts +0 -3
  27. package/dist/stream/readable/readableMap.js +0 -31
  28. package/dist/stream/readable/readableMapToArray.d.ts +0 -10
  29. package/dist/stream/readable/readableMapToArray.js +0 -16
  30. package/src/stream/readable/readableMap.ts +0 -34
  31. package/src/stream/readable/readableMapToArray.ts +0 -22
package/src/index.ts CHANGED
@@ -41,10 +41,9 @@ export * from './stream/pipeline/pipeline'
41
41
  export * from './stream/readable/readableCreate'
42
42
  export * from './stream/readable/readableForEach'
43
43
  export * from './stream/readable/readableFromArray'
44
- export * from './stream/readable/readableMap'
45
- export * from './stream/readable/readableMapToArray'
46
44
  export * from './stream/readable/readableToArray'
47
45
  export * from './stream/stream.model'
46
+ export * from './stream/progressLogger'
48
47
  export * from './stream/transform/transformBuffer'
49
48
  export * from './stream/transform/transformFilter'
50
49
  export * from './stream/transform/transformLimit'
@@ -0,0 +1,324 @@
1
+ import { inspect, InspectOptions } from 'node:util'
2
+ import {
3
+ _mb,
4
+ _since,
5
+ AnyObject,
6
+ CommonLogger,
7
+ localTimeNow,
8
+ SimpleMovingAverage,
9
+ UnixTimestampMillisNumber,
10
+ } from '@naturalcycles/js-lib'
11
+ import { boldWhite, dimGrey, hasColors, white, yellow } from '../colors/colors'
12
+ import { SizeStack } from './sizeStack'
13
+ import { ReadableMapper } from './stream.model'
14
+
15
+ export interface ProgressLoggerCfg<T = any> {
16
+ /**
17
+ * Progress metric
18
+ *
19
+ * @default `progress`
20
+ */
21
+ metric?: string
22
+
23
+ /**
24
+ * Include `heapUsed` in log.
25
+ *
26
+ * @default false
27
+ */
28
+ heapUsed?: boolean
29
+
30
+ /**
31
+ * Include `heapTotal` in log.
32
+ *
33
+ * @default false
34
+ */
35
+ heapTotal?: boolean
36
+
37
+ /**
38
+ * Include `rss` in log.
39
+ *
40
+ * @default true
41
+ */
42
+ rss?: boolean
43
+
44
+ /**
45
+ * Incude Peak RSS in log.
46
+ *
47
+ * @default true
48
+ */
49
+ peakRSS?: boolean
50
+
51
+ /**
52
+ * Include `external` in log.
53
+ *
54
+ * @default false
55
+ */
56
+ external?: boolean
57
+
58
+ /**
59
+ * Include `arrayBuffers` in log.
60
+ *
61
+ * @default false
62
+ */
63
+ arrayBuffers?: boolean
64
+
65
+ /**
66
+ * Log (rss - heapTotal)
67
+ * For convenience of debugging "out-of-heap" memory size.
68
+ *
69
+ * @default false
70
+ */
71
+ rssMinusHeap?: boolean
72
+
73
+ /**
74
+ * Log "rows per second"
75
+ *
76
+ * @default true
77
+ */
78
+ logRPS?: boolean
79
+
80
+ /**
81
+ * Set to false to disable logging progress
82
+ *
83
+ * @default true
84
+ */
85
+ logProgress?: boolean
86
+
87
+ /**
88
+ * Log progress event Nth record that is _processed_ (went through mapper).
89
+ * Set to 0 to disable logging.
90
+ *
91
+ * @default 1000
92
+ */
93
+ logEvery?: number
94
+
95
+ logger?: CommonLogger
96
+
97
+ /**
98
+ * Function to return extra properties to the "progress object".
99
+ *
100
+ * chunk is undefined for "final" stats, otherwise is defined.
101
+ */
102
+ extra?: (chunk: T | undefined, index: number) => AnyObject
103
+
104
+ /**
105
+ * If specified - will multiply the counter by this number.
106
+ * Useful e.g when using `transformBuffer({ batchSize: 500 })`, so
107
+ * it'll accurately represent the number of processed entries (not batches).
108
+ *
109
+ * Defaults to 1.
110
+ */
111
+ batchSize?: number
112
+
113
+ /**
114
+ * Experimental logging of item (shunk) sizes, when json-stringified.
115
+ *
116
+ * Defaults to false.
117
+ *
118
+ * @experimental
119
+ */
120
+ logSizes?: boolean
121
+
122
+ /**
123
+ * How many last item sizes to keep in a buffer, to calculate stats (p50, p90, avg, etc).
124
+ * Defaults to 100_000.
125
+ * Cannot be Infinity.
126
+ */
127
+ logSizesBuffer?: number
128
+
129
+ /**
130
+ * Works in addition to `logSizes`. Adds "zipped sizes".
131
+ *
132
+ * @experimental
133
+ */
134
+ logZippedSizes?: boolean
135
+ }
136
+
137
+ export interface ProgressLogItem extends AnyObject {
138
+ heapUsed?: number
139
+ heapTotal?: number
140
+ rss?: number
141
+ peakRSS?: number
142
+ rssMinusHeap?: number
143
+ external?: number
144
+ arrayBuffers?: number
145
+ rps10?: number
146
+ rpsTotal?: number
147
+ }
148
+
149
+ const inspectOpt: InspectOptions = {
150
+ colors: hasColors,
151
+ breakLength: 300,
152
+ }
153
+
154
+ export class ProgressLogger<T> implements Disposable {
155
+ constructor(cfg: ProgressLoggerCfg<T> = {}) {
156
+ this.cfg = {
157
+ metric: 'progress',
158
+ rss: true,
159
+ peakRSS: true,
160
+ logRPS: true,
161
+ logEvery: 1000,
162
+ logSizesBuffer: 100_000,
163
+ batchSize: 1,
164
+ logger: console,
165
+ logProgress: cfg.logProgress !== false && cfg.logEvery !== 0,
166
+ ...cfg,
167
+ }
168
+ this.logEvery10 = this.cfg.logEvery * 10
169
+
170
+ this.start()
171
+ this.logStats() // initial
172
+ }
173
+
174
+ cfg!: ProgressLoggerCfg<T> & {
175
+ logEvery: number
176
+ logSizesBuffer: number
177
+ batchSize: number
178
+ metric: string
179
+ logger: CommonLogger
180
+ }
181
+
182
+ private started!: UnixTimestampMillisNumber
183
+ private lastSecondStarted!: UnixTimestampMillisNumber
184
+ private sma!: SimpleMovingAverage
185
+ private logEvery10!: number
186
+ private processedLastSecond!: number
187
+ private progress!: number
188
+ private peakRSS!: number
189
+ private sizes?: SizeStack
190
+ private sizesZipped?: SizeStack
191
+
192
+ private start(): void {
193
+ this.started = Date.now()
194
+ this.lastSecondStarted = Date.now()
195
+ this.sma = new SimpleMovingAverage(10)
196
+ this.processedLastSecond = 0
197
+ this.progress = 0
198
+ this.peakRSS = 0
199
+ this.sizes = this.cfg.logSizes ? new SizeStack('json', this.cfg.logSizesBuffer) : undefined
200
+ this.sizesZipped = this.cfg.logZippedSizes
201
+ ? new SizeStack('json.gz', this.cfg.logSizesBuffer)
202
+ : undefined
203
+ }
204
+
205
+ log(chunk?: T): void {
206
+ this.progress++
207
+ this.processedLastSecond++
208
+
209
+ if (this.sizes) {
210
+ // Check it, cause gzipping might be delayed here..
211
+ void SizeStack.countItem(chunk, this.cfg.logger, this.sizes, this.sizesZipped)
212
+ }
213
+
214
+ if (this.cfg.logProgress && this.progress % this.cfg.logEvery === 0) {
215
+ this.logStats(chunk, false, this.progress % this.logEvery10 === 0)
216
+ }
217
+ }
218
+
219
+ done(): void {
220
+ this.logStats(undefined, true)
221
+ }
222
+
223
+ [Symbol.dispose](): void {
224
+ this.done()
225
+ }
226
+
227
+ private logStats(chunk?: T, final = false, tenx = false): void {
228
+ if (!this.cfg.logProgress) return
229
+
230
+ const {
231
+ metric,
232
+ extra,
233
+ batchSize,
234
+ heapUsed: logHeapUsed,
235
+ heapTotal: logHeapTotal,
236
+ rss: logRss,
237
+ peakRSS: logPeakRss,
238
+ rssMinusHeap,
239
+ external,
240
+ arrayBuffers,
241
+ logRPS,
242
+ logger,
243
+ } = this.cfg
244
+
245
+ const mem = process.memoryUsage()
246
+
247
+ const now = Date.now()
248
+ const batchedProgress = this.progress * batchSize
249
+ const lastRPS =
250
+ (this.processedLastSecond * batchSize) / ((now - this.lastSecondStarted) / 1000) || 0
251
+ const rpsTotal = Math.round(batchedProgress / ((now - this.started) / 1000)) || 0
252
+ this.lastSecondStarted = now
253
+ this.processedLastSecond = 0
254
+
255
+ const rps10 = Math.round(this.sma.pushGetAvg(lastRPS))
256
+ if (mem.rss > this.peakRSS) this.peakRSS = mem.rss
257
+
258
+ const o: ProgressLogItem = {
259
+ [final ? `${this.cfg.metric}_final` : this.cfg.metric]: batchedProgress,
260
+ }
261
+
262
+ if (extra) Object.assign(o, extra(chunk, this.progress))
263
+ if (logHeapUsed) o.heapUsed = _mb(mem.heapUsed)
264
+ if (logHeapTotal) o.heapTotal = _mb(mem.heapTotal)
265
+ if (logRss) o.rss = _mb(mem.rss)
266
+ if (logPeakRss) o.peakRSS = _mb(this.peakRSS)
267
+ if (rssMinusHeap) o.rssMinusHeap = _mb(mem.rss - mem.heapTotal)
268
+ if (external) o.external = _mb(mem.external)
269
+ if (arrayBuffers) o.arrayBuffers = _mb(mem.arrayBuffers || 0)
270
+
271
+ if (logRPS) Object.assign(o, { rps10, rpsTotal })
272
+
273
+ logger.log(inspect(o, inspectOpt))
274
+
275
+ if (this.sizes?.items.length) {
276
+ logger.log(this.sizes.getStats())
277
+
278
+ if (this.sizesZipped?.items.length) {
279
+ logger.log(this.sizesZipped.getStats())
280
+ }
281
+ }
282
+
283
+ if (tenx) {
284
+ let perHour: number | string =
285
+ Math.round((batchedProgress * 1000 * 60 * 60) / (now - this.started)) || 0
286
+ if (perHour > 900) {
287
+ perHour = Math.round(perHour / 1000) + 'K'
288
+ }
289
+
290
+ logger.log(
291
+ `${dimGrey(localTimeNow().toPretty())} ${white(metric)} took ${yellow(
292
+ _since(this.started),
293
+ )} so far to process ${yellow(batchedProgress)} rows, ~${yellow(perHour)}/hour`,
294
+ )
295
+ } else if (final) {
296
+ logger.log(
297
+ `${boldWhite(metric)} took ${yellow(_since(this.started))} to process ${yellow(
298
+ batchedProgress,
299
+ )} rows with total RPS of ${yellow(rpsTotal)}`,
300
+ )
301
+ }
302
+ }
303
+ }
304
+
305
+ /**
306
+ * Create new ProgressLogger.
307
+ */
308
+ export function progressLogger<T>(cfg: ProgressLoggerCfg<T> = {}): ProgressLogger<T> {
309
+ return new ProgressLogger(cfg)
310
+ }
311
+
312
+ /**
313
+ * Limitation: I don't know how to catch the `final` callback to log final stats.
314
+ *
315
+ * @experimental
316
+ */
317
+ export function progressReadableMapper<T>(cfg: ProgressLoggerCfg<T> = {}): ReadableMapper<T, T> {
318
+ const progress = new ProgressLogger(cfg)
319
+
320
+ return chunk => {
321
+ progress.log(chunk)
322
+ return chunk
323
+ }
324
+ }
@@ -34,8 +34,8 @@ export function readableCreate<T>(
34
34
  * Convenience type-safe wrapper around Readable.from() that infers the Type of input.
35
35
  */
36
36
  export function readableFrom<T>(
37
- items: Iterable<T> | AsyncIterable<T>,
37
+ iterable: Iterable<T> | AsyncIterable<T>,
38
38
  opt?: ReadableOptions,
39
39
  ): ReadableTyped<T> {
40
- return Readable.from(items, opt)
40
+ return Readable.from(iterable, opt)
41
41
  }
@@ -6,6 +6,8 @@ import { transformMap, TransformMapOptions } from '../transform/transformMap'
6
6
  * Convenience function to do `.forEach` over a Readable.
7
7
  * Typed! (unlike default Readable).
8
8
  *
9
+ * Try native readable.forEach() instead!
10
+ *
9
11
  * @experimental
10
12
  */
11
13
  export async function readableForEach<T>(
@@ -3,13 +3,17 @@ import { ReadableTyped } from '../stream.model'
3
3
  /**
4
4
  * Convenience function to read the whole Readable stream into Array (in-memory)
5
5
  * and return that array.
6
+ *
7
+ * Native `await readable.toArray()` can be used instead.
8
+ * This helper is kept for type-safery support.
6
9
  */
7
10
  export async function readableToArray<T>(readable: ReadableTyped<T>): Promise<T[]> {
8
- const a: T[] = []
9
-
10
- for await (const item of readable) {
11
- a.push(item)
12
- }
13
-
14
- return a
11
+ return await readable.toArray()
12
+ // const a: T[] = []
13
+ //
14
+ // for await (const item of readable) {
15
+ // a.push(item)
16
+ // }
17
+ //
18
+ // return a
15
19
  }
@@ -1,13 +1,55 @@
1
- import { Readable, Transform, Writable } from 'node:stream'
1
+ import type { Readable, Transform, Writable } from 'node:stream'
2
+ import type { Promisable } from '@naturalcycles/js-lib'
2
3
 
3
- // eslint-disable-next-line unused-imports/no-unused-vars
4
- export interface ReadableTyped<T> extends Readable {}
4
+ export interface ReadableSignalOptions {
5
+ /** allows destroying the stream if the signal is aborted. */
6
+ signal?: AbortSignal
7
+ }
8
+
9
+ export interface ReadableArrayOptions {
10
+ /** the maximum concurrent invocations of `fn` to call on the stream at once. **Default: 1**. */
11
+ concurrency?: number
12
+ /** allows destroying the stream if the signal is aborted. */
13
+ signal?: AbortSignal
14
+ }
15
+
16
+ export type ReadableMapper<IN, OUT> = (data: IN, opt?: ReadableSignalOptions) => Promisable<OUT>
17
+
18
+ export type ReadableFlatMapper<IN, OUT> = (
19
+ data: IN,
20
+ opt?: ReadableSignalOptions,
21
+ ) => Promisable<OUT[]>
22
+
23
+ export type ReadableVoidMapper<IN> = (data: IN, opt?: ReadableSignalOptions) => void | Promise<void>
24
+
25
+ export type ReadablePredicate<IN> = (
26
+ data: IN,
27
+ opt?: ReadableSignalOptions,
28
+ ) => boolean | Promise<boolean>
29
+
30
+ export interface ReadableTyped<T> extends Readable {
31
+ toArray: (opt?: ReadableSignalOptions) => Promise<T[]>
32
+
33
+ map: <OUT>(mapper: ReadableMapper<T, OUT>, opt?: ReadableArrayOptions) => ReadableTyped<OUT>
34
+
35
+ flatMap: <OUT>(
36
+ mapper: ReadableFlatMapper<T, OUT>,
37
+ opt?: ReadableArrayOptions,
38
+ ) => ReadableTyped<OUT>
39
+
40
+ filter: (predicate: ReadablePredicate<T>, opt?: ReadableArrayOptions) => ReadableTyped<T>
41
+
42
+ forEach: (mapper: ReadableVoidMapper<T>, opt?: ReadableArrayOptions) => Promise<void>
43
+
44
+ take: (limit: number, opt?: ReadableSignalOptions) => ReadableTyped<T>
45
+ drop: (limit: number, opt?: ReadableSignalOptions) => ReadableTyped<T>
46
+ }
5
47
 
6
48
  // eslint-disable-next-line unused-imports/no-unused-vars
7
49
  export interface WritableTyped<T> extends Writable {}
8
50
 
9
51
  // eslint-disable-next-line unused-imports/no-unused-vars
10
- export interface TransformTyped<IN, OUT = IN> extends Transform {}
52
+ export interface TransformTyped<IN, OUT> extends Transform {}
11
53
 
12
54
  export interface TransformOptions {
13
55
  /**