@naturalcycles/nodejs-lib 12.59.0 → 12.62.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/dist/got/getGot.js +98 -28
  2. package/dist/got/got.model.d.ts +6 -0
  3. package/dist/index.d.ts +19 -19
  4. package/dist/index.js +19 -39
  5. package/dist/stream/ndjson/transformJsonParse.js +3 -3
  6. package/dist/stream/ndjson/transformToNDJson.js +2 -2
  7. package/dist/stream/sizeStack.d.ts +9 -0
  8. package/dist/stream/sizeStack.js +48 -0
  9. package/dist/stream/transform/transformBuffer.js +1 -1
  10. package/dist/stream/transform/transformFilter.d.ts +3 -4
  11. package/dist/stream/transform/transformFilter.js +5 -20
  12. package/dist/stream/transform/transformLogProgress.d.ts +20 -0
  13. package/dist/stream/transform/transformLogProgress.js +36 -18
  14. package/dist/stream/transform/transformMap.d.ts +2 -4
  15. package/dist/stream/transform/transformMap.js +6 -11
  16. package/dist/stream/transform/transformMapSimple.js +1 -1
  17. package/dist/stream/transform/transformMapSync.d.ts +5 -3
  18. package/dist/stream/transform/transformMapSync.js +28 -22
  19. package/dist/stream/transform/transformNoOp.js +1 -1
  20. package/dist/stream/transform/transformTap.js +3 -3
  21. package/dist/stream/transform/transformToArray.js +1 -1
  22. package/dist/stream/transform/transformToString.js +2 -2
  23. package/dist/stream/transform/worker/transformMultiThreaded.js +1 -1
  24. package/dist/stream/writable/writableFork.js +1 -1
  25. package/dist/stream/writable/writablePushToArray.js +1 -1
  26. package/dist/stream/writable/writableVoid.js +1 -1
  27. package/dist/util/zip.util.d.ts +15 -7
  28. package/dist/util/zip.util.js +27 -22
  29. package/package.json +2 -2
  30. package/src/got/getGot.ts +120 -31
  31. package/src/got/got.model.ts +8 -0
  32. package/src/index.ts +19 -38
  33. package/src/stream/ndjson/transformJsonParse.ts +3 -3
  34. package/src/stream/ndjson/transformToNDJson.ts +2 -2
  35. package/src/stream/sizeStack.ts +56 -0
  36. package/src/stream/transform/transformBuffer.ts +1 -1
  37. package/src/stream/transform/transformFilter.ts +6 -20
  38. package/src/stream/transform/transformLogProgress.ts +72 -23
  39. package/src/stream/transform/transformMap.ts +7 -14
  40. package/src/stream/transform/transformMapSimple.ts +1 -1
  41. package/src/stream/transform/transformMapSync.ts +40 -26
  42. package/src/stream/transform/transformNoOp.ts +1 -1
  43. package/src/stream/transform/transformTap.ts +3 -3
  44. package/src/stream/transform/transformToArray.ts +1 -1
  45. package/src/stream/transform/transformToString.ts +2 -2
  46. package/src/stream/transform/worker/transformMultiThreaded.ts +1 -1
  47. package/src/stream/writable/writableFork.ts +1 -1
  48. package/src/stream/writable/writablePushToArray.ts +1 -1
  49. package/src/stream/writable/writableVoid.ts +1 -1
  50. package/src/util/zip.util.ts +26 -20
package/src/got/getGot.ts CHANGED
@@ -1,8 +1,13 @@
1
1
  import { URL } from 'url'
2
2
  import { _since } from '@naturalcycles/js-lib'
3
- import got, { AfterResponseHook, BeforeErrorHook, BeforeRequestHook, Got, HTTPError } from 'got'
3
+ import got, {
4
+ AfterResponseHook,
5
+ BeforeErrorHook,
6
+ BeforeRequestHook,
7
+ BeforeRetryHook,
8
+ Got,
9
+ } from 'got'
4
10
  import { inspectAny } from '..'
5
- import { dimGrey, grey, red, yellow } from '../colors'
6
11
  import { GetGotOptions, GotRequestContext } from './got.model'
7
12
 
8
13
  /**
@@ -15,12 +20,33 @@ import { GetGotOptions, GotRequestContext } from './got.model'
15
20
  export function getGot(opt: GetGotOptions = {}): Got {
16
21
  opt.logger ||= console
17
22
 
23
+ if (opt.debug) {
24
+ opt.logStart = opt.logFinished = opt.logResponse = true
25
+ }
26
+
18
27
  return got.extend({
19
28
  // Most-important is to set to anything non-empty (so, requests don't "hang" by default).
20
29
  // Should be long enough to handle for slow responses from scaled cloud APIs in times of spikes
21
30
  // Ideally should be LESS than default Request timeout in backend-lib (so, it has a chance to error
22
31
  // before server times out with 503).
23
- timeout: 90_000,
32
+ //
33
+ // UPD 2021-11-27
34
+ // There are 2 types/strategies for requests:
35
+ // 1. Optimized to get result no matter what. E.g in Cron jobs, where otherwise there'll be a job failure
36
+ // 2. Part of the Backend request, where we better retry quickly and fail on timeout before Backend aborts it with "503 Request timeout"
37
+ //
38
+ // Here it's hard to set the default timeout right for both use-cases.
39
+ // So, if it's important, you should override it according to your use-cases:
40
+ // - set it longer for Type 1 (e.g 120 seconds)
41
+ // - set it shorter for Type 2 (e.g 10/20 seconds)
42
+ // Please beware of default Retry strategy of Got:
43
+ // by default it will retry 2 times (after first try)
44
+ // First delay between tries will be ~1 second, then ~2 seconds
45
+ // Each retry it'll wait up to `timeout` (so, up to 60 seconds by default).
46
+ // So, for 3 tries it multiplies your timeout by 3 (+3 seconds between the tries).
47
+ // So, e.g 60 seconds timeout with 2 retries becomes up to 183 seconds.
48
+ // Which definitely doesn't fit into default "RequestTimeout"
49
+ timeout: 60_000,
24
50
  ...opt,
25
51
  hooks: {
26
52
  ...opt.hooks,
@@ -34,6 +60,11 @@ export function getGot(opt: GetGotOptions = {}): Got {
34
60
  // User hooks go AFTER
35
61
  ...(opt.hooks?.beforeRequest || []),
36
62
  ],
63
+ beforeRetry: [
64
+ gotBeforeRetryHook(opt),
65
+ // User hooks go AFTER
66
+ ...(opt.hooks?.beforeRetry || []),
67
+ ],
37
68
  afterResponse: [
38
69
  ...(opt.hooks?.afterResponse || []),
39
70
  // User hooks go BEFORE
@@ -71,22 +102,38 @@ function gotErrorHook(opt: GetGotOptions = {}): BeforeErrorHook {
71
102
  const { maxResponseLength = 10_000 } = opt
72
103
 
73
104
  return err => {
74
- if (err instanceof HTTPError) {
75
- const { statusCode } = err.response
76
- const { method, url, prefixUrl } = err.options
77
- const shortUrl = getShortUrl(opt, url, prefixUrl)
78
- // const { started } = context as GotRequestContext
79
-
80
- const body = inspectAny(err.response.body, {
81
- maxLen: maxResponseLength,
82
- colors: false,
83
- })
84
-
85
- // timings are not part of err.message to allow automatic error grouping in Sentry
86
- err.message = [[statusCode, method, shortUrl].filter(Boolean).join(' '), body]
105
+ const statusCode = err.response?.statusCode || 0
106
+ const { method, url, prefixUrl } = err.options
107
+ const shortUrl = getShortUrl(opt, url, prefixUrl)
108
+ const { started, retryCount } = (err.request?.options.context || {}) as GotRequestContext
109
+
110
+ const body = err.response?.body
111
+ ? inspectAny(err.response.body, {
112
+ maxLen: maxResponseLength,
113
+ colors: false,
114
+ })
115
+ : err.message
116
+
117
+ // We don't include Response/Body/Message in the log, because it's included in the Error thrown from here
118
+ opt.logger!.log(
119
+ [
120
+ ' <<',
121
+ statusCode,
122
+ method,
123
+ shortUrl,
124
+ retryCount && `(retry ${retryCount})`,
125
+ 'error',
126
+ started && 'in ' + _since(started),
127
+ ]
87
128
  .filter(Boolean)
88
- .join('\n')
89
- }
129
+ .join(' '),
130
+ )
131
+
132
+ // timings are not part of err.message to allow automatic error grouping in Sentry
133
+ // Colors are not used, because there's high chance that this Error will be propagated all the way to the Frontend
134
+ err.message = [[statusCode, method, shortUrl].filter(Boolean).join(' '), body]
135
+ .filter(Boolean)
136
+ .join('\n')
90
137
 
91
138
  return err
92
139
  }
@@ -100,28 +147,76 @@ function gotBeforeRequestHook(opt: GetGotOptions): BeforeRequestHook {
100
147
  } as GotRequestContext
101
148
 
102
149
  if (opt.logStart) {
150
+ const { retryCount } = options.context as GotRequestContext
103
151
  const shortUrl = getShortUrl(opt, options.url, options.prefixUrl)
104
- opt.logger!.log([dimGrey(' >>'), dimGrey(options.method), grey(shortUrl)].join(' '))
152
+ opt.logger!.log(
153
+ [' >>', options.method, shortUrl, retryCount && `(retry ${retryCount})`].join(' '),
154
+ )
105
155
  }
106
156
  }
107
157
  }
108
158
 
159
+ // Here we log always, because it's similar to ErrorHook - we always log errors
160
+ // Because Retries are always result of some Error
161
+ function gotBeforeRetryHook(opt: GetGotOptions): BeforeRetryHook {
162
+ const { maxResponseLength = 10_000 } = opt
163
+
164
+ return (options, err, retryCount) => {
165
+ // opt.logger!.log('beforeRetry', retryCount)
166
+ const statusCode = err?.response?.statusCode || 0
167
+ const { method, url, prefixUrl } = options
168
+ const shortUrl = getShortUrl(opt, url, prefixUrl)
169
+ const { started } = options.context as GotRequestContext
170
+ Object.assign(options.context, { retryCount })
171
+
172
+ const body = err?.response?.body
173
+ ? inspectAny(err.response.body, {
174
+ maxLen: maxResponseLength,
175
+ colors: false,
176
+ })
177
+ : err?.message
178
+
179
+ // We don't include Response/Body/Message in the log, because it's included in the Error thrown from here
180
+ opt.logger!.warn(
181
+ [
182
+ [
183
+ ' <<',
184
+ statusCode,
185
+ method,
186
+ shortUrl,
187
+ retryCount && retryCount > 1 ? `(retry ${retryCount - 1})` : '(first try)',
188
+ 'error',
189
+ started && 'in ' + _since(started),
190
+ ]
191
+ .filter(Boolean)
192
+ .join(' '),
193
+ body,
194
+ ]
195
+ .filter(Boolean)
196
+ .join('\n'),
197
+ )
198
+ }
199
+ }
200
+
201
+ // AfterResponseHook is never called on Error
202
+ // So, coloredHttpCode(resp.statusCode) is probably useless
109
203
  function gotAfterResponseHook(opt: GetGotOptions = {}): AfterResponseHook {
110
204
  return resp => {
111
205
  const success = resp.statusCode >= 200 && resp.statusCode < 400
112
206
 
113
207
  if (opt.logFinished) {
114
- const { started } = resp.request.options.context as GotRequestContext
208
+ const { started, retryCount } = resp.request.options.context as GotRequestContext
115
209
  const { url, prefixUrl, method } = resp.request.options
116
210
  const shortUrl = getShortUrl(opt, url, prefixUrl)
117
211
 
118
212
  opt.logger!.log(
119
213
  [
120
- dimGrey(' <<'),
121
- coloredHttpCode(resp.statusCode),
122
- dimGrey(method),
123
- grey(shortUrl),
124
- started && dimGrey('in ' + _since(started)),
214
+ ' <<',
215
+ resp.statusCode,
216
+ method,
217
+ shortUrl,
218
+ retryCount && `(retry ${retryCount - 1})`,
219
+ started && 'in ' + _since(started),
125
220
  ]
126
221
  .filter(Boolean)
127
222
  .join(' '),
@@ -138,12 +233,6 @@ function gotAfterResponseHook(opt: GetGotOptions = {}): AfterResponseHook {
138
233
  }
139
234
  }
140
235
 
141
- function coloredHttpCode(statusCode: number): string {
142
- if (statusCode < 400) return dimGrey(statusCode) // default
143
- if (statusCode < 500) return yellow(statusCode)
144
- return red(statusCode)
145
- }
146
-
147
236
  function getShortUrl(opt: GetGotOptions, url: URL, prefixUrl?: string): string {
148
237
  let shortUrl = url.toString()
149
238
 
@@ -2,6 +2,12 @@ import { AnyObject, CommonLogger } from '@naturalcycles/js-lib'
2
2
  import type { Options } from 'got'
3
3
 
4
4
  export interface GetGotOptions extends Options {
5
+ /**
6
+ * Set to `true` to enable all possible debug logging.
7
+ * Not safe in prod (as it logs Responses), but great to use during development.
8
+ */
9
+ debug?: boolean
10
+
5
11
  /**
6
12
  * @default false
7
13
  */
@@ -51,4 +57,6 @@ export interface GotRequestContext extends AnyObject {
51
57
  * Millisecond-timestamp of when the request was started. To be able to count "time spent".
52
58
  */
53
59
  started: number
60
+
61
+ retryCount?: number
54
62
  }
package/src/index.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import Ajv from 'ajv'
2
- import { HTTPError, TimeoutError } from 'got'
2
+ import { RequestError, TimeoutError } from 'got'
3
3
  import type { AfterResponseHook, BeforeErrorHook, BeforeRequestHook, Got } from 'got'
4
4
  import { AnySchema, ValidationErrorItem } from 'joi'
5
5
  import { _chunkBuffer } from './buffer/buffer.util'
@@ -11,7 +11,7 @@ import { Debug, IDebug, IDebugger } from './log/debug'
11
11
  export * from './security/hash.util'
12
12
  export * from './security/id.util'
13
13
  export * from './security/secret.util'
14
- import { hasColors } from './colors/colors'
14
+ export * from './colors/colors'
15
15
  export * from './log/log.util'
16
16
  import { slackDefaultMessagePrefixHook, SlackService } from './slack/slack.service'
17
17
  import {
@@ -22,9 +22,9 @@ import {
22
22
  SlackServiceCfg,
23
23
  } from './slack/slack.service.model'
24
24
  import { NDJsonStats } from './stream/ndjson/ndjson.model'
25
- import { ndJsonFileRead } from './stream/ndjson/ndJsonFileRead'
26
- import { ndJsonFileWrite } from './stream/ndjson/ndJsonFileWrite'
27
- import { ndjsonMap } from './stream/ndjson/ndjsonMap'
25
+ export * from './stream/ndjson/ndJsonFileRead'
26
+ export * from './stream/ndjson/ndJsonFileWrite'
27
+ export * from './stream/ndjson/ndjsonMap'
28
28
  import {
29
29
  ndjsonStreamForEach,
30
30
  NDJSONStreamForEachOptions,
@@ -45,12 +45,12 @@ import {
45
45
  } from './stream/ndjson/transformJsonParse'
46
46
  import { transformToNDJson, TransformToNDJsonOptions } from './stream/ndjson/transformToNDJson'
47
47
  export * from './stream/pipeline/pipeline'
48
- import { readableCreate, readableFrom } from './stream/readable/readableCreate'
49
- import { readableForEach, readableForEachSync } from './stream/readable/readableForEach'
50
- import { readableFromArray } from './stream/readable/readableFromArray'
51
- import { readableMap } from './stream/readable/readableMap'
52
- import { readableMapToArray } from './stream/readable/readableMapToArray'
53
- import { readableToArray } from './stream/readable/readableToArray'
48
+ export * from './stream/readable/readableCreate'
49
+ export * from './stream/readable/readableForEach'
50
+ export * from './stream/readable/readableFromArray'
51
+ export * from './stream/readable/readableMap'
52
+ export * from './stream/readable/readableMapToArray'
53
+ export * from './stream/readable/readableToArray'
54
54
  import {
55
55
  ReadableTyped,
56
56
  TransformOptions,
@@ -62,13 +62,13 @@ export * from './stream/transform/transformFilter'
62
62
  export * from './stream/transform/transformLimit'
63
63
  export * from './stream/transform/transformLogProgress'
64
64
  import { transformMap, TransformMapOptions } from './stream/transform/transformMap'
65
- import { transformMapSimple } from './stream/transform/transformMapSimple'
66
- import { transformNoOp } from './stream/transform/transformNoOp'
65
+ export * from './stream/transform/transformMapSimple'
66
+ export * from './stream/transform/transformNoOp'
67
67
  import { transformMapSync, TransformMapSyncOptions } from './stream/transform/transformMapSync'
68
- import { transformSplit } from './stream/transform/transformSplit'
69
- import { transformTap } from './stream/transform/transformTap'
70
- import { transformToArray } from './stream/transform/transformToArray'
71
- import { transformToString } from './stream/transform/transformToString'
68
+ export * from './stream/transform/transformSplit'
69
+ export * from './stream/transform/transformTap'
70
+ export * from './stream/transform/transformToArray'
71
+ export * from './stream/transform/transformToString'
72
72
  import { BaseWorkerClass, WorkerClassInterface } from './stream/transform/worker/baseWorkerClass'
73
73
  import {
74
74
  transformMultiThreaded,
@@ -86,7 +86,7 @@ export * from './util/zip.util'
86
86
  import { readAjvSchemas, readJsonSchemas } from './validation/ajv/ajv.util'
87
87
  import { AjvSchema, AjvSchemaCfg, AjvValidationOptions } from './validation/ajv/ajvSchema'
88
88
  import { AjvValidationError, AjvValidationErrorData } from './validation/ajv/ajvValidationError'
89
- import { getAjv } from './validation/ajv/getAjv'
89
+ export * from './validation/ajv/getAjv'
90
90
  import { ExtendedJoi, Joi } from './validation/joi/joi.extensions'
91
91
  import {
92
92
  AnySchemaTyped,
@@ -171,17 +171,6 @@ export {
171
171
  Debug,
172
172
  SlackService,
173
173
  slackDefaultMessagePrefixHook,
174
- readableCreate,
175
- readableFrom,
176
- readableFromArray,
177
- readableToArray,
178
- readableForEach,
179
- readableForEachSync,
180
- readableMap,
181
- readableMapToArray,
182
- ndjsonMap,
183
- ndJsonFileRead,
184
- ndJsonFileWrite,
185
174
  ndjsonStreamForEach,
186
175
  pipelineFromNDJsonFile,
187
176
  pipelineToNDJsonFile,
@@ -192,27 +181,19 @@ export {
192
181
  transformToNDJson,
193
182
  transformMap,
194
183
  transformMapSync,
195
- transformMapSimple,
196
- transformNoOp,
197
- transformSplit,
198
- transformToString,
199
- transformToArray,
200
- transformTap,
201
184
  transformMultiThreaded,
202
185
  BaseWorkerClass,
203
186
  tableDiff,
204
187
  inspectAny,
205
188
  inspectAnyStringifyFn,
206
- HTTPError,
189
+ RequestError,
207
190
  TimeoutError,
208
191
  _chunkBuffer,
209
192
  Ajv,
210
- getAjv,
211
193
  AjvSchema,
212
194
  AjvValidationError,
213
195
  readJsonSchemas,
214
196
  readAjvSchemas,
215
- hasColors,
216
197
  sanitizeHTML,
217
198
  runScript,
218
199
  }
@@ -33,17 +33,17 @@ export function transformJsonParse<OUT = any>(
33
33
  const { strict = true, reviver } = opt
34
34
 
35
35
  return new Transform({
36
- objectMode: false,
36
+ writableObjectMode: false,
37
37
  readableObjectMode: true,
38
- transform(chunk: string, _encoding, cb) {
38
+ transform(chunk: string, _, cb) {
39
39
  try {
40
40
  const data = JSON.parse(chunk, reviver)
41
41
  cb(null, data)
42
42
  } catch (err) {
43
- // console.error(err)
44
43
  if (strict) {
45
44
  cb(err as Error) // emit error
46
45
  } else {
46
+ console.error(err)
47
47
  cb() // emit no error, but no result neither
48
48
  }
49
49
  }
@@ -37,9 +37,9 @@ export function transformToNDJson<IN = any>(
37
37
  const { strict = true, separator = '\n', sortObjects = false, useFlatstr = false } = opt
38
38
 
39
39
  return new Transform({
40
- objectMode: true,
40
+ writableObjectMode: true,
41
41
  readableObjectMode: false,
42
- transform(chunk: IN, _encoding, cb) {
42
+ transform(chunk: IN, _, cb) {
43
43
  try {
44
44
  if (sortObjects) {
45
45
  chunk = _sortObjectDeep(chunk as any)
@@ -0,0 +1,56 @@
1
+ import { _hb, CommonLogger, NumberStack } from '@naturalcycles/js-lib'
2
+ import { yellow } from '../colors'
3
+ import { gzipBuffer } from '../util/zip.util'
4
+
5
+ export class SizeStack extends NumberStack {
6
+ constructor(public name: string, size: number) {
7
+ super(size)
8
+ }
9
+
10
+ total = 0
11
+
12
+ override push(item: any): this {
13
+ this.total += item
14
+ return super.push(item)
15
+ }
16
+
17
+ getStats(): string {
18
+ // const pcs = this.percentiles([50, 90])
19
+
20
+ return [
21
+ ' ' + this.name,
22
+ 'avg',
23
+ yellow(_hb(this.avg())),
24
+ // 'p50',
25
+ // yellow(_hb(pcs[50])),
26
+ // 'p90',
27
+ // yellow(_hb(pcs[90])),
28
+ 'total',
29
+ yellow(_hb(this.total)),
30
+ ].join(' ')
31
+ }
32
+
33
+ static async countItem(
34
+ item: any,
35
+ logger: CommonLogger,
36
+ sizes?: SizeStack,
37
+ sizesZipped?: SizeStack,
38
+ ): Promise<void> {
39
+ if (!sizes) return
40
+
41
+ // try-catch, because we don't want to fail the pipeline on logProgress
42
+ try {
43
+ const buf = Buffer.from(JSON.stringify(item))
44
+ sizes.push(buf.byteLength)
45
+
46
+ if (sizesZipped) {
47
+ const { byteLength } = await gzipBuffer(buf)
48
+ sizesZipped.push(byteLength)
49
+ }
50
+ } catch (err) {
51
+ logger.warn(
52
+ `transformLogProgress failed to JSON.stringify the chunk: ${(err as Error).message}`,
53
+ )
54
+ }
55
+ }
56
+ }
@@ -18,7 +18,7 @@ export function transformBuffer<IN = any>(opt: TransformBufferOptions): Transfor
18
18
  return new Transform({
19
19
  objectMode: true,
20
20
  ...opt,
21
- transform(chunk, _encoding, cb) {
21
+ transform(chunk, _, cb) {
22
22
  buf.push(chunk)
23
23
 
24
24
  if (buf.length >= batchSize) {
@@ -1,32 +1,18 @@
1
1
  import { Transform } from 'stream'
2
2
  import { AsyncPredicate, Predicate } from '@naturalcycles/js-lib'
3
3
  import { TransformOptions, TransformTyped } from '../stream.model'
4
+ import { transformMap, TransformMapOptions } from './transformMap'
4
5
 
5
6
  /**
6
- * Note, that currently it's NOT concurrent! (concurrency = 1)
7
- * So, it's recommended to use transformMap instead, that is both concurrent and has
8
- * filtering feature by default.
7
+ * Just a convenience wrapper around `transformMap` that has built-in predicate filtering support.
9
8
  */
10
9
  export function transformFilter<IN = any>(
11
10
  predicate: AsyncPredicate<IN>,
12
- opt: TransformOptions = {},
11
+ opt: TransformMapOptions = {},
13
12
  ): TransformTyped<IN, IN> {
14
- let index = 0
15
-
16
- return new Transform({
17
- objectMode: true,
13
+ return transformMap(v => v, {
14
+ predicate,
18
15
  ...opt,
19
- async transform(chunk: IN, _encoding, cb) {
20
- try {
21
- if (await predicate(chunk, index++)) {
22
- cb(null, chunk) // pass through
23
- } else {
24
- cb() // signal that we've finished processing, but emit no output here
25
- }
26
- } catch (err) {
27
- cb(err as Error)
28
- }
29
- },
30
16
  })
31
17
  }
32
18
 
@@ -42,7 +28,7 @@ export function transformFilterSync<IN = any>(
42
28
  return new Transform({
43
29
  objectMode: true,
44
30
  ...opt,
45
- async transform(chunk: IN, _encoding, cb) {
31
+ transform(chunk: IN, _, cb) {
46
32
  try {
47
33
  if (predicate(chunk, index++)) {
48
34
  cb(null, chunk) // pass through
@@ -4,6 +4,7 @@ import { SimpleMovingAverage, _mb, _since, AnyObject, CommonLogger } from '@natu
4
4
  import { dayjs } from '@naturalcycles/time-lib'
5
5
  import { boldWhite, dimGrey, white, yellow } from '../../colors'
6
6
  import { hasColors } from '../../colors/colors'
7
+ import { SizeStack } from '../sizeStack'
7
8
  import { TransformOptions, TransformTyped } from '../stream.model'
8
9
 
9
10
  export interface TransformLogProgressOptions<IN = any> extends TransformOptions {
@@ -103,6 +104,41 @@ export interface TransformLogProgressOptions<IN = any> extends TransformOptions
103
104
  * Defaults to 1.
104
105
  */
105
106
  batchSize?: number
107
+
108
+ /**
109
+ * Experimental logging of item (shunk) sizes, when json-stringified.
110
+ *
111
+ * Defaults to false.
112
+ *
113
+ * @experimental
114
+ */
115
+ logSizes?: boolean
116
+
117
+ /**
118
+ * How many last item sizes to keep in a buffer, to calculate stats (p50, p90, avg, etc).
119
+ * Defaults to 100_000.
120
+ * Cannot be Infinity.
121
+ */
122
+ logSizesBuffer?: number
123
+
124
+ /**
125
+ * Works in addition to `logSizes`. Adds "zipped sizes".
126
+ *
127
+ * @experimental
128
+ */
129
+ logZippedSizes?: boolean
130
+ }
131
+
132
+ interface LogItem extends AnyObject {
133
+ heapUsed?: number
134
+ heapTotal?: number
135
+ rss?: number
136
+ peakRSS?: number
137
+ rssMinusHeap?: number
138
+ external?: number
139
+ arrayBuffers?: number
140
+ rps10?: number
141
+ rpsTotal?: number
106
142
  }
107
143
 
108
144
  const inspectOpt: InspectOptions = {
@@ -124,6 +160,9 @@ export function transformLogProgress<IN = any>(
124
160
  peakRSS: logPeakRSS = true,
125
161
  logRPS = true,
126
162
  logEvery = 1000,
163
+ logSizes = false,
164
+ logSizesBuffer = 100_000,
165
+ logZippedSizes = false,
127
166
  batchSize = 1,
128
167
  extra,
129
168
  logger = console,
@@ -138,15 +177,23 @@ export function transformLogProgress<IN = any>(
138
177
  let progress = 0
139
178
  let peakRSS = 0
140
179
 
180
+ const sizes = logSizes ? new SizeStack('json', logSizesBuffer) : undefined
181
+ const sizesZipped = logZippedSizes ? new SizeStack('json.gz', logSizesBuffer) : undefined
182
+
141
183
  logStats() // initial
142
184
 
143
185
  return new Transform({
144
186
  objectMode: true,
145
187
  ...opt,
146
- transform(chunk: IN, _encoding, cb) {
188
+ transform(chunk: IN, _, cb) {
147
189
  progress++
148
190
  processedLastSecond++
149
191
 
192
+ if (sizes) {
193
+ // Check it, cause gzipping might be delayed here..
194
+ void SizeStack.countItem(chunk, logger, sizes, sizesZipped)
195
+ }
196
+
150
197
  if (logProgress && progress % logEvery === 0) {
151
198
  logStats(chunk, false, progress % logEvery10 === 0)
152
199
  }
@@ -175,28 +222,30 @@ export function transformLogProgress<IN = any>(
175
222
  const rps10 = Math.round(sma.push(lastRPS))
176
223
  if (mem.rss > peakRSS) peakRSS = mem.rss
177
224
 
178
- logger.log(
179
- inspect(
180
- {
181
- [final ? `${metric}_final` : metric]: batchedProgress,
182
- ...(extra ? extra(chunk, progress) : {}),
183
- ...(logHeapUsed ? { heapUsed: _mb(mem.heapUsed) } : {}),
184
- ...(logHeapTotal ? { heapTotal: _mb(mem.heapTotal) } : {}),
185
- ...(logRss ? { rss: _mb(mem.rss) } : {}),
186
- ...(logPeakRSS ? { peakRSS: _mb(peakRSS) } : {}),
187
- ...(opt.rssMinusHeap ? { rssMinusHeap: _mb(mem.rss - mem.heapTotal) } : {}),
188
- ...(opt.external ? { external: _mb(mem.external) } : {}),
189
- ...(opt.arrayBuffers ? { arrayBuffers: _mb(mem.arrayBuffers || 0) } : {}),
190
- ...(logRPS
191
- ? {
192
- rps10,
193
- rpsTotal,
194
- }
195
- : {}),
196
- },
197
- inspectOpt,
198
- ),
199
- )
225
+ const o: LogItem = {
226
+ [final ? `${metric}_final` : metric]: batchedProgress,
227
+ }
228
+
229
+ if (extra) Object.assign(o, extra(chunk, progress))
230
+ if (logHeapUsed) o.heapUsed = _mb(mem.heapUsed)
231
+ if (logHeapTotal) o.heapTotal = _mb(mem.heapTotal)
232
+ if (logRss) o.rss = _mb(mem.rss)
233
+ if (logPeakRSS) o.peakRSS = _mb(peakRSS)
234
+ if (opt.rssMinusHeap) o.rssMinusHeap = _mb(mem.rss - mem.heapTotal)
235
+ if (opt.external) o.external = _mb(mem.external)
236
+ if (opt.arrayBuffers) o.arrayBuffers = _mb(mem.arrayBuffers || 0)
237
+
238
+ if (logRPS) Object.assign(o, { rps10, rpsTotal })
239
+
240
+ logger.log(inspect(o, inspectOpt))
241
+
242
+ if (sizes?.items.length) {
243
+ logger.log(sizes.getStats())
244
+
245
+ if (sizesZipped?.items.length) {
246
+ logger.log(sizesZipped.getStats())
247
+ }
248
+ }
200
249
 
201
250
  if (tenx) {
202
251
  let perHour: number | string =