@naturalcycles/datastore-lib 4.13.0 → 4.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,6 @@ import { _round } from '@naturalcycles/js-lib';
4
4
  import { _chunk } from '@naturalcycles/js-lib/array/array.util.js';
5
5
  import { _assert } from '@naturalcycles/js-lib/error/assert.js';
6
6
  import { _errorDataAppend, TimeoutError } from '@naturalcycles/js-lib/error/error.util.js';
7
- import { commonLoggerMinLevel } from '@naturalcycles/js-lib/log';
8
7
  import { _omit } from '@naturalcycles/js-lib/object/object.util.js';
9
8
  import { pMap } from '@naturalcycles/js-lib/promise/pMap.js';
10
9
  import { pRetry, pRetryFn } from '@naturalcycles/js-lib/promise/pRetry.js';
@@ -12,7 +11,7 @@ import { pTimeout } from '@naturalcycles/js-lib/promise/pTimeout.js';
12
11
  import { _stringMapEntries, _stringMapValues, } from '@naturalcycles/js-lib/types';
13
12
  import { boldWhite } from '@naturalcycles/nodejs-lib/colors';
14
13
  import { DatastoreType } from './datastore.model.js';
15
- import { DatastoreStreamReadable } from './DatastoreStreamReadable.js';
14
+ import { DatastoreStreamReadable } from './datastoreStreamReadable.js';
16
15
  import { dbQueryToDatastoreQuery } from './query.util.js';
17
16
  // Datastore (also Firestore and other Google APIs) supports max 500 of items when saving/deleting, etc.
18
17
  const MAX_ITEMS = 500;
@@ -219,11 +218,12 @@ export class DatastoreDB extends BaseCommonDB {
219
218
  void this.ds().then(async (ds) => {
220
219
  const q = dbQueryToDatastoreQuery(dbQuery, ds.createQuery(dbQuery.table), await this.getPropertyFilter());
221
220
  const opt = {
221
+ logger: this.cfg.logger,
222
222
  ...this.cfg.streamOptions,
223
223
  ..._opt,
224
224
  };
225
225
  (opt.experimentalCursorStream
226
- ? new DatastoreStreamReadable(q, opt, commonLoggerMinLevel(this.cfg.logger, opt.debug ? 'log' : 'warn'))
226
+ ? new DatastoreStreamReadable(q, opt)
227
227
  : ds.runQueryStream(q, this.getRunQueryOptions(opt)))
228
228
  .on('error', err => transform.emit('error', err))
229
229
  .pipe(transform);
@@ -1,7 +1,7 @@
1
1
  import type { DatastoreOptions, Key } from '@google-cloud/datastore';
2
2
  import type { CommonDBOptions, CommonDBReadOptions, CommonDBSaveOptions } from '@naturalcycles/db-lib';
3
- import type { CommonLogger } from '@naturalcycles/js-lib/log';
4
- import type { NumberOfSeconds, ObjectWithId } from '@naturalcycles/js-lib/types';
3
+ import type { CommonLogger, CommonLogLevel } from '@naturalcycles/js-lib/log';
4
+ import type { NumberOfSeconds, ObjectWithId, PositiveInteger } from '@naturalcycles/js-lib/types';
5
5
  export interface DatastorePayload<T = any> {
6
6
  key: Key;
7
7
  data: T;
@@ -30,6 +30,10 @@ export interface DatastoreDBCfg extends DatastoreOptions {
30
30
  * Default to `console`
31
31
  */
32
32
  logger?: CommonLogger;
33
+ /**
34
+ * Defaults to `log`.
35
+ */
36
+ logLevel?: CommonLogLevel;
33
37
  /**
34
38
  * Experimental option, currently only applies to `getByIds`.
35
39
  * Applies pTimeout to Datastore operation, re-creates Datastore on any error.
@@ -59,38 +63,23 @@ export interface DatastoreDBStreamOptions extends DatastoreDBReadOptions {
59
63
  * Applicable to `experimentalCursorStream`.
60
64
  * Defines the size (limit) of each individual query.
61
65
  *
62
- * @default 1000
63
- */
64
- batchSize?: number;
65
- /**
66
- * Applicable to `experimentalCursorStream`
67
- *
68
- * Set to a value (number of Megabytes) to control the peak RSS size.
69
- * If limit is reached - streaming will pause until the stream keeps up, and then
70
- * resumes.
71
- *
72
- * Set to 0/undefined to disable. Stream will get "slow" then, cause it'll only run the query
73
- * when _read is called.
74
- *
75
- * @default 1000
66
+ * @default 1_000
76
67
  */
77
- rssLimitMB?: number;
68
+ batchSize?: PositiveInteger;
78
69
  /**
79
- * Applicable to `experimentalCursorStream`
80
- * Default false.
81
- * If true, stream will pause until consumer requests more data (via _read).
82
- * It means it'll run slower, as buffer will be equal to batchSize (1000) at max.
83
- * There will be gaps in time between "last query loaded" and "next query requested".
84
- * This mode is useful e.g for DB migrations, where you want to avoid "stale data".
85
- * So, it minimizes the time between "item loaded" and "item saved" during DB migration.
70
+ * Defaults to 3x batchSize.
71
+ * Default batchSize is 1_000, so default highWaterMark is 3_000.
72
+ * Controls how many rows to have "buffered".
73
+ * Should be at least 1x batchSize, otherwise the stream will be "starving"
74
+ * between the queries.
86
75
  */
87
- singleBatchBuffer?: boolean;
76
+ highWaterMark?: PositiveInteger;
77
+ logger?: CommonLogger;
88
78
  /**
89
- * Set to `true` to log additional debug info, when using experimentalCursorStream.
90
- *
91
- * @default false
79
+ * Defaults to `log`.
80
+ * Set to `debug` to allow for extra debugging, e.g in experimentalCursorStream.
92
81
  */
93
- debug?: boolean;
82
+ logLevel?: CommonLogLevel;
94
83
  /**
95
84
  * Default is undefined.
96
85
  * If set - sets a "safety timer", which will force call _read after the specified number of seconds.
@@ -1,32 +1,33 @@
1
1
  import { Readable } from 'node:stream';
2
2
  import type { Query } from '@google-cloud/datastore';
3
- import type { CommonLogger } from '@naturalcycles/js-lib/log';
4
3
  import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream';
5
4
  import type { DatastoreDBStreamOptions } from './datastore.model.js';
6
5
  export declare class DatastoreStreamReadable<T = any> extends Readable implements ReadableTyped<T> {
7
6
  private q;
8
- private logger;
7
+ private readonly table;
9
8
  private readonly originalLimit;
10
9
  private rowsRetrieved;
10
+ /**
11
+ * Counts how many times _read was called.
12
+ * For debugging.
13
+ */
14
+ countReads: number;
11
15
  private endCursor?;
12
- private running;
16
+ private queryIsRunning;
17
+ private paused;
13
18
  private done;
14
19
  private lastQueryDone?;
15
20
  private totalWait;
16
- private readonly table;
17
21
  /**
18
22
  * Used to support maxWait
19
23
  */
20
24
  private lastReadTimestamp;
21
25
  private readonly maxWaitInterval;
22
26
  private readonly opt;
27
+ private readonly logger;
23
28
  private readonly dsOpt;
24
- constructor(q: Query, opt: DatastoreDBStreamOptions, logger: CommonLogger);
25
- private runNextQuery;
26
- /**
27
- * Counts how many times _read was called.
28
- * For debugging.
29
- */
30
- count: number;
29
+ constructor(q: Query, opt: DatastoreDBStreamOptions);
31
30
  _read(): void;
31
+ private runNextQuery;
32
+ private runQuery;
32
33
  }
@@ -0,0 +1,182 @@
1
+ import { Readable } from 'node:stream';
2
+ import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js';
3
+ import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js';
4
+ import { createCommonLoggerAtLevel } from '@naturalcycles/js-lib/log';
5
+ import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js';
6
+ export class DatastoreStreamReadable extends Readable {
7
+ q;
8
+ table;
9
+ originalLimit;
10
+ rowsRetrieved = 0;
11
+ /**
12
+ * Counts how many times _read was called.
13
+ * For debugging.
14
+ */
15
+ countReads = 0;
16
+ endCursor;
17
+ queryIsRunning = false;
18
+ paused = false;
19
+ done = false;
20
+ lastQueryDone;
21
+ totalWait = 0;
22
+ /**
23
+ * Used to support maxWait
24
+ */
25
+ lastReadTimestamp = 0;
26
+ maxWaitInterval;
27
+ opt;
28
+ logger;
29
+ dsOpt;
30
+ constructor(q, opt) {
31
+ // 1_000 was optimal in benchmarks
32
+ const { batchSize = 1000 } = opt;
33
+ const { highWaterMark = batchSize * 3 } = opt;
34
+ // Defaulting highWaterMark to 3x batchSize
35
+ super({ objectMode: true, highWaterMark });
36
+ this.q = q;
37
+ this.opt = {
38
+ ...opt,
39
+ batchSize,
40
+ highWaterMark,
41
+ };
42
+ this.dsOpt = {};
43
+ if (opt.readAt) {
44
+ // Datastore expects UnixTimestamp in milliseconds
45
+ this.dsOpt.readTime = opt.readAt * 1000;
46
+ }
47
+ const logger = createCommonLoggerAtLevel(opt.logger, opt.logLevel);
48
+ this.logger = logger;
49
+ this.originalLimit = q.limitVal;
50
+ this.table = q.kinds[0];
51
+ logger.log(`!! using experimentalCursorStream`, {
52
+ table: this.table,
53
+ batchSize,
54
+ highWaterMark,
55
+ });
56
+ const { maxWait } = this.opt;
57
+ if (maxWait) {
58
+ logger.log(`!! ${this.table} maxWait ${maxWait}`);
59
+ this.maxWaitInterval = setInterval(() => {
60
+ const millisSinceLastRead = Date.now() - this.lastReadTimestamp;
61
+ if (millisSinceLastRead < maxWait * 1000) {
62
+ logger.log(`!! ${this.table} millisSinceLastRead(${millisSinceLastRead}) < maxWait*1000`);
63
+ return;
64
+ }
65
+ const { queryIsRunning, rowsRetrieved } = this;
66
+ logger.log(`maxWait of ${maxWait} seconds reached, force-triggering _read`, {
67
+ running: queryIsRunning,
68
+ rowsRetrieved,
69
+ });
70
+ // force-trigger _read
71
+ // regardless of `running` status
72
+ this._read();
73
+ }, (maxWait * 1000) / 2);
74
+ }
75
+ }
76
+ _read() {
77
+ this.lastReadTimestamp = localTime.nowUnixMillis();
78
+ // console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
79
+ this.countReads++;
80
+ if (this.done) {
81
+ this.logger.warn(`!!! _read was called, but done==true`);
82
+ return;
83
+ }
84
+ if (this.paused) {
85
+ this.logger.log(`_read #${this.countReads}, queryIsRunning: ${this.queryIsRunning}, unpausing stream`);
86
+ this.paused = false;
87
+ }
88
+ if (this.queryIsRunning) {
89
+ this.logger.debug(`_read #${this.countReads}, queryIsRunning: true, doing nothing`);
90
+ return;
91
+ }
92
+ void this.runNextQuery().catch(err => {
93
+ this.logger.error('error in runNextQuery', err);
94
+ this.emit('error', err);
95
+ });
96
+ }
97
+ async runNextQuery() {
98
+ if (this.done)
99
+ return;
100
+ const { logger, table } = this;
101
+ if (this.lastQueryDone) {
102
+ const now = Date.now();
103
+ this.totalWait += now - this.lastQueryDone;
104
+ }
105
+ this.queryIsRunning = true;
106
+ let limit = this.opt.batchSize;
107
+ if (this.originalLimit) {
108
+ limit = Math.min(this.opt.batchSize, this.originalLimit - this.rowsRetrieved);
109
+ }
110
+ let q = this.q.limit(limit);
111
+ if (this.endCursor) {
112
+ q = q.start(this.endCursor);
113
+ }
114
+ const started = localTime.nowUnixMillis();
115
+ const res = await this.runQuery(q);
116
+ const queryTook = Date.now() - started;
117
+ if (!res) {
118
+ // error already emitted in runQuery
119
+ return;
120
+ }
121
+ const rows = res[0];
122
+ const info = res[1];
123
+ this.rowsRetrieved += rows.length;
124
+ logger.log(`${table} got ${rows.length} rows in ${_ms(queryTook)}, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
125
+ this.endCursor = info.endCursor;
126
+ this.queryIsRunning = false; // ready to take more _reads
127
+ this.lastQueryDone = Date.now();
128
+ let shouldContinue = false;
129
+ for (const row of rows) {
130
+ shouldContinue = this.push(row);
131
+ }
132
+ if (!info.endCursor ||
133
+ info.moreResults === 'NO_MORE_RESULTS' ||
134
+ (this.originalLimit && this.rowsRetrieved >= this.originalLimit)) {
135
+ logger.log(`!!!! DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
136
+ this.push(null);
137
+ this.done = true;
138
+ this.paused = false;
139
+ clearInterval(this.maxWaitInterval);
140
+ return;
141
+ }
142
+ if (shouldContinue) {
143
+ // Keep the stream flowing
144
+ logger.debug(`${table} continuing the stream`);
145
+ void this.runNextQuery();
146
+ }
147
+ else {
148
+ // Not starting the next query
149
+ if (this.paused) {
150
+ logger.debug(`${table} stream is already paused`);
151
+ }
152
+ else {
153
+ logger.log(`${table} pausing the stream`);
154
+ this.paused = true;
155
+ }
156
+ }
157
+ }
158
+ async runQuery(q) {
159
+ const { table, logger } = this;
160
+ try {
161
+ return await pRetry(async () => {
162
+ return await q.run(this.dsOpt);
163
+ }, {
164
+ name: `DatastoreStreamReadable.query(${table})`,
165
+ maxAttempts: 5,
166
+ delay: 5000,
167
+ delayMultiplier: 2,
168
+ logger,
169
+ timeout: 120_000, // 2 minutes
170
+ });
171
+ }
172
+ catch (err) {
173
+ logger.error(`DatastoreStreamReadable error!\n`, {
174
+ table,
175
+ rowsRetrieved: this.rowsRetrieved,
176
+ }, err);
177
+ this.emit('error', err);
178
+ clearInterval(this.maxWaitInterval);
179
+ return;
180
+ }
181
+ }
182
+ }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@naturalcycles/datastore-lib",
3
3
  "type": "module",
4
- "version": "4.13.0",
4
+ "version": "4.15.0",
5
5
  "description": "Opinionated library to work with Google Datastore, implements CommonDB",
6
6
  "dependencies": {
7
7
  "@google-cloud/datastore": "^10",
@@ -29,7 +29,6 @@ import type {
29
29
  JsonSchemaString,
30
30
  } from '@naturalcycles/js-lib/json-schema'
31
31
  import type { CommonLogger } from '@naturalcycles/js-lib/log'
32
- import { commonLoggerMinLevel } from '@naturalcycles/js-lib/log'
33
32
  import { _omit } from '@naturalcycles/js-lib/object/object.util.js'
34
33
  import type { PRetryOptions } from '@naturalcycles/js-lib/promise'
35
34
  import { pMap } from '@naturalcycles/js-lib/promise/pMap.js'
@@ -54,7 +53,7 @@ import type {
54
53
  DatastoreStats,
55
54
  } from './datastore.model.js'
56
55
  import { DatastoreType } from './datastore.model.js'
57
- import { DatastoreStreamReadable } from './DatastoreStreamReadable.js'
56
+ import { DatastoreStreamReadable } from './datastoreStreamReadable.js'
58
57
  import { dbQueryToDatastoreQuery } from './query.util.js'
59
58
 
60
59
  // Datastore (also Firestore and other Google APIs) supports max 500 of items when saving/deleting, etc.
@@ -344,16 +343,13 @@ export class DatastoreDB extends BaseCommonDB implements CommonDB {
344
343
  )
345
344
 
346
345
  const opt = {
346
+ logger: this.cfg.logger,
347
347
  ...this.cfg.streamOptions,
348
348
  ..._opt,
349
349
  }
350
350
 
351
351
  ;(opt.experimentalCursorStream
352
- ? new DatastoreStreamReadable<ROW>(
353
- q,
354
- opt,
355
- commonLoggerMinLevel(this.cfg.logger, opt.debug ? 'log' : 'warn'),
356
- )
352
+ ? new DatastoreStreamReadable<ROW>(q, opt)
357
353
  : (ds.runQueryStream(q, this.getRunQueryOptions(opt)) as ReadableTyped<ROW>)
358
354
  )
359
355
  .on('error', err => transform.emit('error', err))
@@ -4,8 +4,8 @@ import type {
4
4
  CommonDBReadOptions,
5
5
  CommonDBSaveOptions,
6
6
  } from '@naturalcycles/db-lib'
7
- import type { CommonLogger } from '@naturalcycles/js-lib/log'
8
- import type { NumberOfSeconds, ObjectWithId } from '@naturalcycles/js-lib/types'
7
+ import type { CommonLogger, CommonLogLevel } from '@naturalcycles/js-lib/log'
8
+ import type { NumberOfSeconds, ObjectWithId, PositiveInteger } from '@naturalcycles/js-lib/types'
9
9
 
10
10
  export interface DatastorePayload<T = any> {
11
11
  key: Key
@@ -41,6 +41,11 @@ export interface DatastoreDBCfg extends DatastoreOptions {
41
41
  */
42
42
  logger?: CommonLogger
43
43
 
44
+ /**
45
+ * Defaults to `log`.
46
+ */
47
+ logLevel?: CommonLogLevel
48
+
44
49
  /**
45
50
  * Experimental option, currently only applies to `getByIds`.
46
51
  * Applies pTimeout to Datastore operation, re-creates Datastore on any error.
@@ -73,41 +78,26 @@ export interface DatastoreDBStreamOptions extends DatastoreDBReadOptions {
73
78
  * Applicable to `experimentalCursorStream`.
74
79
  * Defines the size (limit) of each individual query.
75
80
  *
76
- * @default 1000
81
+ * @default 1_000
77
82
  */
78
- batchSize?: number
83
+ batchSize?: PositiveInteger
79
84
 
80
85
  /**
81
- * Applicable to `experimentalCursorStream`
82
- *
83
- * Set to a value (number of Megabytes) to control the peak RSS size.
84
- * If limit is reached - streaming will pause until the stream keeps up, and then
85
- * resumes.
86
- *
87
- * Set to 0/undefined to disable. Stream will get "slow" then, cause it'll only run the query
88
- * when _read is called.
89
- *
90
- * @default 1000
86
+ * Defaults to 3x batchSize.
87
+ * Default batchSize is 1_000, so default highWaterMark is 3_000.
88
+ * Controls how many rows to have "buffered".
89
+ * Should be at least 1x batchSize, otherwise the stream will be "starving"
90
+ * between the queries.
91
91
  */
92
- rssLimitMB?: number
92
+ highWaterMark?: PositiveInteger
93
93
 
94
- /**
95
- * Applicable to `experimentalCursorStream`
96
- * Default false.
97
- * If true, stream will pause until consumer requests more data (via _read).
98
- * It means it'll run slower, as buffer will be equal to batchSize (1000) at max.
99
- * There will be gaps in time between "last query loaded" and "next query requested".
100
- * This mode is useful e.g for DB migrations, where you want to avoid "stale data".
101
- * So, it minimizes the time between "item loaded" and "item saved" during DB migration.
102
- */
103
- singleBatchBuffer?: boolean
94
+ logger?: CommonLogger
104
95
 
105
96
  /**
106
- * Set to `true` to log additional debug info, when using experimentalCursorStream.
107
- *
108
- * @default false
97
+ * Defaults to `log`.
98
+ * Set to `debug` to allow for extra debugging, e.g in experimentalCursorStream.
109
99
  */
110
- debug?: boolean
100
+ logLevel?: CommonLogLevel
111
101
 
112
102
  /**
113
103
  * Default is undefined.
@@ -1,42 +1,57 @@
1
1
  import { Readable } from 'node:stream'
2
2
  import type { Query } from '@google-cloud/datastore'
3
- import type { RunQueryInfo, RunQueryOptions } from '@google-cloud/datastore/build/src/query.js'
3
+ import type {
4
+ RunQueryInfo,
5
+ RunQueryOptions,
6
+ RunQueryResponse,
7
+ } from '@google-cloud/datastore/build/src/query.js'
8
+ import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js'
4
9
  import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js'
5
- import type { CommonLogger } from '@naturalcycles/js-lib/log'
10
+ import { type CommonLogger, createCommonLoggerAtLevel } from '@naturalcycles/js-lib/log'
6
11
  import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js'
7
12
  import type { UnixTimestampMillis } from '@naturalcycles/js-lib/types'
8
13
  import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream'
9
14
  import type { DatastoreDBStreamOptions } from './datastore.model.js'
10
15
 
11
16
  export class DatastoreStreamReadable<T = any> extends Readable implements ReadableTyped<T> {
17
+ private readonly table: string
12
18
  private readonly originalLimit: number
13
19
  private rowsRetrieved = 0
20
+ /**
21
+ * Counts how many times _read was called.
22
+ * For debugging.
23
+ */
24
+ countReads = 0
14
25
  private endCursor?: string
15
- private running = false
26
+ private queryIsRunning = false
27
+ private paused = false
16
28
  private done = false
17
29
  private lastQueryDone?: number
18
30
  private totalWait = 0
19
- private readonly table: string
20
31
  /**
21
32
  * Used to support maxWait
22
33
  */
23
34
  private lastReadTimestamp = 0 as UnixTimestampMillis
24
35
  private readonly maxWaitInterval: NodeJS.Timeout | undefined
25
36
 
26
- private readonly opt: DatastoreDBStreamOptions & { batchSize: number }
37
+ private readonly opt: DatastoreDBStreamOptions & { batchSize: number; highWaterMark: number }
38
+ private readonly logger: CommonLogger
27
39
  private readonly dsOpt: RunQueryOptions
28
40
 
29
41
  constructor(
30
42
  private q: Query,
31
43
  opt: DatastoreDBStreamOptions,
32
- private logger: CommonLogger,
33
44
  ) {
34
- super({ objectMode: true })
45
+ // 1_000 was optimal in benchmarks
46
+ const { batchSize = 1000 } = opt
47
+ const { highWaterMark = batchSize * 3 } = opt
48
+ // Defaulting highWaterMark to 3x batchSize
49
+ super({ objectMode: true, highWaterMark })
35
50
 
36
51
  this.opt = {
37
- rssLimitMB: 1000,
38
- batchSize: 1000,
39
52
  ...opt,
53
+ batchSize,
54
+ highWaterMark,
40
55
  }
41
56
  this.dsOpt = {}
42
57
  if (opt.readAt) {
@@ -44,29 +59,35 @@ export class DatastoreStreamReadable<T = any> extends Readable implements Readab
44
59
  this.dsOpt.readTime = opt.readAt * 1000
45
60
  }
46
61
 
62
+ const logger = createCommonLoggerAtLevel(opt.logger, opt.logLevel)
63
+ this.logger = logger
47
64
  this.originalLimit = q.limitVal
48
65
  this.table = q.kinds[0]!
49
66
 
50
- logger.log(`!! using experimentalCursorStream !! ${this.table}, batchSize: ${opt.batchSize}`)
67
+ logger.log(`!! using experimentalCursorStream`, {
68
+ table: this.table,
69
+ batchSize,
70
+ highWaterMark,
71
+ })
51
72
 
52
73
  const { maxWait } = this.opt
53
74
  if (maxWait) {
54
- this.logger.warn(`!! ${this.table} maxWait ${maxWait}`)
75
+ logger.log(`!! ${this.table} maxWait ${maxWait}`)
55
76
 
56
77
  this.maxWaitInterval = setInterval(
57
78
  () => {
58
79
  const millisSinceLastRead = Date.now() - this.lastReadTimestamp
59
80
 
60
81
  if (millisSinceLastRead < maxWait * 1000) {
61
- this.logger.warn(
82
+ logger.log(
62
83
  `!! ${this.table} millisSinceLastRead(${millisSinceLastRead}) < maxWait*1000`,
63
84
  )
64
85
  return
65
86
  }
66
87
 
67
- const { running, rowsRetrieved } = this
68
- this.logger.warn(`maxWait of ${maxWait} seconds reached, force-triggering _read`, {
69
- running,
88
+ const { queryIsRunning, rowsRetrieved } = this
89
+ logger.log(`maxWait of ${maxWait} seconds reached, force-triggering _read`, {
90
+ running: queryIsRunning,
70
91
  rowsRetrieved,
71
92
  })
72
93
 
@@ -79,16 +100,45 @@ export class DatastoreStreamReadable<T = any> extends Readable implements Readab
79
100
  }
80
101
  }
81
102
 
103
+ override _read(): void {
104
+ this.lastReadTimestamp = localTime.nowUnixMillis()
105
+
106
+ // console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
107
+ this.countReads++
108
+
109
+ if (this.done) {
110
+ this.logger.warn(`!!! _read was called, but done==true`)
111
+ return
112
+ }
113
+
114
+ if (this.paused) {
115
+ this.logger.log(
116
+ `_read #${this.countReads}, queryIsRunning: ${this.queryIsRunning}, unpausing stream`,
117
+ )
118
+ this.paused = false
119
+ }
120
+
121
+ if (this.queryIsRunning) {
122
+ this.logger.debug(`_read #${this.countReads}, queryIsRunning: true, doing nothing`)
123
+ return
124
+ }
125
+
126
+ void this.runNextQuery().catch(err => {
127
+ this.logger.error('error in runNextQuery', err)
128
+ this.emit('error', err)
129
+ })
130
+ }
131
+
82
132
  private async runNextQuery(): Promise<void> {
83
133
  if (this.done) return
134
+ const { logger, table } = this
84
135
 
85
136
  if (this.lastQueryDone) {
86
137
  const now = Date.now()
87
138
  this.totalWait += now - this.lastQueryDone
88
139
  }
89
140
 
90
- this.running = true
91
- // console.log('running query...')
141
+ this.queryIsRunning = true
92
142
 
93
143
  let limit = this.opt.batchSize
94
144
 
@@ -96,66 +146,35 @@ export class DatastoreStreamReadable<T = any> extends Readable implements Readab
96
146
  limit = Math.min(this.opt.batchSize, this.originalLimit - this.rowsRetrieved)
97
147
  }
98
148
 
99
- // console.log(`limit: ${limit}`)
100
149
  let q = this.q.limit(limit)
101
150
  if (this.endCursor) {
102
151
  q = q.start(this.endCursor)
103
152
  }
104
153
 
105
- let rows: T[] = []
106
- let info: RunQueryInfo = {}
107
-
108
- try {
109
- await pRetry(
110
- async () => {
111
- const res = await q.run(this.dsOpt)
112
- rows = res[0]
113
- info = res[1]
114
- },
115
- {
116
- name: `DatastoreStreamReadable.query(${this.table})`,
117
- maxAttempts: 5,
118
- delay: 5000,
119
- delayMultiplier: 2,
120
- logger: this.logger,
121
- timeout: 120_000, // 2 minutes
122
- },
123
- )
124
- } catch (err) {
125
- console.log(
126
- `DatastoreStreamReadable error!\n`,
127
- {
128
- table: this.table,
129
- rowsRetrieved: this.rowsRetrieved,
130
- },
131
- err,
132
- )
133
- this.emit('error', err)
134
- clearInterval(this.maxWaitInterval)
154
+ const started = localTime.nowUnixMillis()
155
+ const res = await this.runQuery(q)
156
+ const queryTook = Date.now() - started
157
+ if (!res) {
158
+ // error already emitted in runQuery
135
159
  return
136
160
  }
161
+ const rows: T[] = res[0]
162
+ const info: RunQueryInfo = res[1]
137
163
 
138
164
  this.rowsRetrieved += rows.length
139
- this.logger.log(
140
- `${this.table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(
165
+ logger.log(
166
+ `${table} got ${rows.length} rows in ${_ms(queryTook)}, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(
141
167
  this.totalWait,
142
168
  )}`,
143
- info.moreResults,
144
169
  )
145
170
 
146
- if (!rows.length) {
147
- this.logger.warn(
148
- `${this.table} got 0 rows, totalWait: ${_ms(this.totalWait)}`,
149
- info.moreResults,
150
- )
151
- }
152
-
153
171
  this.endCursor = info.endCursor
154
- this.running = false // ready to take more _reads
172
+ this.queryIsRunning = false // ready to take more _reads
155
173
  this.lastQueryDone = Date.now()
174
+ let shouldContinue = false
156
175
 
157
176
  for (const row of rows) {
158
- this.push(row)
177
+ shouldContinue = this.push(row)
159
178
  }
160
179
 
161
180
  if (
@@ -163,52 +182,60 @@ export class DatastoreStreamReadable<T = any> extends Readable implements Readab
163
182
  info.moreResults === 'NO_MORE_RESULTS' ||
164
183
  (this.originalLimit && this.rowsRetrieved >= this.originalLimit)
165
184
  ) {
166
- this.logger.log(
185
+ logger.log(
167
186
  `!!!! DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
168
187
  )
169
188
  this.push(null)
170
189
  this.done = true
190
+ this.paused = false
171
191
  clearInterval(this.maxWaitInterval)
172
- } else if (this.opt.singleBatchBuffer) {
173
- // here we don't start next query until we're asked (via next _read call)
174
- // do, let's do nothing
175
- } else if (this.opt.rssLimitMB) {
176
- const rssMB = Math.round(process.memoryUsage().rss / 1024 / 1024)
177
-
178
- if (rssMB <= this.opt.rssLimitMB) {
179
- void this.runNextQuery()
192
+ return
193
+ }
194
+
195
+ if (shouldContinue) {
196
+ // Keep the stream flowing
197
+ logger.debug(`${table} continuing the stream`)
198
+ void this.runNextQuery()
199
+ } else {
200
+ // Not starting the next query
201
+ if (this.paused) {
202
+ logger.debug(`${table} stream is already paused`)
180
203
  } else {
181
- this.logger.warn(
182
- `${this.table} rssLimitMB reached ${rssMB} > ${this.opt.rssLimitMB}, pausing stream`,
183
- )
204
+ logger.log(`${table} pausing the stream`)
205
+ this.paused = true
184
206
  }
185
207
  }
186
208
  }
187
209
 
188
- /**
189
- * Counts how many times _read was called.
190
- * For debugging.
191
- */
192
- count = 0
193
-
194
- override _read(): void {
195
- this.lastReadTimestamp = Date.now() as UnixTimestampMillis
210
+ private async runQuery(q: Query): Promise<RunQueryResponse | undefined> {
211
+ const { table, logger } = this
196
212
 
197
- // console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
198
- this.count++
199
-
200
- if (this.done) {
201
- this.logger.warn(`!!! _read was called, but done==true`)
213
+ try {
214
+ return await pRetry(
215
+ async () => {
216
+ return await q.run(this.dsOpt)
217
+ },
218
+ {
219
+ name: `DatastoreStreamReadable.query(${table})`,
220
+ maxAttempts: 5,
221
+ delay: 5000,
222
+ delayMultiplier: 2,
223
+ logger,
224
+ timeout: 120_000, // 2 minutes
225
+ },
226
+ )
227
+ } catch (err) {
228
+ logger.error(
229
+ `DatastoreStreamReadable error!\n`,
230
+ {
231
+ table,
232
+ rowsRetrieved: this.rowsRetrieved,
233
+ },
234
+ err,
235
+ )
236
+ this.emit('error', err)
237
+ clearInterval(this.maxWaitInterval)
202
238
  return
203
239
  }
204
-
205
- if (!this.running) {
206
- void this.runNextQuery().catch(err => {
207
- console.log('error in runNextQuery', err)
208
- this.emit('error', err)
209
- })
210
- } else {
211
- this.logger.log(`_read ${this.count}, wasRunning: true`)
212
- }
213
240
  }
214
241
  }
@@ -1,158 +0,0 @@
1
- import { Readable } from 'node:stream';
2
- import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js';
3
- import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js';
4
- export class DatastoreStreamReadable extends Readable {
5
- q;
6
- logger;
7
- originalLimit;
8
- rowsRetrieved = 0;
9
- endCursor;
10
- running = false;
11
- done = false;
12
- lastQueryDone;
13
- totalWait = 0;
14
- table;
15
- /**
16
- * Used to support maxWait
17
- */
18
- lastReadTimestamp = 0;
19
- maxWaitInterval;
20
- opt;
21
- dsOpt;
22
- constructor(q, opt, logger) {
23
- super({ objectMode: true });
24
- this.q = q;
25
- this.logger = logger;
26
- this.opt = {
27
- rssLimitMB: 1000,
28
- batchSize: 1000,
29
- ...opt,
30
- };
31
- this.dsOpt = {};
32
- if (opt.readAt) {
33
- // Datastore expects UnixTimestamp in milliseconds
34
- this.dsOpt.readTime = opt.readAt * 1000;
35
- }
36
- this.originalLimit = q.limitVal;
37
- this.table = q.kinds[0];
38
- logger.log(`!! using experimentalCursorStream !! ${this.table}, batchSize: ${opt.batchSize}`);
39
- const { maxWait } = this.opt;
40
- if (maxWait) {
41
- this.logger.warn(`!! ${this.table} maxWait ${maxWait}`);
42
- this.maxWaitInterval = setInterval(() => {
43
- const millisSinceLastRead = Date.now() - this.lastReadTimestamp;
44
- if (millisSinceLastRead < maxWait * 1000) {
45
- this.logger.warn(`!! ${this.table} millisSinceLastRead(${millisSinceLastRead}) < maxWait*1000`);
46
- return;
47
- }
48
- const { running, rowsRetrieved } = this;
49
- this.logger.warn(`maxWait of ${maxWait} seconds reached, force-triggering _read`, {
50
- running,
51
- rowsRetrieved,
52
- });
53
- // force-trigger _read
54
- // regardless of `running` status
55
- this._read();
56
- }, (maxWait * 1000) / 2);
57
- }
58
- }
59
- async runNextQuery() {
60
- if (this.done)
61
- return;
62
- if (this.lastQueryDone) {
63
- const now = Date.now();
64
- this.totalWait += now - this.lastQueryDone;
65
- }
66
- this.running = true;
67
- // console.log('running query...')
68
- let limit = this.opt.batchSize;
69
- if (this.originalLimit) {
70
- limit = Math.min(this.opt.batchSize, this.originalLimit - this.rowsRetrieved);
71
- }
72
- // console.log(`limit: ${limit}`)
73
- let q = this.q.limit(limit);
74
- if (this.endCursor) {
75
- q = q.start(this.endCursor);
76
- }
77
- let rows = [];
78
- let info = {};
79
- try {
80
- await pRetry(async () => {
81
- const res = await q.run(this.dsOpt);
82
- rows = res[0];
83
- info = res[1];
84
- }, {
85
- name: `DatastoreStreamReadable.query(${this.table})`,
86
- maxAttempts: 5,
87
- delay: 5000,
88
- delayMultiplier: 2,
89
- logger: this.logger,
90
- timeout: 120_000, // 2 minutes
91
- });
92
- }
93
- catch (err) {
94
- console.log(`DatastoreStreamReadable error!\n`, {
95
- table: this.table,
96
- rowsRetrieved: this.rowsRetrieved,
97
- }, err);
98
- this.emit('error', err);
99
- clearInterval(this.maxWaitInterval);
100
- return;
101
- }
102
- this.rowsRetrieved += rows.length;
103
- this.logger.log(`${this.table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`, info.moreResults);
104
- if (!rows.length) {
105
- this.logger.warn(`${this.table} got 0 rows, totalWait: ${_ms(this.totalWait)}`, info.moreResults);
106
- }
107
- this.endCursor = info.endCursor;
108
- this.running = false; // ready to take more _reads
109
- this.lastQueryDone = Date.now();
110
- for (const row of rows) {
111
- this.push(row);
112
- }
113
- if (!info.endCursor ||
114
- info.moreResults === 'NO_MORE_RESULTS' ||
115
- (this.originalLimit && this.rowsRetrieved >= this.originalLimit)) {
116
- this.logger.log(`!!!! DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
117
- this.push(null);
118
- this.done = true;
119
- clearInterval(this.maxWaitInterval);
120
- }
121
- else if (this.opt.singleBatchBuffer) {
122
- // here we don't start next query until we're asked (via next _read call)
123
- // do, let's do nothing
124
- }
125
- else if (this.opt.rssLimitMB) {
126
- const rssMB = Math.round(process.memoryUsage().rss / 1024 / 1024);
127
- if (rssMB <= this.opt.rssLimitMB) {
128
- void this.runNextQuery();
129
- }
130
- else {
131
- this.logger.warn(`${this.table} rssLimitMB reached ${rssMB} > ${this.opt.rssLimitMB}, pausing stream`);
132
- }
133
- }
134
- }
135
- /**
136
- * Counts how many times _read was called.
137
- * For debugging.
138
- */
139
- count = 0;
140
- _read() {
141
- this.lastReadTimestamp = Date.now();
142
- // console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
143
- this.count++;
144
- if (this.done) {
145
- this.logger.warn(`!!! _read was called, but done==true`);
146
- return;
147
- }
148
- if (!this.running) {
149
- void this.runNextQuery().catch(err => {
150
- console.log('error in runNextQuery', err);
151
- this.emit('error', err);
152
- });
153
- }
154
- else {
155
- this.logger.log(`_read ${this.count}, wasRunning: true`);
156
- }
157
- }
158
- }