@naturalcycles/datastore-lib 4.12.0 → 4.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,7 +27,7 @@ export declare class DatastoreDB extends BaseCommonDB implements CommonDB {
27
27
  private getDatastoreLib;
28
28
  ping(): Promise<void>;
29
29
  getByIds<ROW extends ObjectWithId>(table: string, ids: string[], opt?: DatastoreDBReadOptions): Promise<ROW[]>;
30
- multiGetByIds<ROW extends ObjectWithId>(map: StringMap<string[]>, opt?: DatastoreDBReadOptions): Promise<StringMap<ROW[]>>;
30
+ multiGet<ROW extends ObjectWithId>(map: StringMap<string[]>, opt?: DatastoreDBReadOptions): Promise<StringMap<ROW[]>>;
31
31
  runQuery<ROW extends ObjectWithId>(dbQuery: DBQuery<ROW>, opt?: DatastoreDBReadOptions): Promise<RunQueryResult<ROW>>;
32
32
  runQueryCount<ROW extends ObjectWithId>(dbQuery: DBQuery<ROW>, opt?: DatastoreDBReadOptions): Promise<number>;
33
33
  private runDatastoreQuery;
@@ -42,7 +42,7 @@ export declare class DatastoreDB extends BaseCommonDB implements CommonDB {
42
42
  * regardless if they were actually deleted or not.
43
43
  */
44
44
  deleteByIds(table: string, ids: string[], opt?: DatastoreDBOptions): Promise<number>;
45
- multiDeleteByIds(map: StringMap<string[]>, opt?: DatastoreDBOptions): Promise<number>;
45
+ multiDelete(map: StringMap<string[]>, opt?: DatastoreDBOptions): Promise<number>;
46
46
  createTransaction(opt?: CommonDBTransactionOptions): Promise<DatastoreDBTransaction>;
47
47
  runInTransaction(fn: DBTransactionFn, opt?: CommonDBTransactionOptions): Promise<void>;
48
48
  getAllStats(): Promise<DatastoreStats[]>;
@@ -12,7 +12,7 @@ import { pTimeout } from '@naturalcycles/js-lib/promise/pTimeout.js';
12
12
  import { _stringMapEntries, _stringMapValues, } from '@naturalcycles/js-lib/types';
13
13
  import { boldWhite } from '@naturalcycles/nodejs-lib/colors';
14
14
  import { DatastoreType } from './datastore.model.js';
15
- import { DatastoreStreamReadable } from './DatastoreStreamReadable.js';
15
+ import { DatastoreStreamReadable } from './datastoreStreamReadable.js';
16
16
  import { dbQueryToDatastoreQuery } from './query.util.js';
17
17
  // Datastore (also Firestore and other Google APIs) supports max 500 of items when saving/deleting, etc.
18
18
  const MAX_ITEMS = 500;
@@ -148,7 +148,7 @@ export class DatastoreDB extends BaseCommonDB {
148
148
  // same ids are not expected here
149
149
  .sort(idComparator));
150
150
  }
151
- async multiGetByIds(map, opt = {}) {
151
+ async multiGet(map, opt = {}) {
152
152
  const result = {};
153
153
  const ds = await this.ds();
154
154
  const dsOpt = this.getRunQueryOptions(opt);
@@ -296,7 +296,7 @@ export class DatastoreDB extends BaseCommonDB {
296
296
  });
297
297
  return ids.length;
298
298
  }
299
- async multiDeleteByIds(map, opt = {}) {
299
+ async multiDelete(map, opt = {}) {
300
300
  const ds = await this.ds();
301
301
  const keys = [];
302
302
  for (const [table, ids] of _stringMapEntries(map)) {
@@ -1,7 +1,7 @@
1
1
  import type { DatastoreOptions, Key } from '@google-cloud/datastore';
2
2
  import type { CommonDBOptions, CommonDBReadOptions, CommonDBSaveOptions } from '@naturalcycles/db-lib';
3
3
  import type { CommonLogger } from '@naturalcycles/js-lib/log';
4
- import type { NumberOfSeconds, ObjectWithId } from '@naturalcycles/js-lib/types';
4
+ import type { NumberOfSeconds, ObjectWithId, PositiveInteger } from '@naturalcycles/js-lib/types';
5
5
  export interface DatastorePayload<T = any> {
6
6
  key: Key;
7
7
  data: T;
@@ -59,32 +59,17 @@ export interface DatastoreDBStreamOptions extends DatastoreDBReadOptions {
59
59
  * Applicable to `experimentalCursorStream`.
60
60
  * Defines the size (limit) of each individual query.
61
61
  *
62
- * @default 1000
62
+ * @default 1_000
63
63
  */
64
- batchSize?: number;
64
+ batchSize?: PositiveInteger;
65
65
  /**
66
- * Applicable to `experimentalCursorStream`
67
- *
68
- * Set to a value (number of Megabytes) to control the peak RSS size.
69
- * If limit is reached - streaming will pause until the stream keeps up, and then
70
- * resumes.
71
- *
72
- * Set to 0/undefined to disable. Stream will get "slow" then, cause it'll only run the query
73
- * when _read is called.
74
- *
75
- * @default 1000
76
- */
77
- rssLimitMB?: number;
78
- /**
79
- * Applicable to `experimentalCursorStream`
80
- * Default false.
81
- * If true, stream will pause until consumer requests more data (via _read).
82
- * It means it'll run slower, as buffer will be equal to batchSize (1000) at max.
83
- * There will be gaps in time between "last query loaded" and "next query requested".
84
- * This mode is useful e.g for DB migrations, where you want to avoid "stale data".
85
- * So, it minimizes the time between "item loaded" and "item saved" during DB migration.
66
+ * Defaults to 3x batchSize.
67
+ * Default batchSize is 1_000, so default highWaterMark is 3_000.
68
+ * Controls how many rows to have "buffered".
69
+ * Should be at least 1x batchSize, otherwise the stream will be "starving"
70
+ * between the queries.
86
71
  */
87
- singleBatchBuffer?: boolean;
72
+ highWaterMark?: PositiveInteger;
88
73
  /**
89
74
  * Set to `true` to log additional debug info, when using experimentalCursorStream.
90
75
  *
@@ -6,14 +6,20 @@ import type { DatastoreDBStreamOptions } from './datastore.model.js';
6
6
  export declare class DatastoreStreamReadable<T = any> extends Readable implements ReadableTyped<T> {
7
7
  private q;
8
8
  private logger;
9
+ private readonly table;
9
10
  private readonly originalLimit;
10
11
  private rowsRetrieved;
12
+ /**
13
+ * Counts how many times _read was called.
14
+ * For debugging.
15
+ */
16
+ countReads: number;
11
17
  private endCursor?;
12
- private running;
18
+ private queryIsRunning;
19
+ private paused;
13
20
  private done;
14
21
  private lastQueryDone?;
15
22
  private totalWait;
16
- private readonly table;
17
23
  /**
18
24
  * Used to support maxWait
19
25
  */
@@ -22,11 +28,7 @@ export declare class DatastoreStreamReadable<T = any> extends Readable implement
22
28
  private readonly opt;
23
29
  private readonly dsOpt;
24
30
  constructor(q: Query, opt: DatastoreDBStreamOptions, logger: CommonLogger);
25
- private runNextQuery;
26
- /**
27
- * Counts how many times _read was called.
28
- * For debugging.
29
- */
30
- count: number;
31
31
  _read(): void;
32
+ private runNextQuery;
33
+ private runQuery;
32
34
  }
@@ -1,17 +1,24 @@
1
1
  import { Readable } from 'node:stream';
2
+ import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js';
2
3
  import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js';
3
4
  import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js';
4
5
  export class DatastoreStreamReadable extends Readable {
5
6
  q;
6
7
  logger;
8
+ table;
7
9
  originalLimit;
8
10
  rowsRetrieved = 0;
11
+ /**
12
+ * Counts how many times _read was called.
13
+ * For debugging.
14
+ */
15
+ countReads = 0;
9
16
  endCursor;
10
- running = false;
17
+ queryIsRunning = false;
18
+ paused = false;
11
19
  done = false;
12
20
  lastQueryDone;
13
21
  totalWait = 0;
14
- table;
15
22
  /**
16
23
  * Used to support maxWait
17
24
  */
@@ -20,13 +27,17 @@ export class DatastoreStreamReadable extends Readable {
20
27
  opt;
21
28
  dsOpt;
22
29
  constructor(q, opt, logger) {
23
- super({ objectMode: true });
30
+ // 1_000 was optimal in benchmarks
31
+ const { batchSize = 1000 } = opt;
32
+ const { highWaterMark = batchSize * 3 } = opt;
33
+ // Defaulting highWaterMark to 3x batchSize
34
+ super({ objectMode: true, highWaterMark });
24
35
  this.q = q;
25
36
  this.logger = logger;
26
37
  this.opt = {
27
- rssLimitMB: 1000,
28
- batchSize: 1000,
29
38
  ...opt,
39
+ batchSize,
40
+ highWaterMark,
30
41
  };
31
42
  this.dsOpt = {};
32
43
  if (opt.readAt) {
@@ -35,19 +46,23 @@ export class DatastoreStreamReadable extends Readable {
35
46
  }
36
47
  this.originalLimit = q.limitVal;
37
48
  this.table = q.kinds[0];
38
- logger.log(`!! using experimentalCursorStream !! ${this.table}, batchSize: ${opt.batchSize}`);
49
+ logger.warn(`!! using experimentalCursorStream`, {
50
+ table: this.table,
51
+ batchSize,
52
+ highWaterMark,
53
+ });
39
54
  const { maxWait } = this.opt;
40
55
  if (maxWait) {
41
- this.logger.warn(`!! ${this.table} maxWait ${maxWait}`);
56
+ logger.warn(`!! ${this.table} maxWait ${maxWait}`);
42
57
  this.maxWaitInterval = setInterval(() => {
43
58
  const millisSinceLastRead = Date.now() - this.lastReadTimestamp;
44
59
  if (millisSinceLastRead < maxWait * 1000) {
45
- this.logger.warn(`!! ${this.table} millisSinceLastRead(${millisSinceLastRead}) < maxWait*1000`);
60
+ logger.warn(`!! ${this.table} millisSinceLastRead(${millisSinceLastRead}) < maxWait*1000`);
46
61
  return;
47
62
  }
48
- const { running, rowsRetrieved } = this;
49
- this.logger.warn(`maxWait of ${maxWait} seconds reached, force-triggering _read`, {
50
- running,
63
+ const { queryIsRunning, rowsRetrieved } = this;
64
+ logger.warn(`maxWait of ${maxWait} seconds reached, force-triggering _read`, {
65
+ running: queryIsRunning,
51
66
  rowsRetrieved,
52
67
  });
53
68
  // force-trigger _read
@@ -56,103 +71,110 @@ export class DatastoreStreamReadable extends Readable {
56
71
  }, (maxWait * 1000) / 2);
57
72
  }
58
73
  }
74
+ _read() {
75
+ this.lastReadTimestamp = Date.now();
76
+ // console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
77
+ this.countReads++;
78
+ if (this.done) {
79
+ this.logger.warn(`!!! _read was called, but done==true`);
80
+ return;
81
+ }
82
+ if (this.paused) {
83
+ this.logger.log(`_read #${this.countReads}, queryIsRunning: ${this.queryIsRunning}, unpausing stream`);
84
+ this.paused = false;
85
+ }
86
+ if (this.queryIsRunning) {
87
+ this.logger.log(`_read #${this.countReads}, queryIsRunning: true, doing nothing`);
88
+ return;
89
+ }
90
+ void this.runNextQuery().catch(err => {
91
+ console.log('error in runNextQuery', err);
92
+ this.emit('error', err);
93
+ });
94
+ }
59
95
  async runNextQuery() {
60
96
  if (this.done)
61
97
  return;
98
+ const { logger, table } = this;
62
99
  if (this.lastQueryDone) {
63
100
  const now = Date.now();
64
101
  this.totalWait += now - this.lastQueryDone;
65
102
  }
66
- this.running = true;
67
- // console.log('running query...')
103
+ this.queryIsRunning = true;
68
104
  let limit = this.opt.batchSize;
69
105
  if (this.originalLimit) {
70
106
  limit = Math.min(this.opt.batchSize, this.originalLimit - this.rowsRetrieved);
71
107
  }
72
- // console.log(`limit: ${limit}`)
73
108
  let q = this.q.limit(limit);
74
109
  if (this.endCursor) {
75
110
  q = q.start(this.endCursor);
76
111
  }
77
- let rows = [];
78
- let info = {};
79
- try {
80
- await pRetry(async () => {
81
- const res = await q.run(this.dsOpt);
82
- rows = res[0];
83
- info = res[1];
84
- }, {
85
- name: `DatastoreStreamReadable.query(${this.table})`,
86
- maxAttempts: 5,
87
- delay: 5000,
88
- delayMultiplier: 2,
89
- logger: this.logger,
90
- timeout: 120_000, // 2 minutes
91
- });
92
- }
93
- catch (err) {
94
- console.log(`DatastoreStreamReadable error!\n`, {
95
- table: this.table,
96
- rowsRetrieved: this.rowsRetrieved,
97
- }, err);
98
- this.emit('error', err);
99
- clearInterval(this.maxWaitInterval);
112
+ const started = localTime.nowUnixMillis();
113
+ const res = await this.runQuery(q);
114
+ const queryTook = Date.now() - started;
115
+ if (!res) {
116
+ // error already emitted in runQuery
100
117
  return;
101
118
  }
119
+ const rows = res[0];
120
+ const info = res[1];
102
121
  this.rowsRetrieved += rows.length;
103
- this.logger.log(`${this.table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`, info.moreResults);
104
- if (!rows.length) {
105
- this.logger.warn(`${this.table} got 0 rows, totalWait: ${_ms(this.totalWait)}`, info.moreResults);
106
- }
122
+ logger.log(`${table} got ${rows.length} rows in ${_ms(queryTook)}, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
107
123
  this.endCursor = info.endCursor;
108
- this.running = false; // ready to take more _reads
124
+ this.queryIsRunning = false; // ready to take more _reads
109
125
  this.lastQueryDone = Date.now();
126
+ let shouldContinue = false;
110
127
  for (const row of rows) {
111
- this.push(row);
128
+ shouldContinue = this.push(row);
112
129
  }
113
130
  if (!info.endCursor ||
114
131
  info.moreResults === 'NO_MORE_RESULTS' ||
115
132
  (this.originalLimit && this.rowsRetrieved >= this.originalLimit)) {
116
- this.logger.log(`!!!! DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
133
+ logger.log(`!!!! DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
117
134
  this.push(null);
118
135
  this.done = true;
136
+ this.paused = false;
119
137
  clearInterval(this.maxWaitInterval);
138
+ return;
120
139
  }
121
- else if (this.opt.singleBatchBuffer) {
122
- // here we don't start next query until we're asked (via next _read call)
123
- // do, let's do nothing
140
+ if (shouldContinue) {
141
+ // Keep the stream flowing
142
+ logger.log(`${table} continuing the stream`);
143
+ void this.runNextQuery();
124
144
  }
125
- else if (this.opt.rssLimitMB) {
126
- const rssMB = Math.round(process.memoryUsage().rss / 1024 / 1024);
127
- if (rssMB <= this.opt.rssLimitMB) {
128
- void this.runNextQuery();
145
+ else {
146
+ // Not starting the next query
147
+ if (this.paused) {
148
+ logger.log(`${table} stream is already paused`);
129
149
  }
130
150
  else {
131
- this.logger.warn(`${this.table} rssLimitMB reached ${rssMB} > ${this.opt.rssLimitMB}, pausing stream`);
151
+ logger.log(`${table} pausing the stream`);
152
+ this.paused = true;
132
153
  }
133
154
  }
134
155
  }
135
- /**
136
- * Counts how many times _read was called.
137
- * For debugging.
138
- */
139
- count = 0;
140
- _read() {
141
- this.lastReadTimestamp = Date.now();
142
- // console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
143
- this.count++;
144
- if (this.done) {
145
- this.logger.warn(`!!! _read was called, but done==true`);
146
- return;
147
- }
148
- if (!this.running) {
149
- void this.runNextQuery().catch(err => {
150
- console.log('error in runNextQuery', err);
151
- this.emit('error', err);
156
+ async runQuery(q) {
157
+ const { table, logger } = this;
158
+ try {
159
+ return await pRetry(async () => {
160
+ return await q.run(this.dsOpt);
161
+ }, {
162
+ name: `DatastoreStreamReadable.query(${table})`,
163
+ maxAttempts: 5,
164
+ delay: 5000,
165
+ delayMultiplier: 2,
166
+ logger,
167
+ timeout: 120_000, // 2 minutes
152
168
  });
153
169
  }
154
- else {
155
- this.logger.log(`_read ${this.count}, wasRunning: true`);
170
+ catch (err) {
171
+ console.log(`DatastoreStreamReadable error!\n`, {
172
+ table,
173
+ rowsRetrieved: this.rowsRetrieved,
174
+ }, err);
175
+ this.emit('error', err);
176
+ clearInterval(this.maxWaitInterval);
177
+ return;
156
178
  }
157
179
  }
158
180
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@naturalcycles/datastore-lib",
3
3
  "type": "module",
4
- "version": "4.12.0",
4
+ "version": "4.14.0",
5
5
  "description": "Opinionated library to work with Google Datastore, implements CommonDB",
6
6
  "dependencies": {
7
7
  "@google-cloud/datastore": "^10",
@@ -54,7 +54,7 @@ import type {
54
54
  DatastoreStats,
55
55
  } from './datastore.model.js'
56
56
  import { DatastoreType } from './datastore.model.js'
57
- import { DatastoreStreamReadable } from './DatastoreStreamReadable.js'
57
+ import { DatastoreStreamReadable } from './datastoreStreamReadable.js'
58
58
  import { dbQueryToDatastoreQuery } from './query.util.js'
59
59
 
60
60
  // Datastore (also Firestore and other Google APIs) supports max 500 of items when saving/deleting, etc.
@@ -229,7 +229,7 @@ export class DatastoreDB extends BaseCommonDB implements CommonDB {
229
229
  )
230
230
  }
231
231
 
232
- override async multiGetByIds<ROW extends ObjectWithId>(
232
+ override async multiGet<ROW extends ObjectWithId>(
233
233
  map: StringMap<string[]>,
234
234
  opt: DatastoreDBReadOptions = {},
235
235
  ): Promise<StringMap<ROW[]>> {
@@ -470,7 +470,7 @@ export class DatastoreDB extends BaseCommonDB implements CommonDB {
470
470
  return ids.length
471
471
  }
472
472
 
473
- override async multiDeleteByIds(
473
+ override async multiDelete(
474
474
  map: StringMap<string[]>,
475
475
  opt: DatastoreDBOptions = {},
476
476
  ): Promise<number> {
@@ -5,7 +5,7 @@ import type {
5
5
  CommonDBSaveOptions,
6
6
  } from '@naturalcycles/db-lib'
7
7
  import type { CommonLogger } from '@naturalcycles/js-lib/log'
8
- import type { NumberOfSeconds, ObjectWithId } from '@naturalcycles/js-lib/types'
8
+ import type { NumberOfSeconds, ObjectWithId, PositiveInteger } from '@naturalcycles/js-lib/types'
9
9
 
10
10
  export interface DatastorePayload<T = any> {
11
11
  key: Key
@@ -73,34 +73,18 @@ export interface DatastoreDBStreamOptions extends DatastoreDBReadOptions {
73
73
  * Applicable to `experimentalCursorStream`.
74
74
  * Defines the size (limit) of each individual query.
75
75
  *
76
- * @default 1000
76
+ * @default 1_000
77
77
  */
78
- batchSize?: number
78
+ batchSize?: PositiveInteger
79
79
 
80
80
  /**
81
- * Applicable to `experimentalCursorStream`
82
- *
83
- * Set to a value (number of Megabytes) to control the peak RSS size.
84
- * If limit is reached - streaming will pause until the stream keeps up, and then
85
- * resumes.
86
- *
87
- * Set to 0/undefined to disable. Stream will get "slow" then, cause it'll only run the query
88
- * when _read is called.
89
- *
90
- * @default 1000
91
- */
92
- rssLimitMB?: number
93
-
94
- /**
95
- * Applicable to `experimentalCursorStream`
96
- * Default false.
97
- * If true, stream will pause until consumer requests more data (via _read).
98
- * It means it'll run slower, as buffer will be equal to batchSize (1000) at max.
99
- * There will be gaps in time between "last query loaded" and "next query requested".
100
- * This mode is useful e.g for DB migrations, where you want to avoid "stale data".
101
- * So, it minimizes the time between "item loaded" and "item saved" during DB migration.
81
+ * Defaults to 3x batchSize.
82
+ * Default batchSize is 1_000, so default highWaterMark is 3_000.
83
+ * Controls how many rows to have "buffered".
84
+ * Should be at least 1x batchSize, otherwise the stream will be "starving"
85
+ * between the queries.
102
86
  */
103
- singleBatchBuffer?: boolean
87
+ highWaterMark?: PositiveInteger
104
88
 
105
89
  /**
106
90
  * Set to `true` to log additional debug info, when using experimentalCursorStream.
@@ -1,6 +1,11 @@
1
1
  import { Readable } from 'node:stream'
2
2
  import type { Query } from '@google-cloud/datastore'
3
- import type { RunQueryInfo, RunQueryOptions } from '@google-cloud/datastore/build/src/query.js'
3
+ import type {
4
+ RunQueryInfo,
5
+ RunQueryOptions,
6
+ RunQueryResponse,
7
+ } from '@google-cloud/datastore/build/src/query.js'
8
+ import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js'
4
9
  import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js'
5
10
  import type { CommonLogger } from '@naturalcycles/js-lib/log'
6
11
  import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js'
@@ -9,21 +14,27 @@ import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream'
9
14
  import type { DatastoreDBStreamOptions } from './datastore.model.js'
10
15
 
11
16
  export class DatastoreStreamReadable<T = any> extends Readable implements ReadableTyped<T> {
17
+ private readonly table: string
12
18
  private readonly originalLimit: number
13
19
  private rowsRetrieved = 0
20
+ /**
21
+ * Counts how many times _read was called.
22
+ * For debugging.
23
+ */
24
+ countReads = 0
14
25
  private endCursor?: string
15
- private running = false
26
+ private queryIsRunning = false
27
+ private paused = false
16
28
  private done = false
17
29
  private lastQueryDone?: number
18
30
  private totalWait = 0
19
- private readonly table: string
20
31
  /**
21
32
  * Used to support maxWait
22
33
  */
23
34
  private lastReadTimestamp = 0 as UnixTimestampMillis
24
35
  private readonly maxWaitInterval: NodeJS.Timeout | undefined
25
36
 
26
- private readonly opt: DatastoreDBStreamOptions & { batchSize: number }
37
+ private readonly opt: DatastoreDBStreamOptions & { batchSize: number; highWaterMark: number }
27
38
  private readonly dsOpt: RunQueryOptions
28
39
 
29
40
  constructor(
@@ -31,12 +42,16 @@ export class DatastoreStreamReadable<T = any> extends Readable implements Readab
31
42
  opt: DatastoreDBStreamOptions,
32
43
  private logger: CommonLogger,
33
44
  ) {
34
- super({ objectMode: true })
45
+ // 1_000 was optimal in benchmarks
46
+ const { batchSize = 1000 } = opt
47
+ const { highWaterMark = batchSize * 3 } = opt
48
+ // Defaulting highWaterMark to 3x batchSize
49
+ super({ objectMode: true, highWaterMark })
35
50
 
36
51
  this.opt = {
37
- rssLimitMB: 1000,
38
- batchSize: 1000,
39
52
  ...opt,
53
+ batchSize,
54
+ highWaterMark,
40
55
  }
41
56
  this.dsOpt = {}
42
57
  if (opt.readAt) {
@@ -47,26 +62,30 @@ export class DatastoreStreamReadable<T = any> extends Readable implements Readab
47
62
  this.originalLimit = q.limitVal
48
63
  this.table = q.kinds[0]!
49
64
 
50
- logger.log(`!! using experimentalCursorStream !! ${this.table}, batchSize: ${opt.batchSize}`)
65
+ logger.warn(`!! using experimentalCursorStream`, {
66
+ table: this.table,
67
+ batchSize,
68
+ highWaterMark,
69
+ })
51
70
 
52
71
  const { maxWait } = this.opt
53
72
  if (maxWait) {
54
- this.logger.warn(`!! ${this.table} maxWait ${maxWait}`)
73
+ logger.warn(`!! ${this.table} maxWait ${maxWait}`)
55
74
 
56
75
  this.maxWaitInterval = setInterval(
57
76
  () => {
58
77
  const millisSinceLastRead = Date.now() - this.lastReadTimestamp
59
78
 
60
79
  if (millisSinceLastRead < maxWait * 1000) {
61
- this.logger.warn(
80
+ logger.warn(
62
81
  `!! ${this.table} millisSinceLastRead(${millisSinceLastRead}) < maxWait*1000`,
63
82
  )
64
83
  return
65
84
  }
66
85
 
67
- const { running, rowsRetrieved } = this
68
- this.logger.warn(`maxWait of ${maxWait} seconds reached, force-triggering _read`, {
69
- running,
86
+ const { queryIsRunning, rowsRetrieved } = this
87
+ logger.warn(`maxWait of ${maxWait} seconds reached, force-triggering _read`, {
88
+ running: queryIsRunning,
70
89
  rowsRetrieved,
71
90
  })
72
91
 
@@ -79,16 +98,45 @@ export class DatastoreStreamReadable<T = any> extends Readable implements Readab
79
98
  }
80
99
  }
81
100
 
101
+ override _read(): void {
102
+ this.lastReadTimestamp = Date.now() as UnixTimestampMillis
103
+
104
+ // console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
105
+ this.countReads++
106
+
107
+ if (this.done) {
108
+ this.logger.warn(`!!! _read was called, but done==true`)
109
+ return
110
+ }
111
+
112
+ if (this.paused) {
113
+ this.logger.log(
114
+ `_read #${this.countReads}, queryIsRunning: ${this.queryIsRunning}, unpausing stream`,
115
+ )
116
+ this.paused = false
117
+ }
118
+
119
+ if (this.queryIsRunning) {
120
+ this.logger.log(`_read #${this.countReads}, queryIsRunning: true, doing nothing`)
121
+ return
122
+ }
123
+
124
+ void this.runNextQuery().catch(err => {
125
+ console.log('error in runNextQuery', err)
126
+ this.emit('error', err)
127
+ })
128
+ }
129
+
82
130
  private async runNextQuery(): Promise<void> {
83
131
  if (this.done) return
132
+ const { logger, table } = this
84
133
 
85
134
  if (this.lastQueryDone) {
86
135
  const now = Date.now()
87
136
  this.totalWait += now - this.lastQueryDone
88
137
  }
89
138
 
90
- this.running = true
91
- // console.log('running query...')
139
+ this.queryIsRunning = true
92
140
 
93
141
  let limit = this.opt.batchSize
94
142
 
@@ -96,66 +144,35 @@ export class DatastoreStreamReadable<T = any> extends Readable implements Readab
96
144
  limit = Math.min(this.opt.batchSize, this.originalLimit - this.rowsRetrieved)
97
145
  }
98
146
 
99
- // console.log(`limit: ${limit}`)
100
147
  let q = this.q.limit(limit)
101
148
  if (this.endCursor) {
102
149
  q = q.start(this.endCursor)
103
150
  }
104
151
 
105
- let rows: T[] = []
106
- let info: RunQueryInfo = {}
107
-
108
- try {
109
- await pRetry(
110
- async () => {
111
- const res = await q.run(this.dsOpt)
112
- rows = res[0]
113
- info = res[1]
114
- },
115
- {
116
- name: `DatastoreStreamReadable.query(${this.table})`,
117
- maxAttempts: 5,
118
- delay: 5000,
119
- delayMultiplier: 2,
120
- logger: this.logger,
121
- timeout: 120_000, // 2 minutes
122
- },
123
- )
124
- } catch (err) {
125
- console.log(
126
- `DatastoreStreamReadable error!\n`,
127
- {
128
- table: this.table,
129
- rowsRetrieved: this.rowsRetrieved,
130
- },
131
- err,
132
- )
133
- this.emit('error', err)
134
- clearInterval(this.maxWaitInterval)
152
+ const started = localTime.nowUnixMillis()
153
+ const res = await this.runQuery(q)
154
+ const queryTook = Date.now() - started
155
+ if (!res) {
156
+ // error already emitted in runQuery
135
157
  return
136
158
  }
159
+ const rows: T[] = res[0]
160
+ const info: RunQueryInfo = res[1]
137
161
 
138
162
  this.rowsRetrieved += rows.length
139
- this.logger.log(
140
- `${this.table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(
163
+ logger.log(
164
+ `${table} got ${rows.length} rows in ${_ms(queryTook)}, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(
141
165
  this.totalWait,
142
166
  )}`,
143
- info.moreResults,
144
167
  )
145
168
 
146
- if (!rows.length) {
147
- this.logger.warn(
148
- `${this.table} got 0 rows, totalWait: ${_ms(this.totalWait)}`,
149
- info.moreResults,
150
- )
151
- }
152
-
153
169
  this.endCursor = info.endCursor
154
- this.running = false // ready to take more _reads
170
+ this.queryIsRunning = false // ready to take more _reads
155
171
  this.lastQueryDone = Date.now()
172
+ let shouldContinue = false
156
173
 
157
174
  for (const row of rows) {
158
- this.push(row)
175
+ shouldContinue = this.push(row)
159
176
  }
160
177
 
161
178
  if (
@@ -163,52 +180,60 @@ export class DatastoreStreamReadable<T = any> extends Readable implements Readab
163
180
  info.moreResults === 'NO_MORE_RESULTS' ||
164
181
  (this.originalLimit && this.rowsRetrieved >= this.originalLimit)
165
182
  ) {
166
- this.logger.log(
183
+ logger.log(
167
184
  `!!!! DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
168
185
  )
169
186
  this.push(null)
170
187
  this.done = true
188
+ this.paused = false
171
189
  clearInterval(this.maxWaitInterval)
172
- } else if (this.opt.singleBatchBuffer) {
173
- // here we don't start next query until we're asked (via next _read call)
174
- // do, let's do nothing
175
- } else if (this.opt.rssLimitMB) {
176
- const rssMB = Math.round(process.memoryUsage().rss / 1024 / 1024)
177
-
178
- if (rssMB <= this.opt.rssLimitMB) {
179
- void this.runNextQuery()
190
+ return
191
+ }
192
+
193
+ if (shouldContinue) {
194
+ // Keep the stream flowing
195
+ logger.log(`${table} continuing the stream`)
196
+ void this.runNextQuery()
197
+ } else {
198
+ // Not starting the next query
199
+ if (this.paused) {
200
+ logger.log(`${table} stream is already paused`)
180
201
  } else {
181
- this.logger.warn(
182
- `${this.table} rssLimitMB reached ${rssMB} > ${this.opt.rssLimitMB}, pausing stream`,
183
- )
202
+ logger.log(`${table} pausing the stream`)
203
+ this.paused = true
184
204
  }
185
205
  }
186
206
  }
187
207
 
188
- /**
189
- * Counts how many times _read was called.
190
- * For debugging.
191
- */
192
- count = 0
193
-
194
- override _read(): void {
195
- this.lastReadTimestamp = Date.now() as UnixTimestampMillis
208
+ private async runQuery(q: Query): Promise<RunQueryResponse | undefined> {
209
+ const { table, logger } = this
196
210
 
197
- // console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
198
- this.count++
199
-
200
- if (this.done) {
201
- this.logger.warn(`!!! _read was called, but done==true`)
211
+ try {
212
+ return await pRetry(
213
+ async () => {
214
+ return await q.run(this.dsOpt)
215
+ },
216
+ {
217
+ name: `DatastoreStreamReadable.query(${table})`,
218
+ maxAttempts: 5,
219
+ delay: 5000,
220
+ delayMultiplier: 2,
221
+ logger,
222
+ timeout: 120_000, // 2 minutes
223
+ },
224
+ )
225
+ } catch (err) {
226
+ console.log(
227
+ `DatastoreStreamReadable error!\n`,
228
+ {
229
+ table,
230
+ rowsRetrieved: this.rowsRetrieved,
231
+ },
232
+ err,
233
+ )
234
+ this.emit('error', err)
235
+ clearInterval(this.maxWaitInterval)
202
236
  return
203
237
  }
204
-
205
- if (!this.running) {
206
- void this.runNextQuery().catch(err => {
207
- console.log('error in runNextQuery', err)
208
- this.emit('error', err)
209
- })
210
- } else {
211
- this.logger.log(`_read ${this.count}, wasRunning: true`)
212
- }
213
238
  }
214
239
  }