@naturalcycles/datastore-lib 4.12.0 → 4.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/datastore.db.d.ts +2 -2
- package/dist/datastore.db.js +3 -3
- package/dist/datastore.model.d.ts +9 -24
- package/dist/{DatastoreStreamReadable.d.ts → datastoreStreamReadable.d.ts} +10 -8
- package/dist/{DatastoreStreamReadable.js → datastoreStreamReadable.js} +93 -71
- package/package.json +1 -1
- package/src/datastore.db.ts +3 -3
- package/src/datastore.model.ts +9 -25
- package/src/{DatastoreStreamReadable.ts → datastoreStreamReadable.ts} +117 -92
package/dist/datastore.db.d.ts
CHANGED
|
@@ -27,7 +27,7 @@ export declare class DatastoreDB extends BaseCommonDB implements CommonDB {
|
|
|
27
27
|
private getDatastoreLib;
|
|
28
28
|
ping(): Promise<void>;
|
|
29
29
|
getByIds<ROW extends ObjectWithId>(table: string, ids: string[], opt?: DatastoreDBReadOptions): Promise<ROW[]>;
|
|
30
|
-
|
|
30
|
+
multiGet<ROW extends ObjectWithId>(map: StringMap<string[]>, opt?: DatastoreDBReadOptions): Promise<StringMap<ROW[]>>;
|
|
31
31
|
runQuery<ROW extends ObjectWithId>(dbQuery: DBQuery<ROW>, opt?: DatastoreDBReadOptions): Promise<RunQueryResult<ROW>>;
|
|
32
32
|
runQueryCount<ROW extends ObjectWithId>(dbQuery: DBQuery<ROW>, opt?: DatastoreDBReadOptions): Promise<number>;
|
|
33
33
|
private runDatastoreQuery;
|
|
@@ -42,7 +42,7 @@ export declare class DatastoreDB extends BaseCommonDB implements CommonDB {
|
|
|
42
42
|
* regardless if they were actually deleted or not.
|
|
43
43
|
*/
|
|
44
44
|
deleteByIds(table: string, ids: string[], opt?: DatastoreDBOptions): Promise<number>;
|
|
45
|
-
|
|
45
|
+
multiDelete(map: StringMap<string[]>, opt?: DatastoreDBOptions): Promise<number>;
|
|
46
46
|
createTransaction(opt?: CommonDBTransactionOptions): Promise<DatastoreDBTransaction>;
|
|
47
47
|
runInTransaction(fn: DBTransactionFn, opt?: CommonDBTransactionOptions): Promise<void>;
|
|
48
48
|
getAllStats(): Promise<DatastoreStats[]>;
|
package/dist/datastore.db.js
CHANGED
|
@@ -12,7 +12,7 @@ import { pTimeout } from '@naturalcycles/js-lib/promise/pTimeout.js';
|
|
|
12
12
|
import { _stringMapEntries, _stringMapValues, } from '@naturalcycles/js-lib/types';
|
|
13
13
|
import { boldWhite } from '@naturalcycles/nodejs-lib/colors';
|
|
14
14
|
import { DatastoreType } from './datastore.model.js';
|
|
15
|
-
import { DatastoreStreamReadable } from './
|
|
15
|
+
import { DatastoreStreamReadable } from './datastoreStreamReadable.js';
|
|
16
16
|
import { dbQueryToDatastoreQuery } from './query.util.js';
|
|
17
17
|
// Datastore (also Firestore and other Google APIs) supports max 500 of items when saving/deleting, etc.
|
|
18
18
|
const MAX_ITEMS = 500;
|
|
@@ -148,7 +148,7 @@ export class DatastoreDB extends BaseCommonDB {
|
|
|
148
148
|
// same ids are not expected here
|
|
149
149
|
.sort(idComparator));
|
|
150
150
|
}
|
|
151
|
-
async
|
|
151
|
+
async multiGet(map, opt = {}) {
|
|
152
152
|
const result = {};
|
|
153
153
|
const ds = await this.ds();
|
|
154
154
|
const dsOpt = this.getRunQueryOptions(opt);
|
|
@@ -296,7 +296,7 @@ export class DatastoreDB extends BaseCommonDB {
|
|
|
296
296
|
});
|
|
297
297
|
return ids.length;
|
|
298
298
|
}
|
|
299
|
-
async
|
|
299
|
+
async multiDelete(map, opt = {}) {
|
|
300
300
|
const ds = await this.ds();
|
|
301
301
|
const keys = [];
|
|
302
302
|
for (const [table, ids] of _stringMapEntries(map)) {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { DatastoreOptions, Key } from '@google-cloud/datastore';
|
|
2
2
|
import type { CommonDBOptions, CommonDBReadOptions, CommonDBSaveOptions } from '@naturalcycles/db-lib';
|
|
3
3
|
import type { CommonLogger } from '@naturalcycles/js-lib/log';
|
|
4
|
-
import type { NumberOfSeconds, ObjectWithId } from '@naturalcycles/js-lib/types';
|
|
4
|
+
import type { NumberOfSeconds, ObjectWithId, PositiveInteger } from '@naturalcycles/js-lib/types';
|
|
5
5
|
export interface DatastorePayload<T = any> {
|
|
6
6
|
key: Key;
|
|
7
7
|
data: T;
|
|
@@ -59,32 +59,17 @@ export interface DatastoreDBStreamOptions extends DatastoreDBReadOptions {
|
|
|
59
59
|
* Applicable to `experimentalCursorStream`.
|
|
60
60
|
* Defines the size (limit) of each individual query.
|
|
61
61
|
*
|
|
62
|
-
* @default
|
|
62
|
+
* @default 1_000
|
|
63
63
|
*/
|
|
64
|
-
batchSize?:
|
|
64
|
+
batchSize?: PositiveInteger;
|
|
65
65
|
/**
|
|
66
|
-
*
|
|
67
|
-
*
|
|
68
|
-
*
|
|
69
|
-
*
|
|
70
|
-
*
|
|
71
|
-
*
|
|
72
|
-
* Set to 0/undefined to disable. Stream will get "slow" then, cause it'll only run the query
|
|
73
|
-
* when _read is called.
|
|
74
|
-
*
|
|
75
|
-
* @default 1000
|
|
76
|
-
*/
|
|
77
|
-
rssLimitMB?: number;
|
|
78
|
-
/**
|
|
79
|
-
* Applicable to `experimentalCursorStream`
|
|
80
|
-
* Default false.
|
|
81
|
-
* If true, stream will pause until consumer requests more data (via _read).
|
|
82
|
-
* It means it'll run slower, as buffer will be equal to batchSize (1000) at max.
|
|
83
|
-
* There will be gaps in time between "last query loaded" and "next query requested".
|
|
84
|
-
* This mode is useful e.g for DB migrations, where you want to avoid "stale data".
|
|
85
|
-
* So, it minimizes the time between "item loaded" and "item saved" during DB migration.
|
|
66
|
+
* Defaults to 3x batchSize.
|
|
67
|
+
* Default batchSize is 1_000, so default highWaterMark is 3_000.
|
|
68
|
+
* Controls how many rows to have "buffered".
|
|
69
|
+
* Should be at least 1x batchSize, otherwise the stream will be "starving"
|
|
70
|
+
* between the queries.
|
|
86
71
|
*/
|
|
87
|
-
|
|
72
|
+
highWaterMark?: PositiveInteger;
|
|
88
73
|
/**
|
|
89
74
|
* Set to `true` to log additional debug info, when using experimentalCursorStream.
|
|
90
75
|
*
|
|
@@ -6,14 +6,20 @@ import type { DatastoreDBStreamOptions } from './datastore.model.js';
|
|
|
6
6
|
export declare class DatastoreStreamReadable<T = any> extends Readable implements ReadableTyped<T> {
|
|
7
7
|
private q;
|
|
8
8
|
private logger;
|
|
9
|
+
private readonly table;
|
|
9
10
|
private readonly originalLimit;
|
|
10
11
|
private rowsRetrieved;
|
|
12
|
+
/**
|
|
13
|
+
* Counts how many times _read was called.
|
|
14
|
+
* For debugging.
|
|
15
|
+
*/
|
|
16
|
+
countReads: number;
|
|
11
17
|
private endCursor?;
|
|
12
|
-
private
|
|
18
|
+
private queryIsRunning;
|
|
19
|
+
private paused;
|
|
13
20
|
private done;
|
|
14
21
|
private lastQueryDone?;
|
|
15
22
|
private totalWait;
|
|
16
|
-
private readonly table;
|
|
17
23
|
/**
|
|
18
24
|
* Used to support maxWait
|
|
19
25
|
*/
|
|
@@ -22,11 +28,7 @@ export declare class DatastoreStreamReadable<T = any> extends Readable implement
|
|
|
22
28
|
private readonly opt;
|
|
23
29
|
private readonly dsOpt;
|
|
24
30
|
constructor(q: Query, opt: DatastoreDBStreamOptions, logger: CommonLogger);
|
|
25
|
-
private runNextQuery;
|
|
26
|
-
/**
|
|
27
|
-
* Counts how many times _read was called.
|
|
28
|
-
* For debugging.
|
|
29
|
-
*/
|
|
30
|
-
count: number;
|
|
31
31
|
_read(): void;
|
|
32
|
+
private runNextQuery;
|
|
33
|
+
private runQuery;
|
|
32
34
|
}
|
|
@@ -1,17 +1,24 @@
|
|
|
1
1
|
import { Readable } from 'node:stream';
|
|
2
|
+
import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js';
|
|
2
3
|
import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js';
|
|
3
4
|
import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js';
|
|
4
5
|
export class DatastoreStreamReadable extends Readable {
|
|
5
6
|
q;
|
|
6
7
|
logger;
|
|
8
|
+
table;
|
|
7
9
|
originalLimit;
|
|
8
10
|
rowsRetrieved = 0;
|
|
11
|
+
/**
|
|
12
|
+
* Counts how many times _read was called.
|
|
13
|
+
* For debugging.
|
|
14
|
+
*/
|
|
15
|
+
countReads = 0;
|
|
9
16
|
endCursor;
|
|
10
|
-
|
|
17
|
+
queryIsRunning = false;
|
|
18
|
+
paused = false;
|
|
11
19
|
done = false;
|
|
12
20
|
lastQueryDone;
|
|
13
21
|
totalWait = 0;
|
|
14
|
-
table;
|
|
15
22
|
/**
|
|
16
23
|
* Used to support maxWait
|
|
17
24
|
*/
|
|
@@ -20,13 +27,17 @@ export class DatastoreStreamReadable extends Readable {
|
|
|
20
27
|
opt;
|
|
21
28
|
dsOpt;
|
|
22
29
|
constructor(q, opt, logger) {
|
|
23
|
-
|
|
30
|
+
// 1_000 was optimal in benchmarks
|
|
31
|
+
const { batchSize = 1000 } = opt;
|
|
32
|
+
const { highWaterMark = batchSize * 3 } = opt;
|
|
33
|
+
// Defaulting highWaterMark to 3x batchSize
|
|
34
|
+
super({ objectMode: true, highWaterMark });
|
|
24
35
|
this.q = q;
|
|
25
36
|
this.logger = logger;
|
|
26
37
|
this.opt = {
|
|
27
|
-
rssLimitMB: 1000,
|
|
28
|
-
batchSize: 1000,
|
|
29
38
|
...opt,
|
|
39
|
+
batchSize,
|
|
40
|
+
highWaterMark,
|
|
30
41
|
};
|
|
31
42
|
this.dsOpt = {};
|
|
32
43
|
if (opt.readAt) {
|
|
@@ -35,19 +46,23 @@ export class DatastoreStreamReadable extends Readable {
|
|
|
35
46
|
}
|
|
36
47
|
this.originalLimit = q.limitVal;
|
|
37
48
|
this.table = q.kinds[0];
|
|
38
|
-
logger.
|
|
49
|
+
logger.warn(`!! using experimentalCursorStream`, {
|
|
50
|
+
table: this.table,
|
|
51
|
+
batchSize,
|
|
52
|
+
highWaterMark,
|
|
53
|
+
});
|
|
39
54
|
const { maxWait } = this.opt;
|
|
40
55
|
if (maxWait) {
|
|
41
|
-
|
|
56
|
+
logger.warn(`!! ${this.table} maxWait ${maxWait}`);
|
|
42
57
|
this.maxWaitInterval = setInterval(() => {
|
|
43
58
|
const millisSinceLastRead = Date.now() - this.lastReadTimestamp;
|
|
44
59
|
if (millisSinceLastRead < maxWait * 1000) {
|
|
45
|
-
|
|
60
|
+
logger.warn(`!! ${this.table} millisSinceLastRead(${millisSinceLastRead}) < maxWait*1000`);
|
|
46
61
|
return;
|
|
47
62
|
}
|
|
48
|
-
const {
|
|
49
|
-
|
|
50
|
-
running,
|
|
63
|
+
const { queryIsRunning, rowsRetrieved } = this;
|
|
64
|
+
logger.warn(`maxWait of ${maxWait} seconds reached, force-triggering _read`, {
|
|
65
|
+
running: queryIsRunning,
|
|
51
66
|
rowsRetrieved,
|
|
52
67
|
});
|
|
53
68
|
// force-trigger _read
|
|
@@ -56,103 +71,110 @@ export class DatastoreStreamReadable extends Readable {
|
|
|
56
71
|
}, (maxWait * 1000) / 2);
|
|
57
72
|
}
|
|
58
73
|
}
|
|
74
|
+
_read() {
|
|
75
|
+
this.lastReadTimestamp = Date.now();
|
|
76
|
+
// console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
|
|
77
|
+
this.countReads++;
|
|
78
|
+
if (this.done) {
|
|
79
|
+
this.logger.warn(`!!! _read was called, but done==true`);
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
if (this.paused) {
|
|
83
|
+
this.logger.log(`_read #${this.countReads}, queryIsRunning: ${this.queryIsRunning}, unpausing stream`);
|
|
84
|
+
this.paused = false;
|
|
85
|
+
}
|
|
86
|
+
if (this.queryIsRunning) {
|
|
87
|
+
this.logger.log(`_read #${this.countReads}, queryIsRunning: true, doing nothing`);
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
90
|
+
void this.runNextQuery().catch(err => {
|
|
91
|
+
console.log('error in runNextQuery', err);
|
|
92
|
+
this.emit('error', err);
|
|
93
|
+
});
|
|
94
|
+
}
|
|
59
95
|
async runNextQuery() {
|
|
60
96
|
if (this.done)
|
|
61
97
|
return;
|
|
98
|
+
const { logger, table } = this;
|
|
62
99
|
if (this.lastQueryDone) {
|
|
63
100
|
const now = Date.now();
|
|
64
101
|
this.totalWait += now - this.lastQueryDone;
|
|
65
102
|
}
|
|
66
|
-
this.
|
|
67
|
-
// console.log('running query...')
|
|
103
|
+
this.queryIsRunning = true;
|
|
68
104
|
let limit = this.opt.batchSize;
|
|
69
105
|
if (this.originalLimit) {
|
|
70
106
|
limit = Math.min(this.opt.batchSize, this.originalLimit - this.rowsRetrieved);
|
|
71
107
|
}
|
|
72
|
-
// console.log(`limit: ${limit}`)
|
|
73
108
|
let q = this.q.limit(limit);
|
|
74
109
|
if (this.endCursor) {
|
|
75
110
|
q = q.start(this.endCursor);
|
|
76
111
|
}
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
rows = res[0];
|
|
83
|
-
info = res[1];
|
|
84
|
-
}, {
|
|
85
|
-
name: `DatastoreStreamReadable.query(${this.table})`,
|
|
86
|
-
maxAttempts: 5,
|
|
87
|
-
delay: 5000,
|
|
88
|
-
delayMultiplier: 2,
|
|
89
|
-
logger: this.logger,
|
|
90
|
-
timeout: 120_000, // 2 minutes
|
|
91
|
-
});
|
|
92
|
-
}
|
|
93
|
-
catch (err) {
|
|
94
|
-
console.log(`DatastoreStreamReadable error!\n`, {
|
|
95
|
-
table: this.table,
|
|
96
|
-
rowsRetrieved: this.rowsRetrieved,
|
|
97
|
-
}, err);
|
|
98
|
-
this.emit('error', err);
|
|
99
|
-
clearInterval(this.maxWaitInterval);
|
|
112
|
+
const started = localTime.nowUnixMillis();
|
|
113
|
+
const res = await this.runQuery(q);
|
|
114
|
+
const queryTook = Date.now() - started;
|
|
115
|
+
if (!res) {
|
|
116
|
+
// error already emitted in runQuery
|
|
100
117
|
return;
|
|
101
118
|
}
|
|
119
|
+
const rows = res[0];
|
|
120
|
+
const info = res[1];
|
|
102
121
|
this.rowsRetrieved += rows.length;
|
|
103
|
-
|
|
104
|
-
if (!rows.length) {
|
|
105
|
-
this.logger.warn(`${this.table} got 0 rows, totalWait: ${_ms(this.totalWait)}`, info.moreResults);
|
|
106
|
-
}
|
|
122
|
+
logger.log(`${table} got ${rows.length} rows in ${_ms(queryTook)}, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
|
|
107
123
|
this.endCursor = info.endCursor;
|
|
108
|
-
this.
|
|
124
|
+
this.queryIsRunning = false; // ready to take more _reads
|
|
109
125
|
this.lastQueryDone = Date.now();
|
|
126
|
+
let shouldContinue = false;
|
|
110
127
|
for (const row of rows) {
|
|
111
|
-
this.push(row);
|
|
128
|
+
shouldContinue = this.push(row);
|
|
112
129
|
}
|
|
113
130
|
if (!info.endCursor ||
|
|
114
131
|
info.moreResults === 'NO_MORE_RESULTS' ||
|
|
115
132
|
(this.originalLimit && this.rowsRetrieved >= this.originalLimit)) {
|
|
116
|
-
|
|
133
|
+
logger.log(`!!!! DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
|
|
117
134
|
this.push(null);
|
|
118
135
|
this.done = true;
|
|
136
|
+
this.paused = false;
|
|
119
137
|
clearInterval(this.maxWaitInterval);
|
|
138
|
+
return;
|
|
120
139
|
}
|
|
121
|
-
|
|
122
|
-
//
|
|
123
|
-
|
|
140
|
+
if (shouldContinue) {
|
|
141
|
+
// Keep the stream flowing
|
|
142
|
+
logger.log(`${table} continuing the stream`);
|
|
143
|
+
void this.runNextQuery();
|
|
124
144
|
}
|
|
125
|
-
else
|
|
126
|
-
|
|
127
|
-
if (
|
|
128
|
-
|
|
145
|
+
else {
|
|
146
|
+
// Not starting the next query
|
|
147
|
+
if (this.paused) {
|
|
148
|
+
logger.log(`${table} stream is already paused`);
|
|
129
149
|
}
|
|
130
150
|
else {
|
|
131
|
-
|
|
151
|
+
logger.log(`${table} pausing the stream`);
|
|
152
|
+
this.paused = true;
|
|
132
153
|
}
|
|
133
154
|
}
|
|
134
155
|
}
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
}
|
|
148
|
-
if (!this.running) {
|
|
149
|
-
void this.runNextQuery().catch(err => {
|
|
150
|
-
console.log('error in runNextQuery', err);
|
|
151
|
-
this.emit('error', err);
|
|
156
|
+
async runQuery(q) {
|
|
157
|
+
const { table, logger } = this;
|
|
158
|
+
try {
|
|
159
|
+
return await pRetry(async () => {
|
|
160
|
+
return await q.run(this.dsOpt);
|
|
161
|
+
}, {
|
|
162
|
+
name: `DatastoreStreamReadable.query(${table})`,
|
|
163
|
+
maxAttempts: 5,
|
|
164
|
+
delay: 5000,
|
|
165
|
+
delayMultiplier: 2,
|
|
166
|
+
logger,
|
|
167
|
+
timeout: 120_000, // 2 minutes
|
|
152
168
|
});
|
|
153
169
|
}
|
|
154
|
-
|
|
155
|
-
|
|
170
|
+
catch (err) {
|
|
171
|
+
console.log(`DatastoreStreamReadable error!\n`, {
|
|
172
|
+
table,
|
|
173
|
+
rowsRetrieved: this.rowsRetrieved,
|
|
174
|
+
}, err);
|
|
175
|
+
this.emit('error', err);
|
|
176
|
+
clearInterval(this.maxWaitInterval);
|
|
177
|
+
return;
|
|
156
178
|
}
|
|
157
179
|
}
|
|
158
180
|
}
|
package/package.json
CHANGED
package/src/datastore.db.ts
CHANGED
|
@@ -54,7 +54,7 @@ import type {
|
|
|
54
54
|
DatastoreStats,
|
|
55
55
|
} from './datastore.model.js'
|
|
56
56
|
import { DatastoreType } from './datastore.model.js'
|
|
57
|
-
import { DatastoreStreamReadable } from './
|
|
57
|
+
import { DatastoreStreamReadable } from './datastoreStreamReadable.js'
|
|
58
58
|
import { dbQueryToDatastoreQuery } from './query.util.js'
|
|
59
59
|
|
|
60
60
|
// Datastore (also Firestore and other Google APIs) supports max 500 of items when saving/deleting, etc.
|
|
@@ -229,7 +229,7 @@ export class DatastoreDB extends BaseCommonDB implements CommonDB {
|
|
|
229
229
|
)
|
|
230
230
|
}
|
|
231
231
|
|
|
232
|
-
override async
|
|
232
|
+
override async multiGet<ROW extends ObjectWithId>(
|
|
233
233
|
map: StringMap<string[]>,
|
|
234
234
|
opt: DatastoreDBReadOptions = {},
|
|
235
235
|
): Promise<StringMap<ROW[]>> {
|
|
@@ -470,7 +470,7 @@ export class DatastoreDB extends BaseCommonDB implements CommonDB {
|
|
|
470
470
|
return ids.length
|
|
471
471
|
}
|
|
472
472
|
|
|
473
|
-
override async
|
|
473
|
+
override async multiDelete(
|
|
474
474
|
map: StringMap<string[]>,
|
|
475
475
|
opt: DatastoreDBOptions = {},
|
|
476
476
|
): Promise<number> {
|
package/src/datastore.model.ts
CHANGED
|
@@ -5,7 +5,7 @@ import type {
|
|
|
5
5
|
CommonDBSaveOptions,
|
|
6
6
|
} from '@naturalcycles/db-lib'
|
|
7
7
|
import type { CommonLogger } from '@naturalcycles/js-lib/log'
|
|
8
|
-
import type { NumberOfSeconds, ObjectWithId } from '@naturalcycles/js-lib/types'
|
|
8
|
+
import type { NumberOfSeconds, ObjectWithId, PositiveInteger } from '@naturalcycles/js-lib/types'
|
|
9
9
|
|
|
10
10
|
export interface DatastorePayload<T = any> {
|
|
11
11
|
key: Key
|
|
@@ -73,34 +73,18 @@ export interface DatastoreDBStreamOptions extends DatastoreDBReadOptions {
|
|
|
73
73
|
* Applicable to `experimentalCursorStream`.
|
|
74
74
|
* Defines the size (limit) of each individual query.
|
|
75
75
|
*
|
|
76
|
-
* @default
|
|
76
|
+
* @default 1_000
|
|
77
77
|
*/
|
|
78
|
-
batchSize?:
|
|
78
|
+
batchSize?: PositiveInteger
|
|
79
79
|
|
|
80
80
|
/**
|
|
81
|
-
*
|
|
82
|
-
*
|
|
83
|
-
*
|
|
84
|
-
*
|
|
85
|
-
*
|
|
86
|
-
*
|
|
87
|
-
* Set to 0/undefined to disable. Stream will get "slow" then, cause it'll only run the query
|
|
88
|
-
* when _read is called.
|
|
89
|
-
*
|
|
90
|
-
* @default 1000
|
|
91
|
-
*/
|
|
92
|
-
rssLimitMB?: number
|
|
93
|
-
|
|
94
|
-
/**
|
|
95
|
-
* Applicable to `experimentalCursorStream`
|
|
96
|
-
* Default false.
|
|
97
|
-
* If true, stream will pause until consumer requests more data (via _read).
|
|
98
|
-
* It means it'll run slower, as buffer will be equal to batchSize (1000) at max.
|
|
99
|
-
* There will be gaps in time between "last query loaded" and "next query requested".
|
|
100
|
-
* This mode is useful e.g for DB migrations, where you want to avoid "stale data".
|
|
101
|
-
* So, it minimizes the time between "item loaded" and "item saved" during DB migration.
|
|
81
|
+
* Defaults to 3x batchSize.
|
|
82
|
+
* Default batchSize is 1_000, so default highWaterMark is 3_000.
|
|
83
|
+
* Controls how many rows to have "buffered".
|
|
84
|
+
* Should be at least 1x batchSize, otherwise the stream will be "starving"
|
|
85
|
+
* between the queries.
|
|
102
86
|
*/
|
|
103
|
-
|
|
87
|
+
highWaterMark?: PositiveInteger
|
|
104
88
|
|
|
105
89
|
/**
|
|
106
90
|
* Set to `true` to log additional debug info, when using experimentalCursorStream.
|
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
import { Readable } from 'node:stream'
|
|
2
2
|
import type { Query } from '@google-cloud/datastore'
|
|
3
|
-
import type {
|
|
3
|
+
import type {
|
|
4
|
+
RunQueryInfo,
|
|
5
|
+
RunQueryOptions,
|
|
6
|
+
RunQueryResponse,
|
|
7
|
+
} from '@google-cloud/datastore/build/src/query.js'
|
|
8
|
+
import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js'
|
|
4
9
|
import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js'
|
|
5
10
|
import type { CommonLogger } from '@naturalcycles/js-lib/log'
|
|
6
11
|
import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js'
|
|
@@ -9,21 +14,27 @@ import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream'
|
|
|
9
14
|
import type { DatastoreDBStreamOptions } from './datastore.model.js'
|
|
10
15
|
|
|
11
16
|
export class DatastoreStreamReadable<T = any> extends Readable implements ReadableTyped<T> {
|
|
17
|
+
private readonly table: string
|
|
12
18
|
private readonly originalLimit: number
|
|
13
19
|
private rowsRetrieved = 0
|
|
20
|
+
/**
|
|
21
|
+
* Counts how many times _read was called.
|
|
22
|
+
* For debugging.
|
|
23
|
+
*/
|
|
24
|
+
countReads = 0
|
|
14
25
|
private endCursor?: string
|
|
15
|
-
private
|
|
26
|
+
private queryIsRunning = false
|
|
27
|
+
private paused = false
|
|
16
28
|
private done = false
|
|
17
29
|
private lastQueryDone?: number
|
|
18
30
|
private totalWait = 0
|
|
19
|
-
private readonly table: string
|
|
20
31
|
/**
|
|
21
32
|
* Used to support maxWait
|
|
22
33
|
*/
|
|
23
34
|
private lastReadTimestamp = 0 as UnixTimestampMillis
|
|
24
35
|
private readonly maxWaitInterval: NodeJS.Timeout | undefined
|
|
25
36
|
|
|
26
|
-
private readonly opt: DatastoreDBStreamOptions & { batchSize: number }
|
|
37
|
+
private readonly opt: DatastoreDBStreamOptions & { batchSize: number; highWaterMark: number }
|
|
27
38
|
private readonly dsOpt: RunQueryOptions
|
|
28
39
|
|
|
29
40
|
constructor(
|
|
@@ -31,12 +42,16 @@ export class DatastoreStreamReadable<T = any> extends Readable implements Readab
|
|
|
31
42
|
opt: DatastoreDBStreamOptions,
|
|
32
43
|
private logger: CommonLogger,
|
|
33
44
|
) {
|
|
34
|
-
|
|
45
|
+
// 1_000 was optimal in benchmarks
|
|
46
|
+
const { batchSize = 1000 } = opt
|
|
47
|
+
const { highWaterMark = batchSize * 3 } = opt
|
|
48
|
+
// Defaulting highWaterMark to 3x batchSize
|
|
49
|
+
super({ objectMode: true, highWaterMark })
|
|
35
50
|
|
|
36
51
|
this.opt = {
|
|
37
|
-
rssLimitMB: 1000,
|
|
38
|
-
batchSize: 1000,
|
|
39
52
|
...opt,
|
|
53
|
+
batchSize,
|
|
54
|
+
highWaterMark,
|
|
40
55
|
}
|
|
41
56
|
this.dsOpt = {}
|
|
42
57
|
if (opt.readAt) {
|
|
@@ -47,26 +62,30 @@ export class DatastoreStreamReadable<T = any> extends Readable implements Readab
|
|
|
47
62
|
this.originalLimit = q.limitVal
|
|
48
63
|
this.table = q.kinds[0]!
|
|
49
64
|
|
|
50
|
-
logger.
|
|
65
|
+
logger.warn(`!! using experimentalCursorStream`, {
|
|
66
|
+
table: this.table,
|
|
67
|
+
batchSize,
|
|
68
|
+
highWaterMark,
|
|
69
|
+
})
|
|
51
70
|
|
|
52
71
|
const { maxWait } = this.opt
|
|
53
72
|
if (maxWait) {
|
|
54
|
-
|
|
73
|
+
logger.warn(`!! ${this.table} maxWait ${maxWait}`)
|
|
55
74
|
|
|
56
75
|
this.maxWaitInterval = setInterval(
|
|
57
76
|
() => {
|
|
58
77
|
const millisSinceLastRead = Date.now() - this.lastReadTimestamp
|
|
59
78
|
|
|
60
79
|
if (millisSinceLastRead < maxWait * 1000) {
|
|
61
|
-
|
|
80
|
+
logger.warn(
|
|
62
81
|
`!! ${this.table} millisSinceLastRead(${millisSinceLastRead}) < maxWait*1000`,
|
|
63
82
|
)
|
|
64
83
|
return
|
|
65
84
|
}
|
|
66
85
|
|
|
67
|
-
const {
|
|
68
|
-
|
|
69
|
-
running,
|
|
86
|
+
const { queryIsRunning, rowsRetrieved } = this
|
|
87
|
+
logger.warn(`maxWait of ${maxWait} seconds reached, force-triggering _read`, {
|
|
88
|
+
running: queryIsRunning,
|
|
70
89
|
rowsRetrieved,
|
|
71
90
|
})
|
|
72
91
|
|
|
@@ -79,16 +98,45 @@ export class DatastoreStreamReadable<T = any> extends Readable implements Readab
|
|
|
79
98
|
}
|
|
80
99
|
}
|
|
81
100
|
|
|
101
|
+
override _read(): void {
|
|
102
|
+
this.lastReadTimestamp = Date.now() as UnixTimestampMillis
|
|
103
|
+
|
|
104
|
+
// console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
|
|
105
|
+
this.countReads++
|
|
106
|
+
|
|
107
|
+
if (this.done) {
|
|
108
|
+
this.logger.warn(`!!! _read was called, but done==true`)
|
|
109
|
+
return
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (this.paused) {
|
|
113
|
+
this.logger.log(
|
|
114
|
+
`_read #${this.countReads}, queryIsRunning: ${this.queryIsRunning}, unpausing stream`,
|
|
115
|
+
)
|
|
116
|
+
this.paused = false
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (this.queryIsRunning) {
|
|
120
|
+
this.logger.log(`_read #${this.countReads}, queryIsRunning: true, doing nothing`)
|
|
121
|
+
return
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
void this.runNextQuery().catch(err => {
|
|
125
|
+
console.log('error in runNextQuery', err)
|
|
126
|
+
this.emit('error', err)
|
|
127
|
+
})
|
|
128
|
+
}
|
|
129
|
+
|
|
82
130
|
private async runNextQuery(): Promise<void> {
|
|
83
131
|
if (this.done) return
|
|
132
|
+
const { logger, table } = this
|
|
84
133
|
|
|
85
134
|
if (this.lastQueryDone) {
|
|
86
135
|
const now = Date.now()
|
|
87
136
|
this.totalWait += now - this.lastQueryDone
|
|
88
137
|
}
|
|
89
138
|
|
|
90
|
-
this.
|
|
91
|
-
// console.log('running query...')
|
|
139
|
+
this.queryIsRunning = true
|
|
92
140
|
|
|
93
141
|
let limit = this.opt.batchSize
|
|
94
142
|
|
|
@@ -96,66 +144,35 @@ export class DatastoreStreamReadable<T = any> extends Readable implements Readab
|
|
|
96
144
|
limit = Math.min(this.opt.batchSize, this.originalLimit - this.rowsRetrieved)
|
|
97
145
|
}
|
|
98
146
|
|
|
99
|
-
// console.log(`limit: ${limit}`)
|
|
100
147
|
let q = this.q.limit(limit)
|
|
101
148
|
if (this.endCursor) {
|
|
102
149
|
q = q.start(this.endCursor)
|
|
103
150
|
}
|
|
104
151
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
async () => {
|
|
111
|
-
const res = await q.run(this.dsOpt)
|
|
112
|
-
rows = res[0]
|
|
113
|
-
info = res[1]
|
|
114
|
-
},
|
|
115
|
-
{
|
|
116
|
-
name: `DatastoreStreamReadable.query(${this.table})`,
|
|
117
|
-
maxAttempts: 5,
|
|
118
|
-
delay: 5000,
|
|
119
|
-
delayMultiplier: 2,
|
|
120
|
-
logger: this.logger,
|
|
121
|
-
timeout: 120_000, // 2 minutes
|
|
122
|
-
},
|
|
123
|
-
)
|
|
124
|
-
} catch (err) {
|
|
125
|
-
console.log(
|
|
126
|
-
`DatastoreStreamReadable error!\n`,
|
|
127
|
-
{
|
|
128
|
-
table: this.table,
|
|
129
|
-
rowsRetrieved: this.rowsRetrieved,
|
|
130
|
-
},
|
|
131
|
-
err,
|
|
132
|
-
)
|
|
133
|
-
this.emit('error', err)
|
|
134
|
-
clearInterval(this.maxWaitInterval)
|
|
152
|
+
const started = localTime.nowUnixMillis()
|
|
153
|
+
const res = await this.runQuery(q)
|
|
154
|
+
const queryTook = Date.now() - started
|
|
155
|
+
if (!res) {
|
|
156
|
+
// error already emitted in runQuery
|
|
135
157
|
return
|
|
136
158
|
}
|
|
159
|
+
const rows: T[] = res[0]
|
|
160
|
+
const info: RunQueryInfo = res[1]
|
|
137
161
|
|
|
138
162
|
this.rowsRetrieved += rows.length
|
|
139
|
-
|
|
140
|
-
`${
|
|
163
|
+
logger.log(
|
|
164
|
+
`${table} got ${rows.length} rows in ${_ms(queryTook)}, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(
|
|
141
165
|
this.totalWait,
|
|
142
166
|
)}`,
|
|
143
|
-
info.moreResults,
|
|
144
167
|
)
|
|
145
168
|
|
|
146
|
-
if (!rows.length) {
|
|
147
|
-
this.logger.warn(
|
|
148
|
-
`${this.table} got 0 rows, totalWait: ${_ms(this.totalWait)}`,
|
|
149
|
-
info.moreResults,
|
|
150
|
-
)
|
|
151
|
-
}
|
|
152
|
-
|
|
153
169
|
this.endCursor = info.endCursor
|
|
154
|
-
this.
|
|
170
|
+
this.queryIsRunning = false // ready to take more _reads
|
|
155
171
|
this.lastQueryDone = Date.now()
|
|
172
|
+
let shouldContinue = false
|
|
156
173
|
|
|
157
174
|
for (const row of rows) {
|
|
158
|
-
this.push(row)
|
|
175
|
+
shouldContinue = this.push(row)
|
|
159
176
|
}
|
|
160
177
|
|
|
161
178
|
if (
|
|
@@ -163,52 +180,60 @@ export class DatastoreStreamReadable<T = any> extends Readable implements Readab
|
|
|
163
180
|
info.moreResults === 'NO_MORE_RESULTS' ||
|
|
164
181
|
(this.originalLimit && this.rowsRetrieved >= this.originalLimit)
|
|
165
182
|
) {
|
|
166
|
-
|
|
183
|
+
logger.log(
|
|
167
184
|
`!!!! DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
|
|
168
185
|
)
|
|
169
186
|
this.push(null)
|
|
170
187
|
this.done = true
|
|
188
|
+
this.paused = false
|
|
171
189
|
clearInterval(this.maxWaitInterval)
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
190
|
+
return
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
if (shouldContinue) {
|
|
194
|
+
// Keep the stream flowing
|
|
195
|
+
logger.log(`${table} continuing the stream`)
|
|
196
|
+
void this.runNextQuery()
|
|
197
|
+
} else {
|
|
198
|
+
// Not starting the next query
|
|
199
|
+
if (this.paused) {
|
|
200
|
+
logger.log(`${table} stream is already paused`)
|
|
180
201
|
} else {
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
)
|
|
202
|
+
logger.log(`${table} pausing the stream`)
|
|
203
|
+
this.paused = true
|
|
184
204
|
}
|
|
185
205
|
}
|
|
186
206
|
}
|
|
187
207
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
* For debugging.
|
|
191
|
-
*/
|
|
192
|
-
count = 0
|
|
193
|
-
|
|
194
|
-
override _read(): void {
|
|
195
|
-
this.lastReadTimestamp = Date.now() as UnixTimestampMillis
|
|
208
|
+
private async runQuery(q: Query): Promise<RunQueryResponse | undefined> {
|
|
209
|
+
const { table, logger } = this
|
|
196
210
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
211
|
+
try {
|
|
212
|
+
return await pRetry(
|
|
213
|
+
async () => {
|
|
214
|
+
return await q.run(this.dsOpt)
|
|
215
|
+
},
|
|
216
|
+
{
|
|
217
|
+
name: `DatastoreStreamReadable.query(${table})`,
|
|
218
|
+
maxAttempts: 5,
|
|
219
|
+
delay: 5000,
|
|
220
|
+
delayMultiplier: 2,
|
|
221
|
+
logger,
|
|
222
|
+
timeout: 120_000, // 2 minutes
|
|
223
|
+
},
|
|
224
|
+
)
|
|
225
|
+
} catch (err) {
|
|
226
|
+
console.log(
|
|
227
|
+
`DatastoreStreamReadable error!\n`,
|
|
228
|
+
{
|
|
229
|
+
table,
|
|
230
|
+
rowsRetrieved: this.rowsRetrieved,
|
|
231
|
+
},
|
|
232
|
+
err,
|
|
233
|
+
)
|
|
234
|
+
this.emit('error', err)
|
|
235
|
+
clearInterval(this.maxWaitInterval)
|
|
202
236
|
return
|
|
203
237
|
}
|
|
204
|
-
|
|
205
|
-
if (!this.running) {
|
|
206
|
-
void this.runNextQuery().catch(err => {
|
|
207
|
-
console.log('error in runNextQuery', err)
|
|
208
|
-
this.emit('error', err)
|
|
209
|
-
})
|
|
210
|
-
} else {
|
|
211
|
-
this.logger.log(`_read ${this.count}, wasRunning: true`)
|
|
212
|
-
}
|
|
213
238
|
}
|
|
214
239
|
}
|