@naturalcycles/firestore-lib 2.9.0 → 2.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/firestore.db.d.ts +10 -32
- package/dist/firestore.db.js +5 -0
- package/dist/firestoreShardedReadable.d.ts +41 -0
- package/dist/firestoreShardedReadable.js +173 -0
- package/dist/firestoreStreamReadable.d.ts +4 -5
- package/dist/firestoreStreamReadable.js +78 -69
- package/dist/query.util.d.ts +1 -1
- package/dist/query.util.js +16 -7
- package/package.json +1 -1
- package/src/firestore.db.ts +22 -34
- package/src/firestoreShardedReadable.ts +233 -0
- package/src/firestoreStreamReadable.ts +92 -89
- package/src/query.util.ts +16 -8
package/dist/firestore.db.d.ts
CHANGED
|
@@ -2,7 +2,7 @@ import type { Firestore, Query, QuerySnapshot, Transaction } from '@google-cloud
|
|
|
2
2
|
import type { CommonDB, CommonDBOptions, CommonDBReadOptions, CommonDBSaveOptions, CommonDBSupport, CommonDBTransactionOptions, DBQuery, DBTransaction, DBTransactionFn, RunQueryResult } from '@naturalcycles/db-lib';
|
|
3
3
|
import { BaseCommonDB } from '@naturalcycles/db-lib';
|
|
4
4
|
import { type CommonLogger } from '@naturalcycles/js-lib/log';
|
|
5
|
-
import type {
|
|
5
|
+
import type { ObjectWithId, PositiveInteger, StringMap } from '@naturalcycles/js-lib/types';
|
|
6
6
|
import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream';
|
|
7
7
|
export declare class FirestoreDB extends BaseCommonDB implements CommonDB {
|
|
8
8
|
constructor(cfg: FirestoreDBCfg);
|
|
@@ -66,50 +66,28 @@ export interface FirestoreDBStreamOptions extends FirestoreDBReadOptions {
|
|
|
66
66
|
* Defaults to false
|
|
67
67
|
*/
|
|
68
68
|
experimentalCursorStream?: boolean;
|
|
69
|
+
experimentalShardedStream?: boolean;
|
|
69
70
|
/**
|
|
70
71
|
* Applicable to `experimentalCursorStream`.
|
|
71
72
|
* Defines the size (limit) of each individual query.
|
|
72
73
|
*
|
|
73
|
-
* Default:
|
|
74
|
+
* Default: 10_000
|
|
74
75
|
*/
|
|
75
|
-
batchSize?:
|
|
76
|
+
batchSize?: PositiveInteger;
|
|
76
77
|
/**
|
|
77
|
-
*
|
|
78
|
-
*
|
|
79
|
-
*
|
|
80
|
-
*
|
|
81
|
-
*
|
|
82
|
-
*
|
|
83
|
-
* Set to 0/undefined to disable. Stream will get "slow" then, cause it'll only run the query
|
|
84
|
-
* when _read is called.
|
|
85
|
-
*
|
|
86
|
-
* @default 1000
|
|
87
|
-
*/
|
|
88
|
-
rssLimitMB?: number;
|
|
89
|
-
/**
|
|
90
|
-
* Applicable to `experimentalCursorStream`
|
|
91
|
-
* Default false.
|
|
92
|
-
* If true, stream will pause until consumer requests more data (via _read).
|
|
93
|
-
* It means it'll run slower, as buffer will be equal to batchSize (1000) at max.
|
|
94
|
-
* There will be gaps in time between "last query loaded" and "next query requested".
|
|
95
|
-
* This mode is useful e.g for DB migrations, where you want to avoid "stale data".
|
|
96
|
-
* So, it minimizes the time between "item loaded" and "item saved" during DB migration.
|
|
78
|
+
* Defaults to 3x batchSize.
|
|
79
|
+
* Default batchSize is 10_000, so default highWaterMark is 30_000.
|
|
80
|
+
* Controls how many rows to have "buffered".
|
|
81
|
+
* Should be at least 1x batchSize, otherwise the stream will be "starving"
|
|
82
|
+
* between the queries.
|
|
97
83
|
*/
|
|
98
|
-
|
|
84
|
+
highWaterMark?: PositiveInteger;
|
|
99
85
|
/**
|
|
100
86
|
* Set to `true` to log additional debug info, when using experimentalCursorStream.
|
|
101
87
|
*
|
|
102
88
|
* @default false
|
|
103
89
|
*/
|
|
104
90
|
debug?: boolean;
|
|
105
|
-
/**
|
|
106
|
-
* Default is undefined.
|
|
107
|
-
* If set - sets a "safety timer", which will force call _read after the specified number of seconds.
|
|
108
|
-
* This is to prevent possible "dead-lock"/race-condition that would make the stream "hang".
|
|
109
|
-
*
|
|
110
|
-
* @experimental
|
|
111
|
-
*/
|
|
112
|
-
maxWait?: NumberOfSeconds;
|
|
113
91
|
}
|
|
114
92
|
export interface FirestoreDBOptions extends CommonDBOptions {
|
|
115
93
|
}
|
package/dist/firestore.db.js
CHANGED
|
@@ -8,6 +8,7 @@ import { _filterUndefinedValues, _omit } from '@naturalcycles/js-lib/object/obje
|
|
|
8
8
|
import { pMap } from '@naturalcycles/js-lib/promise/pMap.js';
|
|
9
9
|
import { _stringMapEntries } from '@naturalcycles/js-lib/types';
|
|
10
10
|
import { escapeDocId, unescapeDocId } from './firestore.util.js';
|
|
11
|
+
import { FirestoreShardedReadable } from './firestoreShardedReadable.js';
|
|
11
12
|
import { FirestoreStreamReadable } from './firestoreStreamReadable.js';
|
|
12
13
|
import { dbQueryToFirestoreQuery } from './query.util.js';
|
|
13
14
|
export class FirestoreDB extends BaseCommonDB {
|
|
@@ -23,6 +24,7 @@ export class FirestoreDB extends BaseCommonDB {
|
|
|
23
24
|
...commonDBFullSupport,
|
|
24
25
|
patchByQuery: false, // todo: can be implemented
|
|
25
26
|
tableSchemas: false,
|
|
27
|
+
createTransaction: false, // Firestore SDK doesn't support it
|
|
26
28
|
};
|
|
27
29
|
// GET
|
|
28
30
|
async getByIds(table, ids, opt = {}) {
|
|
@@ -100,6 +102,9 @@ export class FirestoreDB extends BaseCommonDB {
|
|
|
100
102
|
if (opt.experimentalCursorStream) {
|
|
101
103
|
return new FirestoreStreamReadable(firestoreQuery, q, opt, commonLoggerMinLevel(this.cfg.logger, opt.debug ? 'log' : 'warn'));
|
|
102
104
|
}
|
|
105
|
+
if (opt.experimentalShardedStream) {
|
|
106
|
+
return new FirestoreShardedReadable(firestoreQuery, q, opt, commonLoggerMinLevel(this.cfg.logger, opt.debug ? 'log' : 'warn'));
|
|
107
|
+
}
|
|
103
108
|
return firestoreQuery.stream().map(doc => {
|
|
104
109
|
return {
|
|
105
110
|
id: unescapeDocId(doc.id),
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { Readable } from 'node:stream';
|
|
2
|
+
import { type Query } from '@google-cloud/firestore';
|
|
3
|
+
import type { DBQuery } from '@naturalcycles/db-lib';
|
|
4
|
+
import type { CommonLogger } from '@naturalcycles/js-lib/log';
|
|
5
|
+
import type { ObjectWithId } from '@naturalcycles/js-lib/types';
|
|
6
|
+
import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream';
|
|
7
|
+
import type { FirestoreDBStreamOptions } from './firestore.db.js';
|
|
8
|
+
/**
|
|
9
|
+
* Highly, HIGHLY experimental!
|
|
10
|
+
*/
|
|
11
|
+
export declare class FirestoreShardedReadable<T extends ObjectWithId = any> extends Readable implements ReadableTyped<T> {
|
|
12
|
+
private readonly q;
|
|
13
|
+
readonly dbQuery: DBQuery<T>;
|
|
14
|
+
private logger;
|
|
15
|
+
private readonly table;
|
|
16
|
+
private readonly originalLimit;
|
|
17
|
+
private rowsRetrieved;
|
|
18
|
+
/**
|
|
19
|
+
* Next shard to be used for querying.
|
|
20
|
+
*/
|
|
21
|
+
private nextShard;
|
|
22
|
+
private cursorByShard;
|
|
23
|
+
private queryIsRunningByShard;
|
|
24
|
+
private paused;
|
|
25
|
+
private done;
|
|
26
|
+
private doneShards;
|
|
27
|
+
private lastQueryDoneByShard;
|
|
28
|
+
private totalWait;
|
|
29
|
+
private readonly opt;
|
|
30
|
+
constructor(q: Query, dbQuery: DBQuery<T>, opt: FirestoreDBStreamOptions, logger: CommonLogger);
|
|
31
|
+
/**
|
|
32
|
+
* Counts how many times _read was called.
|
|
33
|
+
* For debugging.
|
|
34
|
+
*/
|
|
35
|
+
count: number;
|
|
36
|
+
_read(): void;
|
|
37
|
+
private runNextQuery;
|
|
38
|
+
private runQuery;
|
|
39
|
+
private findNextFreeShard;
|
|
40
|
+
private _getNextShardAndMove;
|
|
41
|
+
}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import { Readable } from 'node:stream';
|
|
2
|
+
import { FieldPath } from '@google-cloud/firestore';
|
|
3
|
+
import { localTime } from '@naturalcycles/js-lib/datetime';
|
|
4
|
+
import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js';
|
|
5
|
+
import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js';
|
|
6
|
+
import { unescapeDocId } from './firestore.util.js';
|
|
7
|
+
const SHARDS = 16;
|
|
8
|
+
const SHARD_COLUMN = 'shard16';
|
|
9
|
+
/**
|
|
10
|
+
* Highly, HIGHLY experimental!
|
|
11
|
+
*/
|
|
12
|
+
export class FirestoreShardedReadable extends Readable {
|
|
13
|
+
q;
|
|
14
|
+
dbQuery;
|
|
15
|
+
logger;
|
|
16
|
+
table;
|
|
17
|
+
originalLimit;
|
|
18
|
+
rowsRetrieved = 0;
|
|
19
|
+
/**
|
|
20
|
+
* Next shard to be used for querying.
|
|
21
|
+
*/
|
|
22
|
+
nextShard = 1;
|
|
23
|
+
cursorByShard = {};
|
|
24
|
+
queryIsRunningByShard = {};
|
|
25
|
+
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
|
|
26
|
+
paused = false;
|
|
27
|
+
done = false;
|
|
28
|
+
doneShards = new Set();
|
|
29
|
+
lastQueryDoneByShard = {};
|
|
30
|
+
totalWait = 0;
|
|
31
|
+
opt;
|
|
32
|
+
constructor(q, dbQuery, opt, logger) {
|
|
33
|
+
super({ objectMode: true });
|
|
34
|
+
this.q = q;
|
|
35
|
+
this.dbQuery = dbQuery;
|
|
36
|
+
this.logger = logger;
|
|
37
|
+
this.opt = {
|
|
38
|
+
batchSize: 3000,
|
|
39
|
+
...opt,
|
|
40
|
+
};
|
|
41
|
+
this.originalLimit = dbQuery._limitValue;
|
|
42
|
+
this.table = dbQuery.table;
|
|
43
|
+
logger.warn(`!! using experimentalShardedStream !! ${this.table}, batchSize: ${this.opt.batchSize}`);
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Counts how many times _read was called.
|
|
47
|
+
* For debugging.
|
|
48
|
+
*/
|
|
49
|
+
count = 0;
|
|
50
|
+
_read() {
|
|
51
|
+
// this.lastReadTimestamp = Date.now() as UnixTimestampMillis
|
|
52
|
+
// console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
|
|
53
|
+
this.count++;
|
|
54
|
+
if (this.done) {
|
|
55
|
+
this.logger.warn(`!!! _read was called, but done==true`);
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
// const shard = this.getNextShardAndMove()
|
|
59
|
+
const shard = this.findNextFreeShard();
|
|
60
|
+
if (!shard) {
|
|
61
|
+
this.logger.log(`_read ${this.count}: all shards are busy, skipping`);
|
|
62
|
+
return;
|
|
63
|
+
}
|
|
64
|
+
void this.runNextQuery(shard).catch(err => {
|
|
65
|
+
console.log('error in runNextQuery', err);
|
|
66
|
+
this.emit('error', err);
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
async runNextQuery(shard) {
|
|
70
|
+
if (this.done)
|
|
71
|
+
return;
|
|
72
|
+
const { logger, table } = this;
|
|
73
|
+
if (this.lastQueryDoneByShard[shard]) {
|
|
74
|
+
this.totalWait += Date.now() - this.lastQueryDoneByShard[shard];
|
|
75
|
+
}
|
|
76
|
+
this.queryIsRunningByShard[shard] = true;
|
|
77
|
+
const limit = this.opt.batchSize;
|
|
78
|
+
// We have to orderBy documentId, to be able to use id as a cursor
|
|
79
|
+
let q = this.q.where(SHARD_COLUMN, '==', shard).orderBy(FieldPath.documentId()).limit(limit);
|
|
80
|
+
if (this.cursorByShard[shard]) {
|
|
81
|
+
q = q.startAfter(this.cursorByShard[shard]);
|
|
82
|
+
}
|
|
83
|
+
console.log(`runNextQuery[${shard}]`, {
|
|
84
|
+
retrieved: this.rowsRetrieved,
|
|
85
|
+
});
|
|
86
|
+
const qs = await this.runQuery(q);
|
|
87
|
+
if (!qs) {
|
|
88
|
+
// this means we have already emitted an unrecoverable error
|
|
89
|
+
return;
|
|
90
|
+
}
|
|
91
|
+
const rows = [];
|
|
92
|
+
let lastDocId;
|
|
93
|
+
for (const doc of qs.docs) {
|
|
94
|
+
lastDocId = doc.id;
|
|
95
|
+
rows.push({
|
|
96
|
+
id: unescapeDocId(doc.id),
|
|
97
|
+
...doc.data(),
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
this.rowsRetrieved += rows.length;
|
|
101
|
+
logger.log(`${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
|
|
102
|
+
this.cursorByShard[shard] = lastDocId;
|
|
103
|
+
this.queryIsRunningByShard[shard] = false; // ready to take more _reads
|
|
104
|
+
this.lastQueryDoneByShard[shard] = localTime.nowUnixMillis();
|
|
105
|
+
for (const row of rows) {
|
|
106
|
+
this.push(row);
|
|
107
|
+
}
|
|
108
|
+
if (qs.empty) {
|
|
109
|
+
logger.warn(`!!!! Shard ${shard} DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
|
|
110
|
+
this.doneShards.add(shard);
|
|
111
|
+
}
|
|
112
|
+
if (this.doneShards.size === SHARDS) {
|
|
113
|
+
logger.warn(`!!!! DONE: all shards completed, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
|
|
114
|
+
this.push(null);
|
|
115
|
+
this.paused = false;
|
|
116
|
+
this.done = true;
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
if (this.originalLimit && this.rowsRetrieved >= this.originalLimit) {
|
|
120
|
+
logger.warn(`!!!! DONE: reached total limit of ${this.originalLimit}, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
|
|
121
|
+
this.push(null);
|
|
122
|
+
this.paused = false;
|
|
123
|
+
this.done = true;
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
// if (this.paused) {
|
|
127
|
+
// this.paused = false
|
|
128
|
+
// }
|
|
129
|
+
const nextShard = this.findNextFreeShard();
|
|
130
|
+
if (nextShard) {
|
|
131
|
+
void this.runNextQuery(nextShard);
|
|
132
|
+
}
|
|
133
|
+
else {
|
|
134
|
+
logger.warn(`${table} all shards are busy in runNextQuery, skipping`);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
async runQuery(q) {
|
|
138
|
+
const { table, logger } = this;
|
|
139
|
+
try {
|
|
140
|
+
return await pRetry(async () => {
|
|
141
|
+
return await q.get();
|
|
142
|
+
}, {
|
|
143
|
+
name: `FirestoreStreamReadable.query(${table})`,
|
|
144
|
+
maxAttempts: 5,
|
|
145
|
+
delay: 5000,
|
|
146
|
+
delayMultiplier: 2,
|
|
147
|
+
logger,
|
|
148
|
+
timeout: 120_000, // 2 minutes
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
catch (err) {
|
|
152
|
+
console.log(`FirestoreStreamReadable error!\n`, {
|
|
153
|
+
table,
|
|
154
|
+
rowsRetrieved: this.rowsRetrieved,
|
|
155
|
+
}, err);
|
|
156
|
+
this.emit('error', err);
|
|
157
|
+
return;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
findNextFreeShard() {
|
|
161
|
+
for (let shard = 1; shard <= SHARDS; shard++) {
|
|
162
|
+
if (!this.queryIsRunningByShard[shard] && !this.doneShards.has(shard)) {
|
|
163
|
+
return shard;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
|
|
168
|
+
_getNextShardAndMove() {
|
|
169
|
+
const shard = this.nextShard;
|
|
170
|
+
this.nextShard = shard === SHARDS ? 1 : shard + 1;
|
|
171
|
+
return shard;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
@@ -15,15 +15,14 @@ export declare class FirestoreStreamReadable<T extends ObjectWithId = any> exten
|
|
|
15
15
|
private queryIsRunning;
|
|
16
16
|
private paused;
|
|
17
17
|
private done;
|
|
18
|
-
private lastQueryDone?;
|
|
19
|
-
private totalWait;
|
|
20
|
-
private readonly opt;
|
|
21
|
-
constructor(q: Query, dbQuery: DBQuery<T>, opt: FirestoreDBStreamOptions, logger: CommonLogger);
|
|
22
18
|
/**
|
|
23
19
|
* Counts how many times _read was called.
|
|
24
20
|
* For debugging.
|
|
25
21
|
*/
|
|
26
|
-
|
|
22
|
+
countReads: number;
|
|
23
|
+
private readonly opt;
|
|
24
|
+
constructor(q: Query, dbQuery: DBQuery<T>, opt: FirestoreDBStreamOptions, logger: CommonLogger);
|
|
27
25
|
_read(): void;
|
|
28
26
|
private runNextQuery;
|
|
27
|
+
private runQuery;
|
|
29
28
|
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { Readable } from 'node:stream';
|
|
2
2
|
import { FieldPath } from '@google-cloud/firestore';
|
|
3
|
-
import {
|
|
3
|
+
import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js';
|
|
4
|
+
import { _since } from '@naturalcycles/js-lib/datetime/time.util.js';
|
|
4
5
|
import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js';
|
|
5
6
|
import { unescapeDocId } from './firestore.util.js';
|
|
6
7
|
export class FirestoreStreamReadable extends Readable {
|
|
@@ -13,61 +14,60 @@ export class FirestoreStreamReadable extends Readable {
|
|
|
13
14
|
queryIsRunning = false;
|
|
14
15
|
paused = false;
|
|
15
16
|
done = false;
|
|
16
|
-
|
|
17
|
-
|
|
17
|
+
/**
|
|
18
|
+
* Counts how many times _read was called.
|
|
19
|
+
* For debugging.
|
|
20
|
+
*/
|
|
21
|
+
countReads = 0;
|
|
18
22
|
opt;
|
|
19
|
-
// private readonly dsOpt: RunQueryOptions
|
|
20
23
|
constructor(q, dbQuery, opt, logger) {
|
|
21
|
-
|
|
24
|
+
// 10_000 was optimal in benchmarks
|
|
25
|
+
const { batchSize = 10_000 } = opt;
|
|
26
|
+
const { highWaterMark = batchSize * 3 } = opt;
|
|
27
|
+
// Defaulting highWaterMark to 3x batchSize
|
|
28
|
+
super({ objectMode: true, highWaterMark });
|
|
22
29
|
this.q = q;
|
|
23
30
|
this.logger = logger;
|
|
24
31
|
this.opt = {
|
|
25
|
-
rssLimitMB: 1000,
|
|
26
|
-
batchSize: 1000,
|
|
27
32
|
...opt,
|
|
33
|
+
batchSize,
|
|
34
|
+
highWaterMark,
|
|
28
35
|
};
|
|
29
36
|
// todo: support PITR!
|
|
30
|
-
// this.dsOpt = {}
|
|
31
|
-
// if (opt.readAt) {
|
|
32
|
-
// // Datastore expects UnixTimestamp in milliseconds
|
|
33
|
-
// this.dsOpt.readTime = opt.readAt * 1000
|
|
34
|
-
// }
|
|
35
37
|
this.originalLimit = dbQuery._limitValue;
|
|
36
38
|
this.table = dbQuery.table;
|
|
37
|
-
logger.warn(
|
|
39
|
+
logger.warn(`!!! using experimentalCursorStream`, {
|
|
40
|
+
table: this.table,
|
|
41
|
+
batchSize,
|
|
42
|
+
highWaterMark,
|
|
43
|
+
});
|
|
38
44
|
}
|
|
39
|
-
/**
|
|
40
|
-
* Counts how many times _read was called.
|
|
41
|
-
* For debugging.
|
|
42
|
-
*/
|
|
43
|
-
count = 0;
|
|
44
45
|
_read() {
|
|
45
46
|
// this.lastReadTimestamp = Date.now() as UnixTimestampMillis
|
|
46
47
|
// console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
|
|
47
|
-
this.
|
|
48
|
+
this.countReads++;
|
|
48
49
|
if (this.done) {
|
|
49
50
|
this.logger.warn(`!!! _read was called, but done==true`);
|
|
50
51
|
return;
|
|
51
52
|
}
|
|
52
|
-
if (
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
this.emit('error', err);
|
|
56
|
-
});
|
|
53
|
+
if (this.paused) {
|
|
54
|
+
this.logger.log(`_read #${this.countReads}, queryIsRunning: ${this.queryIsRunning}, unpausing stream`);
|
|
55
|
+
this.paused = false;
|
|
57
56
|
}
|
|
58
|
-
|
|
59
|
-
this.logger.log(`_read
|
|
57
|
+
if (this.queryIsRunning) {
|
|
58
|
+
this.logger.log(`_read #${this.countReads}, queryIsRunning: true, doing nothing`);
|
|
60
59
|
// todo: check if this can cause a "hang", if no more _reads would come later and we get stuck?
|
|
60
|
+
return;
|
|
61
61
|
}
|
|
62
|
+
void this.runNextQuery().catch(err => {
|
|
63
|
+
console.log('error in runNextQuery', err);
|
|
64
|
+
this.emit('error', err);
|
|
65
|
+
});
|
|
62
66
|
}
|
|
63
67
|
async runNextQuery() {
|
|
64
68
|
if (this.done)
|
|
65
69
|
return;
|
|
66
70
|
const { logger, table } = this;
|
|
67
|
-
if (this.lastQueryDone) {
|
|
68
|
-
const now = Date.now();
|
|
69
|
-
this.totalWait += now - this.lastQueryDone;
|
|
70
|
-
}
|
|
71
71
|
this.queryIsRunning = true;
|
|
72
72
|
let limit = this.opt.batchSize;
|
|
73
73
|
if (this.originalLimit) {
|
|
@@ -79,26 +79,15 @@ export class FirestoreStreamReadable extends Readable {
|
|
|
79
79
|
if (this.endCursor) {
|
|
80
80
|
q = q.startAfter(this.endCursor);
|
|
81
81
|
}
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
logger,
|
|
92
|
-
timeout: 120_000, // 2 minutes
|
|
93
|
-
});
|
|
94
|
-
}
|
|
95
|
-
catch (err) {
|
|
96
|
-
console.log(`FirestoreStreamReadable error!\n`, {
|
|
97
|
-
table,
|
|
98
|
-
rowsRetrieved: this.rowsRetrieved,
|
|
99
|
-
}, err);
|
|
100
|
-
this.emit('error', err);
|
|
101
|
-
// clearInterval(this.maxWaitInterval)
|
|
82
|
+
// logger.log(`runNextQuery`, {
|
|
83
|
+
// rowsRetrieved: this.rowsRetrieved,
|
|
84
|
+
// paused: this.paused,
|
|
85
|
+
// })
|
|
86
|
+
const started = localTime.nowUnixMillis();
|
|
87
|
+
const qs = await this.runQuery(q);
|
|
88
|
+
logger.log(`${table} query took ${_since(started)}`);
|
|
89
|
+
if (!qs) {
|
|
90
|
+
// error already emitted in runQuery
|
|
102
91
|
return;
|
|
103
92
|
}
|
|
104
93
|
const rows = [];
|
|
@@ -111,37 +100,57 @@ export class FirestoreStreamReadable extends Readable {
|
|
|
111
100
|
});
|
|
112
101
|
}
|
|
113
102
|
this.rowsRetrieved += rows.length;
|
|
114
|
-
logger.log(`${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved
|
|
103
|
+
logger.log(`${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved`);
|
|
115
104
|
this.endCursor = lastDocId;
|
|
116
105
|
this.queryIsRunning = false; // ready to take more _reads
|
|
117
|
-
|
|
106
|
+
let shouldContinue = false;
|
|
118
107
|
for (const row of rows) {
|
|
119
|
-
this.push(row);
|
|
108
|
+
shouldContinue = this.push(row);
|
|
120
109
|
}
|
|
121
|
-
if (
|
|
122
|
-
logger.warn(
|
|
110
|
+
if (!rows.length || (this.originalLimit && this.rowsRetrieved >= this.originalLimit)) {
|
|
111
|
+
logger.warn(`${table} DONE! ${this.rowsRetrieved} rowsRetrieved`);
|
|
123
112
|
this.push(null);
|
|
124
|
-
this.paused = false;
|
|
125
113
|
this.done = true;
|
|
114
|
+
this.paused = false;
|
|
126
115
|
return;
|
|
127
116
|
}
|
|
128
|
-
if (
|
|
129
|
-
//
|
|
130
|
-
|
|
131
|
-
|
|
117
|
+
if (shouldContinue) {
|
|
118
|
+
// Keep the stream flowing
|
|
119
|
+
logger.log(`${table} continuing the stream`);
|
|
120
|
+
void this.runNextQuery();
|
|
132
121
|
}
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
if (rssMB <= rssLimitMB) {
|
|
122
|
+
else {
|
|
123
|
+
// Not starting the next query
|
|
136
124
|
if (this.paused) {
|
|
137
|
-
logger.
|
|
138
|
-
|
|
125
|
+
logger.log(`${table} stream is already paused`);
|
|
126
|
+
}
|
|
127
|
+
else {
|
|
128
|
+
logger.warn(`${table} pausing the stream`);
|
|
129
|
+
this.paused = true;
|
|
139
130
|
}
|
|
140
|
-
void this.runNextQuery();
|
|
141
131
|
}
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
132
|
+
}
|
|
133
|
+
async runQuery(q) {
|
|
134
|
+
const { table, logger } = this;
|
|
135
|
+
try {
|
|
136
|
+
return await pRetry(async () => {
|
|
137
|
+
return await q.get();
|
|
138
|
+
}, {
|
|
139
|
+
name: `FirestoreStreamReadable.query(${table})`,
|
|
140
|
+
maxAttempts: 5,
|
|
141
|
+
delay: 5000,
|
|
142
|
+
delayMultiplier: 2,
|
|
143
|
+
logger,
|
|
144
|
+
timeout: 120_000, // 2 minutes
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
catch (err) {
|
|
148
|
+
console.log(`FirestoreStreamReadable error!\n`, {
|
|
149
|
+
table,
|
|
150
|
+
rowsRetrieved: this.rowsRetrieved,
|
|
151
|
+
}, err);
|
|
152
|
+
this.emit('error', err);
|
|
153
|
+
return;
|
|
145
154
|
}
|
|
146
155
|
}
|
|
147
156
|
}
|
package/dist/query.util.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type Query } from '@google-cloud/firestore';
|
|
2
2
|
import type { DBQuery } from '@naturalcycles/db-lib';
|
|
3
3
|
import type { ObjectWithId } from '@naturalcycles/js-lib/types';
|
|
4
4
|
export declare function dbQueryToFirestoreQuery<ROW extends ObjectWithId>(dbQuery: DBQuery<ROW>, emptyQuery: Query): Query;
|
package/dist/query.util.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { FieldPath } from '@google-cloud/firestore';
|
|
1
2
|
// Map DBQueryFilterOp to WhereFilterOp
|
|
2
3
|
// Currently it's fully aligned!
|
|
3
4
|
const OP_MAP = {
|
|
@@ -8,27 +9,35 @@ export function dbQueryToFirestoreQuery(dbQuery, emptyQuery) {
|
|
|
8
9
|
let q = emptyQuery;
|
|
9
10
|
// filter
|
|
10
11
|
for (const f of dbQuery._filters) {
|
|
11
|
-
q = q.where(f.name, OP_MAP[f.op] || f.op, f.val);
|
|
12
|
+
q = q.where(mapName(f.name), OP_MAP[f.op] || f.op, f.val);
|
|
12
13
|
}
|
|
13
14
|
// order
|
|
14
15
|
for (const ord of dbQuery._orders) {
|
|
15
|
-
|
|
16
|
-
// .orderBy(FieldPath.documentId())
|
|
17
|
-
q = q.orderBy(ord.name, ord.descending ? 'desc' : 'asc');
|
|
16
|
+
q = q.orderBy(mapName(ord.name), ord.descending ? 'desc' : 'asc');
|
|
18
17
|
}
|
|
19
18
|
// limit
|
|
20
19
|
q = q.limit(dbQuery._limitValue);
|
|
21
20
|
// selectedFields
|
|
22
21
|
if (dbQuery._selectedFieldNames) {
|
|
23
|
-
//
|
|
24
|
-
|
|
22
|
+
// id is filtered out, because in Firestore it's not a "property",
|
|
23
|
+
// and doc.id is always returned, even if we request empty set of fields
|
|
24
|
+
q = q.select(...dbQuery._selectedFieldNames.filter(n => n !== 'id'));
|
|
25
25
|
}
|
|
26
26
|
// cursor
|
|
27
27
|
if (dbQuery._startCursor) {
|
|
28
|
-
|
|
28
|
+
// Using `startAfter`, not `startAt` here
|
|
29
|
+
// Why?
|
|
30
|
+
// Because in Firestore, you can only retrieve "last document id" to be used as Cursor.
|
|
31
|
+
// That document was already retrieved, so it makes sense to start AFTER it.
|
|
32
|
+
q = q.startAfter(dbQuery._startCursor);
|
|
29
33
|
}
|
|
30
34
|
if (dbQuery._endCursor) {
|
|
31
35
|
q = q.endAt(dbQuery._endCursor);
|
|
32
36
|
}
|
|
33
37
|
return q;
|
|
34
38
|
}
|
|
39
|
+
function mapName(name) {
|
|
40
|
+
if (name === 'id')
|
|
41
|
+
return FieldPath.documentId();
|
|
42
|
+
return name;
|
|
43
|
+
}
|
package/package.json
CHANGED
package/src/firestore.db.ts
CHANGED
|
@@ -28,10 +28,11 @@ import { _assert } from '@naturalcycles/js-lib/error/assert.js'
|
|
|
28
28
|
import { type CommonLogger, commonLoggerMinLevel } from '@naturalcycles/js-lib/log'
|
|
29
29
|
import { _filterUndefinedValues, _omit } from '@naturalcycles/js-lib/object/object.util.js'
|
|
30
30
|
import { pMap } from '@naturalcycles/js-lib/promise/pMap.js'
|
|
31
|
-
import type {
|
|
31
|
+
import type { ObjectWithId, PositiveInteger, StringMap } from '@naturalcycles/js-lib/types'
|
|
32
32
|
import { _stringMapEntries } from '@naturalcycles/js-lib/types'
|
|
33
33
|
import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream'
|
|
34
34
|
import { escapeDocId, unescapeDocId } from './firestore.util.js'
|
|
35
|
+
import { FirestoreShardedReadable } from './firestoreShardedReadable.js'
|
|
35
36
|
import { FirestoreStreamReadable } from './firestoreStreamReadable.js'
|
|
36
37
|
import { dbQueryToFirestoreQuery } from './query.util.js'
|
|
37
38
|
|
|
@@ -50,6 +51,7 @@ export class FirestoreDB extends BaseCommonDB implements CommonDB {
|
|
|
50
51
|
...commonDBFullSupport,
|
|
51
52
|
patchByQuery: false, // todo: can be implemented
|
|
52
53
|
tableSchemas: false,
|
|
54
|
+
createTransaction: false, // Firestore SDK doesn't support it
|
|
53
55
|
}
|
|
54
56
|
|
|
55
57
|
// GET
|
|
@@ -167,6 +169,15 @@ export class FirestoreDB extends BaseCommonDB implements CommonDB {
|
|
|
167
169
|
)
|
|
168
170
|
}
|
|
169
171
|
|
|
172
|
+
if (opt.experimentalShardedStream) {
|
|
173
|
+
return new FirestoreShardedReadable(
|
|
174
|
+
firestoreQuery,
|
|
175
|
+
q,
|
|
176
|
+
opt,
|
|
177
|
+
commonLoggerMinLevel(this.cfg.logger, opt.debug ? 'log' : 'warn'),
|
|
178
|
+
)
|
|
179
|
+
}
|
|
180
|
+
|
|
170
181
|
return (firestoreQuery.stream() as ReadableTyped<QueryDocumentSnapshot<any>>).map(doc => {
|
|
171
182
|
return {
|
|
172
183
|
id: unescapeDocId(doc.id),
|
|
@@ -545,38 +556,24 @@ export interface FirestoreDBStreamOptions extends FirestoreDBReadOptions {
|
|
|
545
556
|
*/
|
|
546
557
|
experimentalCursorStream?: boolean
|
|
547
558
|
|
|
559
|
+
experimentalShardedStream?: boolean
|
|
560
|
+
|
|
548
561
|
/**
|
|
549
562
|
* Applicable to `experimentalCursorStream`.
|
|
550
563
|
* Defines the size (limit) of each individual query.
|
|
551
564
|
*
|
|
552
|
-
* Default:
|
|
553
|
-
*/
|
|
554
|
-
batchSize?: number
|
|
555
|
-
|
|
556
|
-
/**
|
|
557
|
-
* Applicable to `experimentalCursorStream`
|
|
558
|
-
*
|
|
559
|
-
* Set to a value (number of Megabytes) to control the peak RSS size.
|
|
560
|
-
* If limit is reached - streaming will pause until the stream keeps up, and then
|
|
561
|
-
* resumes.
|
|
562
|
-
*
|
|
563
|
-
* Set to 0/undefined to disable. Stream will get "slow" then, cause it'll only run the query
|
|
564
|
-
* when _read is called.
|
|
565
|
-
*
|
|
566
|
-
* @default 1000
|
|
565
|
+
* Default: 10_000
|
|
567
566
|
*/
|
|
568
|
-
|
|
567
|
+
batchSize?: PositiveInteger
|
|
569
568
|
|
|
570
569
|
/**
|
|
571
|
-
*
|
|
572
|
-
* Default
|
|
573
|
-
*
|
|
574
|
-
*
|
|
575
|
-
*
|
|
576
|
-
* This mode is useful e.g for DB migrations, where you want to avoid "stale data".
|
|
577
|
-
* So, it minimizes the time between "item loaded" and "item saved" during DB migration.
|
|
570
|
+
* Defaults to 3x batchSize.
|
|
571
|
+
* Default batchSize is 10_000, so default highWaterMark is 30_000.
|
|
572
|
+
* Controls how many rows to have "buffered".
|
|
573
|
+
* Should be at least 1x batchSize, otherwise the stream will be "starving"
|
|
574
|
+
* between the queries.
|
|
578
575
|
*/
|
|
579
|
-
|
|
576
|
+
highWaterMark?: PositiveInteger
|
|
580
577
|
|
|
581
578
|
/**
|
|
582
579
|
* Set to `true` to log additional debug info, when using experimentalCursorStream.
|
|
@@ -584,15 +581,6 @@ export interface FirestoreDBStreamOptions extends FirestoreDBReadOptions {
|
|
|
584
581
|
* @default false
|
|
585
582
|
*/
|
|
586
583
|
debug?: boolean
|
|
587
|
-
|
|
588
|
-
/**
|
|
589
|
-
* Default is undefined.
|
|
590
|
-
* If set - sets a "safety timer", which will force call _read after the specified number of seconds.
|
|
591
|
-
* This is to prevent possible "dead-lock"/race-condition that would make the stream "hang".
|
|
592
|
-
*
|
|
593
|
-
* @experimental
|
|
594
|
-
*/
|
|
595
|
-
maxWait?: NumberOfSeconds
|
|
596
584
|
}
|
|
597
585
|
|
|
598
586
|
export interface FirestoreDBOptions extends CommonDBOptions {}
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
import { Readable } from 'node:stream'
|
|
2
|
+
import { FieldPath, type Query, type QuerySnapshot } from '@google-cloud/firestore'
|
|
3
|
+
import type { DBQuery } from '@naturalcycles/db-lib'
|
|
4
|
+
import { localTime } from '@naturalcycles/js-lib/datetime'
|
|
5
|
+
import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js'
|
|
6
|
+
import type { CommonLogger } from '@naturalcycles/js-lib/log'
|
|
7
|
+
import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js'
|
|
8
|
+
import type {
|
|
9
|
+
ObjectWithId,
|
|
10
|
+
PositiveInteger,
|
|
11
|
+
StringMap,
|
|
12
|
+
UnixTimestampMillis,
|
|
13
|
+
} from '@naturalcycles/js-lib/types'
|
|
14
|
+
import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream'
|
|
15
|
+
import type { FirestoreDBStreamOptions } from './firestore.db.js'
|
|
16
|
+
import { unescapeDocId } from './firestore.util.js'
|
|
17
|
+
|
|
18
|
+
const SHARDS = 16
|
|
19
|
+
const SHARD_COLUMN = 'shard16'
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Highly, HIGHLY experimental!
|
|
23
|
+
*/
|
|
24
|
+
export class FirestoreShardedReadable<T extends ObjectWithId = any>
|
|
25
|
+
extends Readable
|
|
26
|
+
implements ReadableTyped<T>
|
|
27
|
+
{
|
|
28
|
+
private readonly table: string
|
|
29
|
+
private readonly originalLimit: number
|
|
30
|
+
private rowsRetrieved = 0
|
|
31
|
+
/**
|
|
32
|
+
* Next shard to be used for querying.
|
|
33
|
+
*/
|
|
34
|
+
private nextShard = 1
|
|
35
|
+
private cursorByShard: StringMap = {}
|
|
36
|
+
private queryIsRunningByShard: StringMap<boolean> = {}
|
|
37
|
+
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
|
|
38
|
+
private paused = false
|
|
39
|
+
private done = false
|
|
40
|
+
private doneShards = new Set<PositiveInteger>()
|
|
41
|
+
private lastQueryDoneByShard: StringMap<UnixTimestampMillis> = {}
|
|
42
|
+
private totalWait = 0
|
|
43
|
+
|
|
44
|
+
private readonly opt: FirestoreDBStreamOptions & { batchSize: number }
|
|
45
|
+
|
|
46
|
+
constructor(
|
|
47
|
+
private readonly q: Query,
|
|
48
|
+
readonly dbQuery: DBQuery<T>,
|
|
49
|
+
opt: FirestoreDBStreamOptions,
|
|
50
|
+
private logger: CommonLogger,
|
|
51
|
+
) {
|
|
52
|
+
super({ objectMode: true })
|
|
53
|
+
|
|
54
|
+
this.opt = {
|
|
55
|
+
batchSize: 3000,
|
|
56
|
+
...opt,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
this.originalLimit = dbQuery._limitValue
|
|
60
|
+
this.table = dbQuery.table
|
|
61
|
+
|
|
62
|
+
logger.warn(
|
|
63
|
+
`!! using experimentalShardedStream !! ${this.table}, batchSize: ${this.opt.batchSize}`,
|
|
64
|
+
)
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Counts how many times _read was called.
|
|
69
|
+
* For debugging.
|
|
70
|
+
*/
|
|
71
|
+
count = 0
|
|
72
|
+
|
|
73
|
+
override _read(): void {
|
|
74
|
+
// this.lastReadTimestamp = Date.now() as UnixTimestampMillis
|
|
75
|
+
|
|
76
|
+
// console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
|
|
77
|
+
this.count++
|
|
78
|
+
|
|
79
|
+
if (this.done) {
|
|
80
|
+
this.logger.warn(`!!! _read was called, but done==true`)
|
|
81
|
+
return
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// const shard = this.getNextShardAndMove()
|
|
85
|
+
const shard = this.findNextFreeShard()
|
|
86
|
+
if (!shard) {
|
|
87
|
+
this.logger.log(`_read ${this.count}: all shards are busy, skipping`)
|
|
88
|
+
return
|
|
89
|
+
}
|
|
90
|
+
void this.runNextQuery(shard).catch(err => {
|
|
91
|
+
console.log('error in runNextQuery', err)
|
|
92
|
+
this.emit('error', err)
|
|
93
|
+
})
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
private async runNextQuery(shard: PositiveInteger): Promise<void> {
|
|
97
|
+
if (this.done) return
|
|
98
|
+
const { logger, table } = this
|
|
99
|
+
|
|
100
|
+
if (this.lastQueryDoneByShard[shard]) {
|
|
101
|
+
this.totalWait += Date.now() - this.lastQueryDoneByShard[shard]
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
this.queryIsRunningByShard[shard] = true
|
|
105
|
+
|
|
106
|
+
const limit = this.opt.batchSize
|
|
107
|
+
|
|
108
|
+
// We have to orderBy documentId, to be able to use id as a cursor
|
|
109
|
+
|
|
110
|
+
let q = this.q.where(SHARD_COLUMN, '==', shard).orderBy(FieldPath.documentId()).limit(limit)
|
|
111
|
+
if (this.cursorByShard[shard]) {
|
|
112
|
+
q = q.startAfter(this.cursorByShard[shard])
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
console.log(`runNextQuery[${shard}]`, {
|
|
116
|
+
retrieved: this.rowsRetrieved,
|
|
117
|
+
})
|
|
118
|
+
const qs = await this.runQuery(q)
|
|
119
|
+
if (!qs) {
|
|
120
|
+
// this means we have already emitted an unrecoverable error
|
|
121
|
+
return
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const rows: T[] = []
|
|
125
|
+
let lastDocId: string | undefined
|
|
126
|
+
|
|
127
|
+
for (const doc of qs.docs) {
|
|
128
|
+
lastDocId = doc.id
|
|
129
|
+
rows.push({
|
|
130
|
+
id: unescapeDocId(doc.id),
|
|
131
|
+
...doc.data(),
|
|
132
|
+
} as T)
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
this.rowsRetrieved += rows.length
|
|
136
|
+
logger.log(
|
|
137
|
+
`${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(
|
|
138
|
+
this.totalWait,
|
|
139
|
+
)}`,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
this.cursorByShard[shard] = lastDocId
|
|
143
|
+
this.queryIsRunningByShard[shard] = false // ready to take more _reads
|
|
144
|
+
this.lastQueryDoneByShard[shard] = localTime.nowUnixMillis()
|
|
145
|
+
|
|
146
|
+
for (const row of rows) {
|
|
147
|
+
this.push(row)
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (qs.empty) {
|
|
151
|
+
logger.warn(
|
|
152
|
+
`!!!! Shard ${shard} DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
|
|
153
|
+
)
|
|
154
|
+
this.doneShards.add(shard)
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if (this.doneShards.size === SHARDS) {
|
|
158
|
+
logger.warn(
|
|
159
|
+
`!!!! DONE: all shards completed, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
|
|
160
|
+
)
|
|
161
|
+
this.push(null)
|
|
162
|
+
this.paused = false
|
|
163
|
+
this.done = true
|
|
164
|
+
return
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if (this.originalLimit && this.rowsRetrieved >= this.originalLimit) {
|
|
168
|
+
logger.warn(
|
|
169
|
+
`!!!! DONE: reached total limit of ${this.originalLimit}, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
|
|
170
|
+
)
|
|
171
|
+
this.push(null)
|
|
172
|
+
this.paused = false
|
|
173
|
+
this.done = true
|
|
174
|
+
return
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// if (this.paused) {
|
|
178
|
+
// this.paused = false
|
|
179
|
+
// }
|
|
180
|
+
const nextShard = this.findNextFreeShard()
|
|
181
|
+
if (nextShard) {
|
|
182
|
+
void this.runNextQuery(nextShard)
|
|
183
|
+
} else {
|
|
184
|
+
logger.warn(`${table} all shards are busy in runNextQuery, skipping`)
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
private async runQuery(q: Query): Promise<QuerySnapshot | undefined> {
|
|
189
|
+
const { table, logger } = this
|
|
190
|
+
|
|
191
|
+
try {
|
|
192
|
+
return await pRetry(
|
|
193
|
+
async () => {
|
|
194
|
+
return await q.get()
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
name: `FirestoreStreamReadable.query(${table})`,
|
|
198
|
+
maxAttempts: 5,
|
|
199
|
+
delay: 5000,
|
|
200
|
+
delayMultiplier: 2,
|
|
201
|
+
logger,
|
|
202
|
+
timeout: 120_000, // 2 minutes
|
|
203
|
+
},
|
|
204
|
+
)
|
|
205
|
+
} catch (err) {
|
|
206
|
+
console.log(
|
|
207
|
+
`FirestoreStreamReadable error!\n`,
|
|
208
|
+
{
|
|
209
|
+
table,
|
|
210
|
+
rowsRetrieved: this.rowsRetrieved,
|
|
211
|
+
},
|
|
212
|
+
err,
|
|
213
|
+
)
|
|
214
|
+
this.emit('error', err)
|
|
215
|
+
return
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
private findNextFreeShard(): PositiveInteger | undefined {
|
|
220
|
+
for (let shard = 1; shard <= SHARDS; shard++) {
|
|
221
|
+
if (!this.queryIsRunningByShard[shard] && !this.doneShards.has(shard)) {
|
|
222
|
+
return shard
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
|
|
228
|
+
private _getNextShardAndMove(): PositiveInteger {
|
|
229
|
+
const shard = this.nextShard
|
|
230
|
+
this.nextShard = shard === SHARDS ? 1 : shard + 1
|
|
231
|
+
return shard
|
|
232
|
+
}
|
|
233
|
+
}
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import { Readable } from 'node:stream'
|
|
2
2
|
import { FieldPath, type Query, type QuerySnapshot } from '@google-cloud/firestore'
|
|
3
3
|
import type { DBQuery } from '@naturalcycles/db-lib'
|
|
4
|
-
import {
|
|
4
|
+
import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js'
|
|
5
|
+
import { _since } from '@naturalcycles/js-lib/datetime/time.util.js'
|
|
5
6
|
import type { CommonLogger } from '@naturalcycles/js-lib/log'
|
|
6
7
|
import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js'
|
|
7
8
|
import type { ObjectWithId } from '@naturalcycles/js-lib/types'
|
|
@@ -20,11 +21,13 @@ export class FirestoreStreamReadable<T extends ObjectWithId = any>
|
|
|
20
21
|
private queryIsRunning = false
|
|
21
22
|
private paused = false
|
|
22
23
|
private done = false
|
|
23
|
-
|
|
24
|
-
|
|
24
|
+
/**
|
|
25
|
+
* Counts how many times _read was called.
|
|
26
|
+
* For debugging.
|
|
27
|
+
*/
|
|
28
|
+
countReads = 0
|
|
25
29
|
|
|
26
|
-
private readonly opt: FirestoreDBStreamOptions & { batchSize: number;
|
|
27
|
-
// private readonly dsOpt: RunQueryOptions
|
|
30
|
+
private readonly opt: FirestoreDBStreamOptions & { batchSize: number; highWaterMark: number }
|
|
28
31
|
|
|
29
32
|
constructor(
|
|
30
33
|
private q: Query,
|
|
@@ -32,65 +35,63 @@ export class FirestoreStreamReadable<T extends ObjectWithId = any>
|
|
|
32
35
|
opt: FirestoreDBStreamOptions,
|
|
33
36
|
private logger: CommonLogger,
|
|
34
37
|
) {
|
|
35
|
-
|
|
38
|
+
// 10_000 was optimal in benchmarks
|
|
39
|
+
const { batchSize = 10_000 } = opt
|
|
40
|
+
const { highWaterMark = batchSize * 3 } = opt
|
|
41
|
+
// Defaulting highWaterMark to 3x batchSize
|
|
42
|
+
super({ objectMode: true, highWaterMark })
|
|
36
43
|
|
|
37
44
|
this.opt = {
|
|
38
|
-
rssLimitMB: 1000,
|
|
39
|
-
batchSize: 1000,
|
|
40
45
|
...opt,
|
|
46
|
+
batchSize,
|
|
47
|
+
highWaterMark,
|
|
41
48
|
}
|
|
42
49
|
// todo: support PITR!
|
|
43
|
-
// this.dsOpt = {}
|
|
44
|
-
// if (opt.readAt) {
|
|
45
|
-
// // Datastore expects UnixTimestamp in milliseconds
|
|
46
|
-
// this.dsOpt.readTime = opt.readAt * 1000
|
|
47
|
-
// }
|
|
48
50
|
|
|
49
51
|
this.originalLimit = dbQuery._limitValue
|
|
50
52
|
this.table = dbQuery.table
|
|
51
53
|
|
|
52
|
-
logger.warn(
|
|
53
|
-
|
|
54
|
-
|
|
54
|
+
logger.warn(`!!! using experimentalCursorStream`, {
|
|
55
|
+
table: this.table,
|
|
56
|
+
batchSize,
|
|
57
|
+
highWaterMark,
|
|
58
|
+
})
|
|
55
59
|
}
|
|
56
60
|
|
|
57
|
-
/**
|
|
58
|
-
* Counts how many times _read was called.
|
|
59
|
-
* For debugging.
|
|
60
|
-
*/
|
|
61
|
-
count = 0
|
|
62
|
-
|
|
63
61
|
override _read(): void {
|
|
64
62
|
// this.lastReadTimestamp = Date.now() as UnixTimestampMillis
|
|
65
63
|
|
|
66
64
|
// console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
|
|
67
|
-
this.
|
|
65
|
+
this.countReads++
|
|
68
66
|
|
|
69
67
|
if (this.done) {
|
|
70
68
|
this.logger.warn(`!!! _read was called, but done==true`)
|
|
71
69
|
return
|
|
72
70
|
}
|
|
73
71
|
|
|
74
|
-
if (
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
}
|
|
80
|
-
|
|
72
|
+
if (this.paused) {
|
|
73
|
+
this.logger.log(
|
|
74
|
+
`_read #${this.countReads}, queryIsRunning: ${this.queryIsRunning}, unpausing stream`,
|
|
75
|
+
)
|
|
76
|
+
this.paused = false
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (this.queryIsRunning) {
|
|
80
|
+
this.logger.log(`_read #${this.countReads}, queryIsRunning: true, doing nothing`)
|
|
81
81
|
// todo: check if this can cause a "hang", if no more _reads would come later and we get stuck?
|
|
82
|
+
return
|
|
82
83
|
}
|
|
84
|
+
|
|
85
|
+
void this.runNextQuery().catch(err => {
|
|
86
|
+
console.log('error in runNextQuery', err)
|
|
87
|
+
this.emit('error', err)
|
|
88
|
+
})
|
|
83
89
|
}
|
|
84
90
|
|
|
85
91
|
private async runNextQuery(): Promise<void> {
|
|
86
92
|
if (this.done) return
|
|
87
93
|
const { logger, table } = this
|
|
88
94
|
|
|
89
|
-
if (this.lastQueryDone) {
|
|
90
|
-
const now = Date.now()
|
|
91
|
-
this.totalWait += now - this.lastQueryDone
|
|
92
|
-
}
|
|
93
|
-
|
|
94
95
|
this.queryIsRunning = true
|
|
95
96
|
|
|
96
97
|
let limit = this.opt.batchSize
|
|
@@ -106,40 +107,23 @@ export class FirestoreStreamReadable<T extends ObjectWithId = any>
|
|
|
106
107
|
q = q.startAfter(this.endCursor)
|
|
107
108
|
}
|
|
108
109
|
|
|
109
|
-
|
|
110
|
+
// logger.log(`runNextQuery`, {
|
|
111
|
+
// rowsRetrieved: this.rowsRetrieved,
|
|
112
|
+
// paused: this.paused,
|
|
113
|
+
// })
|
|
110
114
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
{
|
|
117
|
-
name: `FirestoreStreamReadable.query(${table})`,
|
|
118
|
-
maxAttempts: 5,
|
|
119
|
-
delay: 5000,
|
|
120
|
-
delayMultiplier: 2,
|
|
121
|
-
logger,
|
|
122
|
-
timeout: 120_000, // 2 minutes
|
|
123
|
-
},
|
|
124
|
-
)
|
|
125
|
-
} catch (err) {
|
|
126
|
-
console.log(
|
|
127
|
-
`FirestoreStreamReadable error!\n`,
|
|
128
|
-
{
|
|
129
|
-
table,
|
|
130
|
-
rowsRetrieved: this.rowsRetrieved,
|
|
131
|
-
},
|
|
132
|
-
err,
|
|
133
|
-
)
|
|
134
|
-
this.emit('error', err)
|
|
135
|
-
// clearInterval(this.maxWaitInterval)
|
|
115
|
+
const started = localTime.nowUnixMillis()
|
|
116
|
+
const qs = await this.runQuery(q)
|
|
117
|
+
logger.log(`${table} query took ${_since(started)}`)
|
|
118
|
+
if (!qs) {
|
|
119
|
+
// error already emitted in runQuery
|
|
136
120
|
return
|
|
137
121
|
}
|
|
138
122
|
|
|
139
123
|
const rows: T[] = []
|
|
140
124
|
let lastDocId: string | undefined
|
|
141
125
|
|
|
142
|
-
for (const doc of qs
|
|
126
|
+
for (const doc of qs.docs) {
|
|
143
127
|
lastDocId = doc.id
|
|
144
128
|
rows.push({
|
|
145
129
|
id: unescapeDocId(doc.id),
|
|
@@ -148,48 +132,67 @@ export class FirestoreStreamReadable<T extends ObjectWithId = any>
|
|
|
148
132
|
}
|
|
149
133
|
|
|
150
134
|
this.rowsRetrieved += rows.length
|
|
151
|
-
logger.log(
|
|
152
|
-
`${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(
|
|
153
|
-
this.totalWait,
|
|
154
|
-
)}`,
|
|
155
|
-
)
|
|
135
|
+
logger.log(`${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved`)
|
|
156
136
|
|
|
157
137
|
this.endCursor = lastDocId
|
|
158
138
|
this.queryIsRunning = false // ready to take more _reads
|
|
159
|
-
|
|
139
|
+
let shouldContinue = false
|
|
160
140
|
|
|
161
141
|
for (const row of rows) {
|
|
162
|
-
this.push(row)
|
|
142
|
+
shouldContinue = this.push(row)
|
|
163
143
|
}
|
|
164
144
|
|
|
165
|
-
if (
|
|
166
|
-
logger.warn(
|
|
167
|
-
`!!!! DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
|
|
168
|
-
)
|
|
145
|
+
if (!rows.length || (this.originalLimit && this.rowsRetrieved >= this.originalLimit)) {
|
|
146
|
+
logger.warn(`${table} DONE! ${this.rowsRetrieved} rowsRetrieved`)
|
|
169
147
|
this.push(null)
|
|
170
|
-
this.paused = false
|
|
171
148
|
this.done = true
|
|
149
|
+
this.paused = false
|
|
172
150
|
return
|
|
173
151
|
}
|
|
174
152
|
|
|
175
|
-
if (
|
|
176
|
-
//
|
|
177
|
-
|
|
178
|
-
|
|
153
|
+
if (shouldContinue) {
|
|
154
|
+
// Keep the stream flowing
|
|
155
|
+
logger.log(`${table} continuing the stream`)
|
|
156
|
+
void this.runNextQuery()
|
|
157
|
+
} else {
|
|
158
|
+
// Not starting the next query
|
|
159
|
+
if (this.paused) {
|
|
160
|
+
logger.log(`${table} stream is already paused`)
|
|
161
|
+
} else {
|
|
162
|
+
logger.warn(`${table} pausing the stream`)
|
|
163
|
+
this.paused = true
|
|
164
|
+
}
|
|
179
165
|
}
|
|
166
|
+
}
|
|
180
167
|
|
|
181
|
-
|
|
182
|
-
const {
|
|
168
|
+
private async runQuery(q: Query): Promise<QuerySnapshot | undefined> {
|
|
169
|
+
const { table, logger } = this
|
|
183
170
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
171
|
+
try {
|
|
172
|
+
return await pRetry(
|
|
173
|
+
async () => {
|
|
174
|
+
return await q.get()
|
|
175
|
+
},
|
|
176
|
+
{
|
|
177
|
+
name: `FirestoreStreamReadable.query(${table})`,
|
|
178
|
+
maxAttempts: 5,
|
|
179
|
+
delay: 5000,
|
|
180
|
+
delayMultiplier: 2,
|
|
181
|
+
logger,
|
|
182
|
+
timeout: 120_000, // 2 minutes
|
|
183
|
+
},
|
|
184
|
+
)
|
|
185
|
+
} catch (err) {
|
|
186
|
+
console.log(
|
|
187
|
+
`FirestoreStreamReadable error!\n`,
|
|
188
|
+
{
|
|
189
|
+
table,
|
|
190
|
+
rowsRetrieved: this.rowsRetrieved,
|
|
191
|
+
},
|
|
192
|
+
err,
|
|
193
|
+
)
|
|
194
|
+
this.emit('error', err)
|
|
195
|
+
return
|
|
193
196
|
}
|
|
194
197
|
}
|
|
195
198
|
}
|
package/src/query.util.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { FieldPath, type Query, type WhereFilterOp } from '@google-cloud/firestore'
|
|
2
2
|
import type { DBQuery, DBQueryFilterOperator } from '@naturalcycles/db-lib'
|
|
3
3
|
import type { ObjectWithId } from '@naturalcycles/js-lib/types'
|
|
4
4
|
|
|
@@ -17,14 +17,12 @@ export function dbQueryToFirestoreQuery<ROW extends ObjectWithId>(
|
|
|
17
17
|
|
|
18
18
|
// filter
|
|
19
19
|
for (const f of dbQuery._filters) {
|
|
20
|
-
q = q.where(f.name
|
|
20
|
+
q = q.where(mapName(f.name), OP_MAP[f.op] || (f.op as WhereFilterOp), f.val)
|
|
21
21
|
}
|
|
22
22
|
|
|
23
23
|
// order
|
|
24
24
|
for (const ord of dbQuery._orders) {
|
|
25
|
-
|
|
26
|
-
// .orderBy(FieldPath.documentId())
|
|
27
|
-
q = q.orderBy(ord.name as string, ord.descending ? 'desc' : 'asc')
|
|
25
|
+
q = q.orderBy(mapName(ord.name), ord.descending ? 'desc' : 'asc')
|
|
28
26
|
}
|
|
29
27
|
|
|
30
28
|
// limit
|
|
@@ -32,13 +30,18 @@ export function dbQueryToFirestoreQuery<ROW extends ObjectWithId>(
|
|
|
32
30
|
|
|
33
31
|
// selectedFields
|
|
34
32
|
if (dbQuery._selectedFieldNames) {
|
|
35
|
-
//
|
|
36
|
-
|
|
33
|
+
// id is filtered out, because in Firestore it's not a "property",
|
|
34
|
+
// and doc.id is always returned, even if we request empty set of fields
|
|
35
|
+
q = q.select(...(dbQuery._selectedFieldNames as string[]).filter(n => n !== 'id'))
|
|
37
36
|
}
|
|
38
37
|
|
|
39
38
|
// cursor
|
|
40
39
|
if (dbQuery._startCursor) {
|
|
41
|
-
|
|
40
|
+
// Using `startAfter`, not `startAt` here
|
|
41
|
+
// Why?
|
|
42
|
+
// Because in Firestore, you can only retrieve "last document id" to be used as Cursor.
|
|
43
|
+
// That document was already retrieved, so it makes sense to start AFTER it.
|
|
44
|
+
q = q.startAfter(dbQuery._startCursor)
|
|
42
45
|
}
|
|
43
46
|
|
|
44
47
|
if (dbQuery._endCursor) {
|
|
@@ -47,3 +50,8 @@ export function dbQueryToFirestoreQuery<ROW extends ObjectWithId>(
|
|
|
47
50
|
|
|
48
51
|
return q
|
|
49
52
|
}
|
|
53
|
+
|
|
54
|
+
function mapName<ROW extends ObjectWithId>(name: keyof ROW): string | FieldPath {
|
|
55
|
+
if (name === 'id') return FieldPath.documentId()
|
|
56
|
+
return name as string
|
|
57
|
+
}
|