@naturalcycles/firestore-lib 2.8.1 → 2.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/firestore.db.d.ts +10 -32
- package/dist/firestore.db.js +5 -0
- package/dist/firestoreShardedReadable.d.ts +41 -0
- package/dist/firestoreShardedReadable.js +173 -0
- package/dist/firestoreStreamReadable.d.ts +6 -6
- package/dist/firestoreStreamReadable.js +84 -65
- package/dist/query.util.d.ts +1 -1
- package/dist/query.util.js +16 -7
- package/package.json +1 -1
- package/src/firestore.db.ts +22 -34
- package/src/firestoreShardedReadable.ts +233 -0
- package/src/firestoreStreamReadable.ts +101 -86
- package/src/query.util.ts +16 -8
package/dist/firestore.db.d.ts
CHANGED
|
@@ -2,7 +2,7 @@ import type { Firestore, Query, QuerySnapshot, Transaction } from '@google-cloud
|
|
|
2
2
|
import type { CommonDB, CommonDBOptions, CommonDBReadOptions, CommonDBSaveOptions, CommonDBSupport, CommonDBTransactionOptions, DBQuery, DBTransaction, DBTransactionFn, RunQueryResult } from '@naturalcycles/db-lib';
|
|
3
3
|
import { BaseCommonDB } from '@naturalcycles/db-lib';
|
|
4
4
|
import { type CommonLogger } from '@naturalcycles/js-lib/log';
|
|
5
|
-
import type {
|
|
5
|
+
import type { ObjectWithId, PositiveInteger, StringMap } from '@naturalcycles/js-lib/types';
|
|
6
6
|
import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream';
|
|
7
7
|
export declare class FirestoreDB extends BaseCommonDB implements CommonDB {
|
|
8
8
|
constructor(cfg: FirestoreDBCfg);
|
|
@@ -66,50 +66,28 @@ export interface FirestoreDBStreamOptions extends FirestoreDBReadOptions {
|
|
|
66
66
|
* Defaults to false
|
|
67
67
|
*/
|
|
68
68
|
experimentalCursorStream?: boolean;
|
|
69
|
+
experimentalShardedStream?: boolean;
|
|
69
70
|
/**
|
|
70
71
|
* Applicable to `experimentalCursorStream`.
|
|
71
72
|
* Defines the size (limit) of each individual query.
|
|
72
73
|
*
|
|
73
|
-
* Default:
|
|
74
|
+
* Default: 10_000
|
|
74
75
|
*/
|
|
75
|
-
batchSize?:
|
|
76
|
+
batchSize?: PositiveInteger;
|
|
76
77
|
/**
|
|
77
|
-
*
|
|
78
|
-
*
|
|
79
|
-
*
|
|
80
|
-
*
|
|
81
|
-
*
|
|
82
|
-
*
|
|
83
|
-
* Set to 0/undefined to disable. Stream will get "slow" then, cause it'll only run the query
|
|
84
|
-
* when _read is called.
|
|
85
|
-
*
|
|
86
|
-
* @default 1000
|
|
87
|
-
*/
|
|
88
|
-
rssLimitMB?: number;
|
|
89
|
-
/**
|
|
90
|
-
* Applicable to `experimentalCursorStream`
|
|
91
|
-
* Default false.
|
|
92
|
-
* If true, stream will pause until consumer requests more data (via _read).
|
|
93
|
-
* It means it'll run slower, as buffer will be equal to batchSize (1000) at max.
|
|
94
|
-
* There will be gaps in time between "last query loaded" and "next query requested".
|
|
95
|
-
* This mode is useful e.g for DB migrations, where you want to avoid "stale data".
|
|
96
|
-
* So, it minimizes the time between "item loaded" and "item saved" during DB migration.
|
|
78
|
+
* Defaults to 3x batchSize.
|
|
79
|
+
* Default batchSize is 10_000, so default highWaterMark is 30_000.
|
|
80
|
+
* Controls how many rows to have "buffered".
|
|
81
|
+
* Should be at least 1x batchSize, otherwise the stream will be "starving"
|
|
82
|
+
* between the queries.
|
|
97
83
|
*/
|
|
98
|
-
|
|
84
|
+
highWaterMark?: PositiveInteger;
|
|
99
85
|
/**
|
|
100
86
|
* Set to `true` to log additional debug info, when using experimentalCursorStream.
|
|
101
87
|
*
|
|
102
88
|
* @default false
|
|
103
89
|
*/
|
|
104
90
|
debug?: boolean;
|
|
105
|
-
/**
|
|
106
|
-
* Default is undefined.
|
|
107
|
-
* If set - sets a "safety timer", which will force call _read after the specified number of seconds.
|
|
108
|
-
* This is to prevent possible "dead-lock"/race-condition that would make the stream "hang".
|
|
109
|
-
*
|
|
110
|
-
* @experimental
|
|
111
|
-
*/
|
|
112
|
-
maxWait?: NumberOfSeconds;
|
|
113
91
|
}
|
|
114
92
|
export interface FirestoreDBOptions extends CommonDBOptions {
|
|
115
93
|
}
|
package/dist/firestore.db.js
CHANGED
|
@@ -8,6 +8,7 @@ import { _filterUndefinedValues, _omit } from '@naturalcycles/js-lib/object/obje
|
|
|
8
8
|
import { pMap } from '@naturalcycles/js-lib/promise/pMap.js';
|
|
9
9
|
import { _stringMapEntries } from '@naturalcycles/js-lib/types';
|
|
10
10
|
import { escapeDocId, unescapeDocId } from './firestore.util.js';
|
|
11
|
+
import { FirestoreShardedReadable } from './firestoreShardedReadable.js';
|
|
11
12
|
import { FirestoreStreamReadable } from './firestoreStreamReadable.js';
|
|
12
13
|
import { dbQueryToFirestoreQuery } from './query.util.js';
|
|
13
14
|
export class FirestoreDB extends BaseCommonDB {
|
|
@@ -23,6 +24,7 @@ export class FirestoreDB extends BaseCommonDB {
|
|
|
23
24
|
...commonDBFullSupport,
|
|
24
25
|
patchByQuery: false, // todo: can be implemented
|
|
25
26
|
tableSchemas: false,
|
|
27
|
+
createTransaction: false, // Firestore SDK doesn't support it
|
|
26
28
|
};
|
|
27
29
|
// GET
|
|
28
30
|
async getByIds(table, ids, opt = {}) {
|
|
@@ -100,6 +102,9 @@ export class FirestoreDB extends BaseCommonDB {
|
|
|
100
102
|
if (opt.experimentalCursorStream) {
|
|
101
103
|
return new FirestoreStreamReadable(firestoreQuery, q, opt, commonLoggerMinLevel(this.cfg.logger, opt.debug ? 'log' : 'warn'));
|
|
102
104
|
}
|
|
105
|
+
if (opt.experimentalShardedStream) {
|
|
106
|
+
return new FirestoreShardedReadable(firestoreQuery, q, opt, commonLoggerMinLevel(this.cfg.logger, opt.debug ? 'log' : 'warn'));
|
|
107
|
+
}
|
|
103
108
|
return firestoreQuery.stream().map(doc => {
|
|
104
109
|
return {
|
|
105
110
|
id: unescapeDocId(doc.id),
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { Readable } from 'node:stream';
|
|
2
|
+
import { type Query } from '@google-cloud/firestore';
|
|
3
|
+
import type { DBQuery } from '@naturalcycles/db-lib';
|
|
4
|
+
import type { CommonLogger } from '@naturalcycles/js-lib/log';
|
|
5
|
+
import type { ObjectWithId } from '@naturalcycles/js-lib/types';
|
|
6
|
+
import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream';
|
|
7
|
+
import type { FirestoreDBStreamOptions } from './firestore.db.js';
|
|
8
|
+
/**
|
|
9
|
+
* Highly, HIGHLY experimental!
|
|
10
|
+
*/
|
|
11
|
+
export declare class FirestoreShardedReadable<T extends ObjectWithId = any> extends Readable implements ReadableTyped<T> {
|
|
12
|
+
private readonly q;
|
|
13
|
+
readonly dbQuery: DBQuery<T>;
|
|
14
|
+
private logger;
|
|
15
|
+
private readonly table;
|
|
16
|
+
private readonly originalLimit;
|
|
17
|
+
private rowsRetrieved;
|
|
18
|
+
/**
|
|
19
|
+
* Next shard to be used for querying.
|
|
20
|
+
*/
|
|
21
|
+
private nextShard;
|
|
22
|
+
private cursorByShard;
|
|
23
|
+
private queryIsRunningByShard;
|
|
24
|
+
private paused;
|
|
25
|
+
private done;
|
|
26
|
+
private doneShards;
|
|
27
|
+
private lastQueryDoneByShard;
|
|
28
|
+
private totalWait;
|
|
29
|
+
private readonly opt;
|
|
30
|
+
constructor(q: Query, dbQuery: DBQuery<T>, opt: FirestoreDBStreamOptions, logger: CommonLogger);
|
|
31
|
+
/**
|
|
32
|
+
* Counts how many times _read was called.
|
|
33
|
+
* For debugging.
|
|
34
|
+
*/
|
|
35
|
+
count: number;
|
|
36
|
+
_read(): void;
|
|
37
|
+
private runNextQuery;
|
|
38
|
+
private runQuery;
|
|
39
|
+
private findNextFreeShard;
|
|
40
|
+
private _getNextShardAndMove;
|
|
41
|
+
}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import { Readable } from 'node:stream';
|
|
2
|
+
import { FieldPath } from '@google-cloud/firestore';
|
|
3
|
+
import { localTime } from '@naturalcycles/js-lib/datetime';
|
|
4
|
+
import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js';
|
|
5
|
+
import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js';
|
|
6
|
+
import { unescapeDocId } from './firestore.util.js';
|
|
7
|
+
const SHARDS = 16;
|
|
8
|
+
const SHARD_COLUMN = 'shard16';
|
|
9
|
+
/**
|
|
10
|
+
* Highly, HIGHLY experimental!
|
|
11
|
+
*/
|
|
12
|
+
export class FirestoreShardedReadable extends Readable {
|
|
13
|
+
q;
|
|
14
|
+
dbQuery;
|
|
15
|
+
logger;
|
|
16
|
+
table;
|
|
17
|
+
originalLimit;
|
|
18
|
+
rowsRetrieved = 0;
|
|
19
|
+
/**
|
|
20
|
+
* Next shard to be used for querying.
|
|
21
|
+
*/
|
|
22
|
+
nextShard = 1;
|
|
23
|
+
cursorByShard = {};
|
|
24
|
+
queryIsRunningByShard = {};
|
|
25
|
+
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
|
|
26
|
+
paused = false;
|
|
27
|
+
done = false;
|
|
28
|
+
doneShards = new Set();
|
|
29
|
+
lastQueryDoneByShard = {};
|
|
30
|
+
totalWait = 0;
|
|
31
|
+
opt;
|
|
32
|
+
constructor(q, dbQuery, opt, logger) {
|
|
33
|
+
super({ objectMode: true });
|
|
34
|
+
this.q = q;
|
|
35
|
+
this.dbQuery = dbQuery;
|
|
36
|
+
this.logger = logger;
|
|
37
|
+
this.opt = {
|
|
38
|
+
batchSize: 3000,
|
|
39
|
+
...opt,
|
|
40
|
+
};
|
|
41
|
+
this.originalLimit = dbQuery._limitValue;
|
|
42
|
+
this.table = dbQuery.table;
|
|
43
|
+
logger.warn(`!! using experimentalShardedStream !! ${this.table}, batchSize: ${this.opt.batchSize}`);
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Counts how many times _read was called.
|
|
47
|
+
* For debugging.
|
|
48
|
+
*/
|
|
49
|
+
count = 0;
|
|
50
|
+
_read() {
|
|
51
|
+
// this.lastReadTimestamp = Date.now() as UnixTimestampMillis
|
|
52
|
+
// console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
|
|
53
|
+
this.count++;
|
|
54
|
+
if (this.done) {
|
|
55
|
+
this.logger.warn(`!!! _read was called, but done==true`);
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
// const shard = this.getNextShardAndMove()
|
|
59
|
+
const shard = this.findNextFreeShard();
|
|
60
|
+
if (!shard) {
|
|
61
|
+
this.logger.log(`_read ${this.count}: all shards are busy, skipping`);
|
|
62
|
+
return;
|
|
63
|
+
}
|
|
64
|
+
void this.runNextQuery(shard).catch(err => {
|
|
65
|
+
console.log('error in runNextQuery', err);
|
|
66
|
+
this.emit('error', err);
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
async runNextQuery(shard) {
|
|
70
|
+
if (this.done)
|
|
71
|
+
return;
|
|
72
|
+
const { logger, table } = this;
|
|
73
|
+
if (this.lastQueryDoneByShard[shard]) {
|
|
74
|
+
this.totalWait += Date.now() - this.lastQueryDoneByShard[shard];
|
|
75
|
+
}
|
|
76
|
+
this.queryIsRunningByShard[shard] = true;
|
|
77
|
+
const limit = this.opt.batchSize;
|
|
78
|
+
// We have to orderBy documentId, to be able to use id as a cursor
|
|
79
|
+
let q = this.q.where(SHARD_COLUMN, '==', shard).orderBy(FieldPath.documentId()).limit(limit);
|
|
80
|
+
if (this.cursorByShard[shard]) {
|
|
81
|
+
q = q.startAfter(this.cursorByShard[shard]);
|
|
82
|
+
}
|
|
83
|
+
console.log(`runNextQuery[${shard}]`, {
|
|
84
|
+
retrieved: this.rowsRetrieved,
|
|
85
|
+
});
|
|
86
|
+
const qs = await this.runQuery(q);
|
|
87
|
+
if (!qs) {
|
|
88
|
+
// this means we have already emitted an unrecoverable error
|
|
89
|
+
return;
|
|
90
|
+
}
|
|
91
|
+
const rows = [];
|
|
92
|
+
let lastDocId;
|
|
93
|
+
for (const doc of qs.docs) {
|
|
94
|
+
lastDocId = doc.id;
|
|
95
|
+
rows.push({
|
|
96
|
+
id: unescapeDocId(doc.id),
|
|
97
|
+
...doc.data(),
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
this.rowsRetrieved += rows.length;
|
|
101
|
+
logger.log(`${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
|
|
102
|
+
this.cursorByShard[shard] = lastDocId;
|
|
103
|
+
this.queryIsRunningByShard[shard] = false; // ready to take more _reads
|
|
104
|
+
this.lastQueryDoneByShard[shard] = localTime.nowUnixMillis();
|
|
105
|
+
for (const row of rows) {
|
|
106
|
+
this.push(row);
|
|
107
|
+
}
|
|
108
|
+
if (qs.empty) {
|
|
109
|
+
logger.warn(`!!!! Shard ${shard} DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
|
|
110
|
+
this.doneShards.add(shard);
|
|
111
|
+
}
|
|
112
|
+
if (this.doneShards.size === SHARDS) {
|
|
113
|
+
logger.warn(`!!!! DONE: all shards completed, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
|
|
114
|
+
this.push(null);
|
|
115
|
+
this.paused = false;
|
|
116
|
+
this.done = true;
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
if (this.originalLimit && this.rowsRetrieved >= this.originalLimit) {
|
|
120
|
+
logger.warn(`!!!! DONE: reached total limit of ${this.originalLimit}, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
|
|
121
|
+
this.push(null);
|
|
122
|
+
this.paused = false;
|
|
123
|
+
this.done = true;
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
// if (this.paused) {
|
|
127
|
+
// this.paused = false
|
|
128
|
+
// }
|
|
129
|
+
const nextShard = this.findNextFreeShard();
|
|
130
|
+
if (nextShard) {
|
|
131
|
+
void this.runNextQuery(nextShard);
|
|
132
|
+
}
|
|
133
|
+
else {
|
|
134
|
+
logger.warn(`${table} all shards are busy in runNextQuery, skipping`);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
async runQuery(q) {
|
|
138
|
+
const { table, logger } = this;
|
|
139
|
+
try {
|
|
140
|
+
return await pRetry(async () => {
|
|
141
|
+
return await q.get();
|
|
142
|
+
}, {
|
|
143
|
+
name: `FirestoreStreamReadable.query(${table})`,
|
|
144
|
+
maxAttempts: 5,
|
|
145
|
+
delay: 5000,
|
|
146
|
+
delayMultiplier: 2,
|
|
147
|
+
logger,
|
|
148
|
+
timeout: 120_000, // 2 minutes
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
catch (err) {
|
|
152
|
+
console.log(`FirestoreStreamReadable error!\n`, {
|
|
153
|
+
table,
|
|
154
|
+
rowsRetrieved: this.rowsRetrieved,
|
|
155
|
+
}, err);
|
|
156
|
+
this.emit('error', err);
|
|
157
|
+
return;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
findNextFreeShard() {
|
|
161
|
+
for (let shard = 1; shard <= SHARDS; shard++) {
|
|
162
|
+
if (!this.queryIsRunningByShard[shard] && !this.doneShards.has(shard)) {
|
|
163
|
+
return shard;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
|
|
168
|
+
_getNextShardAndMove() {
|
|
169
|
+
const shard = this.nextShard;
|
|
170
|
+
this.nextShard = shard === SHARDS ? 1 : shard + 1;
|
|
171
|
+
return shard;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
@@ -12,17 +12,17 @@ export declare class FirestoreStreamReadable<T extends ObjectWithId = any> exten
|
|
|
12
12
|
private readonly originalLimit;
|
|
13
13
|
private rowsRetrieved;
|
|
14
14
|
private endCursor?;
|
|
15
|
-
private
|
|
15
|
+
private queryIsRunning;
|
|
16
|
+
private paused;
|
|
16
17
|
private done;
|
|
17
|
-
private lastQueryDone?;
|
|
18
|
-
private totalWait;
|
|
19
|
-
private readonly opt;
|
|
20
|
-
constructor(q: Query, dbQuery: DBQuery<T>, opt: FirestoreDBStreamOptions, logger: CommonLogger);
|
|
21
18
|
/**
|
|
22
19
|
* Counts how many times _read was called.
|
|
23
20
|
* For debugging.
|
|
24
21
|
*/
|
|
25
|
-
|
|
22
|
+
countReads: number;
|
|
23
|
+
private readonly opt;
|
|
24
|
+
constructor(q: Query, dbQuery: DBQuery<T>, opt: FirestoreDBStreamOptions, logger: CommonLogger);
|
|
26
25
|
_read(): void;
|
|
27
26
|
private runNextQuery;
|
|
27
|
+
private runQuery;
|
|
28
28
|
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { Readable } from 'node:stream';
|
|
2
2
|
import { FieldPath } from '@google-cloud/firestore';
|
|
3
|
-
import {
|
|
3
|
+
import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js';
|
|
4
|
+
import { _since } from '@naturalcycles/js-lib/datetime/time.util.js';
|
|
4
5
|
import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js';
|
|
5
6
|
import { unescapeDocId } from './firestore.util.js';
|
|
6
7
|
export class FirestoreStreamReadable extends Readable {
|
|
@@ -10,62 +11,64 @@ export class FirestoreStreamReadable extends Readable {
|
|
|
10
11
|
originalLimit;
|
|
11
12
|
rowsRetrieved = 0;
|
|
12
13
|
endCursor;
|
|
13
|
-
|
|
14
|
+
queryIsRunning = false;
|
|
15
|
+
paused = false;
|
|
14
16
|
done = false;
|
|
15
|
-
|
|
16
|
-
|
|
17
|
+
/**
|
|
18
|
+
* Counts how many times _read was called.
|
|
19
|
+
* For debugging.
|
|
20
|
+
*/
|
|
21
|
+
countReads = 0;
|
|
17
22
|
opt;
|
|
18
|
-
// private readonly dsOpt: RunQueryOptions
|
|
19
23
|
constructor(q, dbQuery, opt, logger) {
|
|
20
|
-
|
|
24
|
+
// 10_000 was optimal in benchmarks
|
|
25
|
+
const { batchSize = 10_000 } = opt;
|
|
26
|
+
const { highWaterMark = batchSize * 3 } = opt;
|
|
27
|
+
// Defaulting highWaterMark to 3x batchSize
|
|
28
|
+
super({ objectMode: true, highWaterMark });
|
|
21
29
|
this.q = q;
|
|
22
30
|
this.logger = logger;
|
|
23
31
|
this.opt = {
|
|
24
|
-
rssLimitMB: 1000,
|
|
25
|
-
batchSize: 1000,
|
|
26
32
|
...opt,
|
|
33
|
+
batchSize,
|
|
34
|
+
highWaterMark,
|
|
27
35
|
};
|
|
28
36
|
// todo: support PITR!
|
|
29
|
-
// this.dsOpt = {}
|
|
30
|
-
// if (opt.readAt) {
|
|
31
|
-
// // Datastore expects UnixTimestamp in milliseconds
|
|
32
|
-
// this.dsOpt.readTime = opt.readAt * 1000
|
|
33
|
-
// }
|
|
34
37
|
this.originalLimit = dbQuery._limitValue;
|
|
35
38
|
this.table = dbQuery.table;
|
|
36
|
-
logger.warn(
|
|
39
|
+
logger.warn(`!!! using experimentalCursorStream`, {
|
|
40
|
+
table: this.table,
|
|
41
|
+
batchSize,
|
|
42
|
+
highWaterMark,
|
|
43
|
+
});
|
|
37
44
|
}
|
|
38
|
-
/**
|
|
39
|
-
* Counts how many times _read was called.
|
|
40
|
-
* For debugging.
|
|
41
|
-
*/
|
|
42
|
-
count = 0;
|
|
43
45
|
_read() {
|
|
44
46
|
// this.lastReadTimestamp = Date.now() as UnixTimestampMillis
|
|
45
47
|
// console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
|
|
46
|
-
this.
|
|
48
|
+
this.countReads++;
|
|
47
49
|
if (this.done) {
|
|
48
50
|
this.logger.warn(`!!! _read was called, but done==true`);
|
|
49
51
|
return;
|
|
50
52
|
}
|
|
51
|
-
if (
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
this.emit('error', err);
|
|
55
|
-
});
|
|
53
|
+
if (this.paused) {
|
|
54
|
+
this.logger.log(`_read #${this.countReads}, queryIsRunning: ${this.queryIsRunning}, unpausing stream`);
|
|
55
|
+
this.paused = false;
|
|
56
56
|
}
|
|
57
|
-
|
|
58
|
-
this.logger.log(`_read
|
|
57
|
+
if (this.queryIsRunning) {
|
|
58
|
+
this.logger.log(`_read #${this.countReads}, queryIsRunning: true, doing nothing`);
|
|
59
|
+
// todo: check if this can cause a "hang", if no more _reads would come later and we get stuck?
|
|
60
|
+
return;
|
|
59
61
|
}
|
|
62
|
+
void this.runNextQuery().catch(err => {
|
|
63
|
+
console.log('error in runNextQuery', err);
|
|
64
|
+
this.emit('error', err);
|
|
65
|
+
});
|
|
60
66
|
}
|
|
61
67
|
async runNextQuery() {
|
|
62
68
|
if (this.done)
|
|
63
69
|
return;
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
this.totalWait += now - this.lastQueryDone;
|
|
67
|
-
}
|
|
68
|
-
this.running = true;
|
|
70
|
+
const { logger, table } = this;
|
|
71
|
+
this.queryIsRunning = true;
|
|
69
72
|
let limit = this.opt.batchSize;
|
|
70
73
|
if (this.originalLimit) {
|
|
71
74
|
limit = Math.min(this.opt.batchSize, this.originalLimit - this.rowsRetrieved);
|
|
@@ -76,26 +79,15 @@ export class FirestoreStreamReadable extends Readable {
|
|
|
76
79
|
if (this.endCursor) {
|
|
77
80
|
q = q.startAfter(this.endCursor);
|
|
78
81
|
}
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
logger: this.logger,
|
|
89
|
-
timeout: 120_000, // 2 minutes
|
|
90
|
-
});
|
|
91
|
-
}
|
|
92
|
-
catch (err) {
|
|
93
|
-
console.log(`FirestoreStreamReadable error!\n`, {
|
|
94
|
-
table: this.table,
|
|
95
|
-
rowsRetrieved: this.rowsRetrieved,
|
|
96
|
-
}, err);
|
|
97
|
-
this.emit('error', err);
|
|
98
|
-
// clearInterval(this.maxWaitInterval)
|
|
82
|
+
// logger.log(`runNextQuery`, {
|
|
83
|
+
// rowsRetrieved: this.rowsRetrieved,
|
|
84
|
+
// paused: this.paused,
|
|
85
|
+
// })
|
|
86
|
+
const started = localTime.nowUnixMillis();
|
|
87
|
+
const qs = await this.runQuery(q);
|
|
88
|
+
logger.log(`${table} query took ${_since(started)}`);
|
|
89
|
+
if (!qs) {
|
|
90
|
+
// error already emitted in runQuery
|
|
99
91
|
return;
|
|
100
92
|
}
|
|
101
93
|
const rows = [];
|
|
@@ -108,30 +100,57 @@ export class FirestoreStreamReadable extends Readable {
|
|
|
108
100
|
});
|
|
109
101
|
}
|
|
110
102
|
this.rowsRetrieved += rows.length;
|
|
111
|
-
|
|
103
|
+
logger.log(`${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved`);
|
|
112
104
|
this.endCursor = lastDocId;
|
|
113
|
-
this.
|
|
114
|
-
|
|
105
|
+
this.queryIsRunning = false; // ready to take more _reads
|
|
106
|
+
let shouldContinue = false;
|
|
115
107
|
for (const row of rows) {
|
|
116
|
-
this.push(row);
|
|
108
|
+
shouldContinue = this.push(row);
|
|
117
109
|
}
|
|
118
|
-
if (
|
|
119
|
-
|
|
110
|
+
if (!rows.length || (this.originalLimit && this.rowsRetrieved >= this.originalLimit)) {
|
|
111
|
+
logger.warn(`${table} DONE! ${this.rowsRetrieved} rowsRetrieved`);
|
|
120
112
|
this.push(null);
|
|
121
113
|
this.done = true;
|
|
114
|
+
this.paused = false;
|
|
115
|
+
return;
|
|
122
116
|
}
|
|
123
|
-
|
|
124
|
-
//
|
|
125
|
-
|
|
117
|
+
if (shouldContinue) {
|
|
118
|
+
// Keep the stream flowing
|
|
119
|
+
logger.log(`${table} continuing the stream`);
|
|
120
|
+
void this.runNextQuery();
|
|
126
121
|
}
|
|
127
122
|
else {
|
|
128
|
-
|
|
129
|
-
if (
|
|
130
|
-
|
|
123
|
+
// Not starting the next query
|
|
124
|
+
if (this.paused) {
|
|
125
|
+
logger.log(`${table} stream is already paused`);
|
|
131
126
|
}
|
|
132
127
|
else {
|
|
133
|
-
|
|
128
|
+
logger.warn(`${table} pausing the stream`);
|
|
129
|
+
this.paused = true;
|
|
134
130
|
}
|
|
135
131
|
}
|
|
136
132
|
}
|
|
133
|
+
async runQuery(q) {
|
|
134
|
+
const { table, logger } = this;
|
|
135
|
+
try {
|
|
136
|
+
return await pRetry(async () => {
|
|
137
|
+
return await q.get();
|
|
138
|
+
}, {
|
|
139
|
+
name: `FirestoreStreamReadable.query(${table})`,
|
|
140
|
+
maxAttempts: 5,
|
|
141
|
+
delay: 5000,
|
|
142
|
+
delayMultiplier: 2,
|
|
143
|
+
logger,
|
|
144
|
+
timeout: 120_000, // 2 minutes
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
catch (err) {
|
|
148
|
+
console.log(`FirestoreStreamReadable error!\n`, {
|
|
149
|
+
table,
|
|
150
|
+
rowsRetrieved: this.rowsRetrieved,
|
|
151
|
+
}, err);
|
|
152
|
+
this.emit('error', err);
|
|
153
|
+
return;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
137
156
|
}
|
package/dist/query.util.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type Query } from '@google-cloud/firestore';
|
|
2
2
|
import type { DBQuery } from '@naturalcycles/db-lib';
|
|
3
3
|
import type { ObjectWithId } from '@naturalcycles/js-lib/types';
|
|
4
4
|
export declare function dbQueryToFirestoreQuery<ROW extends ObjectWithId>(dbQuery: DBQuery<ROW>, emptyQuery: Query): Query;
|
package/dist/query.util.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { FieldPath } from '@google-cloud/firestore';
|
|
1
2
|
// Map DBQueryFilterOp to WhereFilterOp
|
|
2
3
|
// Currently it's fully aligned!
|
|
3
4
|
const OP_MAP = {
|
|
@@ -8,27 +9,35 @@ export function dbQueryToFirestoreQuery(dbQuery, emptyQuery) {
|
|
|
8
9
|
let q = emptyQuery;
|
|
9
10
|
// filter
|
|
10
11
|
for (const f of dbQuery._filters) {
|
|
11
|
-
q = q.where(f.name, OP_MAP[f.op] || f.op, f.val);
|
|
12
|
+
q = q.where(mapName(f.name), OP_MAP[f.op] || f.op, f.val);
|
|
12
13
|
}
|
|
13
14
|
// order
|
|
14
15
|
for (const ord of dbQuery._orders) {
|
|
15
|
-
|
|
16
|
-
// .orderBy(FieldPath.documentId())
|
|
17
|
-
q = q.orderBy(ord.name, ord.descending ? 'desc' : 'asc');
|
|
16
|
+
q = q.orderBy(mapName(ord.name), ord.descending ? 'desc' : 'asc');
|
|
18
17
|
}
|
|
19
18
|
// limit
|
|
20
19
|
q = q.limit(dbQuery._limitValue);
|
|
21
20
|
// selectedFields
|
|
22
21
|
if (dbQuery._selectedFieldNames) {
|
|
23
|
-
//
|
|
24
|
-
|
|
22
|
+
// id is filtered out, because in Firestore it's not a "property",
|
|
23
|
+
// and doc.id is always returned, even if we request empty set of fields
|
|
24
|
+
q = q.select(...dbQuery._selectedFieldNames.filter(n => n !== 'id'));
|
|
25
25
|
}
|
|
26
26
|
// cursor
|
|
27
27
|
if (dbQuery._startCursor) {
|
|
28
|
-
|
|
28
|
+
// Using `startAfter`, not `startAt` here
|
|
29
|
+
// Why?
|
|
30
|
+
// Because in Firestore, you can only retrieve "last document id" to be used as Cursor.
|
|
31
|
+
// That document was already retrieved, so it makes sense to start AFTER it.
|
|
32
|
+
q = q.startAfter(dbQuery._startCursor);
|
|
29
33
|
}
|
|
30
34
|
if (dbQuery._endCursor) {
|
|
31
35
|
q = q.endAt(dbQuery._endCursor);
|
|
32
36
|
}
|
|
33
37
|
return q;
|
|
34
38
|
}
|
|
39
|
+
function mapName(name) {
|
|
40
|
+
if (name === 'id')
|
|
41
|
+
return FieldPath.documentId();
|
|
42
|
+
return name;
|
|
43
|
+
}
|
package/package.json
CHANGED
package/src/firestore.db.ts
CHANGED
|
@@ -28,10 +28,11 @@ import { _assert } from '@naturalcycles/js-lib/error/assert.js'
|
|
|
28
28
|
import { type CommonLogger, commonLoggerMinLevel } from '@naturalcycles/js-lib/log'
|
|
29
29
|
import { _filterUndefinedValues, _omit } from '@naturalcycles/js-lib/object/object.util.js'
|
|
30
30
|
import { pMap } from '@naturalcycles/js-lib/promise/pMap.js'
|
|
31
|
-
import type {
|
|
31
|
+
import type { ObjectWithId, PositiveInteger, StringMap } from '@naturalcycles/js-lib/types'
|
|
32
32
|
import { _stringMapEntries } from '@naturalcycles/js-lib/types'
|
|
33
33
|
import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream'
|
|
34
34
|
import { escapeDocId, unescapeDocId } from './firestore.util.js'
|
|
35
|
+
import { FirestoreShardedReadable } from './firestoreShardedReadable.js'
|
|
35
36
|
import { FirestoreStreamReadable } from './firestoreStreamReadable.js'
|
|
36
37
|
import { dbQueryToFirestoreQuery } from './query.util.js'
|
|
37
38
|
|
|
@@ -50,6 +51,7 @@ export class FirestoreDB extends BaseCommonDB implements CommonDB {
|
|
|
50
51
|
...commonDBFullSupport,
|
|
51
52
|
patchByQuery: false, // todo: can be implemented
|
|
52
53
|
tableSchemas: false,
|
|
54
|
+
createTransaction: false, // Firestore SDK doesn't support it
|
|
53
55
|
}
|
|
54
56
|
|
|
55
57
|
// GET
|
|
@@ -167,6 +169,15 @@ export class FirestoreDB extends BaseCommonDB implements CommonDB {
|
|
|
167
169
|
)
|
|
168
170
|
}
|
|
169
171
|
|
|
172
|
+
if (opt.experimentalShardedStream) {
|
|
173
|
+
return new FirestoreShardedReadable(
|
|
174
|
+
firestoreQuery,
|
|
175
|
+
q,
|
|
176
|
+
opt,
|
|
177
|
+
commonLoggerMinLevel(this.cfg.logger, opt.debug ? 'log' : 'warn'),
|
|
178
|
+
)
|
|
179
|
+
}
|
|
180
|
+
|
|
170
181
|
return (firestoreQuery.stream() as ReadableTyped<QueryDocumentSnapshot<any>>).map(doc => {
|
|
171
182
|
return {
|
|
172
183
|
id: unescapeDocId(doc.id),
|
|
@@ -545,38 +556,24 @@ export interface FirestoreDBStreamOptions extends FirestoreDBReadOptions {
|
|
|
545
556
|
*/
|
|
546
557
|
experimentalCursorStream?: boolean
|
|
547
558
|
|
|
559
|
+
experimentalShardedStream?: boolean
|
|
560
|
+
|
|
548
561
|
/**
|
|
549
562
|
* Applicable to `experimentalCursorStream`.
|
|
550
563
|
* Defines the size (limit) of each individual query.
|
|
551
564
|
*
|
|
552
|
-
* Default:
|
|
553
|
-
*/
|
|
554
|
-
batchSize?: number
|
|
555
|
-
|
|
556
|
-
/**
|
|
557
|
-
* Applicable to `experimentalCursorStream`
|
|
558
|
-
*
|
|
559
|
-
* Set to a value (number of Megabytes) to control the peak RSS size.
|
|
560
|
-
* If limit is reached - streaming will pause until the stream keeps up, and then
|
|
561
|
-
* resumes.
|
|
562
|
-
*
|
|
563
|
-
* Set to 0/undefined to disable. Stream will get "slow" then, cause it'll only run the query
|
|
564
|
-
* when _read is called.
|
|
565
|
-
*
|
|
566
|
-
* @default 1000
|
|
565
|
+
* Default: 10_000
|
|
567
566
|
*/
|
|
568
|
-
|
|
567
|
+
batchSize?: PositiveInteger
|
|
569
568
|
|
|
570
569
|
/**
|
|
571
|
-
*
|
|
572
|
-
* Default
|
|
573
|
-
*
|
|
574
|
-
*
|
|
575
|
-
*
|
|
576
|
-
* This mode is useful e.g for DB migrations, where you want to avoid "stale data".
|
|
577
|
-
* So, it minimizes the time between "item loaded" and "item saved" during DB migration.
|
|
570
|
+
* Defaults to 3x batchSize.
|
|
571
|
+
* Default batchSize is 10_000, so default highWaterMark is 30_000.
|
|
572
|
+
* Controls how many rows to have "buffered".
|
|
573
|
+
* Should be at least 1x batchSize, otherwise the stream will be "starving"
|
|
574
|
+
* between the queries.
|
|
578
575
|
*/
|
|
579
|
-
|
|
576
|
+
highWaterMark?: PositiveInteger
|
|
580
577
|
|
|
581
578
|
/**
|
|
582
579
|
* Set to `true` to log additional debug info, when using experimentalCursorStream.
|
|
@@ -584,15 +581,6 @@ export interface FirestoreDBStreamOptions extends FirestoreDBReadOptions {
|
|
|
584
581
|
* @default false
|
|
585
582
|
*/
|
|
586
583
|
debug?: boolean
|
|
587
|
-
|
|
588
|
-
/**
|
|
589
|
-
* Default is undefined.
|
|
590
|
-
* If set - sets a "safety timer", which will force call _read after the specified number of seconds.
|
|
591
|
-
* This is to prevent possible "dead-lock"/race-condition that would make the stream "hang".
|
|
592
|
-
*
|
|
593
|
-
* @experimental
|
|
594
|
-
*/
|
|
595
|
-
maxWait?: NumberOfSeconds
|
|
596
584
|
}
|
|
597
585
|
|
|
598
586
|
export interface FirestoreDBOptions extends CommonDBOptions {}
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
import { Readable } from 'node:stream'
|
|
2
|
+
import { FieldPath, type Query, type QuerySnapshot } from '@google-cloud/firestore'
|
|
3
|
+
import type { DBQuery } from '@naturalcycles/db-lib'
|
|
4
|
+
import { localTime } from '@naturalcycles/js-lib/datetime'
|
|
5
|
+
import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js'
|
|
6
|
+
import type { CommonLogger } from '@naturalcycles/js-lib/log'
|
|
7
|
+
import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js'
|
|
8
|
+
import type {
|
|
9
|
+
ObjectWithId,
|
|
10
|
+
PositiveInteger,
|
|
11
|
+
StringMap,
|
|
12
|
+
UnixTimestampMillis,
|
|
13
|
+
} from '@naturalcycles/js-lib/types'
|
|
14
|
+
import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream'
|
|
15
|
+
import type { FirestoreDBStreamOptions } from './firestore.db.js'
|
|
16
|
+
import { unescapeDocId } from './firestore.util.js'
|
|
17
|
+
|
|
18
|
+
const SHARDS = 16
|
|
19
|
+
const SHARD_COLUMN = 'shard16'
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Highly, HIGHLY experimental!
|
|
23
|
+
*/
|
|
24
|
+
export class FirestoreShardedReadable<T extends ObjectWithId = any>
|
|
25
|
+
extends Readable
|
|
26
|
+
implements ReadableTyped<T>
|
|
27
|
+
{
|
|
28
|
+
private readonly table: string
|
|
29
|
+
private readonly originalLimit: number
|
|
30
|
+
private rowsRetrieved = 0
|
|
31
|
+
/**
|
|
32
|
+
* Next shard to be used for querying.
|
|
33
|
+
*/
|
|
34
|
+
private nextShard = 1
|
|
35
|
+
private cursorByShard: StringMap = {}
|
|
36
|
+
private queryIsRunningByShard: StringMap<boolean> = {}
|
|
37
|
+
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
|
|
38
|
+
private paused = false
|
|
39
|
+
private done = false
|
|
40
|
+
private doneShards = new Set<PositiveInteger>()
|
|
41
|
+
private lastQueryDoneByShard: StringMap<UnixTimestampMillis> = {}
|
|
42
|
+
private totalWait = 0
|
|
43
|
+
|
|
44
|
+
private readonly opt: FirestoreDBStreamOptions & { batchSize: number }
|
|
45
|
+
|
|
46
|
+
constructor(
|
|
47
|
+
private readonly q: Query,
|
|
48
|
+
readonly dbQuery: DBQuery<T>,
|
|
49
|
+
opt: FirestoreDBStreamOptions,
|
|
50
|
+
private logger: CommonLogger,
|
|
51
|
+
) {
|
|
52
|
+
super({ objectMode: true })
|
|
53
|
+
|
|
54
|
+
this.opt = {
|
|
55
|
+
batchSize: 3000,
|
|
56
|
+
...opt,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
this.originalLimit = dbQuery._limitValue
|
|
60
|
+
this.table = dbQuery.table
|
|
61
|
+
|
|
62
|
+
logger.warn(
|
|
63
|
+
`!! using experimentalShardedStream !! ${this.table}, batchSize: ${this.opt.batchSize}`,
|
|
64
|
+
)
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Counts how many times _read was called.
|
|
69
|
+
* For debugging.
|
|
70
|
+
*/
|
|
71
|
+
count = 0
|
|
72
|
+
|
|
73
|
+
override _read(): void {
|
|
74
|
+
// this.lastReadTimestamp = Date.now() as UnixTimestampMillis
|
|
75
|
+
|
|
76
|
+
// console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
|
|
77
|
+
this.count++
|
|
78
|
+
|
|
79
|
+
if (this.done) {
|
|
80
|
+
this.logger.warn(`!!! _read was called, but done==true`)
|
|
81
|
+
return
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// const shard = this.getNextShardAndMove()
|
|
85
|
+
const shard = this.findNextFreeShard()
|
|
86
|
+
if (!shard) {
|
|
87
|
+
this.logger.log(`_read ${this.count}: all shards are busy, skipping`)
|
|
88
|
+
return
|
|
89
|
+
}
|
|
90
|
+
void this.runNextQuery(shard).catch(err => {
|
|
91
|
+
console.log('error in runNextQuery', err)
|
|
92
|
+
this.emit('error', err)
|
|
93
|
+
})
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
private async runNextQuery(shard: PositiveInteger): Promise<void> {
|
|
97
|
+
if (this.done) return
|
|
98
|
+
const { logger, table } = this
|
|
99
|
+
|
|
100
|
+
if (this.lastQueryDoneByShard[shard]) {
|
|
101
|
+
this.totalWait += Date.now() - this.lastQueryDoneByShard[shard]
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
this.queryIsRunningByShard[shard] = true
|
|
105
|
+
|
|
106
|
+
const limit = this.opt.batchSize
|
|
107
|
+
|
|
108
|
+
// We have to orderBy documentId, to be able to use id as a cursor
|
|
109
|
+
|
|
110
|
+
let q = this.q.where(SHARD_COLUMN, '==', shard).orderBy(FieldPath.documentId()).limit(limit)
|
|
111
|
+
if (this.cursorByShard[shard]) {
|
|
112
|
+
q = q.startAfter(this.cursorByShard[shard])
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
console.log(`runNextQuery[${shard}]`, {
|
|
116
|
+
retrieved: this.rowsRetrieved,
|
|
117
|
+
})
|
|
118
|
+
const qs = await this.runQuery(q)
|
|
119
|
+
if (!qs) {
|
|
120
|
+
// this means we have already emitted an unrecoverable error
|
|
121
|
+
return
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const rows: T[] = []
|
|
125
|
+
let lastDocId: string | undefined
|
|
126
|
+
|
|
127
|
+
for (const doc of qs.docs) {
|
|
128
|
+
lastDocId = doc.id
|
|
129
|
+
rows.push({
|
|
130
|
+
id: unescapeDocId(doc.id),
|
|
131
|
+
...doc.data(),
|
|
132
|
+
} as T)
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
this.rowsRetrieved += rows.length
|
|
136
|
+
logger.log(
|
|
137
|
+
`${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(
|
|
138
|
+
this.totalWait,
|
|
139
|
+
)}`,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
this.cursorByShard[shard] = lastDocId
|
|
143
|
+
this.queryIsRunningByShard[shard] = false // ready to take more _reads
|
|
144
|
+
this.lastQueryDoneByShard[shard] = localTime.nowUnixMillis()
|
|
145
|
+
|
|
146
|
+
for (const row of rows) {
|
|
147
|
+
this.push(row)
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (qs.empty) {
|
|
151
|
+
logger.warn(
|
|
152
|
+
`!!!! Shard ${shard} DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
|
|
153
|
+
)
|
|
154
|
+
this.doneShards.add(shard)
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if (this.doneShards.size === SHARDS) {
|
|
158
|
+
logger.warn(
|
|
159
|
+
`!!!! DONE: all shards completed, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
|
|
160
|
+
)
|
|
161
|
+
this.push(null)
|
|
162
|
+
this.paused = false
|
|
163
|
+
this.done = true
|
|
164
|
+
return
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if (this.originalLimit && this.rowsRetrieved >= this.originalLimit) {
|
|
168
|
+
logger.warn(
|
|
169
|
+
`!!!! DONE: reached total limit of ${this.originalLimit}, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
|
|
170
|
+
)
|
|
171
|
+
this.push(null)
|
|
172
|
+
this.paused = false
|
|
173
|
+
this.done = true
|
|
174
|
+
return
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// if (this.paused) {
|
|
178
|
+
// this.paused = false
|
|
179
|
+
// }
|
|
180
|
+
const nextShard = this.findNextFreeShard()
|
|
181
|
+
if (nextShard) {
|
|
182
|
+
void this.runNextQuery(nextShard)
|
|
183
|
+
} else {
|
|
184
|
+
logger.warn(`${table} all shards are busy in runNextQuery, skipping`)
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
private async runQuery(q: Query): Promise<QuerySnapshot | undefined> {
|
|
189
|
+
const { table, logger } = this
|
|
190
|
+
|
|
191
|
+
try {
|
|
192
|
+
return await pRetry(
|
|
193
|
+
async () => {
|
|
194
|
+
return await q.get()
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
name: `FirestoreStreamReadable.query(${table})`,
|
|
198
|
+
maxAttempts: 5,
|
|
199
|
+
delay: 5000,
|
|
200
|
+
delayMultiplier: 2,
|
|
201
|
+
logger,
|
|
202
|
+
timeout: 120_000, // 2 minutes
|
|
203
|
+
},
|
|
204
|
+
)
|
|
205
|
+
} catch (err) {
|
|
206
|
+
console.log(
|
|
207
|
+
`FirestoreStreamReadable error!\n`,
|
|
208
|
+
{
|
|
209
|
+
table,
|
|
210
|
+
rowsRetrieved: this.rowsRetrieved,
|
|
211
|
+
},
|
|
212
|
+
err,
|
|
213
|
+
)
|
|
214
|
+
this.emit('error', err)
|
|
215
|
+
return
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
private findNextFreeShard(): PositiveInteger | undefined {
|
|
220
|
+
for (let shard = 1; shard <= SHARDS; shard++) {
|
|
221
|
+
if (!this.queryIsRunningByShard[shard] && !this.doneShards.has(shard)) {
|
|
222
|
+
return shard
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
|
|
228
|
+
private _getNextShardAndMove(): PositiveInteger {
|
|
229
|
+
const shard = this.nextShard
|
|
230
|
+
this.nextShard = shard === SHARDS ? 1 : shard + 1
|
|
231
|
+
return shard
|
|
232
|
+
}
|
|
233
|
+
}
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import { Readable } from 'node:stream'
|
|
2
2
|
import { FieldPath, type Query, type QuerySnapshot } from '@google-cloud/firestore'
|
|
3
3
|
import type { DBQuery } from '@naturalcycles/db-lib'
|
|
4
|
-
import {
|
|
4
|
+
import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js'
|
|
5
|
+
import { _since } from '@naturalcycles/js-lib/datetime/time.util.js'
|
|
5
6
|
import type { CommonLogger } from '@naturalcycles/js-lib/log'
|
|
6
7
|
import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js'
|
|
7
8
|
import type { ObjectWithId } from '@naturalcycles/js-lib/types'
|
|
@@ -17,13 +18,16 @@ export class FirestoreStreamReadable<T extends ObjectWithId = any>
|
|
|
17
18
|
private readonly originalLimit: number
|
|
18
19
|
private rowsRetrieved = 0
|
|
19
20
|
private endCursor?: string
|
|
20
|
-
private
|
|
21
|
+
private queryIsRunning = false
|
|
22
|
+
private paused = false
|
|
21
23
|
private done = false
|
|
22
|
-
|
|
23
|
-
|
|
24
|
+
/**
|
|
25
|
+
* Counts how many times _read was called.
|
|
26
|
+
* For debugging.
|
|
27
|
+
*/
|
|
28
|
+
countReads = 0
|
|
24
29
|
|
|
25
|
-
private readonly opt: FirestoreDBStreamOptions & { batchSize: number;
|
|
26
|
-
// private readonly dsOpt: RunQueryOptions
|
|
30
|
+
private readonly opt: FirestoreDBStreamOptions & { batchSize: number; highWaterMark: number }
|
|
27
31
|
|
|
28
32
|
constructor(
|
|
29
33
|
private q: Query,
|
|
@@ -31,64 +35,64 @@ export class FirestoreStreamReadable<T extends ObjectWithId = any>
|
|
|
31
35
|
opt: FirestoreDBStreamOptions,
|
|
32
36
|
private logger: CommonLogger,
|
|
33
37
|
) {
|
|
34
|
-
|
|
38
|
+
// 10_000 was optimal in benchmarks
|
|
39
|
+
const { batchSize = 10_000 } = opt
|
|
40
|
+
const { highWaterMark = batchSize * 3 } = opt
|
|
41
|
+
// Defaulting highWaterMark to 3x batchSize
|
|
42
|
+
super({ objectMode: true, highWaterMark })
|
|
35
43
|
|
|
36
44
|
this.opt = {
|
|
37
|
-
rssLimitMB: 1000,
|
|
38
|
-
batchSize: 1000,
|
|
39
45
|
...opt,
|
|
46
|
+
batchSize,
|
|
47
|
+
highWaterMark,
|
|
40
48
|
}
|
|
41
49
|
// todo: support PITR!
|
|
42
|
-
// this.dsOpt = {}
|
|
43
|
-
// if (opt.readAt) {
|
|
44
|
-
// // Datastore expects UnixTimestamp in milliseconds
|
|
45
|
-
// this.dsOpt.readTime = opt.readAt * 1000
|
|
46
|
-
// }
|
|
47
50
|
|
|
48
51
|
this.originalLimit = dbQuery._limitValue
|
|
49
52
|
this.table = dbQuery.table
|
|
50
53
|
|
|
51
|
-
logger.warn(
|
|
52
|
-
|
|
53
|
-
|
|
54
|
+
logger.warn(`!!! using experimentalCursorStream`, {
|
|
55
|
+
table: this.table,
|
|
56
|
+
batchSize,
|
|
57
|
+
highWaterMark,
|
|
58
|
+
})
|
|
54
59
|
}
|
|
55
60
|
|
|
56
|
-
/**
|
|
57
|
-
* Counts how many times _read was called.
|
|
58
|
-
* For debugging.
|
|
59
|
-
*/
|
|
60
|
-
count = 0
|
|
61
|
-
|
|
62
61
|
override _read(): void {
|
|
63
62
|
// this.lastReadTimestamp = Date.now() as UnixTimestampMillis
|
|
64
63
|
|
|
65
64
|
// console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
|
|
66
|
-
this.
|
|
65
|
+
this.countReads++
|
|
67
66
|
|
|
68
67
|
if (this.done) {
|
|
69
68
|
this.logger.warn(`!!! _read was called, but done==true`)
|
|
70
69
|
return
|
|
71
70
|
}
|
|
72
71
|
|
|
73
|
-
if (
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
}
|
|
79
|
-
|
|
72
|
+
if (this.paused) {
|
|
73
|
+
this.logger.log(
|
|
74
|
+
`_read #${this.countReads}, queryIsRunning: ${this.queryIsRunning}, unpausing stream`,
|
|
75
|
+
)
|
|
76
|
+
this.paused = false
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (this.queryIsRunning) {
|
|
80
|
+
this.logger.log(`_read #${this.countReads}, queryIsRunning: true, doing nothing`)
|
|
81
|
+
// todo: check if this can cause a "hang", if no more _reads would come later and we get stuck?
|
|
82
|
+
return
|
|
80
83
|
}
|
|
84
|
+
|
|
85
|
+
void this.runNextQuery().catch(err => {
|
|
86
|
+
console.log('error in runNextQuery', err)
|
|
87
|
+
this.emit('error', err)
|
|
88
|
+
})
|
|
81
89
|
}
|
|
82
90
|
|
|
83
91
|
private async runNextQuery(): Promise<void> {
|
|
84
92
|
if (this.done) return
|
|
93
|
+
const { logger, table } = this
|
|
85
94
|
|
|
86
|
-
|
|
87
|
-
const now = Date.now()
|
|
88
|
-
this.totalWait += now - this.lastQueryDone
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
this.running = true
|
|
95
|
+
this.queryIsRunning = true
|
|
92
96
|
|
|
93
97
|
let limit = this.opt.batchSize
|
|
94
98
|
|
|
@@ -103,40 +107,23 @@ export class FirestoreStreamReadable<T extends ObjectWithId = any>
|
|
|
103
107
|
q = q.startAfter(this.endCursor)
|
|
104
108
|
}
|
|
105
109
|
|
|
106
|
-
|
|
110
|
+
// logger.log(`runNextQuery`, {
|
|
111
|
+
// rowsRetrieved: this.rowsRetrieved,
|
|
112
|
+
// paused: this.paused,
|
|
113
|
+
// })
|
|
107
114
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
{
|
|
114
|
-
name: `FirestoreStreamReadable.query(${this.table})`,
|
|
115
|
-
maxAttempts: 5,
|
|
116
|
-
delay: 5000,
|
|
117
|
-
delayMultiplier: 2,
|
|
118
|
-
logger: this.logger,
|
|
119
|
-
timeout: 120_000, // 2 minutes
|
|
120
|
-
},
|
|
121
|
-
)
|
|
122
|
-
} catch (err) {
|
|
123
|
-
console.log(
|
|
124
|
-
`FirestoreStreamReadable error!\n`,
|
|
125
|
-
{
|
|
126
|
-
table: this.table,
|
|
127
|
-
rowsRetrieved: this.rowsRetrieved,
|
|
128
|
-
},
|
|
129
|
-
err,
|
|
130
|
-
)
|
|
131
|
-
this.emit('error', err)
|
|
132
|
-
// clearInterval(this.maxWaitInterval)
|
|
115
|
+
const started = localTime.nowUnixMillis()
|
|
116
|
+
const qs = await this.runQuery(q)
|
|
117
|
+
logger.log(`${table} query took ${_since(started)}`)
|
|
118
|
+
if (!qs) {
|
|
119
|
+
// error already emitted in runQuery
|
|
133
120
|
return
|
|
134
121
|
}
|
|
135
122
|
|
|
136
123
|
const rows: T[] = []
|
|
137
124
|
let lastDocId: string | undefined
|
|
138
125
|
|
|
139
|
-
for (const doc of qs
|
|
126
|
+
for (const doc of qs.docs) {
|
|
140
127
|
lastDocId = doc.id
|
|
141
128
|
rows.push({
|
|
142
129
|
id: unescapeDocId(doc.id),
|
|
@@ -145,39 +132,67 @@ export class FirestoreStreamReadable<T extends ObjectWithId = any>
|
|
|
145
132
|
}
|
|
146
133
|
|
|
147
134
|
this.rowsRetrieved += rows.length
|
|
148
|
-
|
|
149
|
-
`${this.table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(
|
|
150
|
-
this.totalWait,
|
|
151
|
-
)}`,
|
|
152
|
-
)
|
|
135
|
+
logger.log(`${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved`)
|
|
153
136
|
|
|
154
137
|
this.endCursor = lastDocId
|
|
155
|
-
this.
|
|
156
|
-
|
|
138
|
+
this.queryIsRunning = false // ready to take more _reads
|
|
139
|
+
let shouldContinue = false
|
|
157
140
|
|
|
158
141
|
for (const row of rows) {
|
|
159
|
-
this.push(row)
|
|
142
|
+
shouldContinue = this.push(row)
|
|
160
143
|
}
|
|
161
144
|
|
|
162
|
-
if (
|
|
163
|
-
|
|
164
|
-
`!!!! DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
|
|
165
|
-
)
|
|
145
|
+
if (!rows.length || (this.originalLimit && this.rowsRetrieved >= this.originalLimit)) {
|
|
146
|
+
logger.warn(`${table} DONE! ${this.rowsRetrieved} rowsRetrieved`)
|
|
166
147
|
this.push(null)
|
|
167
148
|
this.done = true
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
} else {
|
|
172
|
-
const rssMB = Math.round(process.memoryUsage().rss / 1024 / 1024)
|
|
149
|
+
this.paused = false
|
|
150
|
+
return
|
|
151
|
+
}
|
|
173
152
|
|
|
174
|
-
|
|
175
|
-
|
|
153
|
+
if (shouldContinue) {
|
|
154
|
+
// Keep the stream flowing
|
|
155
|
+
logger.log(`${table} continuing the stream`)
|
|
156
|
+
void this.runNextQuery()
|
|
157
|
+
} else {
|
|
158
|
+
// Not starting the next query
|
|
159
|
+
if (this.paused) {
|
|
160
|
+
logger.log(`${table} stream is already paused`)
|
|
176
161
|
} else {
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
)
|
|
162
|
+
logger.warn(`${table} pausing the stream`)
|
|
163
|
+
this.paused = true
|
|
180
164
|
}
|
|
181
165
|
}
|
|
182
166
|
}
|
|
167
|
+
|
|
168
|
+
private async runQuery(q: Query): Promise<QuerySnapshot | undefined> {
|
|
169
|
+
const { table, logger } = this
|
|
170
|
+
|
|
171
|
+
try {
|
|
172
|
+
return await pRetry(
|
|
173
|
+
async () => {
|
|
174
|
+
return await q.get()
|
|
175
|
+
},
|
|
176
|
+
{
|
|
177
|
+
name: `FirestoreStreamReadable.query(${table})`,
|
|
178
|
+
maxAttempts: 5,
|
|
179
|
+
delay: 5000,
|
|
180
|
+
delayMultiplier: 2,
|
|
181
|
+
logger,
|
|
182
|
+
timeout: 120_000, // 2 minutes
|
|
183
|
+
},
|
|
184
|
+
)
|
|
185
|
+
} catch (err) {
|
|
186
|
+
console.log(
|
|
187
|
+
`FirestoreStreamReadable error!\n`,
|
|
188
|
+
{
|
|
189
|
+
table,
|
|
190
|
+
rowsRetrieved: this.rowsRetrieved,
|
|
191
|
+
},
|
|
192
|
+
err,
|
|
193
|
+
)
|
|
194
|
+
this.emit('error', err)
|
|
195
|
+
return
|
|
196
|
+
}
|
|
197
|
+
}
|
|
183
198
|
}
|
package/src/query.util.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { FieldPath, type Query, type WhereFilterOp } from '@google-cloud/firestore'
|
|
2
2
|
import type { DBQuery, DBQueryFilterOperator } from '@naturalcycles/db-lib'
|
|
3
3
|
import type { ObjectWithId } from '@naturalcycles/js-lib/types'
|
|
4
4
|
|
|
@@ -17,14 +17,12 @@ export function dbQueryToFirestoreQuery<ROW extends ObjectWithId>(
|
|
|
17
17
|
|
|
18
18
|
// filter
|
|
19
19
|
for (const f of dbQuery._filters) {
|
|
20
|
-
q = q.where(f.name
|
|
20
|
+
q = q.where(mapName(f.name), OP_MAP[f.op] || (f.op as WhereFilterOp), f.val)
|
|
21
21
|
}
|
|
22
22
|
|
|
23
23
|
// order
|
|
24
24
|
for (const ord of dbQuery._orders) {
|
|
25
|
-
|
|
26
|
-
// .orderBy(FieldPath.documentId())
|
|
27
|
-
q = q.orderBy(ord.name as string, ord.descending ? 'desc' : 'asc')
|
|
25
|
+
q = q.orderBy(mapName(ord.name), ord.descending ? 'desc' : 'asc')
|
|
28
26
|
}
|
|
29
27
|
|
|
30
28
|
// limit
|
|
@@ -32,13 +30,18 @@ export function dbQueryToFirestoreQuery<ROW extends ObjectWithId>(
|
|
|
32
30
|
|
|
33
31
|
// selectedFields
|
|
34
32
|
if (dbQuery._selectedFieldNames) {
|
|
35
|
-
//
|
|
36
|
-
|
|
33
|
+
// id is filtered out, because in Firestore it's not a "property",
|
|
34
|
+
// and doc.id is always returned, even if we request empty set of fields
|
|
35
|
+
q = q.select(...(dbQuery._selectedFieldNames as string[]).filter(n => n !== 'id'))
|
|
37
36
|
}
|
|
38
37
|
|
|
39
38
|
// cursor
|
|
40
39
|
if (dbQuery._startCursor) {
|
|
41
|
-
|
|
40
|
+
// Using `startAfter`, not `startAt` here
|
|
41
|
+
// Why?
|
|
42
|
+
// Because in Firestore, you can only retrieve "last document id" to be used as Cursor.
|
|
43
|
+
// That document was already retrieved, so it makes sense to start AFTER it.
|
|
44
|
+
q = q.startAfter(dbQuery._startCursor)
|
|
42
45
|
}
|
|
43
46
|
|
|
44
47
|
if (dbQuery._endCursor) {
|
|
@@ -47,3 +50,8 @@ export function dbQueryToFirestoreQuery<ROW extends ObjectWithId>(
|
|
|
47
50
|
|
|
48
51
|
return q
|
|
49
52
|
}
|
|
53
|
+
|
|
54
|
+
function mapName<ROW extends ObjectWithId>(name: keyof ROW): string | FieldPath {
|
|
55
|
+
if (name === 'id') return FieldPath.documentId()
|
|
56
|
+
return name as string
|
|
57
|
+
}
|