@naturalcycles/firestore-lib 2.9.0 → 2.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ import type { Firestore, Query, QuerySnapshot, Transaction } from '@google-cloud
2
2
  import type { CommonDB, CommonDBOptions, CommonDBReadOptions, CommonDBSaveOptions, CommonDBSupport, CommonDBTransactionOptions, DBQuery, DBTransaction, DBTransactionFn, RunQueryResult } from '@naturalcycles/db-lib';
3
3
  import { BaseCommonDB } from '@naturalcycles/db-lib';
4
4
  import { type CommonLogger } from '@naturalcycles/js-lib/log';
5
- import type { NumberOfSeconds, ObjectWithId, StringMap } from '@naturalcycles/js-lib/types';
5
+ import type { ObjectWithId, PositiveInteger, StringMap } from '@naturalcycles/js-lib/types';
6
6
  import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream';
7
7
  export declare class FirestoreDB extends BaseCommonDB implements CommonDB {
8
8
  constructor(cfg: FirestoreDBCfg);
@@ -66,50 +66,28 @@ export interface FirestoreDBStreamOptions extends FirestoreDBReadOptions {
66
66
  * Defaults to false
67
67
  */
68
68
  experimentalCursorStream?: boolean;
69
+ experimentalShardedStream?: boolean;
69
70
  /**
70
71
  * Applicable to `experimentalCursorStream`.
71
72
  * Defines the size (limit) of each individual query.
72
73
  *
73
- * Default: 1000
74
+ * Default: 10_000
74
75
  */
75
- batchSize?: number;
76
+ batchSize?: PositiveInteger;
76
77
  /**
77
- * Applicable to `experimentalCursorStream`
78
- *
79
- * Set to a value (number of Megabytes) to control the peak RSS size.
80
- * If limit is reached - streaming will pause until the stream keeps up, and then
81
- * resumes.
82
- *
83
- * Set to 0/undefined to disable. Stream will get "slow" then, cause it'll only run the query
84
- * when _read is called.
85
- *
86
- * @default 1000
87
- */
88
- rssLimitMB?: number;
89
- /**
90
- * Applicable to `experimentalCursorStream`
91
- * Default false.
92
- * If true, stream will pause until consumer requests more data (via _read).
93
- * It means it'll run slower, as buffer will be equal to batchSize (1000) at max.
94
- * There will be gaps in time between "last query loaded" and "next query requested".
95
- * This mode is useful e.g for DB migrations, where you want to avoid "stale data".
96
- * So, it minimizes the time between "item loaded" and "item saved" during DB migration.
78
+ * Defaults to 3x batchSize.
79
+ * Default batchSize is 10_000, so default highWaterMark is 30_000.
80
+ * Controls how many rows to have "buffered".
81
+ * Should be at least 1x batchSize, otherwise the stream will be "starving"
82
+ * between the queries.
97
83
  */
98
- singleBatchBuffer?: boolean;
84
+ highWaterMark?: PositiveInteger;
99
85
  /**
100
86
  * Set to `true` to log additional debug info, when using experimentalCursorStream.
101
87
  *
102
88
  * @default false
103
89
  */
104
90
  debug?: boolean;
105
- /**
106
- * Default is undefined.
107
- * If set - sets a "safety timer", which will force call _read after the specified number of seconds.
108
- * This is to prevent possible "dead-lock"/race-condition that would make the stream "hang".
109
- *
110
- * @experimental
111
- */
112
- maxWait?: NumberOfSeconds;
113
91
  }
114
92
  export interface FirestoreDBOptions extends CommonDBOptions {
115
93
  }
@@ -8,6 +8,7 @@ import { _filterUndefinedValues, _omit } from '@naturalcycles/js-lib/object/obje
8
8
  import { pMap } from '@naturalcycles/js-lib/promise/pMap.js';
9
9
  import { _stringMapEntries } from '@naturalcycles/js-lib/types';
10
10
  import { escapeDocId, unescapeDocId } from './firestore.util.js';
11
+ import { FirestoreShardedReadable } from './firestoreShardedReadable.js';
11
12
  import { FirestoreStreamReadable } from './firestoreStreamReadable.js';
12
13
  import { dbQueryToFirestoreQuery } from './query.util.js';
13
14
  export class FirestoreDB extends BaseCommonDB {
@@ -23,6 +24,7 @@ export class FirestoreDB extends BaseCommonDB {
23
24
  ...commonDBFullSupport,
24
25
  patchByQuery: false, // todo: can be implemented
25
26
  tableSchemas: false,
27
+ createTransaction: false, // Firestore SDK doesn't support it
26
28
  };
27
29
  // GET
28
30
  async getByIds(table, ids, opt = {}) {
@@ -100,6 +102,9 @@ export class FirestoreDB extends BaseCommonDB {
100
102
  if (opt.experimentalCursorStream) {
101
103
  return new FirestoreStreamReadable(firestoreQuery, q, opt, commonLoggerMinLevel(this.cfg.logger, opt.debug ? 'log' : 'warn'));
102
104
  }
105
+ if (opt.experimentalShardedStream) {
106
+ return new FirestoreShardedReadable(firestoreQuery, q, opt, commonLoggerMinLevel(this.cfg.logger, opt.debug ? 'log' : 'warn'));
107
+ }
103
108
  return firestoreQuery.stream().map(doc => {
104
109
  return {
105
110
  id: unescapeDocId(doc.id),
@@ -0,0 +1,41 @@
1
+ import { Readable } from 'node:stream';
2
+ import { type Query } from '@google-cloud/firestore';
3
+ import type { DBQuery } from '@naturalcycles/db-lib';
4
+ import type { CommonLogger } from '@naturalcycles/js-lib/log';
5
+ import type { ObjectWithId } from '@naturalcycles/js-lib/types';
6
+ import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream';
7
+ import type { FirestoreDBStreamOptions } from './firestore.db.js';
8
+ /**
9
+ * Highly, HIGHLY experimental!
10
+ */
11
+ export declare class FirestoreShardedReadable<T extends ObjectWithId = any> extends Readable implements ReadableTyped<T> {
12
+ private readonly q;
13
+ readonly dbQuery: DBQuery<T>;
14
+ private logger;
15
+ private readonly table;
16
+ private readonly originalLimit;
17
+ private rowsRetrieved;
18
+ /**
19
+ * Next shard to be used for querying.
20
+ */
21
+ private nextShard;
22
+ private cursorByShard;
23
+ private queryIsRunningByShard;
24
+ private paused;
25
+ private done;
26
+ private doneShards;
27
+ private lastQueryDoneByShard;
28
+ private totalWait;
29
+ private readonly opt;
30
+ constructor(q: Query, dbQuery: DBQuery<T>, opt: FirestoreDBStreamOptions, logger: CommonLogger);
31
+ /**
32
+ * Counts how many times _read was called.
33
+ * For debugging.
34
+ */
35
+ count: number;
36
+ _read(): void;
37
+ private runNextQuery;
38
+ private runQuery;
39
+ private findNextFreeShard;
40
+ private _getNextShardAndMove;
41
+ }
@@ -0,0 +1,173 @@
1
+ import { Readable } from 'node:stream';
2
+ import { FieldPath } from '@google-cloud/firestore';
3
+ import { localTime } from '@naturalcycles/js-lib/datetime';
4
+ import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js';
5
+ import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js';
6
+ import { unescapeDocId } from './firestore.util.js';
7
+ const SHARDS = 16;
8
+ const SHARD_COLUMN = 'shard16';
9
+ /**
10
+ * Highly, HIGHLY experimental!
11
+ */
12
+ export class FirestoreShardedReadable extends Readable {
13
+ q;
14
+ dbQuery;
15
+ logger;
16
+ table;
17
+ originalLimit;
18
+ rowsRetrieved = 0;
19
+ /**
20
+ * Next shard to be used for querying.
21
+ */
22
+ nextShard = 1;
23
+ cursorByShard = {};
24
+ queryIsRunningByShard = {};
25
+ // biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
26
+ paused = false;
27
+ done = false;
28
+ doneShards = new Set();
29
+ lastQueryDoneByShard = {};
30
+ totalWait = 0;
31
+ opt;
32
+ constructor(q, dbQuery, opt, logger) {
33
+ super({ objectMode: true });
34
+ this.q = q;
35
+ this.dbQuery = dbQuery;
36
+ this.logger = logger;
37
+ this.opt = {
38
+ batchSize: 3000,
39
+ ...opt,
40
+ };
41
+ this.originalLimit = dbQuery._limitValue;
42
+ this.table = dbQuery.table;
43
+ logger.warn(`!! using experimentalShardedStream !! ${this.table}, batchSize: ${this.opt.batchSize}`);
44
+ }
45
+ /**
46
+ * Counts how many times _read was called.
47
+ * For debugging.
48
+ */
49
+ count = 0;
50
+ _read() {
51
+ // this.lastReadTimestamp = Date.now() as UnixTimestampMillis
52
+ // console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
53
+ this.count++;
54
+ if (this.done) {
55
+ this.logger.warn(`!!! _read was called, but done==true`);
56
+ return;
57
+ }
58
+ // const shard = this.getNextShardAndMove()
59
+ const shard = this.findNextFreeShard();
60
+ if (!shard) {
61
+ this.logger.log(`_read ${this.count}: all shards are busy, skipping`);
62
+ return;
63
+ }
64
+ void this.runNextQuery(shard).catch(err => {
65
+ console.log('error in runNextQuery', err);
66
+ this.emit('error', err);
67
+ });
68
+ }
69
+ async runNextQuery(shard) {
70
+ if (this.done)
71
+ return;
72
+ const { logger, table } = this;
73
+ if (this.lastQueryDoneByShard[shard]) {
74
+ this.totalWait += Date.now() - this.lastQueryDoneByShard[shard];
75
+ }
76
+ this.queryIsRunningByShard[shard] = true;
77
+ const limit = this.opt.batchSize;
78
+ // We have to orderBy documentId, to be able to use id as a cursor
79
+ let q = this.q.where(SHARD_COLUMN, '==', shard).orderBy(FieldPath.documentId()).limit(limit);
80
+ if (this.cursorByShard[shard]) {
81
+ q = q.startAfter(this.cursorByShard[shard]);
82
+ }
83
+ console.log(`runNextQuery[${shard}]`, {
84
+ retrieved: this.rowsRetrieved,
85
+ });
86
+ const qs = await this.runQuery(q);
87
+ if (!qs) {
88
+ // this means we have already emitted an unrecoverable error
89
+ return;
90
+ }
91
+ const rows = [];
92
+ let lastDocId;
93
+ for (const doc of qs.docs) {
94
+ lastDocId = doc.id;
95
+ rows.push({
96
+ id: unescapeDocId(doc.id),
97
+ ...doc.data(),
98
+ });
99
+ }
100
+ this.rowsRetrieved += rows.length;
101
+ logger.log(`${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
102
+ this.cursorByShard[shard] = lastDocId;
103
+ this.queryIsRunningByShard[shard] = false; // ready to take more _reads
104
+ this.lastQueryDoneByShard[shard] = localTime.nowUnixMillis();
105
+ for (const row of rows) {
106
+ this.push(row);
107
+ }
108
+ if (qs.empty) {
109
+ logger.warn(`!!!! Shard ${shard} DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
110
+ this.doneShards.add(shard);
111
+ }
112
+ if (this.doneShards.size === SHARDS) {
113
+ logger.warn(`!!!! DONE: all shards completed, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
114
+ this.push(null);
115
+ this.paused = false;
116
+ this.done = true;
117
+ return;
118
+ }
119
+ if (this.originalLimit && this.rowsRetrieved >= this.originalLimit) {
120
+ logger.warn(`!!!! DONE: reached total limit of ${this.originalLimit}, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
121
+ this.push(null);
122
+ this.paused = false;
123
+ this.done = true;
124
+ return;
125
+ }
126
+ // if (this.paused) {
127
+ // this.paused = false
128
+ // }
129
+ const nextShard = this.findNextFreeShard();
130
+ if (nextShard) {
131
+ void this.runNextQuery(nextShard);
132
+ }
133
+ else {
134
+ logger.warn(`${table} all shards are busy in runNextQuery, skipping`);
135
+ }
136
+ }
137
+ async runQuery(q) {
138
+ const { table, logger } = this;
139
+ try {
140
+ return await pRetry(async () => {
141
+ return await q.get();
142
+ }, {
143
+ name: `FirestoreStreamReadable.query(${table})`,
144
+ maxAttempts: 5,
145
+ delay: 5000,
146
+ delayMultiplier: 2,
147
+ logger,
148
+ timeout: 120_000, // 2 minutes
149
+ });
150
+ }
151
+ catch (err) {
152
+ console.log(`FirestoreStreamReadable error!\n`, {
153
+ table,
154
+ rowsRetrieved: this.rowsRetrieved,
155
+ }, err);
156
+ this.emit('error', err);
157
+ return;
158
+ }
159
+ }
160
+ findNextFreeShard() {
161
+ for (let shard = 1; shard <= SHARDS; shard++) {
162
+ if (!this.queryIsRunningByShard[shard] && !this.doneShards.has(shard)) {
163
+ return shard;
164
+ }
165
+ }
166
+ }
167
+ // biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
168
+ _getNextShardAndMove() {
169
+ const shard = this.nextShard;
170
+ this.nextShard = shard === SHARDS ? 1 : shard + 1;
171
+ return shard;
172
+ }
173
+ }
@@ -15,15 +15,14 @@ export declare class FirestoreStreamReadable<T extends ObjectWithId = any> exten
15
15
  private queryIsRunning;
16
16
  private paused;
17
17
  private done;
18
- private lastQueryDone?;
19
- private totalWait;
20
- private readonly opt;
21
- constructor(q: Query, dbQuery: DBQuery<T>, opt: FirestoreDBStreamOptions, logger: CommonLogger);
22
18
  /**
23
19
  * Counts how many times _read was called.
24
20
  * For debugging.
25
21
  */
26
- count: number;
22
+ countReads: number;
23
+ private readonly opt;
24
+ constructor(q: Query, dbQuery: DBQuery<T>, opt: FirestoreDBStreamOptions, logger: CommonLogger);
27
25
  _read(): void;
28
26
  private runNextQuery;
27
+ private runQuery;
29
28
  }
@@ -1,6 +1,7 @@
1
1
  import { Readable } from 'node:stream';
2
2
  import { FieldPath } from '@google-cloud/firestore';
3
- import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js';
3
+ import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js';
4
+ import { _since } from '@naturalcycles/js-lib/datetime/time.util.js';
4
5
  import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js';
5
6
  import { unescapeDocId } from './firestore.util.js';
6
7
  export class FirestoreStreamReadable extends Readable {
@@ -13,61 +14,60 @@ export class FirestoreStreamReadable extends Readable {
13
14
  queryIsRunning = false;
14
15
  paused = false;
15
16
  done = false;
16
- lastQueryDone;
17
- totalWait = 0;
17
+ /**
18
+ * Counts how many times _read was called.
19
+ * For debugging.
20
+ */
21
+ countReads = 0;
18
22
  opt;
19
- // private readonly dsOpt: RunQueryOptions
20
23
  constructor(q, dbQuery, opt, logger) {
21
- super({ objectMode: true });
24
+ // 10_000 was optimal in benchmarks
25
+ const { batchSize = 10_000 } = opt;
26
+ const { highWaterMark = batchSize * 3 } = opt;
27
+ // Defaulting highWaterMark to 3x batchSize
28
+ super({ objectMode: true, highWaterMark });
22
29
  this.q = q;
23
30
  this.logger = logger;
24
31
  this.opt = {
25
- rssLimitMB: 1000,
26
- batchSize: 1000,
27
32
  ...opt,
33
+ batchSize,
34
+ highWaterMark,
28
35
  };
29
36
  // todo: support PITR!
30
- // this.dsOpt = {}
31
- // if (opt.readAt) {
32
- // // Datastore expects UnixTimestamp in milliseconds
33
- // this.dsOpt.readTime = opt.readAt * 1000
34
- // }
35
37
  this.originalLimit = dbQuery._limitValue;
36
38
  this.table = dbQuery.table;
37
- logger.warn(`!! using experimentalCursorStream !! ${this.table}, batchSize: ${this.opt.batchSize}`);
39
+ logger.warn(`!!! using experimentalCursorStream`, {
40
+ table: this.table,
41
+ batchSize,
42
+ highWaterMark,
43
+ });
38
44
  }
39
- /**
40
- * Counts how many times _read was called.
41
- * For debugging.
42
- */
43
- count = 0;
44
45
  _read() {
45
46
  // this.lastReadTimestamp = Date.now() as UnixTimestampMillis
46
47
  // console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
47
- this.count++;
48
+ this.countReads++;
48
49
  if (this.done) {
49
50
  this.logger.warn(`!!! _read was called, but done==true`);
50
51
  return;
51
52
  }
52
- if (!this.queryIsRunning) {
53
- void this.runNextQuery().catch(err => {
54
- console.log('error in runNextQuery', err);
55
- this.emit('error', err);
56
- });
53
+ if (this.paused) {
54
+ this.logger.log(`_read #${this.countReads}, queryIsRunning: ${this.queryIsRunning}, unpausing stream`);
55
+ this.paused = false;
57
56
  }
58
- else {
59
- this.logger.log(`_read ${this.count}, queryIsRunning: true`);
57
+ if (this.queryIsRunning) {
58
+ this.logger.log(`_read #${this.countReads}, queryIsRunning: true, doing nothing`);
60
59
  // todo: check if this can cause a "hang", if no more _reads would come later and we get stuck?
60
+ return;
61
61
  }
62
+ void this.runNextQuery().catch(err => {
63
+ console.log('error in runNextQuery', err);
64
+ this.emit('error', err);
65
+ });
62
66
  }
63
67
  async runNextQuery() {
64
68
  if (this.done)
65
69
  return;
66
70
  const { logger, table } = this;
67
- if (this.lastQueryDone) {
68
- const now = Date.now();
69
- this.totalWait += now - this.lastQueryDone;
70
- }
71
71
  this.queryIsRunning = true;
72
72
  let limit = this.opt.batchSize;
73
73
  if (this.originalLimit) {
@@ -79,26 +79,15 @@ export class FirestoreStreamReadable extends Readable {
79
79
  if (this.endCursor) {
80
80
  q = q.startAfter(this.endCursor);
81
81
  }
82
- let qs;
83
- try {
84
- await pRetry(async () => {
85
- qs = await q.get();
86
- }, {
87
- name: `FirestoreStreamReadable.query(${table})`,
88
- maxAttempts: 5,
89
- delay: 5000,
90
- delayMultiplier: 2,
91
- logger,
92
- timeout: 120_000, // 2 minutes
93
- });
94
- }
95
- catch (err) {
96
- console.log(`FirestoreStreamReadable error!\n`, {
97
- table,
98
- rowsRetrieved: this.rowsRetrieved,
99
- }, err);
100
- this.emit('error', err);
101
- // clearInterval(this.maxWaitInterval)
82
+ // logger.log(`runNextQuery`, {
83
+ // rowsRetrieved: this.rowsRetrieved,
84
+ // paused: this.paused,
85
+ // })
86
+ const started = localTime.nowUnixMillis();
87
+ const qs = await this.runQuery(q);
88
+ logger.log(`${table} query took ${_since(started)}`);
89
+ if (!qs) {
90
+ // error already emitted in runQuery
102
91
  return;
103
92
  }
104
93
  const rows = [];
@@ -111,37 +100,57 @@ export class FirestoreStreamReadable extends Readable {
111
100
  });
112
101
  }
113
102
  this.rowsRetrieved += rows.length;
114
- logger.log(`${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
103
+ logger.log(`${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved`);
115
104
  this.endCursor = lastDocId;
116
105
  this.queryIsRunning = false; // ready to take more _reads
117
- this.lastQueryDone = Date.now();
106
+ let shouldContinue = false;
118
107
  for (const row of rows) {
119
- this.push(row);
108
+ shouldContinue = this.push(row);
120
109
  }
121
- if (qs.empty || (this.originalLimit && this.rowsRetrieved >= this.originalLimit)) {
122
- logger.warn(`!!!! DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
110
+ if (!rows.length || (this.originalLimit && this.rowsRetrieved >= this.originalLimit)) {
111
+ logger.warn(`${table} DONE! ${this.rowsRetrieved} rowsRetrieved`);
123
112
  this.push(null);
124
- this.paused = false;
125
113
  this.done = true;
114
+ this.paused = false;
126
115
  return;
127
116
  }
128
- if (this.opt.singleBatchBuffer) {
129
- // here we don't start next query until we're asked (via next _read call)
130
- // so, let's do nothing
131
- return;
117
+ if (shouldContinue) {
118
+ // Keep the stream flowing
119
+ logger.log(`${table} continuing the stream`);
120
+ void this.runNextQuery();
132
121
  }
133
- const rssMB = Math.round(process.memoryUsage().rss / 1024 / 1024);
134
- const { rssLimitMB } = this.opt;
135
- if (rssMB <= rssLimitMB) {
122
+ else {
123
+ // Not starting the next query
136
124
  if (this.paused) {
137
- logger.warn(`${table} rssLimitMB is below ${rssMB} < ${rssLimitMB}, unpausing stream`);
138
- this.paused = false;
125
+ logger.log(`${table} stream is already paused`);
126
+ }
127
+ else {
128
+ logger.warn(`${table} pausing the stream`);
129
+ this.paused = true;
139
130
  }
140
- void this.runNextQuery();
141
131
  }
142
- else if (!this.paused) {
143
- logger.warn(`${table} rssLimitMB reached ${rssMB} > ${rssLimitMB}, pausing stream`);
144
- this.paused = true;
132
+ }
133
+ async runQuery(q) {
134
+ const { table, logger } = this;
135
+ try {
136
+ return await pRetry(async () => {
137
+ return await q.get();
138
+ }, {
139
+ name: `FirestoreStreamReadable.query(${table})`,
140
+ maxAttempts: 5,
141
+ delay: 5000,
142
+ delayMultiplier: 2,
143
+ logger,
144
+ timeout: 120_000, // 2 minutes
145
+ });
146
+ }
147
+ catch (err) {
148
+ console.log(`FirestoreStreamReadable error!\n`, {
149
+ table,
150
+ rowsRetrieved: this.rowsRetrieved,
151
+ }, err);
152
+ this.emit('error', err);
153
+ return;
145
154
  }
146
155
  }
147
156
  }
@@ -1,4 +1,4 @@
1
- import type { Query } from '@google-cloud/firestore';
1
+ import { type Query } from '@google-cloud/firestore';
2
2
  import type { DBQuery } from '@naturalcycles/db-lib';
3
3
  import type { ObjectWithId } from '@naturalcycles/js-lib/types';
4
4
  export declare function dbQueryToFirestoreQuery<ROW extends ObjectWithId>(dbQuery: DBQuery<ROW>, emptyQuery: Query): Query;
@@ -1,3 +1,4 @@
1
+ import { FieldPath } from '@google-cloud/firestore';
1
2
  // Map DBQueryFilterOp to WhereFilterOp
2
3
  // Currently it's fully aligned!
3
4
  const OP_MAP = {
@@ -8,27 +9,35 @@ export function dbQueryToFirestoreQuery(dbQuery, emptyQuery) {
8
9
  let q = emptyQuery;
9
10
  // filter
10
11
  for (const f of dbQuery._filters) {
11
- q = q.where(f.name, OP_MAP[f.op] || f.op, f.val);
12
+ q = q.where(mapName(f.name), OP_MAP[f.op] || f.op, f.val);
12
13
  }
13
14
  // order
14
15
  for (const ord of dbQuery._orders) {
15
- // todo: support ordering by id like this:
16
- // .orderBy(FieldPath.documentId())
17
- q = q.orderBy(ord.name, ord.descending ? 'desc' : 'asc');
16
+ q = q.orderBy(mapName(ord.name), ord.descending ? 'desc' : 'asc');
18
17
  }
19
18
  // limit
20
19
  q = q.limit(dbQuery._limitValue);
21
20
  // selectedFields
22
21
  if (dbQuery._selectedFieldNames) {
23
- // todo: check if at least id / __key__ is required to be set
24
- q = q.select(...dbQuery._selectedFieldNames);
22
+ // id is filtered out, because in Firestore it's not a "property",
23
+ // and doc.id is always returned, even if we request empty set of fields
24
+ q = q.select(...dbQuery._selectedFieldNames.filter(n => n !== 'id'));
25
25
  }
26
26
  // cursor
27
27
  if (dbQuery._startCursor) {
28
- q = q.startAt(dbQuery._startCursor);
28
+ // Using `startAfter`, not `startAt` here
29
+ // Why?
30
+ // Because in Firestore, you can only retrieve "last document id" to be used as Cursor.
31
+ // That document was already retrieved, so it makes sense to start AFTER it.
32
+ q = q.startAfter(dbQuery._startCursor);
29
33
  }
30
34
  if (dbQuery._endCursor) {
31
35
  q = q.endAt(dbQuery._endCursor);
32
36
  }
33
37
  return q;
34
38
  }
39
+ function mapName(name) {
40
+ if (name === 'id')
41
+ return FieldPath.documentId();
42
+ return name;
43
+ }
package/package.json CHANGED
@@ -38,7 +38,7 @@
38
38
  "engines": {
39
39
  "node": ">=22.12.0"
40
40
  },
41
- "version": "2.9.0",
41
+ "version": "2.10.0",
42
42
  "description": "Firestore implementation of CommonDB interface",
43
43
  "author": "Natural Cycles Team",
44
44
  "license": "MIT",
@@ -28,10 +28,11 @@ import { _assert } from '@naturalcycles/js-lib/error/assert.js'
28
28
  import { type CommonLogger, commonLoggerMinLevel } from '@naturalcycles/js-lib/log'
29
29
  import { _filterUndefinedValues, _omit } from '@naturalcycles/js-lib/object/object.util.js'
30
30
  import { pMap } from '@naturalcycles/js-lib/promise/pMap.js'
31
- import type { NumberOfSeconds, ObjectWithId, StringMap } from '@naturalcycles/js-lib/types'
31
+ import type { ObjectWithId, PositiveInteger, StringMap } from '@naturalcycles/js-lib/types'
32
32
  import { _stringMapEntries } from '@naturalcycles/js-lib/types'
33
33
  import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream'
34
34
  import { escapeDocId, unescapeDocId } from './firestore.util.js'
35
+ import { FirestoreShardedReadable } from './firestoreShardedReadable.js'
35
36
  import { FirestoreStreamReadable } from './firestoreStreamReadable.js'
36
37
  import { dbQueryToFirestoreQuery } from './query.util.js'
37
38
 
@@ -50,6 +51,7 @@ export class FirestoreDB extends BaseCommonDB implements CommonDB {
50
51
  ...commonDBFullSupport,
51
52
  patchByQuery: false, // todo: can be implemented
52
53
  tableSchemas: false,
54
+ createTransaction: false, // Firestore SDK doesn't support it
53
55
  }
54
56
 
55
57
  // GET
@@ -167,6 +169,15 @@ export class FirestoreDB extends BaseCommonDB implements CommonDB {
167
169
  )
168
170
  }
169
171
 
172
+ if (opt.experimentalShardedStream) {
173
+ return new FirestoreShardedReadable(
174
+ firestoreQuery,
175
+ q,
176
+ opt,
177
+ commonLoggerMinLevel(this.cfg.logger, opt.debug ? 'log' : 'warn'),
178
+ )
179
+ }
180
+
170
181
  return (firestoreQuery.stream() as ReadableTyped<QueryDocumentSnapshot<any>>).map(doc => {
171
182
  return {
172
183
  id: unescapeDocId(doc.id),
@@ -545,38 +556,24 @@ export interface FirestoreDBStreamOptions extends FirestoreDBReadOptions {
545
556
  */
546
557
  experimentalCursorStream?: boolean
547
558
 
559
+ experimentalShardedStream?: boolean
560
+
548
561
  /**
549
562
  * Applicable to `experimentalCursorStream`.
550
563
  * Defines the size (limit) of each individual query.
551
564
  *
552
- * Default: 1000
553
- */
554
- batchSize?: number
555
-
556
- /**
557
- * Applicable to `experimentalCursorStream`
558
- *
559
- * Set to a value (number of Megabytes) to control the peak RSS size.
560
- * If limit is reached - streaming will pause until the stream keeps up, and then
561
- * resumes.
562
- *
563
- * Set to 0/undefined to disable. Stream will get "slow" then, cause it'll only run the query
564
- * when _read is called.
565
- *
566
- * @default 1000
565
+ * Default: 10_000
567
566
  */
568
- rssLimitMB?: number
567
+ batchSize?: PositiveInteger
569
568
 
570
569
  /**
571
- * Applicable to `experimentalCursorStream`
572
- * Default false.
573
- * If true, stream will pause until consumer requests more data (via _read).
574
- * It means it'll run slower, as buffer will be equal to batchSize (1000) at max.
575
- * There will be gaps in time between "last query loaded" and "next query requested".
576
- * This mode is useful e.g for DB migrations, where you want to avoid "stale data".
577
- * So, it minimizes the time between "item loaded" and "item saved" during DB migration.
570
+ * Defaults to 3x batchSize.
571
+ * Default batchSize is 10_000, so default highWaterMark is 30_000.
572
+ * Controls how many rows to have "buffered".
573
+ * Should be at least 1x batchSize, otherwise the stream will be "starving"
574
+ * between the queries.
578
575
  */
579
- singleBatchBuffer?: boolean
576
+ highWaterMark?: PositiveInteger
580
577
 
581
578
  /**
582
579
  * Set to `true` to log additional debug info, when using experimentalCursorStream.
@@ -584,15 +581,6 @@ export interface FirestoreDBStreamOptions extends FirestoreDBReadOptions {
584
581
  * @default false
585
582
  */
586
583
  debug?: boolean
587
-
588
- /**
589
- * Default is undefined.
590
- * If set - sets a "safety timer", which will force call _read after the specified number of seconds.
591
- * This is to prevent possible "dead-lock"/race-condition that would make the stream "hang".
592
- *
593
- * @experimental
594
- */
595
- maxWait?: NumberOfSeconds
596
584
  }
597
585
 
598
586
  export interface FirestoreDBOptions extends CommonDBOptions {}
@@ -0,0 +1,233 @@
1
+ import { Readable } from 'node:stream'
2
+ import { FieldPath, type Query, type QuerySnapshot } from '@google-cloud/firestore'
3
+ import type { DBQuery } from '@naturalcycles/db-lib'
4
+ import { localTime } from '@naturalcycles/js-lib/datetime'
5
+ import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js'
6
+ import type { CommonLogger } from '@naturalcycles/js-lib/log'
7
+ import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js'
8
+ import type {
9
+ ObjectWithId,
10
+ PositiveInteger,
11
+ StringMap,
12
+ UnixTimestampMillis,
13
+ } from '@naturalcycles/js-lib/types'
14
+ import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream'
15
+ import type { FirestoreDBStreamOptions } from './firestore.db.js'
16
+ import { unescapeDocId } from './firestore.util.js'
17
+
18
+ const SHARDS = 16
19
+ const SHARD_COLUMN = 'shard16'
20
+
21
+ /**
22
+ * Highly, HIGHLY experimental!
23
+ */
24
+ export class FirestoreShardedReadable<T extends ObjectWithId = any>
25
+ extends Readable
26
+ implements ReadableTyped<T>
27
+ {
28
+ private readonly table: string
29
+ private readonly originalLimit: number
30
+ private rowsRetrieved = 0
31
+ /**
32
+ * Next shard to be used for querying.
33
+ */
34
+ private nextShard = 1
35
+ private cursorByShard: StringMap = {}
36
+ private queryIsRunningByShard: StringMap<boolean> = {}
37
+ // biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
38
+ private paused = false
39
+ private done = false
40
+ private doneShards = new Set<PositiveInteger>()
41
+ private lastQueryDoneByShard: StringMap<UnixTimestampMillis> = {}
42
+ private totalWait = 0
43
+
44
+ private readonly opt: FirestoreDBStreamOptions & { batchSize: number }
45
+
46
+ constructor(
47
+ private readonly q: Query,
48
+ readonly dbQuery: DBQuery<T>,
49
+ opt: FirestoreDBStreamOptions,
50
+ private logger: CommonLogger,
51
+ ) {
52
+ super({ objectMode: true })
53
+
54
+ this.opt = {
55
+ batchSize: 3000,
56
+ ...opt,
57
+ }
58
+
59
+ this.originalLimit = dbQuery._limitValue
60
+ this.table = dbQuery.table
61
+
62
+ logger.warn(
63
+ `!! using experimentalShardedStream !! ${this.table}, batchSize: ${this.opt.batchSize}`,
64
+ )
65
+ }
66
+
67
+ /**
68
+ * Counts how many times _read was called.
69
+ * For debugging.
70
+ */
71
+ count = 0
72
+
73
+ override _read(): void {
74
+ // this.lastReadTimestamp = Date.now() as UnixTimestampMillis
75
+
76
+ // console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
77
+ this.count++
78
+
79
+ if (this.done) {
80
+ this.logger.warn(`!!! _read was called, but done==true`)
81
+ return
82
+ }
83
+
84
+ // const shard = this.getNextShardAndMove()
85
+ const shard = this.findNextFreeShard()
86
+ if (!shard) {
87
+ this.logger.log(`_read ${this.count}: all shards are busy, skipping`)
88
+ return
89
+ }
90
+ void this.runNextQuery(shard).catch(err => {
91
+ console.log('error in runNextQuery', err)
92
+ this.emit('error', err)
93
+ })
94
+ }
95
+
96
+ private async runNextQuery(shard: PositiveInteger): Promise<void> {
97
+ if (this.done) return
98
+ const { logger, table } = this
99
+
100
+ if (this.lastQueryDoneByShard[shard]) {
101
+ this.totalWait += Date.now() - this.lastQueryDoneByShard[shard]
102
+ }
103
+
104
+ this.queryIsRunningByShard[shard] = true
105
+
106
+ const limit = this.opt.batchSize
107
+
108
+ // We have to orderBy documentId, to be able to use id as a cursor
109
+
110
+ let q = this.q.where(SHARD_COLUMN, '==', shard).orderBy(FieldPath.documentId()).limit(limit)
111
+ if (this.cursorByShard[shard]) {
112
+ q = q.startAfter(this.cursorByShard[shard])
113
+ }
114
+
115
+ console.log(`runNextQuery[${shard}]`, {
116
+ retrieved: this.rowsRetrieved,
117
+ })
118
+ const qs = await this.runQuery(q)
119
+ if (!qs) {
120
+ // this means we have already emitted an unrecoverable error
121
+ return
122
+ }
123
+
124
+ const rows: T[] = []
125
+ let lastDocId: string | undefined
126
+
127
+ for (const doc of qs.docs) {
128
+ lastDocId = doc.id
129
+ rows.push({
130
+ id: unescapeDocId(doc.id),
131
+ ...doc.data(),
132
+ } as T)
133
+ }
134
+
135
+ this.rowsRetrieved += rows.length
136
+ logger.log(
137
+ `${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(
138
+ this.totalWait,
139
+ )}`,
140
+ )
141
+
142
+ this.cursorByShard[shard] = lastDocId
143
+ this.queryIsRunningByShard[shard] = false // ready to take more _reads
144
+ this.lastQueryDoneByShard[shard] = localTime.nowUnixMillis()
145
+
146
+ for (const row of rows) {
147
+ this.push(row)
148
+ }
149
+
150
+ if (qs.empty) {
151
+ logger.warn(
152
+ `!!!! Shard ${shard} DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
153
+ )
154
+ this.doneShards.add(shard)
155
+ }
156
+
157
+ if (this.doneShards.size === SHARDS) {
158
+ logger.warn(
159
+ `!!!! DONE: all shards completed, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
160
+ )
161
+ this.push(null)
162
+ this.paused = false
163
+ this.done = true
164
+ return
165
+ }
166
+
167
+ if (this.originalLimit && this.rowsRetrieved >= this.originalLimit) {
168
+ logger.warn(
169
+ `!!!! DONE: reached total limit of ${this.originalLimit}, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
170
+ )
171
+ this.push(null)
172
+ this.paused = false
173
+ this.done = true
174
+ return
175
+ }
176
+
177
+ // if (this.paused) {
178
+ // this.paused = false
179
+ // }
180
+ const nextShard = this.findNextFreeShard()
181
+ if (nextShard) {
182
+ void this.runNextQuery(nextShard)
183
+ } else {
184
+ logger.warn(`${table} all shards are busy in runNextQuery, skipping`)
185
+ }
186
+ }
187
+
188
+ private async runQuery(q: Query): Promise<QuerySnapshot | undefined> {
189
+ const { table, logger } = this
190
+
191
+ try {
192
+ return await pRetry(
193
+ async () => {
194
+ return await q.get()
195
+ },
196
+ {
197
+ name: `FirestoreStreamReadable.query(${table})`,
198
+ maxAttempts: 5,
199
+ delay: 5000,
200
+ delayMultiplier: 2,
201
+ logger,
202
+ timeout: 120_000, // 2 minutes
203
+ },
204
+ )
205
+ } catch (err) {
206
+ console.log(
207
+ `FirestoreStreamReadable error!\n`,
208
+ {
209
+ table,
210
+ rowsRetrieved: this.rowsRetrieved,
211
+ },
212
+ err,
213
+ )
214
+ this.emit('error', err)
215
+ return
216
+ }
217
+ }
218
+
219
+ private findNextFreeShard(): PositiveInteger | undefined {
220
+ for (let shard = 1; shard <= SHARDS; shard++) {
221
+ if (!this.queryIsRunningByShard[shard] && !this.doneShards.has(shard)) {
222
+ return shard
223
+ }
224
+ }
225
+ }
226
+
227
+ // biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
228
+ private _getNextShardAndMove(): PositiveInteger {
229
+ const shard = this.nextShard
230
+ this.nextShard = shard === SHARDS ? 1 : shard + 1
231
+ return shard
232
+ }
233
+ }
@@ -1,7 +1,8 @@
1
1
  import { Readable } from 'node:stream'
2
2
  import { FieldPath, type Query, type QuerySnapshot } from '@google-cloud/firestore'
3
3
  import type { DBQuery } from '@naturalcycles/db-lib'
4
- import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js'
4
+ import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js'
5
+ import { _since } from '@naturalcycles/js-lib/datetime/time.util.js'
5
6
  import type { CommonLogger } from '@naturalcycles/js-lib/log'
6
7
  import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js'
7
8
  import type { ObjectWithId } from '@naturalcycles/js-lib/types'
@@ -20,11 +21,13 @@ export class FirestoreStreamReadable<T extends ObjectWithId = any>
20
21
  private queryIsRunning = false
21
22
  private paused = false
22
23
  private done = false
23
- private lastQueryDone?: number
24
- private totalWait = 0
24
+ /**
25
+ * Counts how many times _read was called.
26
+ * For debugging.
27
+ */
28
+ countReads = 0
25
29
 
26
- private readonly opt: FirestoreDBStreamOptions & { batchSize: number; rssLimitMB: number }
27
- // private readonly dsOpt: RunQueryOptions
30
+ private readonly opt: FirestoreDBStreamOptions & { batchSize: number; highWaterMark: number }
28
31
 
29
32
  constructor(
30
33
  private q: Query,
@@ -32,65 +35,63 @@ export class FirestoreStreamReadable<T extends ObjectWithId = any>
32
35
  opt: FirestoreDBStreamOptions,
33
36
  private logger: CommonLogger,
34
37
  ) {
35
- super({ objectMode: true })
38
+ // 10_000 was optimal in benchmarks
39
+ const { batchSize = 10_000 } = opt
40
+ const { highWaterMark = batchSize * 3 } = opt
41
+ // Defaulting highWaterMark to 3x batchSize
42
+ super({ objectMode: true, highWaterMark })
36
43
 
37
44
  this.opt = {
38
- rssLimitMB: 1000,
39
- batchSize: 1000,
40
45
  ...opt,
46
+ batchSize,
47
+ highWaterMark,
41
48
  }
42
49
  // todo: support PITR!
43
- // this.dsOpt = {}
44
- // if (opt.readAt) {
45
- // // Datastore expects UnixTimestamp in milliseconds
46
- // this.dsOpt.readTime = opt.readAt * 1000
47
- // }
48
50
 
49
51
  this.originalLimit = dbQuery._limitValue
50
52
  this.table = dbQuery.table
51
53
 
52
- logger.warn(
53
- `!! using experimentalCursorStream !! ${this.table}, batchSize: ${this.opt.batchSize}`,
54
- )
54
+ logger.warn(`!!! using experimentalCursorStream`, {
55
+ table: this.table,
56
+ batchSize,
57
+ highWaterMark,
58
+ })
55
59
  }
56
60
 
57
- /**
58
- * Counts how many times _read was called.
59
- * For debugging.
60
- */
61
- count = 0
62
-
63
61
  override _read(): void {
64
62
  // this.lastReadTimestamp = Date.now() as UnixTimestampMillis
65
63
 
66
64
  // console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
67
- this.count++
65
+ this.countReads++
68
66
 
69
67
  if (this.done) {
70
68
  this.logger.warn(`!!! _read was called, but done==true`)
71
69
  return
72
70
  }
73
71
 
74
- if (!this.queryIsRunning) {
75
- void this.runNextQuery().catch(err => {
76
- console.log('error in runNextQuery', err)
77
- this.emit('error', err)
78
- })
79
- } else {
80
- this.logger.log(`_read ${this.count}, queryIsRunning: true`)
72
+ if (this.paused) {
73
+ this.logger.log(
74
+ `_read #${this.countReads}, queryIsRunning: ${this.queryIsRunning}, unpausing stream`,
75
+ )
76
+ this.paused = false
77
+ }
78
+
79
+ if (this.queryIsRunning) {
80
+ this.logger.log(`_read #${this.countReads}, queryIsRunning: true, doing nothing`)
81
81
  // todo: check if this can cause a "hang", if no more _reads would come later and we get stuck?
82
+ return
82
83
  }
84
+
85
+ void this.runNextQuery().catch(err => {
86
+ console.log('error in runNextQuery', err)
87
+ this.emit('error', err)
88
+ })
83
89
  }
84
90
 
85
91
  private async runNextQuery(): Promise<void> {
86
92
  if (this.done) return
87
93
  const { logger, table } = this
88
94
 
89
- if (this.lastQueryDone) {
90
- const now = Date.now()
91
- this.totalWait += now - this.lastQueryDone
92
- }
93
-
94
95
  this.queryIsRunning = true
95
96
 
96
97
  let limit = this.opt.batchSize
@@ -106,40 +107,23 @@ export class FirestoreStreamReadable<T extends ObjectWithId = any>
106
107
  q = q.startAfter(this.endCursor)
107
108
  }
108
109
 
109
- let qs: QuerySnapshot
110
+ // logger.log(`runNextQuery`, {
111
+ // rowsRetrieved: this.rowsRetrieved,
112
+ // paused: this.paused,
113
+ // })
110
114
 
111
- try {
112
- await pRetry(
113
- async () => {
114
- qs = await q.get()
115
- },
116
- {
117
- name: `FirestoreStreamReadable.query(${table})`,
118
- maxAttempts: 5,
119
- delay: 5000,
120
- delayMultiplier: 2,
121
- logger,
122
- timeout: 120_000, // 2 minutes
123
- },
124
- )
125
- } catch (err) {
126
- console.log(
127
- `FirestoreStreamReadable error!\n`,
128
- {
129
- table,
130
- rowsRetrieved: this.rowsRetrieved,
131
- },
132
- err,
133
- )
134
- this.emit('error', err)
135
- // clearInterval(this.maxWaitInterval)
115
+ const started = localTime.nowUnixMillis()
116
+ const qs = await this.runQuery(q)
117
+ logger.log(`${table} query took ${_since(started)}`)
118
+ if (!qs) {
119
+ // error already emitted in runQuery
136
120
  return
137
121
  }
138
122
 
139
123
  const rows: T[] = []
140
124
  let lastDocId: string | undefined
141
125
 
142
- for (const doc of qs!.docs) {
126
+ for (const doc of qs.docs) {
143
127
  lastDocId = doc.id
144
128
  rows.push({
145
129
  id: unescapeDocId(doc.id),
@@ -148,48 +132,67 @@ export class FirestoreStreamReadable<T extends ObjectWithId = any>
148
132
  }
149
133
 
150
134
  this.rowsRetrieved += rows.length
151
- logger.log(
152
- `${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(
153
- this.totalWait,
154
- )}`,
155
- )
135
+ logger.log(`${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved`)
156
136
 
157
137
  this.endCursor = lastDocId
158
138
  this.queryIsRunning = false // ready to take more _reads
159
- this.lastQueryDone = Date.now()
139
+ let shouldContinue = false
160
140
 
161
141
  for (const row of rows) {
162
- this.push(row)
142
+ shouldContinue = this.push(row)
163
143
  }
164
144
 
165
- if (qs!.empty || (this.originalLimit && this.rowsRetrieved >= this.originalLimit)) {
166
- logger.warn(
167
- `!!!! DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
168
- )
145
+ if (!rows.length || (this.originalLimit && this.rowsRetrieved >= this.originalLimit)) {
146
+ logger.warn(`${table} DONE! ${this.rowsRetrieved} rowsRetrieved`)
169
147
  this.push(null)
170
- this.paused = false
171
148
  this.done = true
149
+ this.paused = false
172
150
  return
173
151
  }
174
152
 
175
- if (this.opt.singleBatchBuffer) {
176
- // here we don't start next query until we're asked (via next _read call)
177
- // so, let's do nothing
178
- return
153
+ if (shouldContinue) {
154
+ // Keep the stream flowing
155
+ logger.log(`${table} continuing the stream`)
156
+ void this.runNextQuery()
157
+ } else {
158
+ // Not starting the next query
159
+ if (this.paused) {
160
+ logger.log(`${table} stream is already paused`)
161
+ } else {
162
+ logger.warn(`${table} pausing the stream`)
163
+ this.paused = true
164
+ }
179
165
  }
166
+ }
180
167
 
181
- const rssMB = Math.round(process.memoryUsage().rss / 1024 / 1024)
182
- const { rssLimitMB } = this.opt
168
+ private async runQuery(q: Query): Promise<QuerySnapshot | undefined> {
169
+ const { table, logger } = this
183
170
 
184
- if (rssMB <= rssLimitMB) {
185
- if (this.paused) {
186
- logger.warn(`${table} rssLimitMB is below ${rssMB} < ${rssLimitMB}, unpausing stream`)
187
- this.paused = false
188
- }
189
- void this.runNextQuery()
190
- } else if (!this.paused) {
191
- logger.warn(`${table} rssLimitMB reached ${rssMB} > ${rssLimitMB}, pausing stream`)
192
- this.paused = true
171
+ try {
172
+ return await pRetry(
173
+ async () => {
174
+ return await q.get()
175
+ },
176
+ {
177
+ name: `FirestoreStreamReadable.query(${table})`,
178
+ maxAttempts: 5,
179
+ delay: 5000,
180
+ delayMultiplier: 2,
181
+ logger,
182
+ timeout: 120_000, // 2 minutes
183
+ },
184
+ )
185
+ } catch (err) {
186
+ console.log(
187
+ `FirestoreStreamReadable error!\n`,
188
+ {
189
+ table,
190
+ rowsRetrieved: this.rowsRetrieved,
191
+ },
192
+ err,
193
+ )
194
+ this.emit('error', err)
195
+ return
193
196
  }
194
197
  }
195
198
  }
package/src/query.util.ts CHANGED
@@ -1,4 +1,4 @@
1
- import type { Query, WhereFilterOp } from '@google-cloud/firestore'
1
+ import { FieldPath, type Query, type WhereFilterOp } from '@google-cloud/firestore'
2
2
  import type { DBQuery, DBQueryFilterOperator } from '@naturalcycles/db-lib'
3
3
  import type { ObjectWithId } from '@naturalcycles/js-lib/types'
4
4
 
@@ -17,14 +17,12 @@ export function dbQueryToFirestoreQuery<ROW extends ObjectWithId>(
17
17
 
18
18
  // filter
19
19
  for (const f of dbQuery._filters) {
20
- q = q.where(f.name as string, OP_MAP[f.op] || (f.op as WhereFilterOp), f.val)
20
+ q = q.where(mapName(f.name), OP_MAP[f.op] || (f.op as WhereFilterOp), f.val)
21
21
  }
22
22
 
23
23
  // order
24
24
  for (const ord of dbQuery._orders) {
25
- // todo: support ordering by id like this:
26
- // .orderBy(FieldPath.documentId())
27
- q = q.orderBy(ord.name as string, ord.descending ? 'desc' : 'asc')
25
+ q = q.orderBy(mapName(ord.name), ord.descending ? 'desc' : 'asc')
28
26
  }
29
27
 
30
28
  // limit
@@ -32,13 +30,18 @@ export function dbQueryToFirestoreQuery<ROW extends ObjectWithId>(
32
30
 
33
31
  // selectedFields
34
32
  if (dbQuery._selectedFieldNames) {
35
- // todo: check if at least id / __key__ is required to be set
36
- q = q.select(...(dbQuery._selectedFieldNames as string[]))
33
+ // id is filtered out, because in Firestore it's not a "property",
34
+ // and doc.id is always returned, even if we request empty set of fields
35
+ q = q.select(...(dbQuery._selectedFieldNames as string[]).filter(n => n !== 'id'))
37
36
  }
38
37
 
39
38
  // cursor
40
39
  if (dbQuery._startCursor) {
41
- q = q.startAt(dbQuery._startCursor)
40
+ // Using `startAfter`, not `startAt` here
41
+ // Why?
42
+ // Because in Firestore, you can only retrieve "last document id" to be used as Cursor.
43
+ // That document was already retrieved, so it makes sense to start AFTER it.
44
+ q = q.startAfter(dbQuery._startCursor)
42
45
  }
43
46
 
44
47
  if (dbQuery._endCursor) {
@@ -47,3 +50,8 @@ export function dbQueryToFirestoreQuery<ROW extends ObjectWithId>(
47
50
 
48
51
  return q
49
52
  }
53
+
54
+ function mapName<ROW extends ObjectWithId>(name: keyof ROW): string | FieldPath {
55
+ if (name === 'id') return FieldPath.documentId()
56
+ return name as string
57
+ }