@naturalcycles/firestore-lib 2.8.1 → 2.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ import type { Firestore, Query, QuerySnapshot, Transaction } from '@google-cloud
2
2
  import type { CommonDB, CommonDBOptions, CommonDBReadOptions, CommonDBSaveOptions, CommonDBSupport, CommonDBTransactionOptions, DBQuery, DBTransaction, DBTransactionFn, RunQueryResult } from '@naturalcycles/db-lib';
3
3
  import { BaseCommonDB } from '@naturalcycles/db-lib';
4
4
  import { type CommonLogger } from '@naturalcycles/js-lib/log';
5
- import type { NumberOfSeconds, ObjectWithId, StringMap } from '@naturalcycles/js-lib/types';
5
+ import type { ObjectWithId, PositiveInteger, StringMap } from '@naturalcycles/js-lib/types';
6
6
  import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream';
7
7
  export declare class FirestoreDB extends BaseCommonDB implements CommonDB {
8
8
  constructor(cfg: FirestoreDBCfg);
@@ -66,50 +66,28 @@ export interface FirestoreDBStreamOptions extends FirestoreDBReadOptions {
66
66
  * Defaults to false
67
67
  */
68
68
  experimentalCursorStream?: boolean;
69
+ experimentalShardedStream?: boolean;
69
70
  /**
70
71
  * Applicable to `experimentalCursorStream`.
71
72
  * Defines the size (limit) of each individual query.
72
73
  *
73
- * Default: 1000
74
+ * Default: 10_000
74
75
  */
75
- batchSize?: number;
76
+ batchSize?: PositiveInteger;
76
77
  /**
77
- * Applicable to `experimentalCursorStream`
78
- *
79
- * Set to a value (number of Megabytes) to control the peak RSS size.
80
- * If limit is reached - streaming will pause until the stream keeps up, and then
81
- * resumes.
82
- *
83
- * Set to 0/undefined to disable. Stream will get "slow" then, cause it'll only run the query
84
- * when _read is called.
85
- *
86
- * @default 1000
87
- */
88
- rssLimitMB?: number;
89
- /**
90
- * Applicable to `experimentalCursorStream`
91
- * Default false.
92
- * If true, stream will pause until consumer requests more data (via _read).
93
- * It means it'll run slower, as buffer will be equal to batchSize (1000) at max.
94
- * There will be gaps in time between "last query loaded" and "next query requested".
95
- * This mode is useful e.g for DB migrations, where you want to avoid "stale data".
96
- * So, it minimizes the time between "item loaded" and "item saved" during DB migration.
78
+ * Defaults to 3x batchSize.
79
+ * Default batchSize is 10_000, so default highWaterMark is 30_000.
80
+ * Controls how many rows to have "buffered".
81
+ * Should be at least 1x batchSize, otherwise the stream will be "starving"
82
+ * between the queries.
97
83
  */
98
- singleBatchBuffer?: boolean;
84
+ highWaterMark?: PositiveInteger;
99
85
  /**
100
86
  * Set to `true` to log additional debug info, when using experimentalCursorStream.
101
87
  *
102
88
  * @default false
103
89
  */
104
90
  debug?: boolean;
105
- /**
106
- * Default is undefined.
107
- * If set - sets a "safety timer", which will force call _read after the specified number of seconds.
108
- * This is to prevent possible "dead-lock"/race-condition that would make the stream "hang".
109
- *
110
- * @experimental
111
- */
112
- maxWait?: NumberOfSeconds;
113
91
  }
114
92
  export interface FirestoreDBOptions extends CommonDBOptions {
115
93
  }
@@ -8,6 +8,7 @@ import { _filterUndefinedValues, _omit } from '@naturalcycles/js-lib/object/obje
8
8
  import { pMap } from '@naturalcycles/js-lib/promise/pMap.js';
9
9
  import { _stringMapEntries } from '@naturalcycles/js-lib/types';
10
10
  import { escapeDocId, unescapeDocId } from './firestore.util.js';
11
+ import { FirestoreShardedReadable } from './firestoreShardedReadable.js';
11
12
  import { FirestoreStreamReadable } from './firestoreStreamReadable.js';
12
13
  import { dbQueryToFirestoreQuery } from './query.util.js';
13
14
  export class FirestoreDB extends BaseCommonDB {
@@ -23,6 +24,7 @@ export class FirestoreDB extends BaseCommonDB {
23
24
  ...commonDBFullSupport,
24
25
  patchByQuery: false, // todo: can be implemented
25
26
  tableSchemas: false,
27
+ createTransaction: false, // Firestore SDK doesn't support it
26
28
  };
27
29
  // GET
28
30
  async getByIds(table, ids, opt = {}) {
@@ -100,6 +102,9 @@ export class FirestoreDB extends BaseCommonDB {
100
102
  if (opt.experimentalCursorStream) {
101
103
  return new FirestoreStreamReadable(firestoreQuery, q, opt, commonLoggerMinLevel(this.cfg.logger, opt.debug ? 'log' : 'warn'));
102
104
  }
105
+ if (opt.experimentalShardedStream) {
106
+ return new FirestoreShardedReadable(firestoreQuery, q, opt, commonLoggerMinLevel(this.cfg.logger, opt.debug ? 'log' : 'warn'));
107
+ }
103
108
  return firestoreQuery.stream().map(doc => {
104
109
  return {
105
110
  id: unescapeDocId(doc.id),
@@ -0,0 +1,41 @@
1
+ import { Readable } from 'node:stream';
2
+ import { type Query } from '@google-cloud/firestore';
3
+ import type { DBQuery } from '@naturalcycles/db-lib';
4
+ import type { CommonLogger } from '@naturalcycles/js-lib/log';
5
+ import type { ObjectWithId } from '@naturalcycles/js-lib/types';
6
+ import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream';
7
+ import type { FirestoreDBStreamOptions } from './firestore.db.js';
8
+ /**
9
+ * Highly, HIGHLY experimental!
10
+ */
11
+ export declare class FirestoreShardedReadable<T extends ObjectWithId = any> extends Readable implements ReadableTyped<T> {
12
+ private readonly q;
13
+ readonly dbQuery: DBQuery<T>;
14
+ private logger;
15
+ private readonly table;
16
+ private readonly originalLimit;
17
+ private rowsRetrieved;
18
+ /**
19
+ * Next shard to be used for querying.
20
+ */
21
+ private nextShard;
22
+ private cursorByShard;
23
+ private queryIsRunningByShard;
24
+ private paused;
25
+ private done;
26
+ private doneShards;
27
+ private lastQueryDoneByShard;
28
+ private totalWait;
29
+ private readonly opt;
30
+ constructor(q: Query, dbQuery: DBQuery<T>, opt: FirestoreDBStreamOptions, logger: CommonLogger);
31
+ /**
32
+ * Counts how many times _read was called.
33
+ * For debugging.
34
+ */
35
+ count: number;
36
+ _read(): void;
37
+ private runNextQuery;
38
+ private runQuery;
39
+ private findNextFreeShard;
40
+ private _getNextShardAndMove;
41
+ }
@@ -0,0 +1,173 @@
1
+ import { Readable } from 'node:stream';
2
+ import { FieldPath } from '@google-cloud/firestore';
3
+ import { localTime } from '@naturalcycles/js-lib/datetime';
4
+ import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js';
5
+ import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js';
6
+ import { unescapeDocId } from './firestore.util.js';
7
+ const SHARDS = 16;
8
+ const SHARD_COLUMN = 'shard16';
9
+ /**
10
+ * Highly, HIGHLY experimental!
11
+ */
12
+ export class FirestoreShardedReadable extends Readable {
13
+ q;
14
+ dbQuery;
15
+ logger;
16
+ table;
17
+ originalLimit;
18
+ rowsRetrieved = 0;
19
+ /**
20
+ * Next shard to be used for querying.
21
+ */
22
+ nextShard = 1;
23
+ cursorByShard = {};
24
+ queryIsRunningByShard = {};
25
+ // biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
26
+ paused = false;
27
+ done = false;
28
+ doneShards = new Set();
29
+ lastQueryDoneByShard = {};
30
+ totalWait = 0;
31
+ opt;
32
+ constructor(q, dbQuery, opt, logger) {
33
+ super({ objectMode: true });
34
+ this.q = q;
35
+ this.dbQuery = dbQuery;
36
+ this.logger = logger;
37
+ this.opt = {
38
+ batchSize: 3000,
39
+ ...opt,
40
+ };
41
+ this.originalLimit = dbQuery._limitValue;
42
+ this.table = dbQuery.table;
43
+ logger.warn(`!! using experimentalShardedStream !! ${this.table}, batchSize: ${this.opt.batchSize}`);
44
+ }
45
+ /**
46
+ * Counts how many times _read was called.
47
+ * For debugging.
48
+ */
49
+ count = 0;
50
+ _read() {
51
+ // this.lastReadTimestamp = Date.now() as UnixTimestampMillis
52
+ // console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
53
+ this.count++;
54
+ if (this.done) {
55
+ this.logger.warn(`!!! _read was called, but done==true`);
56
+ return;
57
+ }
58
+ // const shard = this.getNextShardAndMove()
59
+ const shard = this.findNextFreeShard();
60
+ if (!shard) {
61
+ this.logger.log(`_read ${this.count}: all shards are busy, skipping`);
62
+ return;
63
+ }
64
+ void this.runNextQuery(shard).catch(err => {
65
+ console.log('error in runNextQuery', err);
66
+ this.emit('error', err);
67
+ });
68
+ }
69
+ async runNextQuery(shard) {
70
+ if (this.done)
71
+ return;
72
+ const { logger, table } = this;
73
+ if (this.lastQueryDoneByShard[shard]) {
74
+ this.totalWait += Date.now() - this.lastQueryDoneByShard[shard];
75
+ }
76
+ this.queryIsRunningByShard[shard] = true;
77
+ const limit = this.opt.batchSize;
78
+ // We have to orderBy documentId, to be able to use id as a cursor
79
+ let q = this.q.where(SHARD_COLUMN, '==', shard).orderBy(FieldPath.documentId()).limit(limit);
80
+ if (this.cursorByShard[shard]) {
81
+ q = q.startAfter(this.cursorByShard[shard]);
82
+ }
83
+ console.log(`runNextQuery[${shard}]`, {
84
+ retrieved: this.rowsRetrieved,
85
+ });
86
+ const qs = await this.runQuery(q);
87
+ if (!qs) {
88
+ // this means we have already emitted an unrecoverable error
89
+ return;
90
+ }
91
+ const rows = [];
92
+ let lastDocId;
93
+ for (const doc of qs.docs) {
94
+ lastDocId = doc.id;
95
+ rows.push({
96
+ id: unescapeDocId(doc.id),
97
+ ...doc.data(),
98
+ });
99
+ }
100
+ this.rowsRetrieved += rows.length;
101
+ logger.log(`${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
102
+ this.cursorByShard[shard] = lastDocId;
103
+ this.queryIsRunningByShard[shard] = false; // ready to take more _reads
104
+ this.lastQueryDoneByShard[shard] = localTime.nowUnixMillis();
105
+ for (const row of rows) {
106
+ this.push(row);
107
+ }
108
+ if (qs.empty) {
109
+ logger.warn(`!!!! Shard ${shard} DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
110
+ this.doneShards.add(shard);
111
+ }
112
+ if (this.doneShards.size === SHARDS) {
113
+ logger.warn(`!!!! DONE: all shards completed, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
114
+ this.push(null);
115
+ this.paused = false;
116
+ this.done = true;
117
+ return;
118
+ }
119
+ if (this.originalLimit && this.rowsRetrieved >= this.originalLimit) {
120
+ logger.warn(`!!!! DONE: reached total limit of ${this.originalLimit}, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
121
+ this.push(null);
122
+ this.paused = false;
123
+ this.done = true;
124
+ return;
125
+ }
126
+ // if (this.paused) {
127
+ // this.paused = false
128
+ // }
129
+ const nextShard = this.findNextFreeShard();
130
+ if (nextShard) {
131
+ void this.runNextQuery(nextShard);
132
+ }
133
+ else {
134
+ logger.warn(`${table} all shards are busy in runNextQuery, skipping`);
135
+ }
136
+ }
137
+ async runQuery(q) {
138
+ const { table, logger } = this;
139
+ try {
140
+ return await pRetry(async () => {
141
+ return await q.get();
142
+ }, {
143
+ name: `FirestoreStreamReadable.query(${table})`,
144
+ maxAttempts: 5,
145
+ delay: 5000,
146
+ delayMultiplier: 2,
147
+ logger,
148
+ timeout: 120_000, // 2 minutes
149
+ });
150
+ }
151
+ catch (err) {
152
+ console.log(`FirestoreStreamReadable error!\n`, {
153
+ table,
154
+ rowsRetrieved: this.rowsRetrieved,
155
+ }, err);
156
+ this.emit('error', err);
157
+ return;
158
+ }
159
+ }
160
+ findNextFreeShard() {
161
+ for (let shard = 1; shard <= SHARDS; shard++) {
162
+ if (!this.queryIsRunningByShard[shard] && !this.doneShards.has(shard)) {
163
+ return shard;
164
+ }
165
+ }
166
+ }
167
+ // biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
168
+ _getNextShardAndMove() {
169
+ const shard = this.nextShard;
170
+ this.nextShard = shard === SHARDS ? 1 : shard + 1;
171
+ return shard;
172
+ }
173
+ }
@@ -12,17 +12,17 @@ export declare class FirestoreStreamReadable<T extends ObjectWithId = any> exten
12
12
  private readonly originalLimit;
13
13
  private rowsRetrieved;
14
14
  private endCursor?;
15
- private running;
15
+ private queryIsRunning;
16
+ private paused;
16
17
  private done;
17
- private lastQueryDone?;
18
- private totalWait;
19
- private readonly opt;
20
- constructor(q: Query, dbQuery: DBQuery<T>, opt: FirestoreDBStreamOptions, logger: CommonLogger);
21
18
  /**
22
19
  * Counts how many times _read was called.
23
20
  * For debugging.
24
21
  */
25
- count: number;
22
+ countReads: number;
23
+ private readonly opt;
24
+ constructor(q: Query, dbQuery: DBQuery<T>, opt: FirestoreDBStreamOptions, logger: CommonLogger);
26
25
  _read(): void;
27
26
  private runNextQuery;
27
+ private runQuery;
28
28
  }
@@ -1,6 +1,7 @@
1
1
  import { Readable } from 'node:stream';
2
2
  import { FieldPath } from '@google-cloud/firestore';
3
- import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js';
3
+ import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js';
4
+ import { _since } from '@naturalcycles/js-lib/datetime/time.util.js';
4
5
  import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js';
5
6
  import { unescapeDocId } from './firestore.util.js';
6
7
  export class FirestoreStreamReadable extends Readable {
@@ -10,62 +11,64 @@ export class FirestoreStreamReadable extends Readable {
10
11
  originalLimit;
11
12
  rowsRetrieved = 0;
12
13
  endCursor;
13
- running = false;
14
+ queryIsRunning = false;
15
+ paused = false;
14
16
  done = false;
15
- lastQueryDone;
16
- totalWait = 0;
17
+ /**
18
+ * Counts how many times _read was called.
19
+ * For debugging.
20
+ */
21
+ countReads = 0;
17
22
  opt;
18
- // private readonly dsOpt: RunQueryOptions
19
23
  constructor(q, dbQuery, opt, logger) {
20
- super({ objectMode: true });
24
+ // 10_000 was optimal in benchmarks
25
+ const { batchSize = 10_000 } = opt;
26
+ const { highWaterMark = batchSize * 3 } = opt;
27
+ // Defaulting highWaterMark to 3x batchSize
28
+ super({ objectMode: true, highWaterMark });
21
29
  this.q = q;
22
30
  this.logger = logger;
23
31
  this.opt = {
24
- rssLimitMB: 1000,
25
- batchSize: 1000,
26
32
  ...opt,
33
+ batchSize,
34
+ highWaterMark,
27
35
  };
28
36
  // todo: support PITR!
29
- // this.dsOpt = {}
30
- // if (opt.readAt) {
31
- // // Datastore expects UnixTimestamp in milliseconds
32
- // this.dsOpt.readTime = opt.readAt * 1000
33
- // }
34
37
  this.originalLimit = dbQuery._limitValue;
35
38
  this.table = dbQuery.table;
36
- logger.warn(`!! using experimentalCursorStream !! ${this.table}, batchSize: ${this.opt.batchSize}`);
39
+ logger.warn(`!!! using experimentalCursorStream`, {
40
+ table: this.table,
41
+ batchSize,
42
+ highWaterMark,
43
+ });
37
44
  }
38
- /**
39
- * Counts how many times _read was called.
40
- * For debugging.
41
- */
42
- count = 0;
43
45
  _read() {
44
46
  // this.lastReadTimestamp = Date.now() as UnixTimestampMillis
45
47
  // console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
46
- this.count++;
48
+ this.countReads++;
47
49
  if (this.done) {
48
50
  this.logger.warn(`!!! _read was called, but done==true`);
49
51
  return;
50
52
  }
51
- if (!this.running) {
52
- void this.runNextQuery().catch(err => {
53
- console.log('error in runNextQuery', err);
54
- this.emit('error', err);
55
- });
53
+ if (this.paused) {
54
+ this.logger.log(`_read #${this.countReads}, queryIsRunning: ${this.queryIsRunning}, unpausing stream`);
55
+ this.paused = false;
56
56
  }
57
- else {
58
- this.logger.log(`_read ${this.count}, wasRunning: true`);
57
+ if (this.queryIsRunning) {
58
+ this.logger.log(`_read #${this.countReads}, queryIsRunning: true, doing nothing`);
59
+ // todo: check if this can cause a "hang", if no more _reads would come later and we get stuck?
60
+ return;
59
61
  }
62
+ void this.runNextQuery().catch(err => {
63
+ console.log('error in runNextQuery', err);
64
+ this.emit('error', err);
65
+ });
60
66
  }
61
67
  async runNextQuery() {
62
68
  if (this.done)
63
69
  return;
64
- if (this.lastQueryDone) {
65
- const now = Date.now();
66
- this.totalWait += now - this.lastQueryDone;
67
- }
68
- this.running = true;
70
+ const { logger, table } = this;
71
+ this.queryIsRunning = true;
69
72
  let limit = this.opt.batchSize;
70
73
  if (this.originalLimit) {
71
74
  limit = Math.min(this.opt.batchSize, this.originalLimit - this.rowsRetrieved);
@@ -76,26 +79,15 @@ export class FirestoreStreamReadable extends Readable {
76
79
  if (this.endCursor) {
77
80
  q = q.startAfter(this.endCursor);
78
81
  }
79
- let qs;
80
- try {
81
- await pRetry(async () => {
82
- qs = await q.get();
83
- }, {
84
- name: `FirestoreStreamReadable.query(${this.table})`,
85
- maxAttempts: 5,
86
- delay: 5000,
87
- delayMultiplier: 2,
88
- logger: this.logger,
89
- timeout: 120_000, // 2 minutes
90
- });
91
- }
92
- catch (err) {
93
- console.log(`FirestoreStreamReadable error!\n`, {
94
- table: this.table,
95
- rowsRetrieved: this.rowsRetrieved,
96
- }, err);
97
- this.emit('error', err);
98
- // clearInterval(this.maxWaitInterval)
82
+ // logger.log(`runNextQuery`, {
83
+ // rowsRetrieved: this.rowsRetrieved,
84
+ // paused: this.paused,
85
+ // })
86
+ const started = localTime.nowUnixMillis();
87
+ const qs = await this.runQuery(q);
88
+ logger.log(`${table} query took ${_since(started)}`);
89
+ if (!qs) {
90
+ // error already emitted in runQuery
99
91
  return;
100
92
  }
101
93
  const rows = [];
@@ -108,30 +100,57 @@ export class FirestoreStreamReadable extends Readable {
108
100
  });
109
101
  }
110
102
  this.rowsRetrieved += rows.length;
111
- this.logger.log(`${this.table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
103
+ logger.log(`${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved`);
112
104
  this.endCursor = lastDocId;
113
- this.running = false; // ready to take more _reads
114
- this.lastQueryDone = Date.now();
105
+ this.queryIsRunning = false; // ready to take more _reads
106
+ let shouldContinue = false;
115
107
  for (const row of rows) {
116
- this.push(row);
108
+ shouldContinue = this.push(row);
117
109
  }
118
- if (qs.empty || (this.originalLimit && this.rowsRetrieved >= this.originalLimit)) {
119
- this.logger.warn(`!!!! DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
110
+ if (!rows.length || (this.originalLimit && this.rowsRetrieved >= this.originalLimit)) {
111
+ logger.warn(`${table} DONE! ${this.rowsRetrieved} rowsRetrieved`);
120
112
  this.push(null);
121
113
  this.done = true;
114
+ this.paused = false;
115
+ return;
122
116
  }
123
- else if (this.opt.singleBatchBuffer) {
124
- // here we don't start next query until we're asked (via next _read call)
125
- // so, let's do nothing
117
+ if (shouldContinue) {
118
+ // Keep the stream flowing
119
+ logger.log(`${table} continuing the stream`);
120
+ void this.runNextQuery();
126
121
  }
127
122
  else {
128
- const rssMB = Math.round(process.memoryUsage().rss / 1024 / 1024);
129
- if (rssMB <= this.opt.rssLimitMB) {
130
- void this.runNextQuery();
123
+ // Not starting the next query
124
+ if (this.paused) {
125
+ logger.log(`${table} stream is already paused`);
131
126
  }
132
127
  else {
133
- this.logger.warn(`${this.table} rssLimitMB reached ${rssMB} > ${this.opt.rssLimitMB}, pausing stream`);
128
+ logger.warn(`${table} pausing the stream`);
129
+ this.paused = true;
134
130
  }
135
131
  }
136
132
  }
133
+ async runQuery(q) {
134
+ const { table, logger } = this;
135
+ try {
136
+ return await pRetry(async () => {
137
+ return await q.get();
138
+ }, {
139
+ name: `FirestoreStreamReadable.query(${table})`,
140
+ maxAttempts: 5,
141
+ delay: 5000,
142
+ delayMultiplier: 2,
143
+ logger,
144
+ timeout: 120_000, // 2 minutes
145
+ });
146
+ }
147
+ catch (err) {
148
+ console.log(`FirestoreStreamReadable error!\n`, {
149
+ table,
150
+ rowsRetrieved: this.rowsRetrieved,
151
+ }, err);
152
+ this.emit('error', err);
153
+ return;
154
+ }
155
+ }
137
156
  }
@@ -1,4 +1,4 @@
1
- import type { Query } from '@google-cloud/firestore';
1
+ import { type Query } from '@google-cloud/firestore';
2
2
  import type { DBQuery } from '@naturalcycles/db-lib';
3
3
  import type { ObjectWithId } from '@naturalcycles/js-lib/types';
4
4
  export declare function dbQueryToFirestoreQuery<ROW extends ObjectWithId>(dbQuery: DBQuery<ROW>, emptyQuery: Query): Query;
@@ -1,3 +1,4 @@
1
+ import { FieldPath } from '@google-cloud/firestore';
1
2
  // Map DBQueryFilterOp to WhereFilterOp
2
3
  // Currently it's fully aligned!
3
4
  const OP_MAP = {
@@ -8,27 +9,35 @@ export function dbQueryToFirestoreQuery(dbQuery, emptyQuery) {
8
9
  let q = emptyQuery;
9
10
  // filter
10
11
  for (const f of dbQuery._filters) {
11
- q = q.where(f.name, OP_MAP[f.op] || f.op, f.val);
12
+ q = q.where(mapName(f.name), OP_MAP[f.op] || f.op, f.val);
12
13
  }
13
14
  // order
14
15
  for (const ord of dbQuery._orders) {
15
- // todo: support ordering by id like this:
16
- // .orderBy(FieldPath.documentId())
17
- q = q.orderBy(ord.name, ord.descending ? 'desc' : 'asc');
16
+ q = q.orderBy(mapName(ord.name), ord.descending ? 'desc' : 'asc');
18
17
  }
19
18
  // limit
20
19
  q = q.limit(dbQuery._limitValue);
21
20
  // selectedFields
22
21
  if (dbQuery._selectedFieldNames) {
23
- // todo: check if at least id / __key__ is required to be set
24
- q = q.select(...dbQuery._selectedFieldNames);
22
+ // id is filtered out, because in Firestore it's not a "property",
23
+ // and doc.id is always returned, even if we request empty set of fields
24
+ q = q.select(...dbQuery._selectedFieldNames.filter(n => n !== 'id'));
25
25
  }
26
26
  // cursor
27
27
  if (dbQuery._startCursor) {
28
- q = q.startAt(dbQuery._startCursor);
28
+ // Using `startAfter`, not `startAt` here
29
+ // Why?
30
+ // Because in Firestore, you can only retrieve "last document id" to be used as Cursor.
31
+ // That document was already retrieved, so it makes sense to start AFTER it.
32
+ q = q.startAfter(dbQuery._startCursor);
29
33
  }
30
34
  if (dbQuery._endCursor) {
31
35
  q = q.endAt(dbQuery._endCursor);
32
36
  }
33
37
  return q;
34
38
  }
39
+ function mapName(name) {
40
+ if (name === 'id')
41
+ return FieldPath.documentId();
42
+ return name;
43
+ }
package/package.json CHANGED
@@ -38,7 +38,7 @@
38
38
  "engines": {
39
39
  "node": ">=22.12.0"
40
40
  },
41
- "version": "2.8.1",
41
+ "version": "2.10.0",
42
42
  "description": "Firestore implementation of CommonDB interface",
43
43
  "author": "Natural Cycles Team",
44
44
  "license": "MIT",
@@ -28,10 +28,11 @@ import { _assert } from '@naturalcycles/js-lib/error/assert.js'
28
28
  import { type CommonLogger, commonLoggerMinLevel } from '@naturalcycles/js-lib/log'
29
29
  import { _filterUndefinedValues, _omit } from '@naturalcycles/js-lib/object/object.util.js'
30
30
  import { pMap } from '@naturalcycles/js-lib/promise/pMap.js'
31
- import type { NumberOfSeconds, ObjectWithId, StringMap } from '@naturalcycles/js-lib/types'
31
+ import type { ObjectWithId, PositiveInteger, StringMap } from '@naturalcycles/js-lib/types'
32
32
  import { _stringMapEntries } from '@naturalcycles/js-lib/types'
33
33
  import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream'
34
34
  import { escapeDocId, unescapeDocId } from './firestore.util.js'
35
+ import { FirestoreShardedReadable } from './firestoreShardedReadable.js'
35
36
  import { FirestoreStreamReadable } from './firestoreStreamReadable.js'
36
37
  import { dbQueryToFirestoreQuery } from './query.util.js'
37
38
 
@@ -50,6 +51,7 @@ export class FirestoreDB extends BaseCommonDB implements CommonDB {
50
51
  ...commonDBFullSupport,
51
52
  patchByQuery: false, // todo: can be implemented
52
53
  tableSchemas: false,
54
+ createTransaction: false, // Firestore SDK doesn't support it
53
55
  }
54
56
 
55
57
  // GET
@@ -167,6 +169,15 @@ export class FirestoreDB extends BaseCommonDB implements CommonDB {
167
169
  )
168
170
  }
169
171
 
172
+ if (opt.experimentalShardedStream) {
173
+ return new FirestoreShardedReadable(
174
+ firestoreQuery,
175
+ q,
176
+ opt,
177
+ commonLoggerMinLevel(this.cfg.logger, opt.debug ? 'log' : 'warn'),
178
+ )
179
+ }
180
+
170
181
  return (firestoreQuery.stream() as ReadableTyped<QueryDocumentSnapshot<any>>).map(doc => {
171
182
  return {
172
183
  id: unescapeDocId(doc.id),
@@ -545,38 +556,24 @@ export interface FirestoreDBStreamOptions extends FirestoreDBReadOptions {
545
556
  */
546
557
  experimentalCursorStream?: boolean
547
558
 
559
+ experimentalShardedStream?: boolean
560
+
548
561
  /**
549
562
  * Applicable to `experimentalCursorStream`.
550
563
  * Defines the size (limit) of each individual query.
551
564
  *
552
- * Default: 1000
553
- */
554
- batchSize?: number
555
-
556
- /**
557
- * Applicable to `experimentalCursorStream`
558
- *
559
- * Set to a value (number of Megabytes) to control the peak RSS size.
560
- * If limit is reached - streaming will pause until the stream keeps up, and then
561
- * resumes.
562
- *
563
- * Set to 0/undefined to disable. Stream will get "slow" then, cause it'll only run the query
564
- * when _read is called.
565
- *
566
- * @default 1000
565
+ * Default: 10_000
567
566
  */
568
- rssLimitMB?: number
567
+ batchSize?: PositiveInteger
569
568
 
570
569
  /**
571
- * Applicable to `experimentalCursorStream`
572
- * Default false.
573
- * If true, stream will pause until consumer requests more data (via _read).
574
- * It means it'll run slower, as buffer will be equal to batchSize (1000) at max.
575
- * There will be gaps in time between "last query loaded" and "next query requested".
576
- * This mode is useful e.g for DB migrations, where you want to avoid "stale data".
577
- * So, it minimizes the time between "item loaded" and "item saved" during DB migration.
570
+ * Defaults to 3x batchSize.
571
+ * Default batchSize is 10_000, so default highWaterMark is 30_000.
572
+ * Controls how many rows to have "buffered".
573
+ * Should be at least 1x batchSize, otherwise the stream will be "starving"
574
+ * between the queries.
578
575
  */
579
- singleBatchBuffer?: boolean
576
+ highWaterMark?: PositiveInteger
580
577
 
581
578
  /**
582
579
  * Set to `true` to log additional debug info, when using experimentalCursorStream.
@@ -584,15 +581,6 @@ export interface FirestoreDBStreamOptions extends FirestoreDBReadOptions {
584
581
  * @default false
585
582
  */
586
583
  debug?: boolean
587
-
588
- /**
589
- * Default is undefined.
590
- * If set - sets a "safety timer", which will force call _read after the specified number of seconds.
591
- * This is to prevent possible "dead-lock"/race-condition that would make the stream "hang".
592
- *
593
- * @experimental
594
- */
595
- maxWait?: NumberOfSeconds
596
584
  }
597
585
 
598
586
  export interface FirestoreDBOptions extends CommonDBOptions {}
@@ -0,0 +1,233 @@
1
+ import { Readable } from 'node:stream'
2
+ import { FieldPath, type Query, type QuerySnapshot } from '@google-cloud/firestore'
3
+ import type { DBQuery } from '@naturalcycles/db-lib'
4
+ import { localTime } from '@naturalcycles/js-lib/datetime'
5
+ import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js'
6
+ import type { CommonLogger } from '@naturalcycles/js-lib/log'
7
+ import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js'
8
+ import type {
9
+ ObjectWithId,
10
+ PositiveInteger,
11
+ StringMap,
12
+ UnixTimestampMillis,
13
+ } from '@naturalcycles/js-lib/types'
14
+ import type { ReadableTyped } from '@naturalcycles/nodejs-lib/stream'
15
+ import type { FirestoreDBStreamOptions } from './firestore.db.js'
16
+ import { unescapeDocId } from './firestore.util.js'
17
+
18
+ const SHARDS = 16
19
+ const SHARD_COLUMN = 'shard16'
20
+
21
+ /**
22
+ * Highly, HIGHLY experimental!
23
+ */
24
+ export class FirestoreShardedReadable<T extends ObjectWithId = any>
25
+ extends Readable
26
+ implements ReadableTyped<T>
27
+ {
28
+ private readonly table: string
29
+ private readonly originalLimit: number
30
+ private rowsRetrieved = 0
31
+ /**
32
+ * Next shard to be used for querying.
33
+ */
34
+ private nextShard = 1
35
+ private cursorByShard: StringMap = {}
36
+ private queryIsRunningByShard: StringMap<boolean> = {}
37
+ // biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
38
+ private paused = false
39
+ private done = false
40
+ private doneShards = new Set<PositiveInteger>()
41
+ private lastQueryDoneByShard: StringMap<UnixTimestampMillis> = {}
42
+ private totalWait = 0
43
+
44
+ private readonly opt: FirestoreDBStreamOptions & { batchSize: number }
45
+
46
+ constructor(
47
+ private readonly q: Query,
48
+ readonly dbQuery: DBQuery<T>,
49
+ opt: FirestoreDBStreamOptions,
50
+ private logger: CommonLogger,
51
+ ) {
52
+ super({ objectMode: true })
53
+
54
+ this.opt = {
55
+ batchSize: 3000,
56
+ ...opt,
57
+ }
58
+
59
+ this.originalLimit = dbQuery._limitValue
60
+ this.table = dbQuery.table
61
+
62
+ logger.warn(
63
+ `!! using experimentalShardedStream !! ${this.table}, batchSize: ${this.opt.batchSize}`,
64
+ )
65
+ }
66
+
67
+ /**
68
+ * Counts how many times _read was called.
69
+ * For debugging.
70
+ */
71
+ count = 0
72
+
73
+ override _read(): void {
74
+ // this.lastReadTimestamp = Date.now() as UnixTimestampMillis
75
+
76
+ // console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
77
+ this.count++
78
+
79
+ if (this.done) {
80
+ this.logger.warn(`!!! _read was called, but done==true`)
81
+ return
82
+ }
83
+
84
+ // const shard = this.getNextShardAndMove()
85
+ const shard = this.findNextFreeShard()
86
+ if (!shard) {
87
+ this.logger.log(`_read ${this.count}: all shards are busy, skipping`)
88
+ return
89
+ }
90
+ void this.runNextQuery(shard).catch(err => {
91
+ console.log('error in runNextQuery', err)
92
+ this.emit('error', err)
93
+ })
94
+ }
95
+
96
+ private async runNextQuery(shard: PositiveInteger): Promise<void> {
97
+ if (this.done) return
98
+ const { logger, table } = this
99
+
100
+ if (this.lastQueryDoneByShard[shard]) {
101
+ this.totalWait += Date.now() - this.lastQueryDoneByShard[shard]
102
+ }
103
+
104
+ this.queryIsRunningByShard[shard] = true
105
+
106
+ const limit = this.opt.batchSize
107
+
108
+ // We have to orderBy documentId, to be able to use id as a cursor
109
+
110
+ let q = this.q.where(SHARD_COLUMN, '==', shard).orderBy(FieldPath.documentId()).limit(limit)
111
+ if (this.cursorByShard[shard]) {
112
+ q = q.startAfter(this.cursorByShard[shard])
113
+ }
114
+
115
+ console.log(`runNextQuery[${shard}]`, {
116
+ retrieved: this.rowsRetrieved,
117
+ })
118
+ const qs = await this.runQuery(q)
119
+ if (!qs) {
120
+ // this means we have already emitted an unrecoverable error
121
+ return
122
+ }
123
+
124
+ const rows: T[] = []
125
+ let lastDocId: string | undefined
126
+
127
+ for (const doc of qs.docs) {
128
+ lastDocId = doc.id
129
+ rows.push({
130
+ id: unescapeDocId(doc.id),
131
+ ...doc.data(),
132
+ } as T)
133
+ }
134
+
135
+ this.rowsRetrieved += rows.length
136
+ logger.log(
137
+ `${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(
138
+ this.totalWait,
139
+ )}`,
140
+ )
141
+
142
+ this.cursorByShard[shard] = lastDocId
143
+ this.queryIsRunningByShard[shard] = false // ready to take more _reads
144
+ this.lastQueryDoneByShard[shard] = localTime.nowUnixMillis()
145
+
146
+ for (const row of rows) {
147
+ this.push(row)
148
+ }
149
+
150
+ if (qs.empty) {
151
+ logger.warn(
152
+ `!!!! Shard ${shard} DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
153
+ )
154
+ this.doneShards.add(shard)
155
+ }
156
+
157
+ if (this.doneShards.size === SHARDS) {
158
+ logger.warn(
159
+ `!!!! DONE: all shards completed, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
160
+ )
161
+ this.push(null)
162
+ this.paused = false
163
+ this.done = true
164
+ return
165
+ }
166
+
167
+ if (this.originalLimit && this.rowsRetrieved >= this.originalLimit) {
168
+ logger.warn(
169
+ `!!!! DONE: reached total limit of ${this.originalLimit}, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
170
+ )
171
+ this.push(null)
172
+ this.paused = false
173
+ this.done = true
174
+ return
175
+ }
176
+
177
+ // if (this.paused) {
178
+ // this.paused = false
179
+ // }
180
+ const nextShard = this.findNextFreeShard()
181
+ if (nextShard) {
182
+ void this.runNextQuery(nextShard)
183
+ } else {
184
+ logger.warn(`${table} all shards are busy in runNextQuery, skipping`)
185
+ }
186
+ }
187
+
188
+ private async runQuery(q: Query): Promise<QuerySnapshot | undefined> {
189
+ const { table, logger } = this
190
+
191
+ try {
192
+ return await pRetry(
193
+ async () => {
194
+ return await q.get()
195
+ },
196
+ {
197
+ name: `FirestoreStreamReadable.query(${table})`,
198
+ maxAttempts: 5,
199
+ delay: 5000,
200
+ delayMultiplier: 2,
201
+ logger,
202
+ timeout: 120_000, // 2 minutes
203
+ },
204
+ )
205
+ } catch (err) {
206
+ console.log(
207
+ `FirestoreStreamReadable error!\n`,
208
+ {
209
+ table,
210
+ rowsRetrieved: this.rowsRetrieved,
211
+ },
212
+ err,
213
+ )
214
+ this.emit('error', err)
215
+ return
216
+ }
217
+ }
218
+
219
+ private findNextFreeShard(): PositiveInteger | undefined {
220
+ for (let shard = 1; shard <= SHARDS; shard++) {
221
+ if (!this.queryIsRunningByShard[shard] && !this.doneShards.has(shard)) {
222
+ return shard
223
+ }
224
+ }
225
+ }
226
+
227
+ // biome-ignore lint/correctness/noUnusedPrivateClassMembers: ok
228
+ private _getNextShardAndMove(): PositiveInteger {
229
+ const shard = this.nextShard
230
+ this.nextShard = shard === SHARDS ? 1 : shard + 1
231
+ return shard
232
+ }
233
+ }
@@ -1,7 +1,8 @@
1
1
  import { Readable } from 'node:stream'
2
2
  import { FieldPath, type Query, type QuerySnapshot } from '@google-cloud/firestore'
3
3
  import type { DBQuery } from '@naturalcycles/db-lib'
4
- import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js'
4
+ import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js'
5
+ import { _since } from '@naturalcycles/js-lib/datetime/time.util.js'
5
6
  import type { CommonLogger } from '@naturalcycles/js-lib/log'
6
7
  import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js'
7
8
  import type { ObjectWithId } from '@naturalcycles/js-lib/types'
@@ -17,13 +18,16 @@ export class FirestoreStreamReadable<T extends ObjectWithId = any>
17
18
  private readonly originalLimit: number
18
19
  private rowsRetrieved = 0
19
20
  private endCursor?: string
20
- private running = false
21
+ private queryIsRunning = false
22
+ private paused = false
21
23
  private done = false
22
- private lastQueryDone?: number
23
- private totalWait = 0
24
+ /**
25
+ * Counts how many times _read was called.
26
+ * For debugging.
27
+ */
28
+ countReads = 0
24
29
 
25
- private readonly opt: FirestoreDBStreamOptions & { batchSize: number; rssLimitMB: number }
26
- // private readonly dsOpt: RunQueryOptions
30
+ private readonly opt: FirestoreDBStreamOptions & { batchSize: number; highWaterMark: number }
27
31
 
28
32
  constructor(
29
33
  private q: Query,
@@ -31,64 +35,64 @@ export class FirestoreStreamReadable<T extends ObjectWithId = any>
31
35
  opt: FirestoreDBStreamOptions,
32
36
  private logger: CommonLogger,
33
37
  ) {
34
- super({ objectMode: true })
38
+ // 10_000 was optimal in benchmarks
39
+ const { batchSize = 10_000 } = opt
40
+ const { highWaterMark = batchSize * 3 } = opt
41
+ // Defaulting highWaterMark to 3x batchSize
42
+ super({ objectMode: true, highWaterMark })
35
43
 
36
44
  this.opt = {
37
- rssLimitMB: 1000,
38
- batchSize: 1000,
39
45
  ...opt,
46
+ batchSize,
47
+ highWaterMark,
40
48
  }
41
49
  // todo: support PITR!
42
- // this.dsOpt = {}
43
- // if (opt.readAt) {
44
- // // Datastore expects UnixTimestamp in milliseconds
45
- // this.dsOpt.readTime = opt.readAt * 1000
46
- // }
47
50
 
48
51
  this.originalLimit = dbQuery._limitValue
49
52
  this.table = dbQuery.table
50
53
 
51
- logger.warn(
52
- `!! using experimentalCursorStream !! ${this.table}, batchSize: ${this.opt.batchSize}`,
53
- )
54
+ logger.warn(`!!! using experimentalCursorStream`, {
55
+ table: this.table,
56
+ batchSize,
57
+ highWaterMark,
58
+ })
54
59
  }
55
60
 
56
- /**
57
- * Counts how many times _read was called.
58
- * For debugging.
59
- */
60
- count = 0
61
-
62
61
  override _read(): void {
63
62
  // this.lastReadTimestamp = Date.now() as UnixTimestampMillis
64
63
 
65
64
  // console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
66
- this.count++
65
+ this.countReads++
67
66
 
68
67
  if (this.done) {
69
68
  this.logger.warn(`!!! _read was called, but done==true`)
70
69
  return
71
70
  }
72
71
 
73
- if (!this.running) {
74
- void this.runNextQuery().catch(err => {
75
- console.log('error in runNextQuery', err)
76
- this.emit('error', err)
77
- })
78
- } else {
79
- this.logger.log(`_read ${this.count}, wasRunning: true`)
72
+ if (this.paused) {
73
+ this.logger.log(
74
+ `_read #${this.countReads}, queryIsRunning: ${this.queryIsRunning}, unpausing stream`,
75
+ )
76
+ this.paused = false
77
+ }
78
+
79
+ if (this.queryIsRunning) {
80
+ this.logger.log(`_read #${this.countReads}, queryIsRunning: true, doing nothing`)
81
+ // todo: check if this can cause a "hang", if no more _reads would come later and we get stuck?
82
+ return
80
83
  }
84
+
85
+ void this.runNextQuery().catch(err => {
86
+ console.log('error in runNextQuery', err)
87
+ this.emit('error', err)
88
+ })
81
89
  }
82
90
 
83
91
  private async runNextQuery(): Promise<void> {
84
92
  if (this.done) return
93
+ const { logger, table } = this
85
94
 
86
- if (this.lastQueryDone) {
87
- const now = Date.now()
88
- this.totalWait += now - this.lastQueryDone
89
- }
90
-
91
- this.running = true
95
+ this.queryIsRunning = true
92
96
 
93
97
  let limit = this.opt.batchSize
94
98
 
@@ -103,40 +107,23 @@ export class FirestoreStreamReadable<T extends ObjectWithId = any>
103
107
  q = q.startAfter(this.endCursor)
104
108
  }
105
109
 
106
- let qs: QuerySnapshot
110
+ // logger.log(`runNextQuery`, {
111
+ // rowsRetrieved: this.rowsRetrieved,
112
+ // paused: this.paused,
113
+ // })
107
114
 
108
- try {
109
- await pRetry(
110
- async () => {
111
- qs = await q.get()
112
- },
113
- {
114
- name: `FirestoreStreamReadable.query(${this.table})`,
115
- maxAttempts: 5,
116
- delay: 5000,
117
- delayMultiplier: 2,
118
- logger: this.logger,
119
- timeout: 120_000, // 2 minutes
120
- },
121
- )
122
- } catch (err) {
123
- console.log(
124
- `FirestoreStreamReadable error!\n`,
125
- {
126
- table: this.table,
127
- rowsRetrieved: this.rowsRetrieved,
128
- },
129
- err,
130
- )
131
- this.emit('error', err)
132
- // clearInterval(this.maxWaitInterval)
115
+ const started = localTime.nowUnixMillis()
116
+ const qs = await this.runQuery(q)
117
+ logger.log(`${table} query took ${_since(started)}`)
118
+ if (!qs) {
119
+ // error already emitted in runQuery
133
120
  return
134
121
  }
135
122
 
136
123
  const rows: T[] = []
137
124
  let lastDocId: string | undefined
138
125
 
139
- for (const doc of qs!.docs) {
126
+ for (const doc of qs.docs) {
140
127
  lastDocId = doc.id
141
128
  rows.push({
142
129
  id: unescapeDocId(doc.id),
@@ -145,39 +132,67 @@ export class FirestoreStreamReadable<T extends ObjectWithId = any>
145
132
  }
146
133
 
147
134
  this.rowsRetrieved += rows.length
148
- this.logger.log(
149
- `${this.table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(
150
- this.totalWait,
151
- )}`,
152
- )
135
+ logger.log(`${table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved`)
153
136
 
154
137
  this.endCursor = lastDocId
155
- this.running = false // ready to take more _reads
156
- this.lastQueryDone = Date.now()
138
+ this.queryIsRunning = false // ready to take more _reads
139
+ let shouldContinue = false
157
140
 
158
141
  for (const row of rows) {
159
- this.push(row)
142
+ shouldContinue = this.push(row)
160
143
  }
161
144
 
162
- if (qs!.empty || (this.originalLimit && this.rowsRetrieved >= this.originalLimit)) {
163
- this.logger.warn(
164
- `!!!! DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`,
165
- )
145
+ if (!rows.length || (this.originalLimit && this.rowsRetrieved >= this.originalLimit)) {
146
+ logger.warn(`${table} DONE! ${this.rowsRetrieved} rowsRetrieved`)
166
147
  this.push(null)
167
148
  this.done = true
168
- } else if (this.opt.singleBatchBuffer) {
169
- // here we don't start next query until we're asked (via next _read call)
170
- // so, let's do nothing
171
- } else {
172
- const rssMB = Math.round(process.memoryUsage().rss / 1024 / 1024)
149
+ this.paused = false
150
+ return
151
+ }
173
152
 
174
- if (rssMB <= this.opt.rssLimitMB) {
175
- void this.runNextQuery()
153
+ if (shouldContinue) {
154
+ // Keep the stream flowing
155
+ logger.log(`${table} continuing the stream`)
156
+ void this.runNextQuery()
157
+ } else {
158
+ // Not starting the next query
159
+ if (this.paused) {
160
+ logger.log(`${table} stream is already paused`)
176
161
  } else {
177
- this.logger.warn(
178
- `${this.table} rssLimitMB reached ${rssMB} > ${this.opt.rssLimitMB}, pausing stream`,
179
- )
162
+ logger.warn(`${table} pausing the stream`)
163
+ this.paused = true
180
164
  }
181
165
  }
182
166
  }
167
+
168
+ private async runQuery(q: Query): Promise<QuerySnapshot | undefined> {
169
+ const { table, logger } = this
170
+
171
+ try {
172
+ return await pRetry(
173
+ async () => {
174
+ return await q.get()
175
+ },
176
+ {
177
+ name: `FirestoreStreamReadable.query(${table})`,
178
+ maxAttempts: 5,
179
+ delay: 5000,
180
+ delayMultiplier: 2,
181
+ logger,
182
+ timeout: 120_000, // 2 minutes
183
+ },
184
+ )
185
+ } catch (err) {
186
+ console.log(
187
+ `FirestoreStreamReadable error!\n`,
188
+ {
189
+ table,
190
+ rowsRetrieved: this.rowsRetrieved,
191
+ },
192
+ err,
193
+ )
194
+ this.emit('error', err)
195
+ return
196
+ }
197
+ }
183
198
  }
package/src/query.util.ts CHANGED
@@ -1,4 +1,4 @@
1
- import type { Query, WhereFilterOp } from '@google-cloud/firestore'
1
+ import { FieldPath, type Query, type WhereFilterOp } from '@google-cloud/firestore'
2
2
  import type { DBQuery, DBQueryFilterOperator } from '@naturalcycles/db-lib'
3
3
  import type { ObjectWithId } from '@naturalcycles/js-lib/types'
4
4
 
@@ -17,14 +17,12 @@ export function dbQueryToFirestoreQuery<ROW extends ObjectWithId>(
17
17
 
18
18
  // filter
19
19
  for (const f of dbQuery._filters) {
20
- q = q.where(f.name as string, OP_MAP[f.op] || (f.op as WhereFilterOp), f.val)
20
+ q = q.where(mapName(f.name), OP_MAP[f.op] || (f.op as WhereFilterOp), f.val)
21
21
  }
22
22
 
23
23
  // order
24
24
  for (const ord of dbQuery._orders) {
25
- // todo: support ordering by id like this:
26
- // .orderBy(FieldPath.documentId())
27
- q = q.orderBy(ord.name as string, ord.descending ? 'desc' : 'asc')
25
+ q = q.orderBy(mapName(ord.name), ord.descending ? 'desc' : 'asc')
28
26
  }
29
27
 
30
28
  // limit
@@ -32,13 +30,18 @@ export function dbQueryToFirestoreQuery<ROW extends ObjectWithId>(
32
30
 
33
31
  // selectedFields
34
32
  if (dbQuery._selectedFieldNames) {
35
- // todo: check if at least id / __key__ is required to be set
36
- q = q.select(...(dbQuery._selectedFieldNames as string[]))
33
+ // id is filtered out, because in Firestore it's not a "property",
34
+ // and doc.id is always returned, even if we request empty set of fields
35
+ q = q.select(...(dbQuery._selectedFieldNames as string[]).filter(n => n !== 'id'))
37
36
  }
38
37
 
39
38
  // cursor
40
39
  if (dbQuery._startCursor) {
41
- q = q.startAt(dbQuery._startCursor)
40
+ // Using `startAfter`, not `startAt` here
41
+ // Why?
42
+ // Because in Firestore, you can only retrieve "last document id" to be used as Cursor.
43
+ // That document was already retrieved, so it makes sense to start AFTER it.
44
+ q = q.startAfter(dbQuery._startCursor)
42
45
  }
43
46
 
44
47
  if (dbQuery._endCursor) {
@@ -47,3 +50,8 @@ export function dbQueryToFirestoreQuery<ROW extends ObjectWithId>(
47
50
 
48
51
  return q
49
52
  }
53
+
54
+ function mapName<ROW extends ObjectWithId>(name: keyof ROW): string | FieldPath {
55
+ if (name === 'id') return FieldPath.documentId()
56
+ return name as string
57
+ }