@powersync/service-module-postgres-storage 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. package/CHANGELOG.md +17 -0
  2. package/LICENSE +67 -0
  3. package/README.md +67 -0
  4. package/dist/.tsbuildinfo +1 -0
  5. package/dist/@types/index.d.ts +7 -0
  6. package/dist/@types/migrations/PostgresMigrationAgent.d.ts +12 -0
  7. package/dist/@types/migrations/PostgresMigrationStore.d.ts +14 -0
  8. package/dist/@types/migrations/migration-utils.d.ts +3 -0
  9. package/dist/@types/migrations/scripts/1684951997326-init.d.ts +3 -0
  10. package/dist/@types/module/PostgresStorageModule.d.ts +6 -0
  11. package/dist/@types/storage/PostgresBucketStorageFactory.d.ts +42 -0
  12. package/dist/@types/storage/PostgresCompactor.d.ts +40 -0
  13. package/dist/@types/storage/PostgresStorageProvider.d.ts +5 -0
  14. package/dist/@types/storage/PostgresSyncRulesStorage.d.ts +46 -0
  15. package/dist/@types/storage/PostgresTestStorageFactoryGenerator.d.ts +13 -0
  16. package/dist/@types/storage/batch/OperationBatch.d.ts +47 -0
  17. package/dist/@types/storage/batch/PostgresBucketBatch.d.ts +90 -0
  18. package/dist/@types/storage/batch/PostgresPersistedBatch.d.ts +64 -0
  19. package/dist/@types/storage/checkpoints/PostgresWriteCheckpointAPI.d.ts +20 -0
  20. package/dist/@types/storage/storage-index.d.ts +5 -0
  21. package/dist/@types/storage/sync-rules/PostgresPersistedSyncRulesContent.d.ts +17 -0
  22. package/dist/@types/types/codecs.d.ts +61 -0
  23. package/dist/@types/types/models/ActiveCheckpoint.d.ts +12 -0
  24. package/dist/@types/types/models/ActiveCheckpointNotification.d.ts +19 -0
  25. package/dist/@types/types/models/BucketData.d.ts +22 -0
  26. package/dist/@types/types/models/BucketParameters.d.ts +11 -0
  27. package/dist/@types/types/models/CurrentData.d.ts +22 -0
  28. package/dist/@types/types/models/Instance.d.ts +6 -0
  29. package/dist/@types/types/models/Migration.d.ts +12 -0
  30. package/dist/@types/types/models/SourceTable.d.ts +31 -0
  31. package/dist/@types/types/models/SyncRules.d.ts +47 -0
  32. package/dist/@types/types/models/WriteCheckpoint.d.ts +15 -0
  33. package/dist/@types/types/models/models-index.d.ts +10 -0
  34. package/dist/@types/types/types.d.ts +94 -0
  35. package/dist/@types/utils/bson.d.ts +6 -0
  36. package/dist/@types/utils/bucket-data.d.ts +18 -0
  37. package/dist/@types/utils/db.d.ts +8 -0
  38. package/dist/@types/utils/ts-codec.d.ts +5 -0
  39. package/dist/@types/utils/utils-index.d.ts +4 -0
  40. package/dist/index.js +8 -0
  41. package/dist/index.js.map +1 -0
  42. package/dist/migrations/PostgresMigrationAgent.js +36 -0
  43. package/dist/migrations/PostgresMigrationAgent.js.map +1 -0
  44. package/dist/migrations/PostgresMigrationStore.js +60 -0
  45. package/dist/migrations/PostgresMigrationStore.js.map +1 -0
  46. package/dist/migrations/migration-utils.js +13 -0
  47. package/dist/migrations/migration-utils.js.map +1 -0
  48. package/dist/migrations/scripts/1684951997326-init.js +196 -0
  49. package/dist/migrations/scripts/1684951997326-init.js.map +1 -0
  50. package/dist/module/PostgresStorageModule.js +23 -0
  51. package/dist/module/PostgresStorageModule.js.map +1 -0
  52. package/dist/storage/PostgresBucketStorageFactory.js +433 -0
  53. package/dist/storage/PostgresBucketStorageFactory.js.map +1 -0
  54. package/dist/storage/PostgresCompactor.js +298 -0
  55. package/dist/storage/PostgresCompactor.js.map +1 -0
  56. package/dist/storage/PostgresStorageProvider.js +35 -0
  57. package/dist/storage/PostgresStorageProvider.js.map +1 -0
  58. package/dist/storage/PostgresSyncRulesStorage.js +619 -0
  59. package/dist/storage/PostgresSyncRulesStorage.js.map +1 -0
  60. package/dist/storage/PostgresTestStorageFactoryGenerator.js +110 -0
  61. package/dist/storage/PostgresTestStorageFactoryGenerator.js.map +1 -0
  62. package/dist/storage/batch/OperationBatch.js +93 -0
  63. package/dist/storage/batch/OperationBatch.js.map +1 -0
  64. package/dist/storage/batch/PostgresBucketBatch.js +732 -0
  65. package/dist/storage/batch/PostgresBucketBatch.js.map +1 -0
  66. package/dist/storage/batch/PostgresPersistedBatch.js +367 -0
  67. package/dist/storage/batch/PostgresPersistedBatch.js.map +1 -0
  68. package/dist/storage/checkpoints/PostgresWriteCheckpointAPI.js +148 -0
  69. package/dist/storage/checkpoints/PostgresWriteCheckpointAPI.js.map +1 -0
  70. package/dist/storage/storage-index.js +6 -0
  71. package/dist/storage/storage-index.js.map +1 -0
  72. package/dist/storage/sync-rules/PostgresPersistedSyncRulesContent.js +58 -0
  73. package/dist/storage/sync-rules/PostgresPersistedSyncRulesContent.js.map +1 -0
  74. package/dist/types/codecs.js +97 -0
  75. package/dist/types/codecs.js.map +1 -0
  76. package/dist/types/models/ActiveCheckpoint.js +12 -0
  77. package/dist/types/models/ActiveCheckpoint.js.map +1 -0
  78. package/dist/types/models/ActiveCheckpointNotification.js +8 -0
  79. package/dist/types/models/ActiveCheckpointNotification.js.map +1 -0
  80. package/dist/types/models/BucketData.js +23 -0
  81. package/dist/types/models/BucketData.js.map +1 -0
  82. package/dist/types/models/BucketParameters.js +11 -0
  83. package/dist/types/models/BucketParameters.js.map +1 -0
  84. package/dist/types/models/CurrentData.js +16 -0
  85. package/dist/types/models/CurrentData.js.map +1 -0
  86. package/dist/types/models/Instance.js +5 -0
  87. package/dist/types/models/Instance.js.map +1 -0
  88. package/dist/types/models/Migration.js +12 -0
  89. package/dist/types/models/Migration.js.map +1 -0
  90. package/dist/types/models/SourceTable.js +24 -0
  91. package/dist/types/models/SourceTable.js.map +1 -0
  92. package/dist/types/models/SyncRules.js +47 -0
  93. package/dist/types/models/SyncRules.js.map +1 -0
  94. package/dist/types/models/WriteCheckpoint.js +13 -0
  95. package/dist/types/models/WriteCheckpoint.js.map +1 -0
  96. package/dist/types/models/models-index.js +11 -0
  97. package/dist/types/models/models-index.js.map +1 -0
  98. package/dist/types/types.js +46 -0
  99. package/dist/types/types.js.map +1 -0
  100. package/dist/utils/bson.js +16 -0
  101. package/dist/utils/bson.js.map +1 -0
  102. package/dist/utils/bucket-data.js +25 -0
  103. package/dist/utils/bucket-data.js.map +1 -0
  104. package/dist/utils/db.js +24 -0
  105. package/dist/utils/db.js.map +1 -0
  106. package/dist/utils/ts-codec.js +11 -0
  107. package/dist/utils/ts-codec.js.map +1 -0
  108. package/dist/utils/utils-index.js +5 -0
  109. package/dist/utils/utils-index.js.map +1 -0
  110. package/package.json +50 -0
  111. package/src/index.ts +10 -0
  112. package/src/migrations/PostgresMigrationAgent.ts +46 -0
  113. package/src/migrations/PostgresMigrationStore.ts +70 -0
  114. package/src/migrations/migration-utils.ts +14 -0
  115. package/src/migrations/scripts/1684951997326-init.ts +141 -0
  116. package/src/module/PostgresStorageModule.ts +30 -0
  117. package/src/storage/PostgresBucketStorageFactory.ts +496 -0
  118. package/src/storage/PostgresCompactor.ts +366 -0
  119. package/src/storage/PostgresStorageProvider.ts +42 -0
  120. package/src/storage/PostgresSyncRulesStorage.ts +666 -0
  121. package/src/storage/PostgresTestStorageFactoryGenerator.ts +61 -0
  122. package/src/storage/batch/OperationBatch.ts +101 -0
  123. package/src/storage/batch/PostgresBucketBatch.ts +885 -0
  124. package/src/storage/batch/PostgresPersistedBatch.ts +441 -0
  125. package/src/storage/checkpoints/PostgresWriteCheckpointAPI.ts +176 -0
  126. package/src/storage/storage-index.ts +5 -0
  127. package/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts +67 -0
  128. package/src/types/codecs.ts +136 -0
  129. package/src/types/models/ActiveCheckpoint.ts +15 -0
  130. package/src/types/models/ActiveCheckpointNotification.ts +14 -0
  131. package/src/types/models/BucketData.ts +26 -0
  132. package/src/types/models/BucketParameters.ts +14 -0
  133. package/src/types/models/CurrentData.ts +23 -0
  134. package/src/types/models/Instance.ts +8 -0
  135. package/src/types/models/Migration.ts +19 -0
  136. package/src/types/models/SourceTable.ts +32 -0
  137. package/src/types/models/SyncRules.ts +50 -0
  138. package/src/types/models/WriteCheckpoint.ts +20 -0
  139. package/src/types/models/models-index.ts +10 -0
  140. package/src/types/types.ts +73 -0
  141. package/src/utils/bson.ts +17 -0
  142. package/src/utils/bucket-data.ts +25 -0
  143. package/src/utils/db.ts +27 -0
  144. package/src/utils/ts-codec.ts +14 -0
  145. package/src/utils/utils-index.ts +4 -0
  146. package/test/src/__snapshots__/storage.test.ts.snap +9 -0
  147. package/test/src/__snapshots__/storage_sync.test.ts.snap +332 -0
  148. package/test/src/env.ts +6 -0
  149. package/test/src/migrations.test.ts +34 -0
  150. package/test/src/setup.ts +16 -0
  151. package/test/src/storage.test.ts +131 -0
  152. package/test/src/storage_compacting.test.ts +5 -0
  153. package/test/src/storage_sync.test.ts +12 -0
  154. package/test/src/util.ts +34 -0
  155. package/test/tsconfig.json +20 -0
  156. package/tsconfig.json +36 -0
  157. package/vitest.config.ts +13 -0
@@ -0,0 +1,666 @@
1
+ import * as lib_postgres from '@powersync/lib-service-postgres';
2
+ import { DisposableObserver } from '@powersync/lib-services-framework';
3
+ import { storage, utils } from '@powersync/service-core';
4
+ import { JSONBig } from '@powersync/service-jsonbig';
5
+ import * as sync_rules from '@powersync/service-sync-rules';
6
+ import * as uuid from 'uuid';
7
+ import { BIGINT_MAX } from '../types/codecs.js';
8
+ import { models, RequiredOperationBatchLimits } from '../types/types.js';
9
+ import { replicaIdToSubkey } from '../utils/bson.js';
10
+ import { mapOpEntry } from '../utils/bucket-data.js';
11
+
12
+ import { StatementParam } from '@powersync/service-jpgwire';
13
+ import { StoredRelationId } from '../types/models/SourceTable.js';
14
+ import { pick } from '../utils/ts-codec.js';
15
+ import { PostgresBucketBatch } from './batch/PostgresBucketBatch.js';
16
+ import { PostgresWriteCheckpointAPI } from './checkpoints/PostgresWriteCheckpointAPI.js';
17
+ import { PostgresBucketStorageFactory } from './PostgresBucketStorageFactory.js';
18
+ import { PostgresCompactor } from './PostgresCompactor.js';
19
+
20
+ export type PostgresSyncRulesStorageOptions = {
21
+ factory: PostgresBucketStorageFactory;
22
+ db: lib_postgres.DatabaseClient;
23
+ sync_rules: storage.PersistedSyncRulesContent;
24
+ write_checkpoint_mode?: storage.WriteCheckpointMode;
25
+ batchLimits: RequiredOperationBatchLimits;
26
+ };
27
+
28
+ export class PostgresSyncRulesStorage
29
+ extends DisposableObserver<storage.SyncRulesBucketStorageListener>
30
+ implements storage.SyncRulesBucketStorage
31
+ {
32
+ public readonly group_id: number;
33
+ public readonly sync_rules: storage.PersistedSyncRulesContent;
34
+ public readonly slot_name: string;
35
+ public readonly factory: PostgresBucketStorageFactory;
36
+
37
+ protected db: lib_postgres.DatabaseClient;
38
+ protected writeCheckpointAPI: PostgresWriteCheckpointAPI;
39
+
40
+ // TODO we might be able to share this in an abstract class
41
+ private parsedSyncRulesCache: { parsed: sync_rules.SqlSyncRules; options: storage.ParseSyncRulesOptions } | undefined;
42
+ private checksumCache = new storage.ChecksumCache({
43
+ fetchChecksums: (batch) => {
44
+ return this.getChecksumsInternal(batch);
45
+ }
46
+ });
47
+
48
+ constructor(protected options: PostgresSyncRulesStorageOptions) {
49
+ super();
50
+ this.group_id = options.sync_rules.id;
51
+ this.db = options.db;
52
+ this.sync_rules = options.sync_rules;
53
+ this.slot_name = options.sync_rules.slot_name;
54
+ this.factory = options.factory;
55
+
56
+ this.writeCheckpointAPI = new PostgresWriteCheckpointAPI({
57
+ db: this.db,
58
+ mode: options.write_checkpoint_mode ?? storage.WriteCheckpointMode.MANAGED
59
+ });
60
+ }
61
+
62
+ get writeCheckpointMode(): storage.WriteCheckpointMode {
63
+ return this.writeCheckpointAPI.writeCheckpointMode;
64
+ }
65
+
66
+ // TODO we might be able to share this in an abstract class
67
+ getParsedSyncRules(options: storage.ParseSyncRulesOptions): sync_rules.SqlSyncRules {
68
+ const { parsed, options: cachedOptions } = this.parsedSyncRulesCache ?? {};
69
+ /**
70
+ * Check if the cached sync rules, if present, had the same options.
71
+ * Parse sync rules if the options are different or if there is no cached value.
72
+ */
73
+ if (!parsed || options.defaultSchema != cachedOptions?.defaultSchema) {
74
+ this.parsedSyncRulesCache = { parsed: this.sync_rules.parsed(options).sync_rules, options };
75
+ }
76
+
77
+ return this.parsedSyncRulesCache!.parsed;
78
+ }
79
+
80
+ async reportError(e: any): Promise<void> {
81
+ const message = String(e.message ?? 'Replication failure');
82
+ await this.db.sql`
83
+ UPDATE sync_rules
84
+ SET
85
+ last_fatal_error = ${{ type: 'varchar', value: message }}
86
+ WHERE
87
+ id = ${{ type: 'int4', value: this.group_id }};
88
+ `.execute();
89
+ }
90
+
91
+ compact(options?: storage.CompactOptions): Promise<void> {
92
+ return new PostgresCompactor(this.db, this.group_id, options).compact();
93
+ }
94
+
95
+ batchCreateCustomWriteCheckpoints(checkpoints: storage.BatchedCustomWriteCheckpointOptions[]): Promise<void> {
96
+ return this.writeCheckpointAPI.batchCreateCustomWriteCheckpoints(
97
+ checkpoints.map((c) => ({ ...c, sync_rules_id: this.group_id }))
98
+ );
99
+ }
100
+
101
+ createCustomWriteCheckpoint(checkpoint: storage.BatchedCustomWriteCheckpointOptions): Promise<bigint> {
102
+ return this.writeCheckpointAPI.createCustomWriteCheckpoint({
103
+ ...checkpoint,
104
+ sync_rules_id: this.group_id
105
+ });
106
+ }
107
+
108
+ lastWriteCheckpoint(filters: storage.SyncStorageLastWriteCheckpointFilters): Promise<bigint | null> {
109
+ return this.writeCheckpointAPI.lastWriteCheckpoint({
110
+ ...filters,
111
+ sync_rules_id: this.group_id
112
+ });
113
+ }
114
+
115
+ setWriteCheckpointMode(mode: storage.WriteCheckpointMode): void {
116
+ return this.writeCheckpointAPI.setWriteCheckpointMode(mode);
117
+ }
118
+
119
+ createManagedWriteCheckpoint(checkpoint: storage.ManagedWriteCheckpointOptions): Promise<bigint> {
120
+ return this.writeCheckpointAPI.createManagedWriteCheckpoint(checkpoint);
121
+ }
122
+
123
+ async getCheckpoint(): Promise<storage.ReplicationCheckpoint> {
124
+ const checkpointRow = await this.db.sql`
125
+ SELECT
126
+ last_checkpoint,
127
+ last_checkpoint_lsn
128
+ FROM
129
+ sync_rules
130
+ WHERE
131
+ id = ${{ type: 'int4', value: this.group_id }}
132
+ `
133
+ .decoded(pick(models.SyncRules, ['last_checkpoint', 'last_checkpoint_lsn']))
134
+ .first();
135
+
136
+ return {
137
+ checkpoint: utils.timestampToOpId(checkpointRow?.last_checkpoint ?? 0n),
138
+ lsn: checkpointRow?.last_checkpoint_lsn ?? null
139
+ };
140
+ }
141
+
142
+ async resolveTable(options: storage.ResolveTableOptions): Promise<storage.ResolveTableResult> {
143
+ const { group_id, connection_id, connection_tag, entity_descriptor } = options;
144
+
145
+ const { schema, name: table, objectId, replicationColumns } = entity_descriptor;
146
+
147
+ const columns = replicationColumns.map((column) => ({
148
+ name: column.name,
149
+ type: column.type,
150
+ // The PGWire returns this as a BigInt. We want to store this as JSONB
151
+ type_oid: typeof column.typeId !== 'undefined' ? Number(column.typeId) : column.typeId
152
+ }));
153
+ return this.db.transaction(async (db) => {
154
+ let sourceTableRow = await db.sql`
155
+ SELECT
156
+ *
157
+ FROM
158
+ source_tables
159
+ WHERE
160
+ group_id = ${{ type: 'int4', value: group_id }}
161
+ AND connection_id = ${{ type: 'int4', value: connection_id }}
162
+ AND relation_id = ${{ type: 'jsonb', value: { object_id: objectId } satisfies StoredRelationId }}
163
+ AND schema_name = ${{ type: 'varchar', value: schema }}
164
+ AND table_name = ${{ type: 'varchar', value: table }}
165
+ AND replica_id_columns = ${{ type: 'jsonb', value: columns }}
166
+ `
167
+ .decoded(models.SourceTable)
168
+ .first();
169
+
170
+ if (sourceTableRow == null) {
171
+ const row = await db.sql`
172
+ INSERT INTO
173
+ source_tables (
174
+ id,
175
+ group_id,
176
+ connection_id,
177
+ relation_id,
178
+ schema_name,
179
+ table_name,
180
+ replica_id_columns
181
+ )
182
+ VALUES
183
+ (
184
+ ${{ type: 'varchar', value: uuid.v4() }},
185
+ ${{ type: 'int4', value: group_id }},
186
+ ${{ type: 'int4', value: connection_id }},
187
+ --- The objectId can be string | number, we store it as jsonb value
188
+ ${{ type: 'jsonb', value: { object_id: objectId } satisfies StoredRelationId }},
189
+ ${{ type: 'varchar', value: schema }},
190
+ ${{ type: 'varchar', value: table }},
191
+ ${{ type: 'jsonb', value: columns }}
192
+ )
193
+ RETURNING
194
+ *
195
+ `
196
+ .decoded(models.SourceTable)
197
+ .first();
198
+ sourceTableRow = row;
199
+ }
200
+
201
+ const sourceTable = new storage.SourceTable(
202
+ sourceTableRow!.id,
203
+ connection_tag,
204
+ objectId,
205
+ schema,
206
+ table,
207
+ replicationColumns,
208
+ sourceTableRow!.snapshot_done ?? true
209
+ );
210
+ sourceTable.syncEvent = options.sync_rules.tableTriggersEvent(sourceTable);
211
+ sourceTable.syncData = options.sync_rules.tableSyncsData(sourceTable);
212
+ sourceTable.syncParameters = options.sync_rules.tableSyncsParameters(sourceTable);
213
+
214
+ const truncatedTables = await db.sql`
215
+ SELECT
216
+ *
217
+ FROM
218
+ source_tables
219
+ WHERE
220
+ group_id = ${{ type: 'int4', value: group_id }}
221
+ AND connection_id = ${{ type: 'int4', value: connection_id }}
222
+ AND id != ${{ type: 'varchar', value: sourceTableRow!.id }}
223
+ AND (
224
+ relation_id = ${{ type: 'jsonb', value: { object_id: objectId } satisfies StoredRelationId }}
225
+ OR (
226
+ schema_name = ${{ type: 'varchar', value: schema }}
227
+ AND table_name = ${{ type: 'varchar', value: table }}
228
+ )
229
+ )
230
+ `
231
+ .decoded(models.SourceTable)
232
+ .rows();
233
+
234
+ return {
235
+ table: sourceTable,
236
+ dropTables: truncatedTables.map(
237
+ (doc) =>
238
+ new storage.SourceTable(
239
+ doc.id,
240
+ connection_tag,
241
+ doc.relation_id?.object_id ?? 0,
242
+ doc.schema_name,
243
+ doc.table_name,
244
+ doc.replica_id_columns?.map((c) => ({
245
+ name: c.name,
246
+ typeOid: c.typeId,
247
+ type: c.type
248
+ })) ?? [],
249
+ doc.snapshot_done ?? true
250
+ )
251
+ )
252
+ };
253
+ });
254
+ }
255
+
256
+ async startBatch(
257
+ options: storage.StartBatchOptions,
258
+ callback: (batch: storage.BucketStorageBatch) => Promise<void>
259
+ ): Promise<storage.FlushedResult | null> {
260
+ const syncRules = await this.db.sql`
261
+ SELECT
262
+ last_checkpoint_lsn,
263
+ no_checkpoint_before,
264
+ keepalive_op
265
+ FROM
266
+ sync_rules
267
+ WHERE
268
+ id = ${{ type: 'int4', value: this.group_id }}
269
+ `
270
+ .decoded(pick(models.SyncRules, ['last_checkpoint_lsn', 'no_checkpoint_before', 'keepalive_op']))
271
+ .first();
272
+
273
+ const checkpoint_lsn = syncRules?.last_checkpoint_lsn ?? null;
274
+
275
+ await using batch = new PostgresBucketBatch({
276
+ db: this.db,
277
+ sync_rules: this.sync_rules.parsed(options).sync_rules,
278
+ group_id: this.group_id,
279
+ slot_name: this.slot_name,
280
+ last_checkpoint_lsn: checkpoint_lsn,
281
+ keep_alive_op: syncRules?.keepalive_op,
282
+ no_checkpoint_before_lsn: syncRules?.no_checkpoint_before ?? options.zeroLSN,
283
+ store_current_data: options.storeCurrentData,
284
+ skip_existing_rows: options.skipExistingRows ?? false,
285
+ batch_limits: this.options.batchLimits
286
+ });
287
+ this.iterateListeners((cb) => cb.batchStarted?.(batch));
288
+
289
+ await callback(batch);
290
+ await batch.flush();
291
+ if (batch.last_flushed_op) {
292
+ return { flushed_op: String(batch.last_flushed_op) };
293
+ } else {
294
+ return null;
295
+ }
296
+ }
297
+
298
+ async getParameterSets(
299
+ checkpoint: utils.OpId,
300
+ lookups: sync_rules.SqliteJsonValue[][]
301
+ ): Promise<sync_rules.SqliteJsonRow[]> {
302
+ const rows = await this.db.sql`
303
+ SELECT DISTINCT
304
+ ON (lookup, source_table, source_key) lookup,
305
+ source_table,
306
+ source_key,
307
+ id,
308
+ bucket_parameters
309
+ FROM
310
+ bucket_parameters
311
+ WHERE
312
+ group_id = ${{ type: 'int4', value: this.group_id }}
313
+ AND lookup = ANY (
314
+ SELECT
315
+ decode((FILTER ->> 0)::text, 'hex') -- Decode the hex string to bytea
316
+ FROM
317
+ jsonb_array_elements(${{
318
+ type: 'jsonb',
319
+ value: lookups.map((l) => storage.serializeLookupBuffer(l).toString('hex'))
320
+ }}) AS FILTER
321
+ )
322
+ AND id <= ${{ type: 'int8', value: BigInt(checkpoint) }}
323
+ ORDER BY
324
+ lookup,
325
+ source_table,
326
+ source_key,
327
+ id DESC
328
+ `
329
+ .decoded(pick(models.BucketParameters, ['bucket_parameters']))
330
+ .rows();
331
+
332
+ const groupedParameters = rows.map((row) => {
333
+ return JSONBig.parse(row.bucket_parameters) as sync_rules.SqliteJsonRow;
334
+ });
335
+ return groupedParameters.flat();
336
+ }
337
+
338
+ async *getBucketDataBatch(
339
+ checkpoint: utils.OpId,
340
+ dataBuckets: Map<string, string>,
341
+ options?: storage.BucketDataBatchOptions
342
+ ): AsyncIterable<storage.SyncBucketDataBatch> {
343
+ if (dataBuckets.size == 0) {
344
+ return;
345
+ }
346
+
347
+ const end = checkpoint ?? BIGINT_MAX;
348
+ const filters = Array.from(dataBuckets.entries()).map(([name, start]) => ({
349
+ bucket_name: name,
350
+ start: start
351
+ }));
352
+
353
+ const rowLimit = options?.limit ?? storage.DEFAULT_DOCUMENT_BATCH_LIMIT;
354
+ const sizeLimit = options?.chunkLimitBytes ?? storage.DEFAULT_DOCUMENT_CHUNK_LIMIT_BYTES;
355
+
356
+ let batchSize = 0;
357
+ let currentBatch: utils.SyncBucketData | null = null;
358
+ let targetOp: bigint | null = null;
359
+ let rowCount = 0;
360
+
361
+ /**
362
+ * It is possible to perform this query with JSONB join. e.g.
363
+ * ```sql
364
+ * WITH
365
+ * filter_data AS (
366
+ * SELECT
367
+ * FILTER ->> 'bucket_name' AS bucket_name,
368
+ * (FILTER ->> 'start')::BIGINT AS start_op_id
369
+ * FROM
370
+ * jsonb_array_elements($1::jsonb) AS FILTER
371
+ * )
372
+ * SELECT
373
+ * b.*,
374
+ * octet_length(b.data) AS data_size
375
+ * FROM
376
+ * bucket_data b
377
+ * JOIN filter_data f ON b.bucket_name = f.bucket_name
378
+ * AND b.op_id > f.start_op_id
379
+ * AND b.op_id <= $2
380
+ * WHERE
381
+ * b.group_id = $3
382
+ * ORDER BY
383
+ * b.bucket_name ASC,
384
+ * b.op_id ASC
385
+ * LIMIT
386
+ * $4;
387
+ * ```
388
+ * Which might be better for large volumes of buckets, but in testing the JSON method
389
+ * was significantly slower than the method below. Syncing 2.5 million rows in a single
390
+ * bucket takes 2 minutes and 11 seconds with the method below. With the JSON method
391
+ * 1 million rows were only synced before a 5 minute timeout.
392
+ */
393
+ for await (const rows of this.db.streamRows({
394
+ statement: `
395
+ SELECT
396
+ *
397
+ FROM
398
+ bucket_data
399
+ WHERE
400
+ group_id = $1
401
+ and op_id <= $2
402
+ and (
403
+ ${filters.map((f, index) => `(bucket_name = $${index * 2 + 4} and op_id > $${index * 2 + 5})`).join(' OR ')}
404
+ )
405
+ ORDER BY
406
+ bucket_name ASC,
407
+ op_id ASC
408
+ LIMIT
409
+ $3;`,
410
+ params: [
411
+ { type: 'int4', value: this.group_id },
412
+ { type: 'int8', value: end },
413
+ { type: 'int4', value: rowLimit + 1 },
414
+ ...filters.flatMap((f) => [
415
+ { type: 'varchar' as const, value: f.bucket_name },
416
+ { type: 'int8' as const, value: f.start } satisfies StatementParam
417
+ ])
418
+ ]
419
+ })) {
420
+ const decodedRows = rows.map((r) => models.BucketData.decode(r as any));
421
+
422
+ for (const row of decodedRows) {
423
+ const { bucket_name } = row;
424
+ const rowSize = row.data ? row.data.length : 0;
425
+
426
+ if (
427
+ currentBatch == null ||
428
+ currentBatch.bucket != bucket_name ||
429
+ batchSize >= sizeLimit ||
430
+ (currentBatch?.data.length && batchSize + rowSize > sizeLimit) ||
431
+ currentBatch.data.length >= rowLimit
432
+ ) {
433
+ let start: string | undefined = undefined;
434
+ if (currentBatch != null) {
435
+ if (currentBatch.bucket == bucket_name) {
436
+ currentBatch.has_more = true;
437
+ }
438
+
439
+ const yieldBatch = currentBatch;
440
+ start = currentBatch.after;
441
+ currentBatch = null;
442
+ batchSize = 0;
443
+ yield { batch: yieldBatch, targetOp: targetOp };
444
+ targetOp = null;
445
+ if (rowCount >= rowLimit) {
446
+ // We've yielded all the requested rows
447
+ break;
448
+ }
449
+ }
450
+
451
+ start ??= dataBuckets.get(bucket_name);
452
+ if (start == null) {
453
+ throw new Error(`data for unexpected bucket: ${bucket_name}`);
454
+ }
455
+ currentBatch = {
456
+ bucket: bucket_name,
457
+ after: start,
458
+ has_more: false,
459
+ data: [],
460
+ next_after: start
461
+ };
462
+ targetOp = null;
463
+ }
464
+
465
+ const entry = mapOpEntry(row);
466
+
467
+ if (row.source_table && row.source_key) {
468
+ entry.subkey = replicaIdToSubkey(row.source_table, storage.deserializeReplicaId(row.source_key));
469
+ }
470
+
471
+ if (row.target_op != null) {
472
+ // MOVE, CLEAR
473
+ const rowTargetOp = row.target_op;
474
+ if (targetOp == null || rowTargetOp > targetOp) {
475
+ targetOp = rowTargetOp;
476
+ }
477
+ }
478
+
479
+ currentBatch.data.push(entry);
480
+ currentBatch.next_after = entry.op_id;
481
+
482
+ batchSize += rowSize;
483
+
484
+ // Manually track the total rows yielded
485
+ rowCount++;
486
+ }
487
+ }
488
+
489
+ if (currentBatch != null) {
490
+ const yieldBatch = currentBatch;
491
+ currentBatch = null;
492
+ yield { batch: yieldBatch, targetOp: targetOp };
493
+ targetOp = null;
494
+ }
495
+ }
496
+
497
+ async getChecksums(checkpoint: utils.OpId, buckets: string[]): Promise<utils.ChecksumMap> {
498
+ return this.checksumCache.getChecksumMap(checkpoint, buckets);
499
+ }
500
+
501
+ async terminate(options?: storage.TerminateOptions) {
502
+ if (!options || options?.clearStorage) {
503
+ await this.clear();
504
+ }
505
+ await this.db.sql`
506
+ UPDATE sync_rules
507
+ SET
508
+ state = ${{ type: 'varchar', value: storage.SyncRuleState.TERMINATED }},
509
+ snapshot_done = ${{ type: 'bool', value: false }}
510
+ WHERE
511
+ id = ${{ type: 'int4', value: this.group_id }}
512
+ `.execute();
513
+ }
514
+
515
+ async getStatus(): Promise<storage.SyncRuleStatus> {
516
+ const syncRulesRow = await this.db.sql`
517
+ SELECT
518
+ snapshot_done,
519
+ last_checkpoint_lsn,
520
+ state
521
+ FROM
522
+ sync_rules
523
+ WHERE
524
+ id = ${{ type: 'int4', value: this.group_id }}
525
+ `
526
+ .decoded(pick(models.SyncRules, ['snapshot_done', 'last_checkpoint_lsn', 'state']))
527
+ .first();
528
+
529
+ if (syncRulesRow == null) {
530
+ throw new Error('Cannot find sync rules status');
531
+ }
532
+
533
+ return {
534
+ snapshot_done: syncRulesRow.snapshot_done,
535
+ active: syncRulesRow.state == storage.SyncRuleState.ACTIVE,
536
+ checkpoint_lsn: syncRulesRow.last_checkpoint_lsn ?? null
537
+ };
538
+ }
539
+
540
+ async clear(): Promise<void> {
541
+ await this.db.sql`
542
+ UPDATE sync_rules
543
+ SET
544
+ snapshot_done = FALSE,
545
+ last_checkpoint_lsn = NULL,
546
+ last_checkpoint = NULL,
547
+ no_checkpoint_before = NULL
548
+ WHERE
549
+ id = ${{ type: 'int4', value: this.group_id }}
550
+ `.execute();
551
+
552
+ await this.db.sql`
553
+ DELETE FROM bucket_data
554
+ WHERE
555
+ group_id = ${{ type: 'int4', value: this.group_id }}
556
+ `.execute();
557
+
558
+ await this.db.sql`
559
+ DELETE FROM bucket_parameters
560
+ WHERE
561
+ group_id = ${{ type: 'int4', value: this.group_id }}
562
+ `.execute();
563
+
564
+ await this.db.sql`
565
+ DELETE FROM current_data
566
+ WHERE
567
+ group_id = ${{ type: 'int4', value: this.group_id }}
568
+ `.execute();
569
+
570
+ await this.db.sql`
571
+ DELETE FROM source_tables
572
+ WHERE
573
+ group_id = ${{ type: 'int4', value: this.group_id }}
574
+ `.execute();
575
+ }
576
+
577
+ async autoActivate(): Promise<void> {
578
+ await this.db.transaction(async (db) => {
579
+ const syncRulesRow = await db.sql`
580
+ SELECT
581
+ state
582
+ FROM
583
+ sync_rules
584
+ WHERE
585
+ id = ${{ type: 'int4', value: this.group_id }}
586
+ `
587
+ .decoded(pick(models.SyncRules, ['state']))
588
+ .first();
589
+
590
+ if (syncRulesRow && syncRulesRow.state == storage.SyncRuleState.PROCESSING) {
591
+ await db.sql`
592
+ UPDATE sync_rules
593
+ SET
594
+ state = ${{ type: 'varchar', value: storage.SyncRuleState.ACTIVE }}
595
+ WHERE
596
+ id = ${{ type: 'int4', value: this.group_id }}
597
+ `.execute();
598
+ }
599
+
600
+ await db.sql`
601
+ UPDATE sync_rules
602
+ SET
603
+ state = ${{ type: 'varchar', value: storage.SyncRuleState.STOP }}
604
+ WHERE
605
+ state = ${{ type: 'varchar', value: storage.SyncRuleState.ACTIVE }}
606
+ AND id != ${{ type: 'int4', value: this.group_id }}
607
+ `.execute();
608
+ });
609
+ }
610
+
611
+ private async getChecksumsInternal(batch: storage.FetchPartialBucketChecksum[]): Promise<storage.PartialChecksumMap> {
612
+ if (batch.length == 0) {
613
+ return new Map();
614
+ }
615
+
616
+ const rangedBatch = batch.map((b) => ({
617
+ ...b,
618
+ start: b.start ?? 0
619
+ }));
620
+
621
+ const results = await this.db.sql`
622
+ WITH
623
+ filter_data AS (
624
+ SELECT
625
+ FILTER ->> 'bucket' AS bucket_name,
626
+ (FILTER ->> 'start')::BIGINT AS start_op_id,
627
+ (FILTER ->> 'end')::BIGINT AS end_op_id
628
+ FROM
629
+ jsonb_array_elements(${{ type: 'jsonb', value: rangedBatch }}::jsonb) AS FILTER
630
+ )
631
+ SELECT
632
+ b.bucket_name AS bucket,
633
+ SUM(b.checksum) AS checksum_total,
634
+ COUNT(*) AS total,
635
+ MAX(
636
+ CASE
637
+ WHEN b.op = 'CLEAR' THEN 1
638
+ ELSE 0
639
+ END
640
+ ) AS has_clear_op
641
+ FROM
642
+ bucket_data b
643
+ JOIN filter_data f ON b.bucket_name = f.bucket_name
644
+ AND b.op_id > f.start_op_id
645
+ AND b.op_id <= f.end_op_id
646
+ WHERE
647
+ b.group_id = ${{ type: 'int4', value: this.group_id }}
648
+ GROUP BY
649
+ b.bucket_name;
650
+ `.rows<{ bucket: string; checksum_total: bigint; total: bigint; has_clear_op: number }>();
651
+
652
+ return new Map<string, storage.PartialChecksum>(
653
+ results.map((doc) => {
654
+ return [
655
+ doc.bucket,
656
+ {
657
+ bucket: doc.bucket,
658
+ partialCount: Number(doc.total),
659
+ partialChecksum: Number(BigInt(doc.checksum_total) & 0xffffffffn) & 0xffffffff,
660
+ isFullChecksum: doc.has_clear_op == 1
661
+ } satisfies storage.PartialChecksum
662
+ ];
663
+ })
664
+ );
665
+ }
666
+ }