@powersync/service-module-postgres 0.0.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,13 @@
1
- import { MONGO_STORAGE_FACTORY, StorageFactory } from '@core-tests/util.js';
1
+ import { MONGO_STORAGE_FACTORY, StorageFactory, StorageOptions } from '@core-tests/util.js';
2
2
  import { describe, expect, test } from 'vitest';
3
+ import { populateData } from '../../dist/utils/populate_test_data.js';
3
4
  import { env } from './env.js';
4
5
  import { TEST_CONNECTION_OPTIONS } from './util.js';
5
- import { walStreamTest } from './wal_stream_utils.js';
6
- import { populateData } from '../../dist/utils/populate_test_data.js';
6
+ import { WalStreamTestContext } from './wal_stream_utils.js';
7
+ import * as timers from 'timers/promises';
8
+ import { Metrics } from '@powersync/service-core';
7
9
 
8
- describe('batch replication tests - mongodb', function () {
10
+ describe('batch replication tests - mongodb', { timeout: 120_000 }, function () {
9
11
  // These are slow but consistent tests.
10
12
  // Not run on every test run, but we do run on CI, or when manually debugging issues.
11
13
  if (env.CI || env.SLOW_TESTS) {
@@ -22,166 +24,284 @@ const BASIC_SYNC_RULES = `bucket_definitions:
22
24
  - SELECT id, description, other FROM "test_data"`;
23
25
 
24
26
  function defineBatchTests(factory: StorageFactory) {
25
- test(
26
- 'update large record',
27
- walStreamTest(factory, async (context) => {
28
- // This test generates a large transaction in MongoDB, despite the replicated data
29
- // not being that large.
30
- // If we don't limit transaction size, we could run into this error:
31
- // > -31800: transaction is too large and will not fit in the storage engine cache
32
- await context.updateSyncRules(BASIC_SYNC_RULES);
33
- const { pool } = context;
34
-
35
- await pool.query(`CREATE TABLE test_data(id text primary key, description text, other text)`);
36
-
37
- await context.replicateSnapshot();
38
-
39
- let operation_count = await populateData({
40
- num_transactions: 1,
41
- per_transaction: 80,
42
- size: 4_000_000,
43
- connection: TEST_CONNECTION_OPTIONS
44
- });
45
-
27
+ test('update large record', async () => {
28
+ await using context = await WalStreamTestContext.open(factory);
29
+ // This test generates a large transaction in MongoDB, despite the replicated data
30
+ // not being that large.
31
+ // If we don't limit transaction size, we could run into this error:
32
+ // > -31800: transaction is too large and will not fit in the storage engine cache
33
+ await context.updateSyncRules(BASIC_SYNC_RULES);
34
+ const { pool } = context;
35
+
36
+ await pool.query(`CREATE TABLE test_data(id text primary key, description text, other text)`);
37
+
38
+ await context.replicateSnapshot();
39
+
40
+ let operation_count = await populateData({
41
+ num_transactions: 1,
42
+ per_transaction: 80,
43
+ size: 4_000_000,
44
+ connection: TEST_CONNECTION_OPTIONS
45
+ });
46
+
47
+ const start = Date.now();
48
+
49
+ context.startStreaming();
50
+
51
+ const checkpoint = await context.getCheckpoint({ timeout: 100_000 });
52
+ const duration = Date.now() - start;
53
+ const used = Math.round(process.memoryUsage().heapUsed / 1024 / 1024);
54
+ const checksum = await context.storage!.getChecksums(checkpoint, ['global[]']);
55
+ expect(checksum.get('global[]')!.count).toEqual(operation_count);
56
+ const perSecond = Math.round((operation_count / duration) * 1000);
57
+ console.log(`${operation_count} ops in ${duration}ms ${perSecond} ops/s. ${used}MB heap`);
58
+ });
59
+
60
+ test('initial replication performance', async () => {
61
+ await using context = await WalStreamTestContext.open(factory);
62
+ // Manual test to check initial replication performance and memory usage
63
+ await context.updateSyncRules(BASIC_SYNC_RULES);
64
+ const { pool } = context;
65
+
66
+ await pool.query(`CREATE TABLE test_data(id text primary key, description text, other text)`);
67
+
68
+ // Some stats (varies a lot):
69
+ // Old 'postgres' driver, using cursor(2)
70
+ // 15 ops in 19559ms 1 ops/s. 354MB RSS, 115MB heap, 137MB external
71
+ // 25 ops in 42984ms 1 ops/s. 377MB RSS, 129MB heap, 137MB external
72
+ // 35 ops in 41337ms 1 ops/s. 365MB RSS, 115MB heap, 137MB external
73
+
74
+ // streaming with pgwire
75
+ // 15 ops in 26423ms 1 ops/s. 379MB RSS, 128MB heap, 182MB external, 165MB ArrayBuffers
76
+ // 35 ops in 78897ms 0 ops/s. 539MB RSS, 52MB heap, 87MB external, 83MB ArrayBuffers
77
+
78
+ let operation_count = await populateData({
79
+ num_transactions: 1,
80
+ per_transaction: 35,
81
+ size: 14_000_000,
82
+ connection: TEST_CONNECTION_OPTIONS
83
+ });
84
+
85
+ global.gc?.();
86
+
87
+ // Note that we could already have high memory usage at this point
88
+ printMemoryUsage();
89
+
90
+ let interval = setInterval(() => {
91
+ printMemoryUsage();
92
+ }, 2000);
93
+ try {
46
94
  const start = Date.now();
47
95
 
96
+ await context.replicateSnapshot();
97
+ await context.storage!.autoActivate();
48
98
  context.startStreaming();
49
99
 
50
100
  const checkpoint = await context.getCheckpoint({ timeout: 100_000 });
51
101
  const duration = Date.now() - start;
52
- const used = Math.round(process.memoryUsage().heapUsed / 1024 / 1024);
53
102
  const checksum = await context.storage!.getChecksums(checkpoint, ['global[]']);
54
103
  expect(checksum.get('global[]')!.count).toEqual(operation_count);
55
104
  const perSecond = Math.round((operation_count / duration) * 1000);
56
- console.log(`${operation_count} ops in ${duration}ms ${perSecond} ops/s. ${used}MB heap`);
57
- }),
58
- { timeout: 120_000 }
59
- );
60
-
61
- test(
62
- 'initial replication performance',
63
- walStreamTest(factory, async (context) => {
64
- // Manual test to check initial replication performance and memory usage
65
- await context.updateSyncRules(BASIC_SYNC_RULES);
66
- const { pool } = context;
67
-
68
- await pool.query(`CREATE TABLE test_data(id text primary key, description text, other text)`);
69
-
70
- // Some stats (varies a lot):
71
- // Old 'postgres' driver, using cursor(2)
72
- // 15 ops in 19559ms 1 ops/s. 354MB RSS, 115MB heap, 137MB external
73
- // 25 ops in 42984ms 1 ops/s. 377MB RSS, 129MB heap, 137MB external
74
- // 35 ops in 41337ms 1 ops/s. 365MB RSS, 115MB heap, 137MB external
75
-
76
- // streaming with pgwire
77
- // 15 ops in 26423ms 1 ops/s. 379MB RSS, 128MB heap, 182MB external, 165MB ArrayBuffers
78
- // 35 ops in 78897ms 0 ops/s. 539MB RSS, 52MB heap, 87MB external, 83MB ArrayBuffers
79
-
80
- let operation_count = await populateData({
81
- num_transactions: 1,
82
- per_transaction: 35,
83
- size: 14_000_000,
84
- connection: TEST_CONNECTION_OPTIONS
85
- });
86
-
87
- global.gc?.();
88
-
89
- // Note that we could already have high memory usage at this point
105
+ console.log(`${operation_count} ops in ${duration}ms ${perSecond} ops/s.`);
90
106
  printMemoryUsage();
91
-
92
- let interval = setInterval(() => {
93
- printMemoryUsage();
94
- }, 2000);
95
- try {
96
- const start = Date.now();
97
-
98
- await context.replicateSnapshot();
99
- await context.storage!.autoActivate();
100
- context.startStreaming();
101
-
102
- const checkpoint = await context.getCheckpoint({ timeout: 100_000 });
103
- const duration = Date.now() - start;
104
- const checksum = await context.storage!.getChecksums(checkpoint, ['global[]']);
105
- expect(checksum.get('global[]')!.count).toEqual(operation_count);
106
- const perSecond = Math.round((operation_count / duration) * 1000);
107
- console.log(`${operation_count} ops in ${duration}ms ${perSecond} ops/s.`);
108
- printMemoryUsage();
109
- } finally {
110
- clearInterval(interval);
107
+ } finally {
108
+ clearInterval(interval);
109
+ }
110
+ });
111
+
112
+ test('large number of operations', async () => {
113
+ await using context = await WalStreamTestContext.open(factory);
114
+ // This just tests performance of a large number of operations inside a transaction.
115
+ await context.updateSyncRules(BASIC_SYNC_RULES);
116
+ const { pool } = context;
117
+
118
+ await pool.query(`CREATE TABLE test_data(id text primary key, description text, other text)`);
119
+
120
+ await context.replicateSnapshot();
121
+
122
+ const numTransactions = 20;
123
+ const perTransaction = 1500;
124
+ let operationCount = 0;
125
+
126
+ const description = 'description';
127
+
128
+ for (let i = 0; i < numTransactions; i++) {
129
+ const prefix = `test${i}K`;
130
+
131
+ await pool.query(
132
+ {
133
+ statement: `INSERT INTO test_data(id, description, other) SELECT $1 || i, $2 || i, 'foo' FROM generate_series(1, $3) i`,
134
+ params: [
135
+ { type: 'varchar', value: prefix },
136
+ { type: 'varchar', value: description },
137
+ { type: 'int4', value: perTransaction }
138
+ ]
139
+ },
140
+ {
141
+ statement: `UPDATE test_data SET other = other || '#' WHERE id LIKE $1 || '%'`,
142
+ params: [{ type: 'varchar', value: prefix }]
143
+ }
144
+ );
145
+ operationCount += perTransaction * 2;
146
+ }
147
+
148
+ const start = Date.now();
149
+
150
+ context.startStreaming();
151
+
152
+ const checkpoint = await context.getCheckpoint({ timeout: 50_000 });
153
+ const duration = Date.now() - start;
154
+ const used = Math.round(process.memoryUsage().heapUsed / 1024 / 1024);
155
+ const checksum = await context.storage!.getChecksums(checkpoint, ['global[]']);
156
+ expect(checksum.get('global[]')!.count).toEqual(operationCount);
157
+ const perSecond = Math.round((operationCount / duration) * 1000);
158
+ // This number depends on the test machine, so we keep the test significantly
159
+ // lower than expected numbers.
160
+ expect(perSecond).toBeGreaterThan(1000);
161
+ console.log(`${operationCount} ops in ${duration}ms ${perSecond} ops/s. ${used}MB heap`);
162
+
163
+ // Truncating is fast (~10k ops/second).
164
+ // We'd need a really large set of data to actually run into limits when truncating,
165
+ // but we just test with the data we have here.
166
+ const truncateStart = Date.now();
167
+ await pool.query(`TRUNCATE test_data`);
168
+
169
+ const checkpoint2 = await context.getCheckpoint({ timeout: 20_000 });
170
+ const truncateDuration = Date.now() - truncateStart;
171
+
172
+ const checksum2 = await context.storage!.getChecksums(checkpoint2, ['global[]']);
173
+ const truncateCount = checksum2.get('global[]')!.count - checksum.get('global[]')!.count;
174
+ expect(truncateCount).toEqual(numTransactions * perTransaction);
175
+ const truncatePerSecond = Math.round((truncateCount / truncateDuration) * 1000);
176
+ console.log(`Truncated ${truncateCount} ops in ${truncateDuration}ms ${truncatePerSecond} ops/s. ${used}MB heap`);
177
+ });
178
+
179
+ test('resuming initial replication (1)', async () => {
180
+ // Stop early - likely to not include deleted row in first replication attempt.
181
+ await testResumingReplication(2000);
182
+ });
183
+ test('resuming initial replication (2)', async () => {
184
+ // Stop late - likely to include deleted row in first replication attempt.
185
+ await testResumingReplication(8000);
186
+ });
187
+
188
+ async function testResumingReplication(stopAfter: number) {
189
+ // This tests interrupting and then resuming initial replication.
190
+ // We interrupt replication after test_data1 has fully replicated, and
191
+ // test_data2 has partially replicated.
192
+ // This test relies on interval behavior that is not 100% deterministic:
193
+ // 1. We attempt to abort initial replication once a certain number of
194
+ // rows have been replicated, but this is not exact. Our only requirement
195
+ // is that we have not fully replicated test_data2 yet.
196
+ // 2. Order of replication is not deterministic, so which specific rows
197
+ // have been / have not been replicated at that point is not deterministic.
198
+ // We do allow for some variation in the test results to account for this.
199
+
200
+ await using context = await WalStreamTestContext.open(factory);
201
+
202
+ await context.updateSyncRules(`bucket_definitions:
203
+ global:
204
+ data:
205
+ - SELECT * FROM test_data1
206
+ - SELECT * FROM test_data2`);
207
+ const { pool } = context;
208
+
209
+ await pool.query(`CREATE TABLE test_data1(id serial primary key, description text)`);
210
+ await pool.query(`CREATE TABLE test_data2(id serial primary key, description text)`);
211
+
212
+ await pool.query(
213
+ {
214
+ statement: `INSERT INTO test_data1(description) SELECT 'foo' FROM generate_series(1, 1000) i`
215
+ },
216
+ {
217
+ statement: `INSERT INTO test_data2( description) SELECT 'foo' FROM generate_series(1, 10000) i`
111
218
  }
112
- }),
113
- { timeout: 120_000 }
114
- );
219
+ );
115
220
 
116
- test(
117
- 'large number of operations',
118
- walStreamTest(factory, async (context) => {
119
- // This just tests performance of a large number of operations inside a transaction.
120
- await context.updateSyncRules(BASIC_SYNC_RULES);
121
- const { pool } = context;
221
+ const p = context.replicateSnapshot();
122
222
 
123
- await pool.query(`CREATE TABLE test_data(id text primary key, description text, other text)`);
223
+ let done = false;
124
224
 
125
- await context.replicateSnapshot();
225
+ const startRowCount = (await Metrics.getInstance().getMetricValueForTests('powersync_rows_replicated_total')) ?? 0;
226
+ try {
227
+ (async () => {
228
+ while (!done) {
229
+ const count =
230
+ ((await Metrics.getInstance().getMetricValueForTests('powersync_rows_replicated_total')) ?? 0) -
231
+ startRowCount;
126
232
 
127
- const numTransactions = 20;
128
- const perTransaction = 1500;
129
- let operationCount = 0;
130
-
131
- const description = 'description';
132
-
133
- for (let i = 0; i < numTransactions; i++) {
134
- const prefix = `test${i}K`;
135
-
136
- await pool.query(
137
- {
138
- statement: `INSERT INTO test_data(id, description, other) SELECT $1 || i, $2 || i, 'foo' FROM generate_series(1, $3) i`,
139
- params: [
140
- { type: 'varchar', value: prefix },
141
- { type: 'varchar', value: description },
142
- { type: 'int4', value: perTransaction }
143
- ]
144
- },
145
- {
146
- statement: `UPDATE test_data SET other = other || '#' WHERE id LIKE $1 || '%'`,
147
- params: [{ type: 'varchar', value: prefix }]
233
+ if (count >= stopAfter) {
234
+ break;
148
235
  }
149
- );
150
- operationCount += perTransaction * 2;
151
- }
152
-
153
- const start = Date.now();
154
-
155
- context.startStreaming();
156
-
157
- const checkpoint = await context.getCheckpoint({ timeout: 50_000 });
158
- const duration = Date.now() - start;
159
- const used = Math.round(process.memoryUsage().heapUsed / 1024 / 1024);
160
- const checksum = await context.storage!.getChecksums(checkpoint, ['global[]']);
161
- expect(checksum.get('global[]')!.count).toEqual(operationCount);
162
- const perSecond = Math.round((operationCount / duration) * 1000);
163
- // This number depends on the test machine, so we keep the test significantly
164
- // lower than expected numbers.
165
- expect(perSecond).toBeGreaterThan(1000);
166
- console.log(`${operationCount} ops in ${duration}ms ${perSecond} ops/s. ${used}MB heap`);
167
-
168
- // Truncating is fast (~10k ops/second).
169
- // We'd need a really large set of data to actually run into limits when truncating,
170
- // but we just test with the data we have here.
171
- const truncateStart = Date.now();
172
- await pool.query(`TRUNCATE test_data`);
173
-
174
- const checkpoint2 = await context.getCheckpoint({ timeout: 20_000 });
175
- const truncateDuration = Date.now() - truncateStart;
176
-
177
- const checksum2 = await context.storage!.getChecksums(checkpoint2, ['global[]']);
178
- const truncateCount = checksum2.get('global[]')!.count - checksum.get('global[]')!.count;
179
- expect(truncateCount).toEqual(numTransactions * perTransaction);
180
- const truncatePerSecond = Math.round((truncateCount / truncateDuration) * 1000);
181
- console.log(`Truncated ${truncateCount} ops in ${truncateDuration}ms ${truncatePerSecond} ops/s. ${used}MB heap`);
182
- }),
183
- { timeout: 90_000 }
184
- );
236
+ await timers.setTimeout(1);
237
+ }
238
+ // This interrupts initial replication
239
+ await context.dispose();
240
+ })();
241
+ // This confirms that initial replication was interrupted
242
+ await expect(p).rejects.toThrowError();
243
+ done = true;
244
+ } finally {
245
+ done = true;
246
+ }
247
+
248
+ // Bypass the usual "clear db on factory open" step.
249
+ await using context2 = await WalStreamTestContext.open(factory, { doNotClear: true });
250
+
251
+ // This delete should be using one of the ids already replicated
252
+ const {
253
+ rows: [[id1]]
254
+ } = await context2.pool.query(`DELETE FROM test_data2 WHERE id = (SELECT id FROM test_data2 LIMIT 1) RETURNING id`);
255
+ // This update should also be using one of the ids already replicated
256
+ const {
257
+ rows: [[id2]]
258
+ } = await context2.pool.query(
259
+ `UPDATE test_data2 SET description = 'update1' WHERE id = (SELECT id FROM test_data2 LIMIT 1) RETURNING id`
260
+ );
261
+ const {
262
+ rows: [[id3]]
263
+ } = await context2.pool.query(`INSERT INTO test_data2(description) SELECT 'insert1' RETURNING id`);
264
+
265
+ await context2.loadNextSyncRules();
266
+ await context2.replicateSnapshot();
267
+
268
+ context2.startStreaming();
269
+ const data = await context2.getBucketData('global[]', undefined, {});
270
+
271
+ const deletedRowOps = data.filter((row) => row.object_type == 'test_data2' && row.object_id === String(id1));
272
+ const updatedRowOps = data.filter((row) => row.object_type == 'test_data2' && row.object_id === String(id2));
273
+ const insertedRowOps = data.filter((row) => row.object_type == 'test_data2' && row.object_id === String(id3));
274
+
275
+ if (deletedRowOps.length != 0) {
276
+ // The deleted row was part of the first replication batch,
277
+ // so it is removed by streaming replication.
278
+ expect(deletedRowOps.length).toEqual(2);
279
+ expect(deletedRowOps[1].op).toEqual('REMOVE');
280
+ } else {
281
+ // The deleted row was not part of the first replication batch,
282
+ // so it's not in the resulting ops at all.
283
+ }
284
+
285
+ expect(updatedRowOps.length).toEqual(2);
286
+ // description for the first op could be 'foo' or 'update1'.
287
+ // We only test the final version.
288
+ expect(JSON.parse(updatedRowOps[1].data as string).description).toEqual('update1');
289
+
290
+ expect(insertedRowOps.length).toEqual(2);
291
+ expect(JSON.parse(insertedRowOps[0].data as string).description).toEqual('insert1');
292
+ expect(JSON.parse(insertedRowOps[1].data as string).description).toEqual('insert1');
293
+
294
+ // 1000 of test_data1 during first replication attempt.
295
+ // N >= 1000 of test_data2 during first replication attempt.
296
+ // 10000 - N - 1 + 1 of test_data2 during second replication attempt.
297
+ // An additional update during streaming replication (2x total for this row).
298
+ // An additional insert during streaming replication (2x total for this row).
299
+ // If the deleted row was part of the first replication batch, it's removed by streaming replication.
300
+ // This adds 2 ops.
301
+ // We expect this to be 11002 for stopAfter: 2000, and 11004 for stopAfter: 8000.
302
+ // However, this is not deterministic.
303
+ expect(data.length).toEqual(11002 + deletedRowOps.length);
304
+ }
185
305
 
186
306
  function printMemoryUsage() {
187
307
  const memoryUsage = process.memoryUsage();