@powersync/service-module-mongodb 0.15.3 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/CHANGELOG.md +52 -0
  2. package/dist/api/MongoRouteAPIAdapter.js +2 -2
  3. package/dist/api/MongoRouteAPIAdapter.js.map +1 -1
  4. package/dist/replication/ChangeStream.d.ts +6 -6
  5. package/dist/replication/ChangeStream.js +300 -322
  6. package/dist/replication/ChangeStream.js.map +1 -1
  7. package/dist/replication/ChangeStreamReplicationJob.js +2 -2
  8. package/dist/replication/ChangeStreamReplicationJob.js.map +1 -1
  9. package/dist/replication/ChangeStreamReplicator.d.ts +1 -1
  10. package/dist/replication/ChangeStreamReplicator.js +1 -1
  11. package/dist/replication/ChangeStreamReplicator.js.map +1 -1
  12. package/dist/replication/JsonBufferWriter.d.ts +80 -0
  13. package/dist/replication/JsonBufferWriter.js +342 -0
  14. package/dist/replication/JsonBufferWriter.js.map +1 -0
  15. package/dist/replication/MongoManager.d.ts +1 -1
  16. package/dist/replication/MongoManager.js +1 -1
  17. package/dist/replication/MongoManager.js.map +1 -1
  18. package/dist/replication/MongoRelation.js +4 -0
  19. package/dist/replication/MongoRelation.js.map +1 -1
  20. package/dist/replication/MongoSnapshotQuery.d.ts +1 -1
  21. package/dist/replication/MongoSnapshotQuery.js +6 -3
  22. package/dist/replication/MongoSnapshotQuery.js.map +1 -1
  23. package/dist/replication/RawChangeStream.d.ts +55 -0
  24. package/dist/replication/RawChangeStream.js +322 -0
  25. package/dist/replication/RawChangeStream.js.map +1 -0
  26. package/dist/replication/SourceRowConverter.d.ts +46 -0
  27. package/dist/replication/SourceRowConverter.js +42 -0
  28. package/dist/replication/SourceRowConverter.js.map +1 -0
  29. package/dist/replication/bufferToSqlite.d.ts +43 -0
  30. package/dist/replication/bufferToSqlite.js +740 -0
  31. package/dist/replication/bufferToSqlite.js.map +1 -0
  32. package/dist/replication/internal-mongodb-utils.d.ts +0 -12
  33. package/dist/replication/internal-mongodb-utils.js +0 -54
  34. package/dist/replication/internal-mongodb-utils.js.map +1 -1
  35. package/dist/replication/replication-index.d.ts +4 -2
  36. package/dist/replication/replication-index.js +4 -2
  37. package/dist/replication/replication-index.js.map +1 -1
  38. package/dist/replication/replication-utils.d.ts +1 -1
  39. package/dist/types/types.js.map +1 -1
  40. package/package.json +11 -11
  41. package/scripts/benchmark-change-document-json.mts +358 -0
  42. package/scripts/benchmark-change-document.mts +370 -0
  43. package/src/api/MongoRouteAPIAdapter.ts +2 -2
  44. package/src/replication/ChangeStream.ts +348 -371
  45. package/src/replication/ChangeStreamReplicationJob.ts +2 -2
  46. package/src/replication/ChangeStreamReplicator.ts +2 -5
  47. package/src/replication/JsonBufferWriter.ts +390 -0
  48. package/src/replication/MongoManager.ts +2 -2
  49. package/src/replication/MongoRelation.ts +5 -2
  50. package/src/replication/MongoSnapshotQuery.ts +8 -5
  51. package/src/replication/RawChangeStream.ts +460 -0
  52. package/src/replication/SourceRowConverter.ts +65 -0
  53. package/src/replication/bufferToSqlite.ts +944 -0
  54. package/src/replication/internal-mongodb-utils.ts +0 -66
  55. package/src/replication/replication-index.ts +4 -2
  56. package/src/replication/replication-utils.ts +2 -2
  57. package/src/types/types.ts +1 -1
  58. package/test/src/buffer_to_sqlite.test.ts +1146 -0
  59. package/test/src/change_stream.test.ts +49 -3
  60. package/test/src/change_stream_utils.ts +4 -10
  61. package/test/src/mongo_test.test.ts +66 -64
  62. package/test/src/parse_document_id.test.ts +54 -0
  63. package/test/src/raw_change_stream.test.ts +547 -0
  64. package/test/src/resume.test.ts +12 -2
  65. package/test/src/util.ts +56 -3
  66. package/test/tsconfig.json +0 -1
  67. package/tsconfig.scripts.json +13 -0
  68. package/tsconfig.tsbuildinfo +1 -1
  69. package/test/src/internal_mongodb_utils.test.ts +0 -103
@@ -0,0 +1,944 @@
1
+ import { mongo } from '@powersync/lib-service-mongodb';
2
+ import {
3
+ CompatibilityContext,
4
+ CompatibilityOption,
5
+ DateTimeSourceOptions,
6
+ DateTimeValue,
7
+ SqliteRow,
8
+ TimeValuePrecision
9
+ } from '@powersync/service-sync-rules';
10
+ import {
11
+ BYTE_COLON,
12
+ BYTE_COMMA,
13
+ BYTE_LBRACE,
14
+ BYTE_LBRACKET,
15
+ BYTE_ONE,
16
+ BYTE_RBRACE,
17
+ BYTE_RBRACKET,
18
+ BYTE_SPACE,
19
+ BYTE_T,
20
+ BYTE_ZERO,
21
+ JsonBufferWriter
22
+ } from './JsonBufferWriter.js';
23
+
24
+ const NESTED_DEPTH_LIMIT = 20;
25
+ const SHARED_UTC_DATE = new Date(0);
26
+ const BSON_TYPE_DOUBLE = 0x01;
27
+ const BSON_TYPE_STRING = 0x02;
28
+ const BSON_TYPE_DOCUMENT = 0x03;
29
+ const BSON_TYPE_ARRAY = 0x04;
30
+ const BSON_TYPE_BINARY = 0x05;
31
+ const BSON_TYPE_UNDEFINED = 0x06;
32
+ const BSON_TYPE_OBJECT_ID = 0x07;
33
+ const BSON_TYPE_BOOLEAN = 0x08;
34
+ const BSON_TYPE_UTC_DATETIME = 0x09;
35
+ const BSON_TYPE_NULL = 0x0a;
36
+ const BSON_TYPE_REGEX = 0x0b;
37
+ const BSON_TYPE_DB_POINTER = 0x0c;
38
+ const BSON_TYPE_CODE = 0x0d;
39
+ const BSON_TYPE_SYMBOL = 0x0e;
40
+ const BSON_TYPE_CODE_WITH_SCOPE = 0x0f;
41
+ const BSON_TYPE_INT32 = 0x10;
42
+ const BSON_TYPE_TIMESTAMP = 0x11;
43
+ const BSON_TYPE_INT64 = 0x12;
44
+ const BSON_TYPE_DECIMAL128 = 0x13;
45
+ const BSON_TYPE_MIN_KEY = 0xff;
46
+ const BSON_TYPE_MAX_KEY = 0x7f;
47
+ const BSON_BINARY_SUBTYPE_BYTE_ARRAY = 2;
48
+ const BSON_BINARY_SUBTYPE_UUID = 4;
49
+
50
+ export const enum DateRenderMode {
51
+ LEGACY_MILLISECONDS,
52
+ ISO_MILLISECONDS,
53
+ ISO_SECONDS
54
+ }
55
+
56
+ // We use a single shared write, to avoid repeatedly re-allocating buffers.
57
+ // Since this is only used in a synchronous call, this is safe.
58
+ // This never releases memory once a large buffer has been allocated, but that is fine
59
+ // for replication use.
60
+ const SHARED_WRITER = new JsonBufferWriter();
61
+
62
+ /**
63
+ * Convert a buffer containing BSON bytes to a SqliteRow.
64
+ *
65
+ * This is using a custom BSON parser and JSON serializer for performance reasons. By bypassing bson.deserialize,
66
+ * we avoid many small allocations, and can significantly increase throughput.
67
+ *
68
+ * This attempts to match the behavior of `bson.deserialize -> constructAfterRecord -> applyRowContext` for the most part,
69
+ * with some intentional differences:
70
+ * 1. Regular expression patterns options are preserved as-is, while the above normalizes to JS RegExp values.
71
+ * 2. Full UTF-8 validation is not performed - we attempt to continue using replacement characters, as long as the resulting output remains valid.
72
+ * 3. bson.deserialize has special-case handler for converting documents containing {$ref} -> DBRef. We don't do that here.
73
+ *
74
+ * General principles followed:
75
+ * 1. Correctness: Never produce invalid JSON.
76
+ * 2. Performance: Optimize to be as performant as possible for common cases.
77
+ * 3. Full BSON support: Support all valid BSON documents as input, including deprecated types, but without specifically optimizing for performance here.
78
+ * 4. The source database is responsible for producing valid BSON - we don't test for all edge cases of invalid BSON.
79
+ * 5. We do a best-effort attempt to support "degenerate" BSON cases as documented at https://specifications.readthedocs.io/en/latest/bson-corpus/bson-corpus/, since MongoDB can produce many of these cases.
80
+ *
81
+ * @param bytes the source BSON bytes
82
+ * @param dateRenderMode derive using getDateRenderMode(compatibilityContext)
83
+ *
84
+ * @returns a SqliteRow
85
+ */
86
+ export function bufferToSqlite(bytes: Buffer, dateRenderMode: DateRenderMode): SqliteRow {
87
+ const row: SqliteRow = {};
88
+ const jsonWriter = SHARED_WRITER;
89
+ // BSON documents are length-prefixed and null-terminated. We parse directly
90
+ // from raw bytes, so structural validation happens here rather than in the
91
+ // upstream BSON decoder.
92
+ const bodyEnd = readDocumentLength(bytes, 0) - 1;
93
+ let offset = 4;
94
+
95
+ while (offset < bodyEnd) {
96
+ const previousOffset = offset;
97
+ const type = bytes[offset++];
98
+ const { value: key, nextOffset: afterKey } = readCString(bytes, offset);
99
+ offset = afterKey;
100
+
101
+ switch (type) {
102
+ case BSON_TYPE_OBJECT_ID: {
103
+ row[key] = hexLower(bytes, offset, 12);
104
+ offset += 12;
105
+ break;
106
+ }
107
+ case BSON_TYPE_STRING: {
108
+ const { value, nextOffset } = readBsonString(bytes, offset);
109
+ row[key] = value;
110
+ offset = nextOffset;
111
+ break;
112
+ }
113
+ case BSON_TYPE_ARRAY: {
114
+ jsonWriter.reset();
115
+ const result = serializeNestedArrayToJson(bytes, offset, 0, jsonWriter, dateRenderMode);
116
+ row[key] = jsonWriter.toString();
117
+ offset = result.nextOffset;
118
+ break;
119
+ }
120
+ case BSON_TYPE_DOCUMENT: {
121
+ jsonWriter.reset();
122
+ const result = serializeNestedObjectToJson(bytes, offset, 0, jsonWriter, dateRenderMode);
123
+ row[key] = jsonWriter.toString();
124
+ offset = result.nextOffset;
125
+ break;
126
+ }
127
+ case BSON_TYPE_BOOLEAN: {
128
+ row[key] = bytes[offset++] ? 1n : 0n;
129
+ break;
130
+ }
131
+ case BSON_TYPE_UTC_DATETIME: {
132
+ // Even though this is not JSON, we use the same JSON writer for this.
133
+ jsonWriter.reset();
134
+ appendDateTimeToWriter(jsonWriter, Number(bytes.readBigInt64LE(offset)), false, dateRenderMode);
135
+ row[key] = jsonWriter.toString();
136
+ offset += 8;
137
+ break;
138
+ }
139
+ case BSON_TYPE_INT32: {
140
+ row[key] = BigInt(readInt32LE(bytes, offset));
141
+ offset += 4;
142
+ break;
143
+ }
144
+ case BSON_TYPE_TIMESTAMP: {
145
+ row[key] = timestampToBigInt(bytes, offset);
146
+ offset += 8;
147
+ break;
148
+ }
149
+ case BSON_TYPE_INT64: {
150
+ row[key] = bytes.readBigInt64LE(offset);
151
+ offset += 8;
152
+ break;
153
+ }
154
+ case BSON_TYPE_DECIMAL128: {
155
+ row[key] = decimal128ToString(bytes, offset);
156
+ offset += 16;
157
+ break;
158
+ }
159
+ case BSON_TYPE_BINARY: {
160
+ const { value, nextOffset } = parseTopLevelBinary(bytes, offset);
161
+ row[key] = value;
162
+ offset = nextOffset;
163
+ break;
164
+ }
165
+ case BSON_TYPE_REGEX: {
166
+ const { pattern, options, nextOffset } = parseRegex(bytes, offset);
167
+ jsonWriter.reset();
168
+ writeRegexJson(jsonWriter, pattern, options);
169
+ row[key] = jsonWriter.toString();
170
+ offset = nextOffset;
171
+ break;
172
+ }
173
+ case BSON_TYPE_DB_POINTER: {
174
+ // DBPointer
175
+ jsonWriter.reset();
176
+ const nextOffset = writeDbPointerJson(bytes, offset, jsonWriter);
177
+ row[key] = jsonWriter.toString();
178
+ offset = nextOffset;
179
+ break;
180
+ }
181
+ case BSON_TYPE_CODE: {
182
+ // JavaScript code
183
+ jsonWriter.reset();
184
+ const nextOffset = writeCodeJson(bytes, offset, 0, jsonWriter);
185
+ row[key] = jsonWriter.toString();
186
+ offset = nextOffset;
187
+ break;
188
+ }
189
+ case BSON_TYPE_SYMBOL: {
190
+ // Symbol
191
+ const { value, nextOffset } = readBsonString(bytes, offset);
192
+ row[key] = value;
193
+ offset = nextOffset;
194
+ break;
195
+ }
196
+ case BSON_TYPE_CODE_WITH_SCOPE: {
197
+ jsonWriter.reset();
198
+ const nextOffset = writeCodeWithScopeJson(bytes, offset, 0, jsonWriter, dateRenderMode);
199
+ row[key] = jsonWriter.toString();
200
+ offset = nextOffset;
201
+ break;
202
+ }
203
+ case BSON_TYPE_UNDEFINED:
204
+ case BSON_TYPE_NULL:
205
+ case BSON_TYPE_MIN_KEY:
206
+ case BSON_TYPE_MAX_KEY:
207
+ row[key] = null;
208
+ break;
209
+ case BSON_TYPE_DOUBLE: {
210
+ const value = bytes.readDoubleLE(offset);
211
+ offset += 8;
212
+ // Match the default path: integral doubles are widened to bigint.
213
+ row[key] = Number.isInteger(value) ? BigInt(value) : value;
214
+ break;
215
+ }
216
+ default: {
217
+ // Unknown top-level types are treated as null for parity with the
218
+ // default converter, but we still advance through the raw bytes.
219
+ row[key] = null;
220
+ offset = skipBsonValue(bytes, offset, type);
221
+ break;
222
+ }
223
+ }
224
+
225
+ assertAdvanced(previousOffset, offset);
226
+ }
227
+
228
+ return row;
229
+ }
230
+
231
+ function readInt32LE(bytes: Buffer, offset: number): number {
232
+ return bytes.readInt32LE(offset);
233
+ }
234
+
235
+ function readDocumentLength(bytes: Buffer, offset: number): number {
236
+ const length = readInt32LE(bytes, offset);
237
+ if (length < 5 || offset + length > bytes.length) {
238
+ throw new Error('Invalid BSON document length');
239
+ }
240
+ if (bytes[offset + length - 1] !== 0) {
241
+ throw new Error('Invalid BSON document terminator');
242
+ }
243
+ return length;
244
+ }
245
+
246
+ function readBsonString(bytes: Buffer, offset: number): { value: string; nextOffset: number } {
247
+ const length = readInt32LE(bytes, offset);
248
+ const stringStart = offset + 4;
249
+ const stringEnd = stringStart + length;
250
+ if (length < 1 || stringEnd > bytes.length) {
251
+ throw new Error('Invalid BSON string length');
252
+ }
253
+ if (bytes[stringEnd - 1] !== 0) {
254
+ throw new Error('Invalid BSON string terminator');
255
+ }
256
+ return {
257
+ value: bytes.toString('utf8', stringStart, stringEnd - 1),
258
+ nextOffset: stringEnd
259
+ };
260
+ }
261
+
262
+ function readBsonStringEnd(bytes: Buffer, offset: number): number {
263
+ const length = readInt32LE(bytes, offset);
264
+ const stringStart = offset + 4;
265
+ const stringEnd = stringStart + length;
266
+ if (length < 1 || stringEnd > bytes.length) {
267
+ throw new Error('Invalid BSON string length');
268
+ }
269
+ if (bytes[stringEnd - 1] !== 0) {
270
+ throw new Error('Invalid BSON string terminator');
271
+ }
272
+ return stringEnd;
273
+ }
274
+
275
+ function readCString(bytes: Buffer, offset: number): { value: string; nextOffset: number } {
276
+ const end = bytes.indexOf(0, offset);
277
+ if (end < 0) {
278
+ throw new Error('Invalid BSON: missing cstring terminator');
279
+ }
280
+ return {
281
+ value: bytes.toString('utf8', offset, end),
282
+ nextOffset: end + 1
283
+ };
284
+ }
285
+
286
+ function skipCString(bytes: Buffer, offset: number): number {
287
+ const end = bytes.indexOf(0, offset);
288
+ if (end < 0) {
289
+ throw new Error('Invalid BSON: missing cstring terminator');
290
+ }
291
+ return end + 1;
292
+ }
293
+
294
+ function parseRegex(bytes: Buffer, offset: number): { pattern: string; options: string; nextOffset: number } {
295
+ const patternEnd = bytes.indexOf(0, offset);
296
+ const optionsEnd = bytes.indexOf(0, patternEnd + 1);
297
+ if (patternEnd < 0 || optionsEnd < 0) {
298
+ throw new Error('Invalid BSON regex');
299
+ }
300
+ const pattern = bytes.toString('utf8', offset, patternEnd);
301
+ return {
302
+ pattern,
303
+ // Preserve the raw BSON option string exactly as encoded. The default path
304
+ // normalizes via JS RegExp semantics, but the custom path intentionally
305
+ // keeps the BSON flags verbatim.
306
+ options: bytes.toString('utf8', patternEnd + 1, optionsEnd),
307
+ nextOffset: optionsEnd + 1
308
+ };
309
+ }
310
+
311
+ function decimal128ToString(bytes: Buffer, offset: number): string {
312
+ // Just use the upstream parser for this
313
+ return new mongo.Decimal128(bytes.subarray(offset, offset + 16)).toString();
314
+ }
315
+
316
+ function timestampToBigInt(bytes: Buffer, offset: number): bigint {
317
+ return (BigInt(bytes.readUInt32LE(offset + 4)) << 32n) | BigInt(bytes.readUInt32LE(offset));
318
+ }
319
+
320
+ /**
321
+ * @param bytes must be exactly 16 bytes in length - check before calling this.
322
+ * @returns lower-case hex form of the UUID
323
+ */
324
+ function uuidToString(bytes: Buffer): string {
325
+ const hex = bytes.toString('hex');
326
+ return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20)}`;
327
+ }
328
+
329
+ function parseTopLevelBinary(bytes: Buffer, offset: number): { value: Uint8Array | string; nextOffset: number } {
330
+ const length = readInt32LE(bytes, offset);
331
+ if (length < 0) {
332
+ throw new Error('Invalid BSON binary length');
333
+ }
334
+ const subtype = bytes[offset + 4];
335
+ const dataStart = offset + 5;
336
+ const dataEnd = dataStart + length;
337
+ if (dataEnd > bytes.length) {
338
+ throw new Error('Invalid BSON binary length');
339
+ }
340
+ const data = binaryDataSlice(bytes, dataStart, dataEnd, subtype);
341
+
342
+ // Only subtype 4 UUIDs are surfaced as strings. All other binary subtypes
343
+ // stay as raw bytes at the top level.
344
+ if (subtype === BSON_BINARY_SUBTYPE_UUID && data.length === 16) {
345
+ return { value: uuidToString(data), nextOffset: dataEnd };
346
+ }
347
+
348
+ return { value: bufferToUint8Array(data), nextOffset: dataEnd };
349
+ }
350
+
351
+ function bufferToUint8Array(bytes: Buffer): Uint8Array {
352
+ return new Uint8Array(bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength));
353
+ }
354
+
355
+ /**
356
+ * Handle a sub-array for binary data, including the legacy 2 subtype.
357
+ */
358
+ function binaryDataSlice(bytes: Buffer, dataStart: number, dataEnd: number, subtype: number): Buffer {
359
+ if (subtype !== BSON_BINARY_SUBTYPE_BYTE_ARRAY) {
360
+ return bytes.subarray(dataStart, dataEnd);
361
+ }
362
+
363
+ // Legacy subtype 2 embeds its own nested length before the actual bytes.
364
+ const legacyLength = readInt32LE(bytes, dataStart);
365
+ const legacyStart = dataStart + 4;
366
+ if (legacyLength < 0 || legacyStart + legacyLength > dataEnd) {
367
+ throw new Error('Invalid BSON legacy binary length');
368
+ }
369
+ return bytes.subarray(legacyStart, legacyStart + legacyLength);
370
+ }
371
+
372
+ function skipBsonValue(bytes: Buffer, offset: number, type: number) {
373
+ switch (type) {
374
+ case BSON_TYPE_DOUBLE: // Double
375
+ return offset + 8;
376
+ case BSON_TYPE_STRING: {
377
+ // String
378
+ const length = readInt32LE(bytes, offset);
379
+ return offset + 4 + length;
380
+ }
381
+ case BSON_TYPE_DOCUMENT:
382
+ case BSON_TYPE_ARRAY:
383
+ return offset + readInt32LE(bytes, offset);
384
+ case BSON_TYPE_BINARY: {
385
+ // Binary
386
+ const length = readInt32LE(bytes, offset);
387
+ return offset + 4 + 1 + length;
388
+ }
389
+ case BSON_TYPE_UNDEFINED:
390
+ case BSON_TYPE_NULL:
391
+ case BSON_TYPE_MIN_KEY:
392
+ case BSON_TYPE_MAX_KEY:
393
+ return offset;
394
+ case BSON_TYPE_OBJECT_ID:
395
+ return offset + 12;
396
+ case BSON_TYPE_BOOLEAN:
397
+ return offset + 1;
398
+ case BSON_TYPE_UTC_DATETIME:
399
+ return offset + 8;
400
+ case BSON_TYPE_REGEX: {
401
+ // Regular expression
402
+ const patternEnd = bytes.indexOf(0, offset);
403
+ const optionsEnd = bytes.indexOf(0, patternEnd + 1);
404
+ if (patternEnd < 0 || optionsEnd < 0) {
405
+ throw new Error('Invalid BSON regex');
406
+ }
407
+ return optionsEnd + 1;
408
+ }
409
+ case BSON_TYPE_DB_POINTER: {
410
+ // DBPointer
411
+ const nextOffset = readBsonStringEnd(bytes, offset);
412
+ return nextOffset + 12;
413
+ }
414
+ case BSON_TYPE_CODE: {
415
+ // JavaScript code
416
+ return readBsonStringEnd(bytes, offset);
417
+ }
418
+ case BSON_TYPE_SYMBOL: {
419
+ // Symbol
420
+ return readBsonStringEnd(bytes, offset);
421
+ }
422
+ case BSON_TYPE_CODE_WITH_SCOPE: {
423
+ // JavaScript code with scope
424
+ const length = readInt32LE(bytes, offset);
425
+ return offset + length;
426
+ }
427
+ case BSON_TYPE_INT32:
428
+ return offset + 4;
429
+ case BSON_TYPE_TIMESTAMP:
430
+ return offset + 8;
431
+ case BSON_TYPE_INT64:
432
+ return offset + 8;
433
+ case BSON_TYPE_DECIMAL128:
434
+ return offset + 16;
435
+ default:
436
+ throw new Error(`Unsupported BSON type for skip: 0x${type.toString(16)}`);
437
+ }
438
+ }
439
+
440
+ function serializeNestedObjectToJson(
441
+ bytes: Buffer,
442
+ offset: number,
443
+ depth: number,
444
+ writer: JsonBufferWriter,
445
+ dateRenderMode: DateRenderMode
446
+ ): { nextOffset: number } {
447
+ if (depth > NESTED_DEPTH_LIMIT) {
448
+ throw new Error(`json nested object depth exceeds the limit of ${NESTED_DEPTH_LIMIT}`);
449
+ }
450
+
451
+ const totalLength = readDocumentLength(bytes, offset);
452
+ const bodyEnd = offset + totalLength - 1;
453
+ let cursor = offset + 4;
454
+ writer.writeByte(BYTE_LBRACE);
455
+ let first = true;
456
+
457
+ while (cursor < bodyEnd) {
458
+ const previousCursor = cursor;
459
+ const type = bytes[cursor++];
460
+ const keyEnd = bytes.indexOf(0, cursor);
461
+ if (keyEnd < 0) {
462
+ throw new Error('Invalid BSON: missing cstring terminator');
463
+ }
464
+ const writerOffset = writer.getLength();
465
+ if (!first) {
466
+ writer.writeByte(BYTE_COMMA);
467
+ }
468
+ writer.writeQuotedUtf8Slice(bytes, cursor, keyEnd);
469
+ writer.writeByte(BYTE_COLON);
470
+ cursor = keyEnd + 1;
471
+
472
+ const { nextOffset: afterValue, defined } = serializeNestedElementValue(
473
+ bytes,
474
+ cursor,
475
+ type,
476
+ depth,
477
+ writer,
478
+ dateRenderMode
479
+ );
480
+ cursor = afterValue;
481
+ // Malformed BSON must fail fast instead of getting the parser stuck on the
482
+ // same element forever.
483
+ assertAdvanced(previousCursor, cursor);
484
+
485
+ if (!defined) {
486
+ writer.truncate(writerOffset);
487
+ continue;
488
+ }
489
+
490
+ first = false;
491
+ }
492
+
493
+ writer.writeByte(BYTE_RBRACE);
494
+ return { nextOffset: offset + totalLength };
495
+ }
496
+
497
+ function serializeNestedArrayToJson(
498
+ bytes: Buffer,
499
+ offset: number,
500
+ depth: number,
501
+ writer: JsonBufferWriter,
502
+ dateRenderMode: DateRenderMode
503
+ ): { nextOffset: number } {
504
+ if (depth > NESTED_DEPTH_LIMIT) {
505
+ throw new Error(`json nested object depth exceeds the limit of ${NESTED_DEPTH_LIMIT}`);
506
+ }
507
+
508
+ const totalLength = readDocumentLength(bytes, offset);
509
+ const bodyEnd = offset + totalLength - 1;
510
+ let cursor = offset + 4;
511
+ writer.writeByte(BYTE_LBRACKET);
512
+ let first = true;
513
+
514
+ while (cursor < bodyEnd) {
515
+ const previousCursor = cursor;
516
+ const type = bytes[cursor++];
517
+ cursor = skipCString(bytes, cursor);
518
+
519
+ if (!first) {
520
+ writer.writeByte(BYTE_COMMA);
521
+ }
522
+ first = false;
523
+
524
+ const { nextOffset: afterValue, defined } = serializeNestedElementValue(
525
+ bytes,
526
+ cursor,
527
+ type,
528
+ depth,
529
+ writer,
530
+ dateRenderMode
531
+ );
532
+ cursor = afterValue;
533
+ assertAdvanced(previousCursor, cursor);
534
+
535
+ if (!defined) {
536
+ writer.writeAscii('null');
537
+ }
538
+ }
539
+
540
+ writer.writeByte(BYTE_RBRACKET);
541
+ return { nextOffset: offset + totalLength };
542
+ }
543
+
544
+ function serializeNestedElementValue(
545
+ bytes: Buffer,
546
+ offset: number,
547
+ type: number,
548
+ depth: number,
549
+ writer: JsonBufferWriter,
550
+ dateRenderMode: DateRenderMode
551
+ ): { nextOffset: number; defined: boolean } {
552
+ switch (type) {
553
+ case BSON_TYPE_DOUBLE: // Double
554
+ return serializeNestedDoubleElement(bytes, offset, writer);
555
+ case BSON_TYPE_STRING: // String
556
+ return serializeNestedStringElement(bytes, offset, writer);
557
+ case BSON_TYPE_DOCUMENT: // Embedded document
558
+ return serializeNestedObjectElement(bytes, offset, depth, writer, dateRenderMode);
559
+ case BSON_TYPE_ARRAY: // Array
560
+ return serializeNestedArrayElement(bytes, offset, depth, writer, dateRenderMode);
561
+ case BSON_TYPE_BINARY: // Binary
562
+ return serializeNestedBinaryElement(bytes, offset, writer);
563
+ case BSON_TYPE_UNDEFINED: // Undefined
564
+ return { nextOffset: offset, defined: false };
565
+ case BSON_TYPE_OBJECT_ID: {
566
+ // ObjectId
567
+ writer.writeQuotedHexLower(bytes, offset, 12);
568
+ return { nextOffset: offset + 12, defined: true };
569
+ }
570
+ case BSON_TYPE_BOOLEAN: // Boolean
571
+ writer.writeByte(bytes[offset] ? BYTE_ONE : BYTE_ZERO);
572
+ return { nextOffset: offset + 1, defined: true };
573
+ case BSON_TYPE_UTC_DATETIME: // UTC datetime
574
+ return serializeNestedDateTimeElement(bytes, offset, writer, dateRenderMode);
575
+ case BSON_TYPE_NULL: // Null
576
+ case BSON_TYPE_MIN_KEY: // MinKey
577
+ case BSON_TYPE_MAX_KEY: // MaxKey
578
+ writer.writeAscii('null');
579
+ return { nextOffset: offset, defined: true };
580
+ case BSON_TYPE_REGEX: // Regular expression
581
+ return serializeNestedRegexElement(bytes, offset, writer);
582
+ case BSON_TYPE_DB_POINTER: // DBPointer
583
+ return serializeNestedDbPointerElement(bytes, offset, writer);
584
+ case BSON_TYPE_CODE: // JavaScript code
585
+ return serializeNestedCodeElement(bytes, offset, depth, writer);
586
+ case BSON_TYPE_SYMBOL: // Symbol
587
+ return serializeNestedSymbolElement(bytes, offset, writer);
588
+ case BSON_TYPE_CODE_WITH_SCOPE: // JavaScript code with scope
589
+ return serializeNestedCodeWithScopeElement(bytes, offset, depth, writer, dateRenderMode);
590
+ case BSON_TYPE_INT32: {
591
+ // Int32
592
+ writer.writeAscii(String(readInt32LE(bytes, offset)));
593
+ return { nextOffset: offset + 4, defined: true };
594
+ }
595
+ case BSON_TYPE_TIMESTAMP: {
596
+ // Timestamp
597
+ writer.writeAscii(timestampToBigInt(bytes, offset).toString());
598
+ return { nextOffset: offset + 8, defined: true };
599
+ }
600
+ case BSON_TYPE_INT64: {
601
+ // Int64
602
+ writer.writeAscii(bytes.readBigInt64LE(offset).toString());
603
+ return { nextOffset: offset + 8, defined: true };
604
+ }
605
+ case BSON_TYPE_DECIMAL128: // Decimal128
606
+ writer.writeQuotedJsonString(decimal128ToString(bytes, offset));
607
+ return { nextOffset: offset + 16, defined: true };
608
+ default:
609
+ throw new Error(`Unsupported BSON nested type: 0x${type.toString(16)}`);
610
+ }
611
+ }
612
+
613
+ function serializeNestedDoubleElement(
614
+ bytes: Buffer,
615
+ offset: number,
616
+ writer: JsonBufferWriter
617
+ ): { nextOffset: number; defined: boolean } {
618
+ const value = bytes.readDoubleLE(offset);
619
+ if (!Number.isFinite(value)) {
620
+ writer.writeAscii('null');
621
+ } else {
622
+ writer.writeAscii(value.toString());
623
+ }
624
+ return { nextOffset: offset + 8, defined: true };
625
+ }
626
+
627
+ function serializeNestedStringElement(
628
+ bytes: Buffer,
629
+ offset: number,
630
+ writer: JsonBufferWriter
631
+ ): { nextOffset: number; defined: boolean } {
632
+ const nextOffset = readBsonStringEnd(bytes, offset);
633
+ const stringStart = offset + 4;
634
+ const length = nextOffset - stringStart;
635
+ writer.writeQuotedUtf8Slice(bytes, stringStart, stringStart + length - 1);
636
+ return { nextOffset, defined: true };
637
+ }
638
+
639
+ function serializeNestedObjectElement(
640
+ bytes: Buffer,
641
+ offset: number,
642
+ depth: number,
643
+ writer: JsonBufferWriter,
644
+ dateRenderMode: DateRenderMode
645
+ ): { nextOffset: number; defined: boolean } {
646
+ const result = serializeNestedObjectToJson(bytes, offset, depth + 1, writer, dateRenderMode);
647
+ return { nextOffset: result.nextOffset, defined: true };
648
+ }
649
+
650
+ function serializeNestedArrayElement(
651
+ bytes: Buffer,
652
+ offset: number,
653
+ depth: number,
654
+ writer: JsonBufferWriter,
655
+ dateRenderMode: DateRenderMode
656
+ ): { nextOffset: number; defined: boolean } {
657
+ const result = serializeNestedArrayToJson(bytes, offset, depth + 1, writer, dateRenderMode);
658
+ return { nextOffset: result.nextOffset, defined: true };
659
+ }
660
+
661
+ function serializeNestedDateTimeElement(
662
+ bytes: Buffer,
663
+ offset: number,
664
+ writer: JsonBufferWriter,
665
+ dateRenderMode: DateRenderMode
666
+ ): { nextOffset: number; defined: boolean } {
667
+ appendDateTimeToWriter(writer, Number(bytes.readBigInt64LE(offset)), true, dateRenderMode);
668
+ return { nextOffset: offset + 8, defined: true };
669
+ }
670
+
671
+ function serializeNestedRegexElement(
672
+ bytes: Buffer,
673
+ offset: number,
674
+ writer: JsonBufferWriter
675
+ ): { nextOffset: number; defined: boolean } {
676
+ const { pattern, options, nextOffset } = parseRegex(bytes, offset);
677
+ writeRegexJson(writer, pattern, options);
678
+ return { nextOffset, defined: true };
679
+ }
680
+
681
+ /**
682
+ * KLUDGE: The DateTimeValue API needs a CompatibilityContext, but we don't want to pass that
683
+ * around through the entire stack when the DateRenderMode encapsulates it.
684
+ *
685
+ * This translates back from DateRenderMode to CompatibilityContext.
686
+ */
687
+ const DATETIME_COMPATIBILITY_OPTIONS: Record<DateRenderMode, CompatibilityContext> = {
688
+ [DateRenderMode.LEGACY_MILLISECONDS]: CompatibilityContext.FULL_BACKWARDS_COMPATIBILITY,
689
+ [DateRenderMode.ISO_MILLISECONDS]: new CompatibilityContext({
690
+ edition: 2,
691
+ maxTimeValuePrecision: TimeValuePrecision.milliseconds
692
+ }),
693
+ [DateRenderMode.ISO_SECONDS]: new CompatibilityContext({
694
+ edition: 2,
695
+ maxTimeValuePrecision: TimeValuePrecision.seconds
696
+ })
697
+ };
698
+
699
+ const MONGO_TIME_OPTIONS: DateTimeSourceOptions = {
700
+ subSecondPrecision: TimeValuePrecision.milliseconds,
701
+ defaultSubSecondPrecision: TimeValuePrecision.milliseconds
702
+ };
703
+
704
+ /**
705
+ * Fallback date serialization.
706
+ *
707
+ * This is slow, but handles edge cases.
708
+ */
709
+ function extendedDateTimeString(millis: number, dateRenderMode: DateRenderMode): string {
710
+ const isoString = new Date(millis).toISOString();
711
+ const compatibilityContext = DATETIME_COMPATIBILITY_OPTIONS[dateRenderMode];
712
+ return new DateTimeValue(isoString, undefined, MONGO_TIME_OPTIONS).toSqliteValue(compatibilityContext) as string;
713
+ }
714
+
715
+ export function getDateRenderMode(compatibilityContext: CompatibilityContext): DateRenderMode {
716
+ if (!compatibilityContext.isEnabled(CompatibilityOption.timestampsIso8601)) {
717
+ return DateRenderMode.LEGACY_MILLISECONDS;
718
+ }
719
+
720
+ const maxPrecision = compatibilityContext.maxTimeValuePrecision ?? TimeValuePrecision.milliseconds;
721
+ if (maxPrecision === TimeValuePrecision.seconds) {
722
+ return DateRenderMode.ISO_SECONDS;
723
+ }
724
+
725
+ // MongoDB only supports millisecond precision, so this also convers configured values of
726
+ // microseconds and nanoseconds.
727
+ return DateRenderMode.ISO_MILLISECONDS;
728
+ }
729
+
730
+ function appendDateTimeToWriter(
731
+ writer: JsonBufferWriter,
732
+ millis: number,
733
+ quoted: boolean,
734
+ dateRenderMode: DateRenderMode
735
+ ) {
736
+ const date = SHARED_UTC_DATE;
737
+ date.setTime(millis);
738
+
739
+ if (Number.isNaN(date.getTime())) {
740
+ throw new RangeError('Invalid time value');
741
+ }
742
+
743
+ const year = date.getUTCFullYear();
744
+ if (year < 0 || year > 9999) {
745
+ // Abnormal date ranges. We support these, but don't optimize for performance.
746
+ const string = extendedDateTimeString(millis, dateRenderMode);
747
+ if (quoted) {
748
+ writer.writeQuotedJsonString(string);
749
+ } else {
750
+ writer.writeUtf8(string);
751
+ }
752
+ return;
753
+ }
754
+
755
+ writer.writeDateTime(
756
+ year,
757
+ date.getUTCMonth() + 1,
758
+ date.getUTCDate(),
759
+ date.getUTCHours(),
760
+ date.getUTCMinutes(),
761
+ date.getUTCSeconds(),
762
+ date.getUTCMilliseconds(),
763
+ quoted,
764
+ dateRenderMode === DateRenderMode.LEGACY_MILLISECONDS ? BYTE_SPACE : BYTE_T,
765
+ dateRenderMode !== DateRenderMode.ISO_SECONDS
766
+ );
767
+ }
768
+
769
+ function hexLower(bytes: Buffer, offset: number, length: number): string {
770
+ return bytes.toString('hex', offset, offset + length);
771
+ }
772
+
773
+ function writeRegexJson(writer: JsonBufferWriter, pattern: string, options: string) {
774
+ writer.writeAscii('{"pattern":');
775
+ writer.writeQuotedJsonString(pattern);
776
+ writer.writeAscii(',"options":');
777
+ writer.writeQuotedJsonString(options);
778
+ writer.writeByte(BYTE_RBRACE);
779
+ }
780
+
781
+ function writeCodeJson(bytes: Buffer, offset: number, depth: number, writer: JsonBufferWriter) {
782
+ const { value: code, nextOffset } = readBsonString(bytes, offset);
783
+ writer.writeAscii('{"code":');
784
+ writer.writeQuotedJsonString(code);
785
+ writer.writeAscii(',"scope":null}');
786
+ return nextOffset;
787
+ }
788
+
789
+ function writeCodeWithScopeJson(
790
+ bytes: Buffer,
791
+ offset: number,
792
+ depth: number,
793
+ writer: JsonBufferWriter,
794
+ dateRenderMode: DateRenderMode
795
+ ) {
796
+ const totalLength = readInt32LE(bytes, offset);
797
+ const { value: code, nextOffset: afterCode } = readBsonString(bytes, offset + 4);
798
+ writer.writeAscii('{"code":');
799
+ writer.writeQuotedJsonString(code);
800
+ writer.writeAscii(',"scope":');
801
+ serializeNestedObjectToJson(bytes, afterCode, depth + 1, writer, dateRenderMode);
802
+ writer.writeByte(BYTE_RBRACE);
803
+ // code_w_scope carries its own total byte length, so we trust that wrapper
804
+ // rather than reconstructing the end position from the nested scope.
805
+ return offset + totalLength;
806
+ }
807
+
808
+ function writeDbPointerJson(bytes: Buffer, offset: number, writer: JsonBufferWriter) {
809
+ const { value: collection, nextOffset } = readBsonString(bytes, offset);
810
+ const separator = collection.indexOf('.');
811
+ const db = separator >= 0 ? collection.slice(0, separator) : null;
812
+ const collectionName = separator >= 0 ? collection.slice(separator + 1) : collection;
813
+ writer.writeAscii('{"collection":');
814
+ writer.writeQuotedJsonString(collectionName);
815
+ writer.writeAscii(',"oid":');
816
+ writer.writeQuotedHexLower(bytes, nextOffset, 12);
817
+ if (db != null) {
818
+ writer.writeAscii(',"db":');
819
+ writer.writeQuotedJsonString(db);
820
+ }
821
+ writer.writeAscii(',"fields":{}}');
822
+ return nextOffset + 12;
823
+ }
824
+
825
+ function serializeNestedBinaryElement(
826
+ bytes: Buffer,
827
+ offset: number,
828
+ writer: JsonBufferWriter
829
+ ): { nextOffset: number; defined: boolean } {
830
+ const length = readInt32LE(bytes, offset);
831
+ if (length < 0) {
832
+ throw new Error('Invalid BSON binary length');
833
+ }
834
+ const subtype = bytes[offset + 4];
835
+ const dataStart = offset + 5;
836
+ const dataEnd = dataStart + length;
837
+ if (dataEnd > bytes.length) {
838
+ throw new Error('Invalid BSON binary length');
839
+ }
840
+
841
+ const slice = binaryDataSlice(bytes, dataStart, dataEnd, subtype);
842
+ // Nested binary values are omitted from JSON unless they are subtype 4 UUIDs,
843
+ // which are represented as strings for parity with the default path.
844
+ if (subtype === BSON_BINARY_SUBTYPE_UUID && slice.length === 16) {
845
+ writer.writeQuotedUuid(slice, 0);
846
+ return { nextOffset: dataEnd, defined: true };
847
+ }
848
+
849
+ return { nextOffset: dataEnd, defined: false };
850
+ }
851
+
852
+ function assertAdvanced(previousOffset: number, nextOffset: number) {
853
+ if (nextOffset <= previousOffset) {
854
+ throw new Error('Invalid BSON parser state: non-advancing offset');
855
+ }
856
+ }
857
+
858
+ function serializeNestedDbPointerElement(
859
+ bytes: Buffer,
860
+ offset: number,
861
+ writer: JsonBufferWriter
862
+ ): { nextOffset: number; defined: boolean } {
863
+ return { nextOffset: writeDbPointerJson(bytes, offset, writer), defined: true };
864
+ }
865
+
866
+ function serializeNestedCodeElement(
867
+ bytes: Buffer,
868
+ offset: number,
869
+ depth: number,
870
+ writer: JsonBufferWriter
871
+ ): { nextOffset: number; defined: boolean } {
872
+ return { nextOffset: writeCodeJson(bytes, offset, depth, writer), defined: true };
873
+ }
874
+
875
+ function serializeNestedSymbolElement(
876
+ bytes: Buffer,
877
+ offset: number,
878
+ writer: JsonBufferWriter
879
+ ): { nextOffset: number; defined: boolean } {
880
+ return serializeNestedStringElement(bytes, offset, writer);
881
+ }
882
+
883
+ function serializeNestedCodeWithScopeElement(
884
+ bytes: Buffer,
885
+ offset: number,
886
+ depth: number,
887
+ writer: JsonBufferWriter,
888
+ dateRenderMode: DateRenderMode
889
+ ): { nextOffset: number; defined: boolean } {
890
+ return {
891
+ nextOffset: writeCodeWithScopeJson(bytes, offset, depth, writer, dateRenderMode),
892
+ defined: true
893
+ };
894
+ }
895
+
896
+ const idKey = Buffer.from('_id');
897
+
898
+ /**
899
+ * Parse an _id from a buffer, without parsing the entire document.
900
+ *
901
+ * The parsed _id is parsed using standard bson.deserialize - different from bufferToSqlite.
902
+ *
903
+ * @returns the parsed id, as well as a serialized document including only _id.
904
+ */
905
+ export function parseDocumentId(bytes: Buffer): { id: any; idBuffer: Buffer } {
906
+ const bodyEnd = readDocumentLength(bytes, 0) - 1;
907
+ let offset = 4;
908
+
909
+ while (offset < bodyEnd) {
910
+ const baseOffset = offset;
911
+ const type = bytes[baseOffset];
912
+ // In most cases the first key should be _id, but we also handle cases where
913
+ // it occurs later.
914
+ const keyStart = baseOffset + 1;
915
+ const afterKey = skipCString(bytes, keyStart);
916
+ const keyEnd = afterKey - 1; // without null terminator
917
+ const nextOffset = skipBsonValue(bytes, afterKey, type);
918
+ offset = nextOffset;
919
+ if (keyEnd - keyStart != 3) {
920
+ continue;
921
+ }
922
+
923
+ if (!idKey.equals(bytes.subarray(keyStart, keyEnd))) {
924
+ // Not _id - check the next key
925
+ continue;
926
+ }
927
+
928
+ // We create a new "document" containing only the _id, by directly manipulating buffers.
929
+ // https://bsonspec.org/spec.html
930
+ // document ::= int32 e_list unsigned_byte(0)
931
+ // e_list ::= element e_list
932
+ // element ::= signed_byte e_name ...
933
+ const baseLength = nextOffset - baseOffset;
934
+
935
+ // Our buffer wraps the _id element: 4 bytes before for the size, 1 null byte at the end.
936
+ const genBuffer = Buffer.allocUnsafe(baseLength + 5);
937
+ genBuffer.writeInt32LE(baseLength + 5, 0);
938
+ bytes.copy(genBuffer, 4, baseOffset, baseOffset + baseLength);
939
+ genBuffer[genBuffer.length - 1] = 0;
940
+ return { idBuffer: genBuffer, id: mongo.BSON.deserialize(genBuffer, { useBigInt64: true })._id };
941
+ }
942
+
943
+ throw new Error(`Attempt to parse document without _id`);
944
+ }