bson 7.2.0 → 7.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ import { Binary, UUID } from '../binary';
2
2
  import type { Document } from '../bson';
3
3
  import { Code } from '../code';
4
4
  import * as constants from '../constants';
5
- import { DBRef, type DBRefLike, isDBRefLike } from '../db_ref';
5
+ import { DBRef, isDBRefLike } from '../db_ref';
6
6
  import { Decimal128 } from '../decimal128';
7
7
  import { Double } from '../double';
8
8
  import { BSONError } from '../error';
@@ -122,14 +122,77 @@ export function internalDeserialize(
122
122
  return deserializeObject(buffer, index, options, isArray);
123
123
  }
124
124
 
125
+ interface NestedParsingFrame {
126
+ // One of 3 supported types:
127
+ // - constants.BSON_DATA_OBJECT
128
+ // - constants.BSON_DATA_ARRAY
129
+ // - constants.BSON_DATA_CODE_W_SCOPE
130
+ elementType:
131
+ | typeof constants.BSON_DATA_OBJECT
132
+ | typeof constants.BSON_DATA_ARRAY
133
+ | typeof constants.BSON_DATA_CODE_W_SCOPE;
134
+ // Document that we will fill out as we parse the nested object
135
+ holdingDocument: Document;
136
+ // The name of the key we will set the parsed object on once we finish parsing the nested object, this is used in the onComplete callback to know where to set the parsed nested object in the parent document
137
+ propertyName: string | number;
138
+ // The index in the buffer where the current object ends, used to know when we are done parsing the current object and can pop the stack
139
+ lastIndex: number;
140
+ // Whether the current frame is parsing an array, used to know whether to interpret keys as strings or array indices
141
+ isArray: boolean;
142
+ // The next array index to use if this frame is parsing an array, used to assign numeric keys to array elements without having to utf-8 decode the key from the buffer
143
+ arrayIndex: number;
144
+ // When true, all objects in this frame will be returned as raw bson buffers without parsing.
145
+ // This is used when the fieldsAsRaw option is used on a parent object, and is inherited by nested frames.
146
+ // It can also be set to true if the global raw option is set, but it cannot be set to true for a frame if the global raw option is false.
147
+ raw: boolean;
148
+ // When true, this frame may be a DBRef. This is set to false if we encounter a key that is not valid for a DBRef, and is left as null for arrays since they cannot be DBRefs.
149
+ isPossibleDBRef: boolean | null;
150
+ // The utf-8 validation setting for this frame, used to determine whether to utf-8 validate keys in this frame. This is determined based on the global utf-8 validation setting and the specific keys specified in the validation option.
151
+ validationSetting: boolean;
152
+ functionString: string | null; // only used for Code with Scope
153
+ // The enclosing frame, or null at the top level. The parsing stack is a linked list threaded
154
+ // through this field rather than a separate array, which avoids array push/pop churn per frame.
155
+ prev: NestedParsingFrame | null;
156
+ }
157
+
125
158
  const allowedDBRefKeys = /^\$ref$|^\$id$|^\$db$/;
126
159
 
160
+ // Assigns a parsed value into the destination document, guarding the __proto__ key to avoid
161
+ // prototype pollution.
162
+ function assignValue(dest: Document, name: string | number, value: unknown): void {
163
+ if (name === '__proto__') {
164
+ Object.defineProperty(dest, name, {
165
+ value,
166
+ writable: true,
167
+ enumerable: true,
168
+ configurable: true
169
+ });
170
+ } else {
171
+ dest[name] = value;
172
+ }
173
+ }
174
+
175
+ // Promotes a plain document to a DBRef instance when it has the DBRef shape ($ref/$id[/$db]).
176
+ function toPotentialDbRef(doc: Document): DBRef | Document {
177
+ if (isDBRefLike(doc)) {
178
+ const { $ref, $id, $db, ...fields } = doc;
179
+ return new DBRef($ref, $id, $db, fields);
180
+ }
181
+ return doc;
182
+ }
183
+
127
184
  function deserializeObject(
128
185
  buffer: Uint8Array,
129
186
  index: number,
130
187
  options: DeserializeOptions,
131
188
  isArray = false
132
189
  ) {
190
+ // Settings configured from options parameter
191
+
192
+ // Strips prototype chain so inherited properties don't affect option reads.
193
+ options = { ...options };
194
+
195
+ // Used to track fields that should be returned as raw bson buffers without parsing, this is set based on the fieldsAsRaw option and is inherited by nested frames when parsing nested objects
133
196
  const fieldsAsRaw = options['fieldsAsRaw'] == null ? null : options['fieldsAsRaw'];
134
197
 
135
198
  // Return raw bson buffer instead of parsing it
@@ -144,10 +207,10 @@ function deserializeObject(
144
207
  const promoteValues = options.promoteValues ?? true;
145
208
  const useBigInt64 = options.useBigInt64 ?? false;
146
209
 
210
+ // Validate bigint and long promotion settings
147
211
  if (useBigInt64 && !promoteValues) {
148
212
  throw new BSONError('Must either request bigint or Long for int64 deserialization');
149
213
  }
150
-
151
214
  if (useBigInt64 && !promoteLongs) {
152
215
  throw new BSONError('Must either request bigint or Long for int64 deserialization');
153
216
  }
@@ -193,6 +256,8 @@ function deserializeObject(
193
256
  }
194
257
  }
195
258
 
259
+ // Begin parsing the document
260
+
196
261
  // Set the start index
197
262
  const startIndex = index;
198
263
 
@@ -201,26 +266,83 @@ function deserializeObject(
201
266
 
202
267
  // Read the document size
203
268
  const size = NumberUtils.getInt32LE(buffer, index);
269
+ // Skip past the size field
204
270
  index += 4;
205
271
 
206
272
  // Ensure buffer is valid size
207
273
  if (size < 5 || size > buffer.length) throw new BSONError('corrupt bson message');
208
274
 
209
275
  // Create holding object
210
- const object: Document = isArray ? [] : {};
276
+ const rootObject: Document = isArray ? [] : {};
211
277
  // Used for arrays to skip having to perform utf8 decoding
212
278
  let arrayIndex = 0;
213
- const done = false;
214
279
 
215
280
  let isPossibleDBRef = isArray ? false : null;
216
281
 
282
+ // Top of the parsing stack (a linked list via each frame's `prev`), or null at the top level.
283
+ let currentFrame: NestedParsingFrame | null = null;
284
+ // Destination object for the current frame (the parent's holdingDocument, or rootObject at the
285
+ // top level). Maintained alongside currentFrame so per-field assignment never recomputes it.
286
+ let currentDest: Document = rootObject;
287
+ // Whether the current frame is an array. Maintained alongside currentFrame so the per-field key
288
+ // logic does not branch on currentFrame every iteration.
289
+ let currentIsArray = isArray;
290
+
217
291
  // While we have more left data left keep parsing
218
- while (!done) {
292
+ while (true) {
219
293
  // Read the type
220
294
  const elementType = buffer[index++];
221
295
 
222
296
  // If we get a zero it's the last byte, exit
223
- if (elementType === 0) break;
297
+ if (elementType === 0) {
298
+ // 0 byte marks end of document.
299
+ if (currentFrame) {
300
+ // If we're in a frame, that means the end of the current nested document
301
+ if (index === currentFrame.lastIndex) {
302
+ // Snapshot the completed frame before updating currentFrame to the parent.
303
+ const completedFrame: NestedParsingFrame = currentFrame;
304
+ currentFrame = completedFrame.prev;
305
+ if (currentFrame === null) {
306
+ currentDest = rootObject;
307
+ currentIsArray = isArray;
308
+ } else {
309
+ currentDest = currentFrame.holdingDocument;
310
+ currentIsArray = currentFrame.isArray;
311
+ }
312
+ // finish the frame
313
+ let result: Document = completedFrame.holdingDocument;
314
+ switch (completedFrame.elementType) {
315
+ case constants.BSON_DATA_OBJECT:
316
+ // if this is a DBRef, we need to construct a DBRef object instead of a plain object
317
+ if (completedFrame.isPossibleDBRef) {
318
+ result = toPotentialDbRef(result);
319
+ }
320
+ break;
321
+ case constants.BSON_DATA_ARRAY:
322
+ // nothing to do, the holding document is already an array and the keys were set as numeric indices
323
+ break;
324
+ case constants.BSON_DATA_CODE_W_SCOPE:
325
+ // the holding document is the scope, we need to construct a Code object with the function string and scope
326
+ result = new Code(completedFrame.functionString!, completedFrame.holdingDocument);
327
+ break;
328
+ default:
329
+ throw new BSONError('Unexpected element type in frame stack');
330
+ }
331
+ // set the value in the parent document (currentDest now points to the parent's document)
332
+ assignValue(currentDest, completedFrame.propertyName, result);
333
+ continue;
334
+ } else {
335
+ // Current index does not match the last index of the frame, the document is malformed
336
+ if (currentFrame.elementType === constants.BSON_DATA_ARRAY) {
337
+ throw new BSONError('corrupted array bson');
338
+ }
339
+ throw new BSONError('Bad BSON Document: object not properly terminated');
340
+ }
341
+ } else {
342
+ // If we're not in a frame, that means the end of the root document, so we break out of the loop and return the object
343
+ break;
344
+ }
345
+ }
224
346
 
225
347
  // Get the start search index
226
348
  let i = index;
@@ -233,20 +355,34 @@ function deserializeObject(
233
355
  if (i >= buffer.byteLength) throw new BSONError('Bad BSON Document: illegal CString');
234
356
 
235
357
  // Represents the key
236
- const name = isArray ? arrayIndex++ : ByteUtils.toUTF8(buffer, index, i, false);
237
-
238
- // shouldValidateKey is true if the key should be validated, false otherwise
239
- let shouldValidateKey = true;
240
- if (globalUTFValidation || utf8KeysSet?.has(name)) {
358
+ const name = currentIsArray
359
+ ? currentFrame !== null
360
+ ? currentFrame.arrayIndex++
361
+ : arrayIndex++
362
+ : ByteUtils.toUTF8(buffer, index, i, false);
363
+
364
+ // shouldValidateKey is true if the key should be validated, false otherwise.
365
+ // Within a nested frame the original code passed a collapsed boolean validation option,
366
+ // so all keys in the frame are validated uniformly using the frame's setting.
367
+ let shouldValidateKey: boolean;
368
+ if (currentFrame !== null) {
369
+ shouldValidateKey = currentFrame.validationSetting;
370
+ } else if (globalUTFValidation || utf8KeysSet?.has(name)) {
241
371
  shouldValidateKey = validationSetting;
242
372
  } else {
243
373
  shouldValidateKey = !validationSetting;
244
374
  }
245
375
 
246
- if (isPossibleDBRef !== false && (name as string)[0] === '$') {
376
+ // Route DBRef key tracking to the current frame; the root variable handles the root doc.
377
+ if (currentFrame !== null) {
378
+ if (currentFrame.isPossibleDBRef !== false && typeof name === 'string' && name[0] === '$') {
379
+ currentFrame.isPossibleDBRef = allowedDBRefKeys.test(name);
380
+ }
381
+ } else if (isPossibleDBRef !== false && (name as string)[0] === '$') {
247
382
  isPossibleDBRef = allowedDBRefKeys.test(name as string);
248
383
  }
249
384
  let value;
385
+ let isDeferredValue = false;
250
386
 
251
387
  index = i + 1;
252
388
 
@@ -288,45 +424,65 @@ function deserializeObject(
288
424
  throw new BSONError('illegal boolean type value');
289
425
  value = buffer[index++] === 1;
290
426
  } else if (elementType === constants.BSON_DATA_OBJECT) {
291
- const _index = index;
292
427
  const objectSize = NumberUtils.getInt32LE(buffer, index);
293
428
 
294
- if (objectSize <= 0 || objectSize > buffer.length - index)
429
+ if (objectSize < 5 || objectSize > buffer.length - index)
295
430
  throw new BSONError('bad embedded document length in bson');
296
431
 
297
- // We have a raw value
298
- if (raw) {
432
+ // We have a raw value: either the global raw option, or the parent frame requested raw elements.
433
+ if (raw || (currentFrame?.raw ?? false)) {
299
434
  value = buffer.subarray(index, index + objectSize);
435
+ index = index + objectSize;
300
436
  } else {
301
- let objectOptions = options;
302
- if (!globalUTFValidation) {
303
- objectOptions = { ...options, validation: { utf8: shouldValidateKey } };
304
- }
305
- value = deserializeObject(buffer, _index, objectOptions, false);
437
+ isDeferredValue = true;
438
+ const objectFrame: NestedParsingFrame = {
439
+ holdingDocument: {},
440
+ elementType: constants.BSON_DATA_OBJECT,
441
+ propertyName: name,
442
+ functionString: null,
443
+ lastIndex: index + objectSize,
444
+ isArray: false,
445
+ arrayIndex: 0,
446
+ raw: false,
447
+ isPossibleDBRef: null, // we don't know if this is a DBRef until we parse the keys, so we start with null and set to false if we encounter a key that is not valid for a DBRef
448
+ validationSetting: shouldValidateKey,
449
+ prev: currentFrame
450
+ };
451
+ currentFrame = objectFrame;
452
+ currentDest = objectFrame.holdingDocument;
453
+ currentIsArray = false;
454
+ index = index + 4;
306
455
  }
307
-
308
- index = index + objectSize;
309
456
  } else if (elementType === constants.BSON_DATA_ARRAY) {
310
- const _index = index;
311
457
  const objectSize = NumberUtils.getInt32LE(buffer, index);
312
- let arrayOptions: DeserializeOptions = options;
458
+
459
+ if (objectSize < 5 || objectSize > buffer.length - index)
460
+ throw new BSONError('bad embedded array length in bson');
313
461
 
314
462
  // Stop index
315
463
  const stopIndex = index + objectSize;
316
464
 
317
- // All elements of array to be returned as raw bson
318
- if (fieldsAsRaw && fieldsAsRaw[name]) {
319
- arrayOptions = { ...options, raw: true };
320
- }
321
-
322
- if (!globalUTFValidation) {
323
- arrayOptions = { ...arrayOptions, validation: { utf8: shouldValidateKey } };
324
- }
325
- value = deserializeObject(buffer, _index, arrayOptions, true);
326
- index = index + objectSize;
327
-
328
- if (buffer[index - 1] !== 0) throw new BSONError('invalid array terminator byte');
329
- if (index !== stopIndex) throw new BSONError('corrupted array bson');
465
+ // fieldsAsRaw match: push with raw=true so embedded objects inside come back as raw bytes.
466
+ // Also propagate raw from the parent frame (nested arrays inside a raw array stay raw).
467
+ const arrayRaw = !!(fieldsAsRaw && fieldsAsRaw[name]) || (currentFrame?.raw ?? false);
468
+ isDeferredValue = true;
469
+ const arrayFrame: NestedParsingFrame = {
470
+ holdingDocument: [],
471
+ elementType: constants.BSON_DATA_ARRAY,
472
+ propertyName: name,
473
+ functionString: null,
474
+ lastIndex: stopIndex,
475
+ isArray: true,
476
+ arrayIndex: 0,
477
+ raw: arrayRaw,
478
+ isPossibleDBRef: false,
479
+ validationSetting: shouldValidateKey,
480
+ prev: currentFrame
481
+ };
482
+ currentFrame = arrayFrame;
483
+ currentDest = arrayFrame.holdingDocument;
484
+ currentIsArray = true;
485
+ index = index + 4;
330
486
  } else if (elementType === constants.BSON_DATA_UNDEFINED) {
331
487
  value = undefined;
332
488
  } else if (elementType === constants.BSON_DATA_NULL) {
@@ -547,10 +703,9 @@ function deserializeObject(
547
703
  const _index = index;
548
704
  // Decode the size of the object document
549
705
  const objectSize = NumberUtils.getInt32LE(buffer, index);
550
- // Decode the scope object
551
- const scopeObject = deserializeObject(buffer, _index, options, false);
552
- // Adjust the index
553
- index = index + objectSize;
706
+
707
+ if (objectSize < 5 || objectSize > buffer.length - index)
708
+ throw new BSONError('bad scope document size in code_w_scope');
554
709
 
555
710
  // Check if field length is too short
556
711
  if (totalSize < 4 + 4 + objectSize + stringSize) {
@@ -562,7 +717,24 @@ function deserializeObject(
562
717
  throw new BSONError('code_w_scope total size is too long, clips outer document');
563
718
  }
564
719
 
565
- value = new Code(functionString, scopeObject);
720
+ isDeferredValue = true;
721
+ const scopeFrame: NestedParsingFrame = {
722
+ holdingDocument: {},
723
+ elementType: constants.BSON_DATA_CODE_W_SCOPE,
724
+ propertyName: name,
725
+ functionString: functionString,
726
+ lastIndex: _index + objectSize,
727
+ isArray: false,
728
+ arrayIndex: 0,
729
+ raw: false,
730
+ isPossibleDBRef: null,
731
+ validationSetting: shouldValidateKey,
732
+ prev: currentFrame
733
+ };
734
+ currentFrame = scopeFrame;
735
+ currentDest = scopeFrame.holdingDocument;
736
+ currentIsArray = false;
737
+ index = index + 4; // move index past the size of the object, the rest of the object will be parsed in subsequent iterations of this loop
566
738
  } else if (elementType === constants.BSON_DATA_DBPOINTER) {
567
739
  // Get the code string size
568
740
  const stringSize = NumberUtils.getInt32LE(buffer, index);
@@ -594,18 +766,19 @@ function deserializeObject(
594
766
  `Detected unknown BSON type ${elementType.toString(16)} for fieldname "${name}"`
595
767
  );
596
768
  }
597
- if (name === '__proto__') {
598
- Object.defineProperty(object, name, {
599
- value,
600
- writable: true,
601
- enumerable: true,
602
- configurable: true
603
- });
604
- } else {
605
- object[name] = value;
769
+
770
+ // If we have the value, set it on the target object
771
+ if (!isDeferredValue) {
772
+ assignValue(currentDest, name, value);
606
773
  }
607
774
  }
608
775
 
776
+ // Check if we have any frames left on the stack, if we do then we had a malformed document
777
+ if (currentFrame !== null) {
778
+ throw new BSONError('corrupted bson, more objects expected based on the current document size');
779
+ }
780
+ const object = rootObject;
781
+
609
782
  // Check if the deserialization was against a valid array/object
610
783
  if (size !== index - startIndex) {
611
784
  if (isArray) throw new BSONError('corrupt array bson');
@@ -615,13 +788,6 @@ function deserializeObject(
615
788
  // if we did not find "$ref", "$id", "$db", or found an extraneous $key, don't make a DBRef
616
789
  if (!isPossibleDBRef) return object;
617
790
 
618
- if (isDBRefLike(object)) {
619
- const copy = Object.assign({}, object) as Partial<DBRefLike>;
620
- delete copy.$ref;
621
- delete copy.$id;
622
- delete copy.$db;
623
- return new DBRef(object.$ref, object.$id, object.$db, copy);
624
- }
625
-
626
- return object;
791
+ // If the object is DBRef-like, create a new DBRef instance
792
+ return toPotentialDbRef(object);
627
793
  }