bson 6.5.0 → 6.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,16 @@
1
1
  import { nodeJsByteUtils } from './node_byte_utils';
2
2
  import { webByteUtils } from './web_byte_utils';
3
3
 
4
- /** @internal */
4
+ /**
5
+ * @public
6
+ * @experimental
7
+ *
8
+ * A collection of functions that help work with data in a Uint8Array.
9
+ * ByteUtils is configured at load time to use Node.js or Web based APIs for the internal implementations.
10
+ */
5
11
  export type ByteUtils = {
6
12
  /** Transforms the input to an instance of Buffer if running on node, otherwise Uint8Array */
7
- toLocalBufferType(buffer: Uint8Array | ArrayBufferView | ArrayBuffer): Uint8Array;
13
+ toLocalBufferType: (buffer: Uint8Array | ArrayBufferView | ArrayBuffer) => Uint8Array;
8
14
  /** Create empty space of size */
9
15
  allocate: (size: number) => Uint8Array;
10
16
  /** Create empty space of size, use pooled memory when available */
@@ -30,9 +36,9 @@ export type ByteUtils = {
30
36
  /** Get the utf8 code unit count from a string if it were to be transformed to utf8 */
31
37
  utf8ByteLength: (input: string) => number;
32
38
  /** Encode UTF8 bytes generated from `source` string into `destination` at byteOffset. Returns the number of bytes encoded. */
33
- encodeUTF8Into(destination: Uint8Array, source: string, byteOffset: number): number;
39
+ encodeUTF8Into: (destination: Uint8Array, source: string, byteOffset: number) => number;
34
40
  /** Generate a Uint8Array filled with random bytes with byteLength */
35
- randomBytes(byteLength: number): Uint8Array;
41
+ randomBytes: (byteLength: number) => Uint8Array;
36
42
  };
37
43
 
38
44
  declare const Buffer: { new (): unknown; prototype?: { _isBuffer?: boolean } } | undefined;
@@ -1,5 +1,5 @@
1
1
  import { BSONError } from '../error';
2
- import { validateUtf8 } from '../validate_utf8';
2
+ import { parseUtf8 } from '../parse_utf8';
3
3
  import { tryReadBasicLatin, tryWriteBasicLatin } from './latin';
4
4
 
5
5
  type NodeJsEncoding = 'base64' | 'hex' | 'utf8' | 'binary';
@@ -136,12 +136,9 @@ export const nodeJsByteUtils = {
136
136
 
137
137
  const string = nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8', start, end);
138
138
  if (fatal) {
139
- // TODO(NODE-4930): Insufficiently strict BSON UTF8 validation
140
139
  for (let i = 0; i < string.length; i++) {
141
140
  if (string.charCodeAt(i) === 0xfffd) {
142
- if (!validateUtf8(buffer, start, end)) {
143
- throw new BSONError('Invalid UTF-8 string in BSON document');
144
- }
141
+ parseUtf8(buffer, start, end, true);
145
142
  break;
146
143
  }
147
144
  }
@@ -6,12 +6,47 @@ FLOAT[0] = -1;
6
6
  // Big endian [191, 240, 0, 0, 0, 0, 0, 0]
7
7
  const isBigEndian = FLOAT_BYTES[7] === 0;
8
8
 
9
+ /**
10
+ * @experimental
11
+ * @public
12
+ *
13
+ * A collection of functions that get or set various numeric types and bit widths from a Uint8Array.
14
+ */
15
+ export type NumberUtils = {
16
+ /**
17
+ * Parses a signed int32 at offset. Throws a `RangeError` if value is negative.
18
+ */
19
+ getNonnegativeInt32LE: (source: Uint8Array, offset: number) => number;
20
+ getInt32LE: (source: Uint8Array, offset: number) => number;
21
+ getUint32LE: (source: Uint8Array, offset: number) => number;
22
+ getUint32BE: (source: Uint8Array, offset: number) => number;
23
+ getBigInt64LE: (source: Uint8Array, offset: number) => bigint;
24
+ getFloat64LE: (source: Uint8Array, offset: number) => number;
25
+ setInt32BE: (destination: Uint8Array, offset: number, value: number) => 4;
26
+ setInt32LE: (destination: Uint8Array, offset: number, value: number) => 4;
27
+ setBigInt64LE: (destination: Uint8Array, offset: number, value: bigint) => 8;
28
+ setFloat64LE: (destination: Uint8Array, offset: number, value: number) => 8;
29
+ };
30
+
9
31
  /**
10
32
  * Number parsing and serializing utilities.
11
33
  *
12
- * @internal
34
+ * @experimental
35
+ * @public
13
36
  */
14
- export const NumberUtils = {
37
+ export const NumberUtils: NumberUtils = {
38
+ getNonnegativeInt32LE(source: Uint8Array, offset: number): number {
39
+ if (source[offset + 3] > 127) {
40
+ throw new RangeError(`Size cannot be negative at offset: ${offset}`);
41
+ }
42
+ return (
43
+ source[offset] |
44
+ (source[offset + 1] << 8) |
45
+ (source[offset + 2] << 16) |
46
+ (source[offset + 3] << 24)
47
+ );
48
+ },
49
+
15
50
  /** Reads a little-endian 32-bit integer from source */
16
51
  getInt32LE(source: Uint8Array, offset: number): number {
17
52
  return (
@@ -0,0 +1,44 @@
1
+ /**
2
+ * @internal
3
+ * Removes leading zeros and explicit plus from textual representation of a number.
4
+ */
5
+ export function removeLeadingZerosAndExplicitPlus(str: string): string {
6
+ if (str === '') {
7
+ return str;
8
+ }
9
+
10
+ let startIndex = 0;
11
+
12
+ const isNegative = str[startIndex] === '-';
13
+ const isExplicitlyPositive = str[startIndex] === '+';
14
+
15
+ if (isExplicitlyPositive || isNegative) {
16
+ startIndex += 1;
17
+ }
18
+
19
+ let foundInsignificantZero = false;
20
+
21
+ for (; startIndex < str.length && str[startIndex] === '0'; ++startIndex) {
22
+ foundInsignificantZero = true;
23
+ }
24
+
25
+ if (!foundInsignificantZero) {
26
+ return isExplicitlyPositive ? str.slice(1) : str;
27
+ }
28
+
29
+ return `${isNegative ? '-' : ''}${str.length === startIndex ? '0' : str.slice(startIndex)}`;
30
+ }
31
+
32
+ /**
33
+ * @internal
34
+ * Returns false for an string that contains invalid characters for its radix, else returns the original string.
35
+ * @param str - The textual representation of the Long
36
+ * @param radix - The radix in which the text is written (2-36), defaults to 10
37
+ */
38
+ export function validateStringCharacters(str: string, radix?: number): false | string {
39
+ radix = radix ?? 10;
40
+ const validCharacters = '0123456789abcdefghijklmnopqrstuvwxyz'.slice(0, radix);
41
+ // regex is case insensitive and checks that each character within the string is one of the validCharacters
42
+ const regex = new RegExp(`[^-+${validCharacters}]`, 'i');
43
+ return regex.test(str) ? false : str;
44
+ }
@@ -1,5 +1,6 @@
1
1
  import { BSONError } from '../error';
2
2
  import { tryReadBasicLatin } from './latin';
3
+ import { parseUtf8 } from '../parse_utf8';
3
4
 
4
5
  type TextDecoder = {
5
6
  readonly encoding: string;
@@ -179,14 +180,7 @@ export const webByteUtils = {
179
180
  return basicLatin;
180
181
  }
181
182
 
182
- if (fatal) {
183
- try {
184
- return new TextDecoder('utf8', { fatal }).decode(uint8array.slice(start, end));
185
- } catch (cause) {
186
- throw new BSONError('Invalid UTF-8 string in BSON document', { cause });
187
- }
188
- }
189
- return new TextDecoder('utf8', { fatal }).decode(uint8array.slice(start, end));
183
+ return parseUtf8(uint8array, start, end, fatal);
190
184
  },
191
185
 
192
186
  utf8ByteLength(input: string): number {
@@ -1,47 +0,0 @@
1
- const FIRST_BIT = 0x80;
2
- const FIRST_TWO_BITS = 0xc0;
3
- const FIRST_THREE_BITS = 0xe0;
4
- const FIRST_FOUR_BITS = 0xf0;
5
- const FIRST_FIVE_BITS = 0xf8;
6
-
7
- const TWO_BIT_CHAR = 0xc0;
8
- const THREE_BIT_CHAR = 0xe0;
9
- const FOUR_BIT_CHAR = 0xf0;
10
- const CONTINUING_CHAR = 0x80;
11
-
12
- /**
13
- * Determines if the passed in bytes are valid utf8
14
- * @param bytes - An array of 8-bit bytes. Must be indexable and have length property
15
- * @param start - The index to start validating
16
- * @param end - The index to end validating
17
- */
18
- export function validateUtf8(
19
- bytes: { [index: number]: number },
20
- start: number,
21
- end: number
22
- ): boolean {
23
- let continuation = 0;
24
-
25
- for (let i = start; i < end; i += 1) {
26
- const byte = bytes[i];
27
-
28
- if (continuation) {
29
- if ((byte & FIRST_TWO_BITS) !== CONTINUING_CHAR) {
30
- return false;
31
- }
32
- continuation -= 1;
33
- } else if (byte & FIRST_BIT) {
34
- if ((byte & FIRST_THREE_BITS) === TWO_BIT_CHAR) {
35
- continuation = 1;
36
- } else if ((byte & FIRST_FOUR_BITS) === THREE_BIT_CHAR) {
37
- continuation = 2;
38
- } else if ((byte & FIRST_FIVE_BITS) === FOUR_BIT_CHAR) {
39
- continuation = 3;
40
- } else {
41
- return false;
42
- }
43
- }
44
- }
45
-
46
- return !continuation;
47
- }