@exodus/bytes 1.0.0-rc.5 → 1.0.0-rc.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/fallback/hex.js CHANGED
@@ -1,37 +1,105 @@
1
1
  import { assertUint8 } from '../assert.js'
2
- import { nativeEncoder } from './_utils.js'
2
+ import { nativeDecoder, nativeEncoder } from './_utils.js'
3
+ import { encodeAscii, decodeAscii } from './latin1.js'
3
4
 
4
- let hexArray
5
+ let hexArray // array of 256 bytes converted to two-char hex strings
6
+ let hexCodes // hexArray converted to u16 code pairs
5
7
  let dehexArray
8
+ const _00 = 0x30_30 // '00' string in hex, the only allowed char pair to generate 0 byte
9
+ const _ff = 0x66_66 // 'ff' string in hex, max allowed char pair (larger than 'FF' string)
10
+ const allowed = '0123456789ABCDEFabcdef'
6
11
 
7
12
  export const E_HEX = 'Input is not a hex string'
8
13
 
9
- function toHexPart(arr, start, end) {
14
+ function toHexPartAddition(a, start, end) {
10
15
  let o = ''
11
16
  let i = start
12
- const last3 = end - 3
13
- // Unrolled loop is faster
14
- while (i < last3) {
15
- const a = arr[i++]
16
- const b = arr[i++]
17
- const c = arr[i++]
18
- const d = arr[i++]
19
- o += hexArray[a]
20
- o += hexArray[b]
21
- o += hexArray[c]
22
- o += hexArray[d]
17
+ const h = hexArray
18
+ for (const last3 = end - 3; i < last3; i += 4) {
19
+ const x0 = a[i]
20
+ const x1 = a[i + 1]
21
+ const x2 = a[i + 2]
22
+ const x3 = a[i + 3]
23
+ o += h[x0]
24
+ o += h[x1]
25
+ o += h[x2]
26
+ o += h[x3]
23
27
  }
24
28
 
25
- while (i < end) o += hexArray[arr[i++]]
29
+ while (i < end) o += h[a[i++]]
26
30
  return o
27
31
  }
28
32
 
33
+ // Optimiziation for Hermes which is the main user of fallback
34
+ function toHexPartTemplates(a, start, end) {
35
+ let o = ''
36
+ let i = start
37
+ const h = hexArray
38
+ for (const last15 = end - 15; i < last15; i += 16) {
39
+ const x0 = a[i]
40
+ const x1 = a[i + 1]
41
+ const x2 = a[i + 2]
42
+ const x3 = a[i + 3]
43
+ const x4 = a[i + 4]
44
+ const x5 = a[i + 5]
45
+ const x6 = a[i + 6]
46
+ const x7 = a[i + 7]
47
+ const x8 = a[i + 8]
48
+ const x9 = a[i + 9]
49
+ const x10 = a[i + 10]
50
+ const x11 = a[i + 11]
51
+ const x12 = a[i + 12]
52
+ const x13 = a[i + 13]
53
+ const x14 = a[i + 14]
54
+ const x15 = a[i + 15]
55
+ o += `${h[x0]}${h[x1]}${h[x2]}${h[x3]}${h[x4]}${h[x5]}${h[x6]}${h[x7]}${h[x8]}${h[x9]}${h[x10]}${h[x11]}${h[x12]}${h[x13]}${h[x14]}${h[x15]}`
56
+ }
57
+
58
+ while (i < end) o += h[a[i++]]
59
+ return o
60
+ }
61
+
62
+ // Using templates is significantly faster in Hermes and JSC
63
+ // It's harder to detect JSC and not important anyway as it has native impl, so we detect only Hermes
64
+ const toHexPart = globalThis.HermesInternal ? toHexPartTemplates : toHexPartAddition
65
+
29
66
  export function toHex(arr) {
30
67
  assertUint8(arr)
31
68
 
32
69
  if (!hexArray) hexArray = Array.from({ length: 256 }, (_, i) => i.toString(16).padStart(2, '0'))
33
70
  const length = arr.length // this helps Hermes
34
71
 
72
+ // Only old browsers use this, barebone engines don't have TextDecoder
73
+ // But Hermes can use this when it (hopefully) implements TextDecoder
74
+ if (nativeDecoder) {
75
+ if (!hexCodes) {
76
+ hexCodes = new Uint16Array(256)
77
+ const u8 = new Uint8Array(hexCodes.buffer, hexCodes.byteOffset, hexCodes.byteLength)
78
+ for (let i = 0; i < 256; i++) {
79
+ const pair = hexArray[i]
80
+ u8[2 * i] = pair.charCodeAt(0)
81
+ u8[2 * i + 1] = pair.charCodeAt(1)
82
+ }
83
+ }
84
+
85
+ const oa = new Uint16Array(length)
86
+ let i = 0
87
+ for (const last3 = arr.length - 3; ; i += 4) {
88
+ if (i >= last3) break // loop is fast enough for moving this here to be useful on JSC
89
+ const x0 = arr[i]
90
+ const x1 = arr[i + 1]
91
+ const x2 = arr[i + 2]
92
+ const x3 = arr[i + 3]
93
+ oa[i] = hexCodes[x0]
94
+ oa[i + 1] = hexCodes[x1]
95
+ oa[i + 2] = hexCodes[x2]
96
+ oa[i + 3] = hexCodes[x3]
97
+ }
98
+
99
+ for (; i < length; i++) oa[i] = hexCodes[arr[i]]
100
+ return decodeAscii(oa)
101
+ }
102
+
35
103
  if (length > 30_000) {
36
104
  // Limit concatenation to avoid excessive GC
37
105
  // Thresholds checked on Hermes
@@ -51,51 +119,74 @@ export function toHex(arr) {
51
119
  return toHexPart(arr, 0, length)
52
120
  }
53
121
 
54
- // TODO: can this be optimized? This only affects non-Hermes barebone engines though
55
- const mapSize = nativeEncoder ? 256 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
56
-
57
122
  export function fromHex(str) {
58
123
  if (typeof str !== 'string') throw new TypeError('Input is not a string')
59
124
  if (str.length % 2 !== 0) throw new SyntaxError(E_HEX)
60
125
 
61
- if (!dehexArray) {
62
- dehexArray = new Int8Array(mapSize).fill(-1) // no regex input validation here, so we map all other bytes to -1 and recheck sign
63
- for (let i = 0; i < 16; i++) {
64
- const s = i.toString(16)
65
- dehexArray[s.charCodeAt(0)] = dehexArray[s.toUpperCase().charCodeAt(0)] = i
66
- }
67
- }
68
-
69
126
  const length = str.length / 2 // this helps Hermes in loops
70
127
  const arr = new Uint8Array(length)
71
- let j = 0
128
+
129
+ // Native encoder path is beneficial even for small arrays in Hermes
72
130
  if (nativeEncoder) {
73
- // Native encoder path is beneficial even for small arrays in Hermes
74
- const codes = nativeEncoder.encode(str)
75
- if (codes.length !== str.length) throw new SyntaxError(E_HEX) // non-ascii
76
- const last3 = length - 3 // Unroll nativeEncoder path as this is what modern Hermes takes and a small perf improvement is nice there
131
+ if (!dehexArray) {
132
+ dehexArray = new Uint8Array(_ff + 1) // 26 KiB cache, >2x perf improvement on Hermes
133
+ const u8 = new Uint8Array(2)
134
+ const u16 = new Uint16Array(u8.buffer, u8.byteOffset, 1) // for endianess-agnostic transform
135
+ const map = [...allowed].map((c) => [c.charCodeAt(0), parseInt(c, 16)])
136
+ for (const [ch, vh] of map) {
137
+ u8[0] = ch // first we read high hex char
138
+ for (const [cl, vl] of map) {
139
+ u8[1] = cl // then we read low hex char
140
+ dehexArray[u16[0]] = (vh << 4) | vl
141
+ }
142
+ }
143
+ }
144
+
145
+ const codes = encodeAscii(str, E_HEX)
146
+ const codes16 = new Uint16Array(codes.buffer, codes.byteOffset, codes.byteLength / 2)
77
147
  let i = 0
78
- while (i < last3) {
79
- const a = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
80
- const b = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
81
- const c = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
82
- const d = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
83
- if (a < 0 || b < 0 || c < 0 || d < 0) throw new SyntaxError(E_HEX)
84
- arr[i++] = a
85
- arr[i++] = b
86
- arr[i++] = c
87
- arr[i++] = d
148
+ for (const last3 = length - 3; i < last3; i += 4) {
149
+ const ai = codes16[i]
150
+ const bi = codes16[i + 1]
151
+ const ci = codes16[i + 2]
152
+ const di = codes16[i + 3]
153
+ const a = dehexArray[ai]
154
+ const b = dehexArray[bi]
155
+ const c = dehexArray[ci]
156
+ const d = dehexArray[di]
157
+ if ((!a && ai !== _00) || (!b && bi !== _00) || (!c && ci !== _00) || (!d && di !== _00)) {
158
+ throw new SyntaxError(E_HEX)
159
+ }
160
+
161
+ arr[i] = a
162
+ arr[i + 1] = b
163
+ arr[i + 2] = c
164
+ arr[i + 3] = d
88
165
  }
89
166
 
90
167
  while (i < length) {
91
- const res = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
92
- if (res < 0) throw new SyntaxError(E_HEX)
93
- arr[i++] = res
168
+ const ai = codes16[i]
169
+ const a = dehexArray[ai]
170
+ if (!a && ai !== _00) throw new SyntaxError(E_HEX)
171
+ arr[i++] = a
94
172
  }
95
173
  } else {
174
+ if (!dehexArray) {
175
+ // no regex input validation here, so we map all other bytes to -1 and recheck sign
176
+ // non-ASCII chars throw already though, so we should process only 0-127
177
+ dehexArray = new Int8Array(128).fill(-1)
178
+ for (let i = 0; i < 16; i++) {
179
+ const s = i.toString(16)
180
+ dehexArray[s.charCodeAt(0)] = dehexArray[s.toUpperCase().charCodeAt(0)] = i
181
+ }
182
+ }
183
+
184
+ let j = 0
96
185
  for (let i = 0; i < length; i++) {
97
- const res = (dehexArray[str.charCodeAt(j++)] << 4) | dehexArray[str.charCodeAt(j++)]
98
- if (res < 0) throw new SyntaxError(E_HEX)
186
+ const a = str.charCodeAt(j++)
187
+ const b = str.charCodeAt(j++)
188
+ const res = (dehexArray[a] << 4) | dehexArray[b]
189
+ if (res < 0 || (0x7f | a | b) !== 0x7f) throw new SyntaxError(E_HEX) // 0-127
99
190
  arr[i] = res
100
191
  }
101
192
  }
@@ -0,0 +1,113 @@
1
+ import { nativeEncoder, nativeDecoder, nativeDecoderLatin1, nativeBuffer } from './_utils.js'
2
+
3
+ // See http://stackoverflow.com/a/22747272/680742, which says that lowest limit is in Chrome, with 0xffff args
4
+ // On Hermes, actual max is 0x20_000 minus current stack depth, 1/16 of that should be safe
5
+ const maxFunctionArgs = 0x20_00
6
+
7
+ export function asciiPrefix(arr) {
8
+ let p = 0 // verified ascii bytes
9
+ const length = arr.length
10
+ // Threshold tested on Hermes (worse on <=48, better on >=52)
11
+ // Also on v8 arrs of size <=64 might be on heap and using Uint32Array on them is unoptimal
12
+ if (length > 64) {
13
+ // Speedup with u32
14
+ const u32start = (4 - (arr.byteOffset & 3)) % 4 // offset start by this many bytes for alignment
15
+ for (; p < u32start; p++) if (arr[p] >= 0x80) return p
16
+ const u32length = ((arr.byteLength - u32start) / 4) | 0
17
+ const u32 = new Uint32Array(arr.buffer, arr.byteOffset + u32start, u32length)
18
+ let i = 0
19
+ for (const last3 = u32length - 3; ; p += 16, i += 4) {
20
+ if (i >= last3) break // loop is fast enough for moving this here to be _very_ useful, likely due to array access checks
21
+ const a = u32[i]
22
+ const b = u32[i + 1]
23
+ const c = u32[i + 2]
24
+ const d = u32[i + 3]
25
+ if (a & 0x80_80_80_80 || b & 0x80_80_80_80 || c & 0x80_80_80_80 || d & 0x80_80_80_80) break
26
+ }
27
+
28
+ for (; i < u32length; p += 4, i++) if (u32[i] & 0x80_80_80_80) break
29
+ }
30
+
31
+ for (; p < length; p++) if (arr[p] >= 0x80) return p
32
+ return length
33
+ }
34
+
35
+ // Capable of decoding Uint16Array to UTF-16 as well as Uint8Array to Latin-1
36
+ export function decodeLatin1(arr, start = 0, stop = arr.length) {
37
+ start |= 0
38
+ stop |= 0
39
+ const total = stop - start
40
+ if (total === 0) return ''
41
+ if (total > maxFunctionArgs) {
42
+ let prefix = ''
43
+ for (let i = start; i < stop; ) {
44
+ const i1 = Math.min(stop, i + maxFunctionArgs)
45
+ prefix += String.fromCharCode.apply(String, arr.subarray(i, i1))
46
+ i = i1
47
+ }
48
+
49
+ return prefix
50
+ }
51
+
52
+ const sliced = start === 0 && stop === arr.length ? arr : arr.subarray(start, stop)
53
+ return String.fromCharCode.apply(String, sliced)
54
+ }
55
+
56
+ // Does not check input, uses best available method
57
+ // Building an array for this is only faster than proper string concatenation when TextDecoder or native Buffer are available
58
+ export const decodeAscii = nativeBuffer
59
+ ? (a) =>
60
+ // Buffer is faster on Node.js (but only for long enough data), if we know that output is ascii
61
+ a.byteLength >= 0x3_00
62
+ ? nativeBuffer.from(a.buffer, a.byteOffset, a.byteLength).latin1Slice(0, a.byteLength) // .latin1Slice is faster than .asciiSlice
63
+ : nativeDecoder.decode(a) // On Node.js, utf8 decoder is faster than latin1
64
+ : nativeDecoderLatin1
65
+ ? (a) => nativeDecoderLatin1.decode(a) // On browsers (specifically WebKit), latin1 decoder is faster than utf8
66
+ : (a) => decodeLatin1(new Uint8Array(a.buffer, a.byteOffset, a.byteLength)) // Fallback. We shouldn't get here, constructing with strings directly is faster
67
+
68
+ /* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
69
+
70
+ export const encodeCharcodes = globalThis.HermesInternal
71
+ ? (str, arr) => {
72
+ const length = str.length
73
+ if (length > 64) {
74
+ const at = str.charCodeAt.bind(str) // faster on strings from ~64 chars on Hermes, but can be 10x slower on e.g. JSC
75
+ for (let i = 0; i < length; i++) arr[i] = at(i)
76
+ } else {
77
+ for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
78
+ }
79
+
80
+ return arr
81
+ }
82
+ : (str, arr) => {
83
+ const length = str.length
84
+ // Can be optimized with unrolling, but this is not used on non-Hermes atm
85
+ for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
86
+ return arr
87
+ }
88
+
89
+ /* eslint-enable @exodus/mutable/no-param-reassign-prop-only */
90
+
91
+ export const encodeLatin1 = (str) => encodeCharcodes(str, new Uint8Array(str.length))
92
+
93
+ // Expects nativeEncoder to be present
94
+ export const encodeAscii = globalThis.HermesInternal
95
+ ? (str, ERR) => {
96
+ // Much faster in Hermes
97
+ const codes = new Uint8Array(str.length + 4) // overshoot by a full utf8 char
98
+ const info = nativeEncoder.encodeInto(str, codes)
99
+ if (info.read !== str.length || info.written !== str.length) throw new SyntaxError(ERR) // non-ascii
100
+ return codes.subarray(0, str.length)
101
+ }
102
+ : nativeBuffer
103
+ ? (str, ERR) => {
104
+ // TextEncoder is slow on Node.js 24 / 25 (was ok on 22)
105
+ const codes = nativeBuffer.from(str, 'utf8') // ascii/latin1 coerces, we need to check
106
+ if (codes.length !== str.length) throw new SyntaxError(ERR) // non-ascii
107
+ return new Uint8Array(codes.buffer, codes.byteOffset, codes.byteLength)
108
+ }
109
+ : (str, ERR) => {
110
+ const codes = nativeEncoder.encode(str)
111
+ if (codes.length !== str.length) throw new SyntaxError(ERR) // non-ascii
112
+ return codes
113
+ }
package/fallback/utf8.js CHANGED
@@ -5,72 +5,53 @@ const replacementPoint = 0xff_fd
5
5
 
6
6
  // https://encoding.spec.whatwg.org/#utf-8-decoder
7
7
  // We are most likely in loose mode, for non-loose escape & decodeURIComponent solved everything
8
- export function decode(arr, loose) {
9
- const start = 0
8
+ export function decode(arr, loose, start = 0) {
9
+ start |= 0
10
10
  const end = arr.length
11
11
  let out = ''
12
- const tmp = []
12
+ const chunkSize = 0x2_00 // far below MAX_ARGUMENTS_LENGTH in npmjs.com/buffer, we use smaller chunks
13
+ const tmpSize = Math.min(end - start, chunkSize + 1) // need 1 extra slot for last codepoint, which can be 2 charcodes
14
+ const tmp = new Array(tmpSize).fill(0)
15
+ let ti = 0
13
16
 
14
17
  for (let i = start; i < end; i++) {
15
- if (tmp.length > 0x2_00) {
16
- // far below MAX_ARGUMENTS_LENGTH in npmjs.com/buffer, we use smaller chunks
17
- // length can be off by a few as large code points produce two utf-16 char codes, also we overshoot in unrolled loop
18
+ if (ti >= chunkSize) {
19
+ tmp.length = ti // can be larger by 1 if last codepoint is two charcodes
18
20
  out += String.fromCharCode.apply(String, tmp)
19
- tmp.length = 0
21
+ if (tmp.length <= chunkSize) tmp.push(0) // restore 1 extra slot for last codepoint
22
+ ti = 0
20
23
  }
21
24
 
22
25
  const byte = arr[i]
23
26
  if (byte < 0x80) {
24
- // Fast path ascii
25
- tmp.push(byte)
26
- // Unroll the loop a bit for faster ops, overshoot by 20 chars
27
- for (let j = 0; j < 5; j++) {
28
- if (i + 1 >= end) break
29
- const byte1 = arr[i + 1]
30
- if (byte1 >= 0x80) break
31
- tmp.push(byte1)
32
- i++
33
- if (i + 1 >= end) break
34
- const byte2 = arr[i + 1]
35
- if (byte2 >= 0x80) break
36
- tmp.push(byte2)
37
- i++
38
- if (i + 1 >= end) break
39
- const byte3 = arr[i + 1]
40
- if (byte3 >= 0x80) break
41
- tmp.push(byte3)
42
- i++
43
- if (i + 1 >= end) break
44
- const byte4 = arr[i + 1]
45
- if (byte4 >= 0x80) break
46
- tmp.push(byte4)
47
- i++
48
- }
27
+ tmp[ti++] = byte
28
+ // ascii fast path is in ../utf8.js, this is called only on non-ascii input
29
+ // so we don't unroll this anymore
49
30
  } else if (byte < 0xc2) {
50
31
  if (!loose) throw new TypeError(E_STRICT)
51
- tmp.push(replacementPoint)
32
+ tmp[ti++] = replacementPoint
52
33
  } else if (byte < 0xe0) {
53
34
  // need 1 more
54
35
  if (i + 1 >= end) {
55
36
  if (!loose) throw new TypeError(E_STRICT)
56
- tmp.push(replacementPoint)
37
+ tmp[ti++] = replacementPoint
57
38
  break
58
39
  }
59
40
 
60
41
  const byte1 = arr[i + 1]
61
42
  if (byte1 < 0x80 || byte1 > 0xbf) {
62
43
  if (!loose) throw new TypeError(E_STRICT)
63
- tmp.push(replacementPoint)
44
+ tmp[ti++] = replacementPoint
64
45
  continue
65
46
  }
66
47
 
67
48
  i++
68
- tmp.push(((byte & 0x1f) << 6) | (byte1 & 0x3f))
49
+ tmp[ti++] = ((byte & 0x1f) << 6) | (byte1 & 0x3f)
69
50
  } else if (byte < 0xf0) {
70
51
  // need 2 more
71
52
  if (i + 1 >= end) {
72
53
  if (!loose) throw new TypeError(E_STRICT)
73
- tmp.push(replacementPoint)
54
+ tmp[ti++] = replacementPoint
74
55
  break
75
56
  }
76
57
 
@@ -79,31 +60,31 @@ export function decode(arr, loose) {
79
60
  const byte1 = arr[i + 1]
80
61
  if (byte1 < lower || byte1 > upper) {
81
62
  if (!loose) throw new TypeError(E_STRICT)
82
- tmp.push(replacementPoint)
63
+ tmp[ti++] = replacementPoint
83
64
  continue
84
65
  }
85
66
 
86
67
  i++
87
68
  if (i + 1 >= end) {
88
69
  if (!loose) throw new TypeError(E_STRICT)
89
- tmp.push(replacementPoint)
70
+ tmp[ti++] = replacementPoint
90
71
  break
91
72
  }
92
73
 
93
74
  const byte2 = arr[i + 1]
94
75
  if (byte2 < 0x80 || byte2 > 0xbf) {
95
76
  if (!loose) throw new TypeError(E_STRICT)
96
- tmp.push(replacementPoint)
77
+ tmp[ti++] = replacementPoint
97
78
  continue
98
79
  }
99
80
 
100
81
  i++
101
- tmp.push(((byte & 0xf) << 12) | ((byte1 & 0x3f) << 6) | (byte2 & 0x3f))
82
+ tmp[ti++] = ((byte & 0xf) << 12) | ((byte1 & 0x3f) << 6) | (byte2 & 0x3f)
102
83
  } else if (byte <= 0xf4) {
103
84
  // need 3 more
104
85
  if (i + 1 >= end) {
105
86
  if (!loose) throw new TypeError(E_STRICT)
106
- tmp.push(replacementPoint)
87
+ tmp[ti++] = replacementPoint
107
88
  break
108
89
  }
109
90
 
@@ -112,35 +93,35 @@ export function decode(arr, loose) {
112
93
  const byte1 = arr[i + 1]
113
94
  if (byte1 < lower || byte1 > upper) {
114
95
  if (!loose) throw new TypeError(E_STRICT)
115
- tmp.push(replacementPoint)
96
+ tmp[ti++] = replacementPoint
116
97
  continue
117
98
  }
118
99
 
119
100
  i++
120
101
  if (i + 1 >= end) {
121
102
  if (!loose) throw new TypeError(E_STRICT)
122
- tmp.push(replacementPoint)
103
+ tmp[ti++] = replacementPoint
123
104
  break
124
105
  }
125
106
 
126
107
  const byte2 = arr[i + 1]
127
108
  if (byte2 < 0x80 || byte2 > 0xbf) {
128
109
  if (!loose) throw new TypeError(E_STRICT)
129
- tmp.push(replacementPoint)
110
+ tmp[ti++] = replacementPoint
130
111
  continue
131
112
  }
132
113
 
133
114
  i++
134
115
  if (i + 1 >= end) {
135
116
  if (!loose) throw new TypeError(E_STRICT)
136
- tmp.push(replacementPoint)
117
+ tmp[ti++] = replacementPoint
137
118
  break
138
119
  }
139
120
 
140
121
  const byte3 = arr[i + 1]
141
122
  if (byte3 < 0x80 || byte3 > 0xbf) {
142
123
  if (!loose) throw new TypeError(E_STRICT)
143
- tmp.push(replacementPoint)
124
+ tmp[ti++] = replacementPoint
144
125
  continue
145
126
  }
146
127
 
@@ -150,71 +131,65 @@ export function decode(arr, loose) {
150
131
  if (codePoint > 0xff_ff) {
151
132
  // split into char codes as String.fromCharCode is faster than String.fromCodePoint
152
133
  const u = codePoint - 0x1_00_00
153
- tmp.push(0xd8_00 + ((u >> 10) & 0x3_ff), 0xdc_00 + (u & 0x3_ff))
134
+ tmp[ti++] = 0xd8_00 + ((u >> 10) & 0x3_ff)
135
+ tmp[ti++] = 0xdc_00 + (u & 0x3_ff)
154
136
  } else {
155
- tmp.push(codePoint)
137
+ tmp[ti++] = codePoint
156
138
  }
157
139
  // eslint-disable-next-line sonarjs/no-duplicated-branches
158
140
  } else {
159
141
  if (!loose) throw new TypeError(E_STRICT)
160
- tmp.push(replacementPoint)
142
+ tmp[ti++] = replacementPoint
161
143
  }
162
144
  }
163
145
 
164
- if (tmp.length > 0) out += String.fromCharCode.apply(String, tmp)
165
- return out
146
+ if (ti === 0) return out
147
+ tmp.length = ti
148
+ return out + String.fromCharCode.apply(String, tmp)
166
149
  }
167
150
 
168
151
  export function encode(string, loose) {
169
152
  const length = string.length
170
- let lead = null
171
153
  let small = true
172
154
  let bytes = new Uint8Array(length) // assume ascii
173
155
  let p = 0
174
156
 
175
157
  for (let i = 0; i < length; i++) {
176
- const code = string.charCodeAt(i)
158
+ let code = string.charCodeAt(i)
177
159
  if (code < 0x80) {
178
- // Fast path for ascii
179
- if (lead) {
180
- if (!loose) throw new TypeError(E_STRICT_UNICODE)
181
- bytes[p++] = 0xef
182
- bytes[p++] = 0xbf
183
- bytes[p++] = 0xbd
184
- lead = null
185
- }
186
-
187
160
  bytes[p++] = code
188
161
  // Unroll the loop a bit for faster ops
189
- for (let j = 0; j < 5; j++) {
190
- if (i + 1 >= length) break
191
- const c1 = string.charCodeAt(i + 1)
192
- if (c1 >= 0x80) break
193
- bytes[p++] = c1
162
+ while (true) {
194
163
  i++
195
- if (i + 1 >= length) break
196
- const c2 = string.charCodeAt(i + 1)
197
- if (c2 >= 0x80) break
198
- bytes[p++] = c2
164
+ if (i >= length) break
165
+ code = string.charCodeAt(i)
166
+ if (code >= 0x80) break
167
+ bytes[p++] = code
199
168
  i++
200
- if (i + 1 >= length) break
201
- const c3 = string.charCodeAt(i + 1)
202
- if (c3 >= 0x80) break
203
- bytes[p++] = c3
169
+ if (i >= length) break
170
+ code = string.charCodeAt(i)
171
+ if (code >= 0x80) break
172
+ bytes[p++] = code
204
173
  i++
205
- if (i + 1 >= length) break
206
- const c4 = string.charCodeAt(i + 1)
207
- if (c4 >= 0x80) break
208
- bytes[p++] = c4
174
+ if (i >= length) break
175
+ code = string.charCodeAt(i)
176
+ if (code >= 0x80) break
177
+ bytes[p++] = code
209
178
  i++
179
+ if (i >= length) break
180
+ code = string.charCodeAt(i)
181
+ if (code >= 0x80) break
182
+ bytes[p++] = code
210
183
  }
211
184
 
212
- continue
185
+ if (i >= length) break
186
+ // now, code is present and >= 0x80
213
187
  }
214
188
 
215
189
  if (small) {
216
190
  // TODO: use resizable array buffers? will have to return a non-resizeable one
217
- const bytesNew = new Uint8Array(length * 3) // maximium can be 3x of the string length in charcodes
191
+ if (p !== i) throw new Error('Unreachable') // Here, p === i (only when small is still true)
192
+ const bytesNew = new Uint8Array(p + (length - i) * 3) // maximium can be 3x of the string length in charcodes
218
193
  bytesNew.set(bytes)
219
194
  bytes = bytesNew
220
195
  small = false
@@ -224,45 +199,35 @@ export function encode(string, loose) {
224
199
  // lead: d800 - dbff
225
200
  // trail: dc00 - dfff
226
201
  if (code >= 0xd8_00 && code < 0xe0_00) {
227
- if (lead && code < 0xdc_00) {
228
- // a second lead, meaning the previous one was unpaired
202
+ // Can't be a valid trail as we already processed that below
203
+
204
+ if (code > 0xdb_ff || i + 1 >= length) {
205
+ // An unexpected trail or a lead at the very end of input
229
206
  if (!loose) throw new TypeError(E_STRICT_UNICODE)
230
207
  bytes[p++] = 0xef
231
208
  bytes[p++] = 0xbf
232
209
  bytes[p++] = 0xbd
233
- lead = null
234
- // code is still processed as a new lead
210
+ continue
235
211
  }
236
212
 
237
- if (!lead) {
238
- if (code > 0xdb_ff || i + 1 >= length) {
239
- // lead out of range || unpaired
240
- if (!loose) throw new TypeError(E_STRICT_UNICODE)
241
- bytes[p++] = 0xef
242
- bytes[p++] = 0xbf
243
- bytes[p++] = 0xbd
244
- continue
245
- }
246
-
247
- lead = code
248
- continue
213
+ const next = string.charCodeAt(i + 1) // Process valid pairs immediately
214
+ if (next >= 0xdc_00 && next < 0xe0_00) {
215
+ // here, codePoint is always between 0x1_00_00 and 0x11_00_00, we encode as 4 bytes
216
+ const codePoint = (((code - 0xd8_00) << 10) | (next - 0xdc_00)) + 0x1_00_00
217
+ bytes[p++] = (codePoint >> 18) | 0xf0
218
+ bytes[p++] = ((codePoint >> 12) & 0x3f) | 0x80
219
+ bytes[p++] = ((codePoint >> 6) & 0x3f) | 0x80
220
+ bytes[p++] = (codePoint & 0x3f) | 0x80
221
+ i++ // consume next
222
+ } else {
223
+ // Next is not a trail, leave next unconsumed but process unmatched lead error
224
+ if (!loose) throw new TypeError(E_STRICT_UNICODE)
225
+ bytes[p++] = 0xef
226
+ bytes[p++] = 0xbf
227
+ bytes[p++] = 0xbd
249
228
  }
250
229
 
251
- // here, codePoint is always between 0x1_00_00 and 0x11_00_00, we encode as 4 bytes
252
- const codePoint = (((lead - 0xd8_00) << 10) | (code - 0xdc_00)) + 0x1_00_00
253
- bytes[p++] = (codePoint >> 18) | 0xf0
254
- bytes[p++] = ((codePoint >> 12) & 0x3f) | 0x80
255
- bytes[p++] = ((codePoint >> 6) & 0x3f) | 0x80
256
- bytes[p++] = (codePoint & 0x3f) | 0x80
257
- lead = null
258
230
  continue
259
- } else if (lead) {
260
- if (!loose) throw new TypeError(E_STRICT_UNICODE)
261
- bytes[p++] = 0xef
262
- bytes[p++] = 0xbf
263
- bytes[p++] = 0xbd
264
- lead = null
265
- // code is still processed
266
231
  }
267
232
 
268
233
  // We are left with a non-pair char code above ascii, it gets encoded to 2 or 3 bytes