@exodus/bytes 1.0.0-rc.4 → 1.0.0-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  import { assertUint8 } from '../assert.js'
2
2
  import { nativeEncoder, nativeDecoder } from './_utils.js'
3
+ import { encodeAscii } from './latin1.js'
3
4
 
4
5
  // See https://datatracker.ietf.org/doc/html/rfc4648
5
6
 
@@ -13,13 +14,10 @@ export const E_PADDING = 'Invalid base64 padding'
13
14
  export const E_LENGTH = 'Invalid base64 length'
14
15
  export const E_LAST = 'Invalid last chunk'
15
16
 
16
- // Alternatively, we could have mapped 0-255 bytes to charcodes and just used btoa(ascii),
17
- // but that approach is _slower_ than our toBase64js function, even on Hermes
18
-
19
17
  // We construct output by concatenating chars, this seems to be fine enough on modern JS engines
20
18
  export function toBase64(arr, isURL, padding) {
21
19
  assertUint8(arr)
22
- const fullChunks = Math.floor(arr.length / 3)
20
+ const fullChunks = (arr.length / 3) | 0
23
21
  const fullChunksBytes = fullChunks * 3
24
22
  let o = ''
25
23
  let i = 0
@@ -51,21 +49,49 @@ export function toBase64(arr, isURL, padding) {
51
49
  // This whole loop can be commented out, the algorithm won't change, it's just an optimization of the next loop
52
50
  if (nativeDecoder) {
53
51
  const oa = new Uint16Array(fullChunks * 2)
54
- for (let j = 0; i < fullChunksBytes; i += 3) {
52
+ let j = 0
53
+ for (const last = arr.length - 11; i < last; i += 12, j += 8) {
54
+ const x0 = arr[i]
55
+ const x1 = arr[i + 1]
56
+ const x2 = arr[i + 2]
57
+ const x3 = arr[i + 3]
58
+ const x4 = arr[i + 4]
59
+ const x5 = arr[i + 5]
60
+ const x6 = arr[i + 6]
61
+ const x7 = arr[i + 7]
62
+ const x8 = arr[i + 8]
63
+ const x9 = arr[i + 9]
64
+ const x10 = arr[i + 10]
65
+ const x11 = arr[i + 11]
66
+ oa[j] = codepairs[(x0 << 4) | (x1 >> 4)]
67
+ oa[j + 1] = codepairs[((x1 & 0x0f) << 8) | x2]
68
+ oa[j + 2] = codepairs[(x3 << 4) | (x4 >> 4)]
69
+ oa[j + 3] = codepairs[((x4 & 0x0f) << 8) | x5]
70
+ oa[j + 4] = codepairs[(x6 << 4) | (x7 >> 4)]
71
+ oa[j + 5] = codepairs[((x7 & 0x0f) << 8) | x8]
72
+ oa[j + 6] = codepairs[(x9 << 4) | (x10 >> 4)]
73
+ oa[j + 7] = codepairs[((x10 & 0x0f) << 8) | x11]
74
+ }
75
+
76
+ // i < last here is equivalent to i < fullChunksBytes
77
+ for (const last = arr.length - 2; i < last; i += 3, j += 2) {
55
78
  const a = arr[i]
56
79
  const b = arr[i + 1]
57
80
  const c = arr[i + 2]
58
- oa[j++] = codepairs[(a << 4) | (b >> 4)]
59
- oa[j++] = codepairs[((b & 0x0f) << 8) | c]
81
+ oa[j] = codepairs[(a << 4) | (b >> 4)]
82
+ oa[j + 1] = codepairs[((b & 0x0f) << 8) | c]
60
83
  }
61
84
 
62
85
  o = nativeDecoder.decode(oa)
63
86
  } else {
87
+ // This can be optimized by ~25% with templates on Hermes, but this codepath is not called on Hermes, it uses btoa
88
+ // Check git history for templates version
64
89
  for (; i < fullChunksBytes; i += 3) {
65
90
  const a = arr[i]
66
91
  const b = arr[i + 1]
67
92
  const c = arr[i + 2]
68
- o += pairs[(a << 4) | (b >> 4)] + pairs[((b & 0x0f) << 8) | c]
93
+ o += pairs[(a << 4) | (b >> 4)]
94
+ o += pairs[((b & 0x0f) << 8) | c]
69
95
  }
70
96
  }
71
97
 
@@ -92,9 +118,8 @@ export function toBase64(arr, isURL, padding) {
92
118
  }
93
119
 
94
120
  // TODO: can this be optimized? This only affects non-Hermes barebone engines though
95
- const mapSize = nativeEncoder ? 256 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
121
+ const mapSize = nativeEncoder ? 128 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
96
122
 
97
- // Last chunk is rechecked at API
98
123
  export function fromBase64(str, isURL) {
99
124
  let inputLength = str.length
100
125
  while (str[inputLength - 1] === '=') inputLength--
@@ -121,26 +146,31 @@ export function fromBase64(str, isURL) {
121
146
  let i = 0
122
147
 
123
148
  if (nativeEncoder) {
124
- const codes = nativeEncoder.encode(str)
125
- if (codes.length !== str.length) throw new SyntaxError(E_CHAR) // non-ascii
126
- while (i < mainLength) {
127
- const a = (m[codes[i++]] << 18) | (m[codes[i++]] << 12) | (m[codes[i++]] << 6) | m[codes[i++]]
149
+ const codes = encodeAscii(str, E_CHAR)
150
+ for (; i < mainLength; i += 4) {
151
+ const c0 = codes[i]
152
+ const c1 = codes[i + 1]
153
+ const c2 = codes[i + 2]
154
+ const c3 = codes[i + 3]
155
+ const a = (m[c0] << 18) | (m[c1] << 12) | (m[c2] << 6) | m[c3]
128
156
  if (a < 0) throw new SyntaxError(E_CHAR)
129
- arr[at++] = a >> 16
130
- arr[at++] = (a >> 8) & 0xff
131
- arr[at++] = a & 0xff
157
+ arr[at] = a >> 16
158
+ arr[at + 1] = (a >> 8) & 0xff
159
+ arr[at + 2] = a & 0xff
160
+ at += 3
132
161
  }
133
162
  } else {
134
- while (i < mainLength) {
135
- const a =
136
- (m[str.charCodeAt(i++)] << 18) |
137
- (m[str.charCodeAt(i++)] << 12) |
138
- (m[str.charCodeAt(i++)] << 6) |
139
- m[str.charCodeAt(i++)]
163
+ for (; i < mainLength; i += 4) {
164
+ const c0 = str.charCodeAt(i)
165
+ const c1 = str.charCodeAt(i + 1)
166
+ const c2 = str.charCodeAt(i + 2)
167
+ const c3 = str.charCodeAt(i + 3)
168
+ const a = (m[c0] << 18) | (m[c1] << 12) | (m[c2] << 6) | m[c3]
140
169
  if (a < 0) throw new SyntaxError(E_CHAR)
141
- arr[at++] = a >> 16
142
- arr[at++] = (a >> 8) & 0xff
143
- arr[at++] = a & 0xff
170
+ arr[at] = a >> 16
171
+ arr[at + 1] = (a >> 8) & 0xff
172
+ arr[at + 2] = a & 0xff
173
+ at += 3
144
174
  }
145
175
  }
146
176
 
package/fallback/hex.js CHANGED
@@ -1,37 +1,105 @@
1
1
  import { assertUint8 } from '../assert.js'
2
- import { nativeEncoder } from './_utils.js'
2
+ import { nativeDecoder, nativeEncoder } from './_utils.js'
3
+ import { encodeAscii } from './latin1.js'
3
4
 
4
- let hexArray
5
+ let hexArray // array of 256 bytes converted to two-char hex strings
6
+ let hexCodes // hexArray converted to u16 code pairs
5
7
  let dehexArray
8
+ const _00 = 0x30_30 // '00' string in hex, the only allowed char pair to generate 0 byte
9
+ const _ff = 0x66_66 // 'ff' string in hex, max allowed char pair (larger than 'FF' string)
10
+ const allowed = '0123456789ABCDEFabcdef'
6
11
 
7
12
  export const E_HEX = 'Input is not a hex string'
8
13
 
9
- function toHexPart(arr, start, end) {
14
+ function toHexPartAddition(a, start, end) {
10
15
  let o = ''
11
16
  let i = start
12
- const last3 = end - 3
13
- // Unrolled loop is faster
14
- while (i < last3) {
15
- const a = arr[i++]
16
- const b = arr[i++]
17
- const c = arr[i++]
18
- const d = arr[i++]
19
- o += hexArray[a]
20
- o += hexArray[b]
21
- o += hexArray[c]
22
- o += hexArray[d]
17
+ const h = hexArray
18
+ for (const last3 = end - 3; i < last3; i += 4) {
19
+ const x0 = a[i]
20
+ const x1 = a[i + 1]
21
+ const x2 = a[i + 2]
22
+ const x3 = a[i + 3]
23
+ o += h[x0]
24
+ o += h[x1]
25
+ o += h[x2]
26
+ o += h[x3]
23
27
  }
24
28
 
25
- while (i < end) o += hexArray[arr[i++]]
29
+ while (i < end) o += h[a[i++]]
26
30
  return o
27
31
  }
28
32
 
33
+ // Optimiziation for Hermes which is the main user of fallback
34
+ function toHexPartTemplates(a, start, end) {
35
+ let o = ''
36
+ let i = start
37
+ const h = hexArray
38
+ for (const last15 = end - 15; i < last15; i += 16) {
39
+ const x0 = a[i]
40
+ const x1 = a[i + 1]
41
+ const x2 = a[i + 2]
42
+ const x3 = a[i + 3]
43
+ const x4 = a[i + 4]
44
+ const x5 = a[i + 5]
45
+ const x6 = a[i + 6]
46
+ const x7 = a[i + 7]
47
+ const x8 = a[i + 8]
48
+ const x9 = a[i + 9]
49
+ const x10 = a[i + 10]
50
+ const x11 = a[i + 11]
51
+ const x12 = a[i + 12]
52
+ const x13 = a[i + 13]
53
+ const x14 = a[i + 14]
54
+ const x15 = a[i + 15]
55
+ o += `${h[x0]}${h[x1]}${h[x2]}${h[x3]}${h[x4]}${h[x5]}${h[x6]}${h[x7]}${h[x8]}${h[x9]}${h[x10]}${h[x11]}${h[x12]}${h[x13]}${h[x14]}${h[x15]}`
56
+ }
57
+
58
+ while (i < end) o += h[a[i++]]
59
+ return o
60
+ }
61
+
62
+ // Using templates is significantly faster in Hermes and JSC
63
+ // It's harder to detect JSC and not important anyway as it has native impl, so we detect only Hermes
64
+ const toHexPart = globalThis.HermesInternal ? toHexPartTemplates : toHexPartAddition
65
+
29
66
  export function toHex(arr) {
30
67
  assertUint8(arr)
31
68
 
32
69
  if (!hexArray) hexArray = Array.from({ length: 256 }, (_, i) => i.toString(16).padStart(2, '0'))
33
70
  const length = arr.length // this helps Hermes
34
71
 
72
+ // Only old browsers use this, barebone engines don't have TextDecoder
73
+ // But Hermes can use this when it (hopefully) implements TextDecoder
74
+ if (nativeDecoder) {
75
+ if (!hexCodes) {
76
+ hexCodes = new Uint16Array(256)
77
+ const u8 = new Uint8Array(hexCodes.buffer, hexCodes.byteOffset, hexCodes.byteLength)
78
+ for (let i = 0; i < 256; i++) {
79
+ const pair = hexArray[i]
80
+ u8[2 * i] = pair.charCodeAt(0)
81
+ u8[2 * i + 1] = pair.charCodeAt(1)
82
+ }
83
+ }
84
+
85
+ const oa = new Uint16Array(length)
86
+ let i = 0
87
+ for (const last3 = arr.length - 3; ; i += 4) {
88
+ if (i >= last3) break // loop is fast enough for moving this here to be useful on JSC
89
+ const x0 = arr[i]
90
+ const x1 = arr[i + 1]
91
+ const x2 = arr[i + 2]
92
+ const x3 = arr[i + 3]
93
+ oa[i] = hexCodes[x0]
94
+ oa[i + 1] = hexCodes[x1]
95
+ oa[i + 2] = hexCodes[x2]
96
+ oa[i + 3] = hexCodes[x3]
97
+ }
98
+
99
+ for (; i < length; i++) oa[i] = hexCodes[arr[i]]
100
+ return nativeDecoder.decode(oa)
101
+ }
102
+
35
103
  if (length > 30_000) {
36
104
  // Limit concatenation to avoid excessive GC
37
105
  // Thresholds checked on Hermes
@@ -51,54 +119,74 @@ export function toHex(arr) {
51
119
  return toHexPart(arr, 0, length)
52
120
  }
53
121
 
54
- // TODO: can this be optimized? This only affects non-Hermes barebone engines though
55
- const mapSize = nativeEncoder ? 256 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
56
-
57
122
  export function fromHex(str) {
58
123
  if (typeof str !== 'string') throw new TypeError('Input is not a string')
59
124
  if (str.length % 2 !== 0) throw new SyntaxError(E_HEX)
60
125
 
61
- // We don't use native Buffer impl, as rechecking input make it slower than pure js
62
- // This path is used only on older engines though
63
-
64
- if (!dehexArray) {
65
- dehexArray = new Int8Array(mapSize).fill(-1) // no regex input validation here, so we map all other bytes to -1 and recheck sign
66
- for (let i = 0; i < 16; i++) {
67
- const s = i.toString(16)
68
- dehexArray[s.charCodeAt(0)] = dehexArray[s.toUpperCase().charCodeAt(0)] = i
69
- }
70
- }
71
-
72
126
  const length = str.length / 2 // this helps Hermes in loops
73
127
  const arr = new Uint8Array(length)
74
- let j = 0
128
+
129
+ // Native encoder path is beneficial even for small arrays in Hermes
75
130
  if (nativeEncoder) {
76
- // Native encoder path is beneficial even for small arrays in Hermes
77
- const codes = nativeEncoder.encode(str)
78
- if (codes.length !== str.length) throw new SyntaxError(E_HEX) // non-ascii
79
- const last3 = length - 3 // Unroll nativeEncoder path as this is what modern Hermes takes and a small perf improvement is nice there
131
+ if (!dehexArray) {
132
+ dehexArray = new Uint8Array(_ff + 1) // 26 KiB cache, >2x perf improvement on Hermes
133
+ const u8 = new Uint8Array(2)
134
+ const u16 = new Uint16Array(u8.buffer, u8.byteOffset, 1) // for endianess-agnostic transform
135
+ const map = [...allowed].map((c) => [c.charCodeAt(0), parseInt(c, 16)])
136
+ for (const [ch, vh] of map) {
137
+ u8[0] = ch // first we read high hex char
138
+ for (const [cl, vl] of map) {
139
+ u8[1] = cl // then we read low hex char
140
+ dehexArray[u16[0]] = (vh << 4) | vl
141
+ }
142
+ }
143
+ }
144
+
145
+ const codes = encodeAscii(str, E_HEX)
146
+ const codes16 = new Uint16Array(codes.buffer, codes.byteOffset, codes.byteLength / 2)
80
147
  let i = 0
81
- while (i < last3) {
82
- const a = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
83
- const b = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
84
- const c = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
85
- const d = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
86
- if (a < 0 || b < 0 || c < 0 || d < 0) throw new SyntaxError(E_HEX)
87
- arr[i++] = a
88
- arr[i++] = b
89
- arr[i++] = c
90
- arr[i++] = d
148
+ for (const last3 = length - 3; i < last3; i += 4) {
149
+ const ai = codes16[i]
150
+ const bi = codes16[i + 1]
151
+ const ci = codes16[i + 2]
152
+ const di = codes16[i + 3]
153
+ const a = dehexArray[ai]
154
+ const b = dehexArray[bi]
155
+ const c = dehexArray[ci]
156
+ const d = dehexArray[di]
157
+ if ((!a && ai !== _00) || (!b && bi !== _00) || (!c && ci !== _00) || (!d && di !== _00)) {
158
+ throw new SyntaxError(E_HEX)
159
+ }
160
+
161
+ arr[i] = a
162
+ arr[i + 1] = b
163
+ arr[i + 2] = c
164
+ arr[i + 3] = d
91
165
  }
92
166
 
93
167
  while (i < length) {
94
- const res = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
95
- if (res < 0) throw new SyntaxError(E_HEX)
96
- arr[i++] = res
168
+ const ai = codes16[i]
169
+ const a = dehexArray[ai]
170
+ if (!a && ai !== _00) throw new SyntaxError(E_HEX)
171
+ arr[i++] = a
97
172
  }
98
173
  } else {
174
+ if (!dehexArray) {
175
+ // no regex input validation here, so we map all other bytes to -1 and recheck sign
176
+ // non-ASCII chars throw already though, so we should process only 0-127
177
+ dehexArray = new Int8Array(128).fill(-1)
178
+ for (let i = 0; i < 16; i++) {
179
+ const s = i.toString(16)
180
+ dehexArray[s.charCodeAt(0)] = dehexArray[s.toUpperCase().charCodeAt(0)] = i
181
+ }
182
+ }
183
+
184
+ let j = 0
99
185
  for (let i = 0; i < length; i++) {
100
- const res = (dehexArray[str.charCodeAt(j++)] << 4) | dehexArray[str.charCodeAt(j++)]
101
- if (res < 0) throw new SyntaxError(E_HEX)
186
+ const a = str.charCodeAt(j++)
187
+ const b = str.charCodeAt(j++)
188
+ const res = (dehexArray[a] << 4) | dehexArray[b]
189
+ if (res < 0 || (0x7f | a | b) !== 0x7f) throw new SyntaxError(E_HEX) // 0-127
102
190
  arr[i] = res
103
191
  }
104
192
  }
@@ -0,0 +1,89 @@
1
+ import { nativeEncoder } from './_utils.js'
2
+
3
+ // See http://stackoverflow.com/a/22747272/680742, which says that lowest limit is in Chrome, with 0xffff args
4
+ // On Hermes, actual max is 0x20_000 minus current stack depth, 1/16 of that should be safe
5
+ const maxFunctionArgs = 0x20_00
6
+
7
+ export function asciiPrefix(arr) {
8
+ let p = 0 // verified ascii bytes
9
+ const length = arr.length
10
+ // Threshold tested on Hermes (worse on <=48, better on >=52)
11
+ // Also on v8 arrs of size <=64 might be on heap and using Uint32Array on them is unoptimal
12
+ if (length > 64) {
13
+ // Speedup with u32
14
+ const u32start = (4 - (arr.byteOffset & 3)) % 4 // offset start by this many bytes for alignment
15
+ for (; p < u32start; p++) if (arr[p] >= 0x80) return p
16
+ const u32length = ((arr.byteLength - u32start) / 4) | 0
17
+ const u32 = new Uint32Array(arr.buffer, arr.byteOffset + u32start, u32length)
18
+ let i = 0
19
+ for (const last3 = u32length - 3; ; p += 16, i += 4) {
20
+ if (i >= last3) break // loop is fast enough for moving this here to be _very_ useful, likely due to array access checks
21
+ const a = u32[i]
22
+ const b = u32[i + 1]
23
+ const c = u32[i + 2]
24
+ const d = u32[i + 3]
25
+ if (a & 0x80_80_80_80 || b & 0x80_80_80_80 || c & 0x80_80_80_80 || d & 0x80_80_80_80) break
26
+ }
27
+
28
+ for (; i < u32length; p += 4, i++) if (u32[i] & 0x80_80_80_80) break
29
+ }
30
+
31
+ for (; p < length; p++) if (arr[p] >= 0x80) return p
32
+ return length
33
+ }
34
+
35
+ export function decodeLatin1(arr, start = 0, stop = arr.length) {
36
+ start |= 0
37
+ stop |= 0
38
+ const total = stop - start
39
+ if (total === 0) return ''
40
+ if (total > maxFunctionArgs) {
41
+ let prefix = ''
42
+ for (let i = start; i < stop; ) {
43
+ const i1 = Math.min(stop, i + maxFunctionArgs)
44
+ prefix += String.fromCharCode.apply(String, arr.subarray(i, i1))
45
+ i = i1
46
+ }
47
+
48
+ return prefix
49
+ }
50
+
51
+ const sliced = start === 0 && stop === arr.length ? arr : arr.subarray(start, stop)
52
+ return String.fromCharCode.apply(String, sliced)
53
+ }
54
+
55
+ export const encodeLatin1 = globalThis.HermesInternal
56
+ ? (str) => {
57
+ const length = str.length
58
+ const arr = new Uint8Array(length)
59
+ if (length > 64) {
60
+ const at = str.charCodeAt.bind(str) // faster on strings from ~64 chars on Hermes, but can be 10x slower on e.g. JSC
61
+ for (let i = 0; i < length; i++) arr[i] = at(i)
62
+ } else {
63
+ for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
64
+ }
65
+
66
+ return arr
67
+ }
68
+ : (str) => {
69
+ const length = str.length
70
+ const arr = new Uint8Array(length)
71
+ // Can be optimized with unrolling, but this is not used on non-Hermes atm
72
+ for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
73
+ return arr
74
+ }
75
+
76
+ // Expects nativeEncoder to be present
77
+ export const encodeAscii = globalThis.HermesInternal
78
+ ? (str, ERR) => {
79
+ // Much faster in Hermes
80
+ const codes = new Uint8Array(str.length + 4) // overshoot by a full utf8 char
81
+ const info = nativeEncoder.encodeInto(str, codes)
82
+ if (info.read !== str.length || info.written !== str.length) throw new SyntaxError(ERR) // non-ascii
83
+ return codes.subarray(0, str.length)
84
+ }
85
+ : (str, ERR) => {
86
+ const codes = nativeEncoder.encode(str)
87
+ if (codes.length !== str.length) throw new SyntaxError(ERR) // non-ascii
88
+ return codes
89
+ }