@exodus/bytes 1.0.0-rc.3 → 1.0.0-rc.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,3 +1,11 @@
1
- # bytes
1
+ # `@exodus/bytes`
2
2
 
3
- Data structures handling
3
+ `Uint8Array` conversion to and from `base64`, `base32`, `hex` and `utf8`
4
+
5
+ [Fast](./Performance.md)
6
+
7
+ Performs proper input validation
8
+
9
+ ## License
10
+
11
+ [MIT](./LICENSE)
package/assert.js CHANGED
@@ -16,7 +16,14 @@ export function assertTypedArray(arr) {
16
16
  throw new TypeError('Expected a TypedArray instance')
17
17
  }
18
18
 
19
- export function assertUint8(arr, { name, length, ...rest } = {}) {
19
+ export function assertUint8(arr, options) {
20
+ if (!options) {
21
+ // fast path
22
+ if (arr instanceof Uint8Array) return
23
+ throw new TypeError('Expected an Uint8Array')
24
+ }
25
+
26
+ const { name, length, ...rest } = options
20
27
  assertEmptyRest(rest)
21
28
  if (arr instanceof Uint8Array && (length === undefined || arr.length === length)) return
22
29
  throw new TypeError(makeMessage(name, length === undefined ? '' : ` of size ${Number(length)}`))
package/base32.js ADDED
@@ -0,0 +1,33 @@
1
+ import { assertEmptyRest } from './assert.js'
2
+ import { typedView } from './array.js'
3
+ import * as js from './fallback/base32.js'
4
+
5
+ // See https://datatracker.ietf.org/doc/html/rfc4648
6
+
7
+ // 8 chars per 5 bytes
8
+
9
+ const { E_PADDING } = js
10
+
11
+ export const toBase32 = (arr, { padding = false } = {}) => js.toBase32(arr, false, padding)
12
+ export const toBase32hex = (arr, { padding = false } = {}) => js.toBase32(arr, true, padding)
13
+
14
+ // By default, valid padding is accepted but not required
15
+ export const fromBase32 = (str, { format = 'uint8', padding = 'both', ...rest } = {}) =>
16
+ fromBase32common(str, false, padding, format, rest)
17
+ export const fromBase32hex = (str, { format = 'uint8', padding = 'both', ...rest } = {}) =>
18
+ fromBase32common(str, true, padding, format, rest)
19
+
20
+ function fromBase32common(str, isBase32Hex, padding, format, rest) {
21
+ if (typeof str !== 'string') throw new TypeError('Input is not a string')
22
+ assertEmptyRest(rest)
23
+
24
+ if (padding === true) {
25
+ if (str.length % 8 !== 0) throw new SyntaxError(E_PADDING)
26
+ } else if (padding === false) {
27
+ if (str.endsWith('=')) throw new SyntaxError('Did not expect padding in base32 input')
28
+ } else if (padding !== 'both') {
29
+ throw new TypeError('Invalid padding option')
30
+ }
31
+
32
+ return typedView(js.fromBase32(str, isBase32Hex), format)
33
+ }
package/base64.js CHANGED
@@ -1,94 +1,134 @@
1
- import { assert, assertUint8 } from './assert.js'
1
+ import { assertUint8, assertEmptyRest } from './assert.js'
2
2
  import { typedView } from './array.js'
3
3
  import * as js from './fallback/base64.js'
4
4
 
5
5
  // See https://datatracker.ietf.org/doc/html/rfc4648
6
6
 
7
- // base64: A-Za-z0-9+/ and =
8
- // base64url: A-Za-z0-9_-
7
+ // base64: A-Za-z0-9+/ and = if padding not disabled
8
+ // base64url: A-Za-z0-9_- and = if padding enabled
9
9
 
10
10
  const { Buffer, atob } = globalThis // Buffer is optional, only used when native
11
11
  const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
12
12
  const { toBase64: web64 } = Uint8Array.prototype // Modern engines have this
13
13
 
14
- export function toBase64(x) {
14
+ const { E_CHAR, E_PADDING, E_LENGTH, E_LAST } = js
15
+
16
+ const shouldUseAtob = atob && Boolean(globalThis.HermesInternal) // faster only on Hermes (and a little in old Chrome), js path beats it on normal engines
17
+
18
+ // For native Buffer codepaths only
19
+ const isBuffer = (x) => x.constructor === Buffer && Buffer.isBuffer(x)
20
+ const toBuffer = (x) => (isBuffer(x) ? x : Buffer.from(x.buffer, x.byteOffset, x.byteLength))
21
+
22
+ export function toBase64(x, { padding = true } = {}) {
15
23
  assertUint8(x)
16
- if (web64 && x.toBase64 === web64) return x.toBase64() // Modern
17
- if (!haveNativeBuffer) return js.toBase64(x, false, true) // Fallback
18
- if (x.constructor === Buffer && Buffer.isBuffer(x)) return x.toString('base64') // Older Node.js
19
- return Buffer.from(x.buffer, x.byteOffset, x.byteLength).toString('base64') // Older Node.js
24
+ if (web64 && x.toBase64 === web64) {
25
+ return padding ? x.toBase64() : x.toBase64({ omitPadding: !padding }) // Modern, optionless is slightly faster
26
+ }
27
+
28
+ if (!haveNativeBuffer) return js.toBase64(x, false, padding) // Fallback
29
+ const res = toBuffer(x).toString('base64') // Older Node.js
30
+ if (padding) return res
31
+ const at = res.indexOf('=', res.length - 3)
32
+ return at === -1 ? res : res.slice(0, at)
20
33
  }
21
34
 
22
- // NOTE: base64url omits padding
23
- export function toBase64url(x) {
35
+ // NOTE: base64url omits padding by default
36
+ export function toBase64url(x, { padding = false } = {}) {
24
37
  assertUint8(x)
25
- if (web64 && x.toBase64 === web64) return x.toBase64({ alphabet: 'base64url', omitPadding: true }) // Modern
26
- if (!haveNativeBuffer) return js.toBase64(x, true, false) // Fallback
38
+ if (web64 && x.toBase64 === web64) {
39
+ return x.toBase64({ alphabet: 'base64url', omitPadding: !padding }) // Modern
40
+ }
41
+
42
+ if (!haveNativeBuffer) return js.toBase64(x, true, padding) // Fallback
27
43
  if (x.constructor === Buffer && Buffer.isBuffer(x)) return x.toString('base64url') // Older Node.js
28
- return Buffer.from(x.buffer, x.byteOffset, x.byteLength).toString('base64url') // Older Node.js
44
+ const res = toBuffer(x).toString('base64url') // Older Node.js
45
+ return padding && res.length % 4 !== 0 ? res + '='.repeat(4 - (res.length % 4)) : res
29
46
  }
30
47
 
31
48
  // Unlike Buffer.from(), throws on invalid input (non-base64 symbols and incomplete chunks)
32
49
  // Unlike Buffer.from() and Uint8Array.fromBase64(), does not allow spaces
33
50
  // NOTE: Always operates in strict mode for last chunk
34
51
 
35
- // Accepts both padded and non-padded variants, only strict base64
36
- export function fromBase64(str, format = 'uint8') {
37
- if (typeof str !== 'string') throw new TypeError('Input is not a string')
52
+ // By default accepts both padded and non-padded variants, only strict base64
53
+ export function fromBase64(str, options = {}) {
54
+ if (typeof options === 'string') options = { format: options } // Compat due to usage, TODO: remove
55
+ const { format = 'uint8', padding = 'both', ...rest } = options
56
+ return fromBase64common(str, false, padding, format, rest)
57
+ }
38
58
 
39
- // These checks should be needed only for Buffer path, not Uint8Array.fromBase64 path, but JSC lacks proper checks
40
- assert(str.length % 4 !== 1, 'Invalid base64 length') // JSC misses this in fromBase64
41
- if (str.endsWith('=')) {
42
- assert(str.length % 4 === 0, 'Invalid padded length') // JSC misses this too
43
- assert(str[str.length - 3] !== '=', 'Excessive padding') // no more than two = at the end
44
- }
59
+ // By default accepts only non-padded strict base64url
60
+ export function fromBase64url(str, { format = 'uint8', padding = false, ...rest } = {}) {
61
+ return fromBase64common(str, true, padding, format, rest)
62
+ }
45
63
 
46
- return typedView(fromBase64common(str, false), format)
64
+ // By default accepts both padded and non-padded variants, base64 or base64url
65
+ export function fromBase64any(str, { format = 'uint8', padding = 'both', ...rest } = {}) {
66
+ const isBase64url = !str.includes('+') && !str.includes('/') // likely to fail fast, as most input is non-url, also double scan is faster than regex
67
+ return fromBase64common(str, isBase64url, padding, format, rest)
47
68
  }
48
69
 
49
- // Accepts both only non-padded strict base64url
50
- export function fromBase64url(str, format = 'uint8') {
70
+ function fromBase64common(str, isBase64url, padding, format, rest) {
51
71
  if (typeof str !== 'string') throw new TypeError('Input is not a string')
72
+ assertEmptyRest(rest)
73
+ const auto = padding === 'both' ? str.endsWith('=') : undefined
74
+ // Older JSC supporting Uint8Array.fromBase64 lacks proper checks
75
+ if (padding === true || auto === true) {
76
+ if (str.length % 4 !== 0) throw new SyntaxError(E_PADDING) // JSC misses this
77
+ if (str[str.length - 3] === '=') throw new SyntaxError(E_PADDING) // no more than two = at the end
78
+ } else if (padding === false || auto === false) {
79
+ if (str.length % 4 === 1) throw new SyntaxError(E_LENGTH) // JSC misses this in fromBase64
80
+ if (padding === false && str.endsWith('=')) {
81
+ throw new SyntaxError('Did not expect padding in base64 input') // inclusion is checked separately
82
+ }
83
+ } else {
84
+ throw new TypeError('Invalid padding option')
85
+ }
52
86
 
53
- // These checks should be needed only for Buffer path, not Uint8Array.fromBase64 path, but JSC lacks proper checks
54
- assert(str.length % 4 !== 1, 'Invalid base64 length') // JSC misses this in fromBase64
55
- assert(!str.endsWith('='), 'Did not expect padding in base64url input') // inclusion is checked separately
56
-
57
- return typedView(fromBase64common(str, true), format)
87
+ return typedView(fromBase64impl(str, isBase64url), format)
58
88
  }
59
89
 
60
- let fromBase64common
90
+ // ASCII whitespace is U+0009 TAB, U+000A LF, U+000C FF, U+000D CR, or U+0020 SPACE
91
+ const ASCII_WHITESPACE = /[\t\n\f\r ]/ // non-u for JSC perf
92
+
93
+ let fromBase64impl
61
94
  if (Uint8Array.fromBase64) {
62
95
  // NOTICE: this is actually slower than our JS impl in older JavaScriptCore and (slightly) in SpiderMonkey, but faster on V8 and new JavaScriptCore
63
- fromBase64common = (str, isBase64url) => {
96
+ fromBase64impl = (str, isBase64url) => {
64
97
  const alphabet = isBase64url ? 'base64url' : 'base64'
65
- assert(!/\s/u.test(str), `Invalid character in ${alphabet} input`) // all other chars are checked natively
98
+ if (ASCII_WHITESPACE.test(str)) throw new SyntaxError(E_CHAR) // all other chars are checked natively
66
99
  const padded = str.length % 4 > 0 ? `${str}${'='.repeat(4 - (str.length % 4))}` : str
67
100
  return Uint8Array.fromBase64(padded, { alphabet, lastChunkHandling: 'strict' })
68
101
  }
69
102
  } else {
70
- fromBase64common = (str, isBase64url) => {
71
- if (isBase64url) {
72
- assert(!/[^0-9a-z_-]/iu.test(str), 'Invalid character in base64url input')
73
- } else {
74
- assert(!/[^0-9a-z=+/]/iu.test(str), 'Invalid character in base64 input')
75
- }
76
-
103
+ fromBase64impl = (str, isBase64url) => {
77
104
  let arr
78
- if (!haveNativeBuffer && atob) {
105
+ if (haveNativeBuffer) {
106
+ const invalidRegex = isBase64url ? /[^0-9a-z=_-]/iu : /[^0-9a-z=+/]/iu
107
+ if (invalidRegex.test(str)) throw new SyntaxError(E_CHAR)
108
+ const at = str.indexOf('=')
109
+ if (at >= 0 && /[^=]/iu.test(str.slice(at))) throw new SyntaxError(E_PADDING)
110
+ arr = Buffer.from(str, 'base64')
111
+ } else if (shouldUseAtob) {
79
112
  // atob is faster than manual parsing on Hermes
80
- const raw = atob(isBase64url ? str.replaceAll('-', '+').replaceAll('_', '/') : str)
113
+ if (isBase64url) {
114
+ if (/[\t\n\f\r +/]/.test(str)) throw new SyntaxError(E_CHAR) // atob verifies other invalid input
115
+ str = str.replaceAll('-', '+').replaceAll('_', '/')
116
+ } else {
117
+ if (ASCII_WHITESPACE.test(str)) throw new SyntaxError(E_CHAR) // all other chars are checked natively
118
+ }
119
+
120
+ let raw
121
+ try {
122
+ raw = atob(str)
123
+ } catch {
124
+ throw new SyntaxError(E_CHAR) // convert atob errors
125
+ }
126
+
81
127
  const length = raw.length
82
128
  arr = new Uint8Array(length)
83
129
  for (let i = 0; i < length; i++) arr[i] = raw.charCodeAt(i)
84
130
  } else {
85
- // base64url is already checked to have no padding via a regex above
86
- if (!isBase64url) {
87
- const at = str.indexOf('=')
88
- if (at >= 0) assert(!/[^=]/iu.test(str.slice(at)), 'Invalid padding')
89
- }
90
-
91
- arr = haveNativeBuffer ? Buffer.from(str, 'base64') : js.fromBase64(str)
131
+ return js.fromBase64(str, isBase64url) // early return to skip last chunk verification, it's already validated in js
92
132
  }
93
133
 
94
134
  if (arr.length % 3 !== 0) {
@@ -96,7 +136,7 @@ if (Uint8Array.fromBase64) {
96
136
  const expected = toBase64(arr.subarray(-(arr.length % 3)))
97
137
  const end = str.length % 4 === 0 ? str.slice(-4) : str.slice(-(str.length % 4)).padEnd(4, '=')
98
138
  const actual = isBase64url ? end.replaceAll('-', '+').replaceAll('_', '/') : end
99
- if (expected !== actual) throw new Error('Invalid last chunk')
139
+ if (expected !== actual) throw new SyntaxError(E_LAST)
100
140
  }
101
141
 
102
142
  return arr
@@ -0,0 +1,6 @@
1
+ const { Buffer, TextEncoder, TextDecoder } = globalThis
2
+ const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
3
+ const isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]')) // we consider Node.js TextDecoder/TextEncoder native
4
+ const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
5
+ const nativeDecoder = isNative(TextDecoder) ? new TextDecoder('utf8', { ignoreBOM: true }) : null
6
+ export { nativeEncoder, nativeDecoder }
@@ -0,0 +1,198 @@
1
+ import { assertUint8 } from '../assert.js'
2
+ import { nativeEncoder, nativeDecoder } from './_utils.js'
3
+
4
+ // See https://datatracker.ietf.org/doc/html/rfc4648
5
+
6
+ const BASE32 = [...'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'] // RFC 4648, #6
7
+ const BASE32HEX = [...'0123456789ABCDEFGHIJKLMNOPQRSTUV'] // RFC 4648, #7
8
+ const BASE32_HELPERS = {}
9
+ const BASE32HEX_HELPERS = {}
10
+
11
+ export const E_CHAR = 'Invalid character in base32 input'
12
+ export const E_PADDING = 'Invalid base32 padding'
13
+ export const E_LENGTH = 'Invalid base32 length'
14
+ export const E_LAST = 'Invalid last chunk'
15
+
16
+ // We construct output by concatenating chars, this seems to be fine enough on modern JS engines
17
+ export function toBase32(arr, isBase32Hex, padding) {
18
+ assertUint8(arr)
19
+ const fullChunks = Math.floor(arr.length / 5)
20
+ const fullChunksBytes = fullChunks * 5
21
+ let o = ''
22
+ let i = 0
23
+
24
+ const alphabet = isBase32Hex ? BASE32HEX : BASE32
25
+ const helpers = isBase32Hex ? BASE32HEX_HELPERS : BASE32_HELPERS
26
+ if (!helpers.pairs) {
27
+ helpers.pairs = []
28
+ if (nativeDecoder) {
29
+ // Lazy to save memory in case if this is not needed
30
+ helpers.codepairs = new Uint16Array(32 * 32)
31
+ const u16 = helpers.codepairs
32
+ const u8 = new Uint8Array(u16.buffer, u16.byteOffset, u16.byteLength) // write as 1-byte to ignore BE/LE difference
33
+ for (let i = 0; i < 32; i++) {
34
+ const ic = alphabet[i].charCodeAt(0)
35
+ for (let j = 0; j < 32; j++) u8[(i << 6) | (j << 1)] = u8[(j << 6) | ((i << 1) + 1)] = ic
36
+ }
37
+ } else {
38
+ const p = helpers.pairs
39
+ for (let i = 0; i < 32; i++) {
40
+ for (let j = 0; j < 32; j++) p.push(`${alphabet[i]}${alphabet[j]}`)
41
+ }
42
+ }
43
+ }
44
+
45
+ const { pairs, codepairs } = helpers
46
+
47
+ // Fast path for complete blocks
48
+ // This whole loop can be commented out, the algorithm won't change, it's just an optimization of the next loop
49
+ if (nativeDecoder) {
50
+ const oa = new Uint16Array(fullChunks * 4)
51
+ for (let j = 0; i < fullChunksBytes; i += 5) {
52
+ const a = arr[i]
53
+ const b = arr[i + 1]
54
+ const c = arr[i + 2]
55
+ const d = arr[i + 3]
56
+ const e = arr[i + 4]
57
+ oa[j++] = codepairs[(a << 2) | (b >> 6)] // 8 + 8 - 5 - 5 = 6 left
58
+ oa[j++] = codepairs[((b & 0x3f) << 4) | (c >> 4)] // 6 + 8 - 5 - 5 = 4 left
59
+ oa[j++] = codepairs[((c & 0xf) << 6) | (d >> 2)] // 4 + 8 - 5 - 5 = 2 left
60
+ oa[j++] = codepairs[((d & 0x3) << 8) | e] // 2 + 8 - 5 - 5 = 0 left
61
+ }
62
+
63
+ o = nativeDecoder.decode(oa)
64
+ } else {
65
+ for (; i < fullChunksBytes; i += 5) {
66
+ const a = arr[i]
67
+ const b = arr[i + 1]
68
+ const c = arr[i + 2]
69
+ const d = arr[i + 3]
70
+ const e = arr[i + 4]
71
+ o += pairs[(a << 2) | (b >> 6)] // 8 + 8 - 5 - 5 = 6 left
72
+ o += pairs[((b & 0x3f) << 4) | (c >> 4)] // 6 + 8 - 5 - 5 = 4 left
73
+ o += pairs[((c & 0xf) << 6) | (d >> 2)] // 4 + 8 - 5 - 5 = 2 left
74
+ o += pairs[((d & 0x3) << 8) | e] // 2 + 8 - 5 - 5 = 0 left
75
+ }
76
+ }
77
+
78
+ // If we have something left, process it with a full algo
79
+ let carry = 0
80
+ let shift = 3 // First byte needs to be shifted by 3 to get 5 bits
81
+ for (; i < arr.length; i++) {
82
+ const x = arr[i]
83
+ o += alphabet[carry | (x >> shift)] // shift >= 3, so this fits
84
+ if (shift >= 5) {
85
+ shift -= 5
86
+ o += alphabet[(x >> shift) & 0x1f]
87
+ }
88
+
89
+ carry = (x << (5 - shift)) & 0x1f
90
+ shift += 3 // Each byte prints 5 bits and leaves 3 bits
91
+ }
92
+
93
+ if (shift !== 3) o += alphabet[carry] // shift 3 means we have no carry left
94
+ if (padding) o += ['', '======', '====', '===', '='][arr.length - fullChunksBytes]
95
+
96
+ return o
97
+ }
98
+
99
+ // TODO: can this be optimized? This only affects non-Hermes barebone engines though
100
+ const mapSize = nativeEncoder ? 256 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
101
+
102
+ export function fromBase32(str, isBase32Hex) {
103
+ let inputLength = str.length
104
+ while (str[inputLength - 1] === '=') inputLength--
105
+ const paddingLength = str.length - inputLength
106
+ const tailLength = inputLength % 8
107
+ const mainLength = inputLength - tailLength // multiples of 8
108
+ if (![0, 2, 4, 5, 7].includes(tailLength)) throw new SyntaxError(E_LENGTH) // fast verification
109
+ if (paddingLength > 7 || (paddingLength !== 0 && str.length % 8 !== 0)) {
110
+ throw new SyntaxError(E_PADDING)
111
+ }
112
+
113
+ const alphabet = isBase32Hex ? BASE32HEX : BASE32
114
+ const helpers = isBase32Hex ? BASE32HEX_HELPERS : BASE32_HELPERS
115
+
116
+ if (!helpers.fromMap) {
117
+ helpers.fromMap = new Int8Array(mapSize).fill(-1) // no regex input validation here, so we map all other bytes to -1 and recheck sign
118
+ alphabet.forEach((c, i) => {
119
+ helpers.fromMap[c.charCodeAt(0)] = helpers.fromMap[c.toLowerCase().charCodeAt(0)] = i
120
+ })
121
+ }
122
+
123
+ const m = helpers.fromMap
124
+
125
+ const arr = new Uint8Array(Math.floor((inputLength * 5) / 8))
126
+ let at = 0
127
+ let i = 0
128
+
129
+ if (nativeEncoder) {
130
+ const codes = nativeEncoder.encode(str)
131
+ if (codes.length !== str.length) throw new SyntaxError(E_CHAR) // non-ascii
132
+ while (i < mainLength) {
133
+ // each 5 bits, grouped 5 * 4 = 20
134
+ const a = (m[codes[i++]] << 15) | (m[codes[i++]] << 10) | (m[codes[i++]] << 5) | m[codes[i++]]
135
+ const b = (m[codes[i++]] << 15) | (m[codes[i++]] << 10) | (m[codes[i++]] << 5) | m[codes[i++]]
136
+ if (a < 0 || b < 0) throw new SyntaxError(E_CHAR)
137
+ arr[at++] = a >> 12
138
+ arr[at++] = (a >> 4) & 0xff
139
+ arr[at++] = ((a << 4) & 0xff) | (b >> 16)
140
+ arr[at++] = (b >> 8) & 0xff
141
+ arr[at++] = b & 0xff
142
+ }
143
+ } else {
144
+ while (i < mainLength) {
145
+ // each 5 bits, grouped 5 * 4 = 20
146
+ const a =
147
+ (m[str.charCodeAt(i++)] << 15) |
148
+ (m[str.charCodeAt(i++)] << 10) |
149
+ (m[str.charCodeAt(i++)] << 5) |
150
+ m[str.charCodeAt(i++)]
151
+ const b =
152
+ (m[str.charCodeAt(i++)] << 15) |
153
+ (m[str.charCodeAt(i++)] << 10) |
154
+ (m[str.charCodeAt(i++)] << 5) |
155
+ m[str.charCodeAt(i++)]
156
+ if (a < 0 || b < 0) throw new SyntaxError(E_CHAR)
157
+ arr[at++] = a >> 12
158
+ arr[at++] = (a >> 4) & 0xff
159
+ arr[at++] = ((a << 4) & 0xff) | (b >> 16)
160
+ arr[at++] = (b >> 8) & 0xff
161
+ arr[at++] = b & 0xff
162
+ }
163
+ }
164
+
165
+ // Last block, valid tailLength: 0 2 4 5 7, checked already
166
+ // We check last chunk to be strict
167
+ if (tailLength < 2) return arr
168
+ const ab = (m[str.charCodeAt(i++)] << 5) | m[str.charCodeAt(i++)]
169
+ if (ab < 0) throw new SyntaxError(E_CHAR)
170
+ arr[at++] = ab >> 2
171
+ if (tailLength < 4) {
172
+ if (ab & 0x3) throw new SyntaxError(E_LAST)
173
+ return arr
174
+ }
175
+
176
+ const cd = (m[str.charCodeAt(i++)] << 5) | m[str.charCodeAt(i++)]
177
+ if (cd < 0) throw new SyntaxError(E_CHAR)
178
+ arr[at++] = ((ab << 6) & 0xff) | (cd >> 4)
179
+ if (tailLength < 5) {
180
+ if (cd & 0xf) throw new SyntaxError(E_LAST)
181
+ return arr
182
+ }
183
+
184
+ const e = m[str.charCodeAt(i++)]
185
+ if (e < 0) throw new SyntaxError(E_CHAR)
186
+ arr[at++] = ((cd << 4) & 0xff) | (e >> 1) // 4 + 4
187
+ if (tailLength < 7) {
188
+ if (e & 0x1) throw new SyntaxError(E_LAST)
189
+ return arr
190
+ }
191
+
192
+ const fg = (m[str.charCodeAt(i++)] << 5) | m[str.charCodeAt(i++)]
193
+ if (fg < 0) throw new SyntaxError(E_CHAR)
194
+ arr[at++] = ((e << 7) & 0xff) | (fg >> 3) // 1 + 5 + 2
195
+ // Can't be 8, so no h
196
+ if (fg & 0x7) throw new SyntaxError(E_LAST)
197
+ return arr
198
+ }
@@ -1,15 +1,17 @@
1
1
  import { assertUint8 } from '../assert.js'
2
+ import { nativeEncoder, nativeDecoder } from './_utils.js'
2
3
 
3
4
  // See https://datatracker.ietf.org/doc/html/rfc4648
4
5
 
5
- const { TextDecoder } = globalThis
6
- const nativeDecoder = TextDecoder?.toString().includes('[native code]') ? new TextDecoder() : null
7
6
  const BASE64 = [...'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/']
8
7
  const BASE64URL = [...'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_']
9
- const BASE64_PAIRS = []
10
- const BASE64URL_PAIRS = []
11
- const BASE64_CODES = nativeDecoder ? new Uint8Array(64) : null
12
- const BASE64URL_CODES = nativeDecoder ? new Uint8Array(64) : null
8
+ const BASE64_HELPERS = {}
9
+ const BASE64URL_HELPERS = {}
10
+
11
+ export const E_CHAR = 'Invalid character in base64 input'
12
+ export const E_PADDING = 'Invalid base64 padding'
13
+ export const E_LENGTH = 'Invalid base64 length'
14
+ export const E_LAST = 'Invalid last chunk'
13
15
 
14
16
  // Alternatively, we could have mapped 0-255 bytes to charcodes and just used btoa(ascii),
15
17
  // but that approach is _slower_ than our toBase64js function, even on Hermes
@@ -23,27 +25,38 @@ export function toBase64(arr, isURL, padding) {
23
25
  let i = 0
24
26
 
25
27
  const alphabet = isURL ? BASE64URL : BASE64
26
- const pairs = isURL ? BASE64URL_PAIRS : BASE64_PAIRS
27
- const map = isURL ? BASE64_CODES : BASE64URL_CODES
28
- if (pairs.length === 0) {
29
- for (let i = 0; i < 64; i++) {
30
- for (let j = 0; j < 64; j++) pairs.push(`${alphabet[i]}${alphabet[j]}`)
31
- if (map) map[i] = alphabet[i].charCodeAt(0)
28
+ const helpers = isURL ? BASE64URL_HELPERS : BASE64_HELPERS
29
+ if (!helpers.pairs) {
30
+ helpers.pairs = []
31
+ if (nativeDecoder) {
32
+ // Lazy to save memory in case if this is not needed
33
+ helpers.codepairs = new Uint16Array(64 * 64)
34
+ const u16 = helpers.codepairs
35
+ const u8 = new Uint8Array(u16.buffer, u16.byteOffset, u16.byteLength) // write as 1-byte to ignore BE/LE difference
36
+ for (let i = 0; i < 64; i++) {
37
+ const ic = alphabet[i].charCodeAt(0)
38
+ for (let j = 0; j < 64; j++) u8[(i << 7) | (j << 1)] = u8[(j << 7) | ((i << 1) + 1)] = ic
39
+ }
40
+ } else {
41
+ const p = helpers.pairs
42
+ for (let i = 0; i < 64; i++) {
43
+ for (let j = 0; j < 64; j++) p.push(`${alphabet[i]}${alphabet[j]}`)
44
+ }
32
45
  }
33
46
  }
34
47
 
48
+ const { pairs, codepairs } = helpers
49
+
35
50
  // Fast path for complete blocks
36
51
  // This whole loop can be commented out, the algorithm won't change, it's just an optimization of the next loop
37
52
  if (nativeDecoder) {
38
- const oa = new Uint8Array(fullChunks * 4)
53
+ const oa = new Uint16Array(fullChunks * 2)
39
54
  for (let j = 0; i < fullChunksBytes; i += 3) {
40
55
  const a = arr[i]
41
56
  const b = arr[i + 1]
42
57
  const c = arr[i + 2]
43
- oa[j++] = map[a >> 2]
44
- oa[j++] = map[((a & 0x3) << 4) | (b >> 4)]
45
- oa[j++] = map[((b & 0xf) << 2) | (c >> 6)]
46
- oa[j++] = map[c & 0x3f]
58
+ oa[j++] = codepairs[(a << 4) | (b >> 4)]
59
+ oa[j++] = codepairs[((b & 0x0f) << 8) | c]
47
60
  }
48
61
 
49
62
  o = nativeDecoder.decode(oa)
@@ -78,50 +91,72 @@ export function toBase64(arr, isURL, padding) {
78
91
  return o
79
92
  }
80
93
 
81
- let fromBase64jsMap
82
-
83
- // Assumes valid input and no chars after =, checked at API
84
- // Last chunk is rechecked at API too
85
- export function fromBase64(str) {
86
- const map = fromBase64jsMap || new Array(256)
87
- if (!fromBase64jsMap) {
88
- fromBase64jsMap = map
89
- BASE64.forEach((c, i) => (map[c.charCodeAt(0)] = i))
90
- map['-'.charCodeAt(0)] = map['+'.charCodeAt(0)] // for base64url
91
- map['_'.charCodeAt(0)] = map['/'.charCodeAt(0)] // for base64url
92
- }
94
+ // TODO: can this be optimized? This only affects non-Hermes barebone engines though
95
+ const mapSize = nativeEncoder ? 256 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
93
96
 
97
+ // Last chunk is rechecked at API
98
+ export function fromBase64(str, isURL) {
94
99
  let inputLength = str.length
95
100
  while (str[inputLength - 1] === '=') inputLength--
96
-
97
- const arr = new Uint8Array(Math.floor((inputLength * 3) / 4))
101
+ const paddingLength = str.length - inputLength
98
102
  const tailLength = inputLength % 4
99
103
  const mainLength = inputLength - tailLength // multiples of 4
104
+ if (tailLength === 1) throw new SyntaxError(E_LENGTH)
105
+ if (paddingLength > 3 || (paddingLength !== 0 && str.length % 4 !== 0)) {
106
+ throw new SyntaxError(E_PADDING)
107
+ }
108
+
109
+ const alphabet = isURL ? BASE64URL : BASE64
110
+ const helpers = isURL ? BASE64URL_HELPERS : BASE64_HELPERS
100
111
 
112
+ if (!helpers.fromMap) {
113
+ helpers.fromMap = new Int8Array(mapSize).fill(-1) // no regex input validation here, so we map all other bytes to -1 and recheck sign
114
+ alphabet.forEach((c, i) => (helpers.fromMap[c.charCodeAt(0)] = i))
115
+ }
116
+
117
+ const m = helpers.fromMap
118
+
119
+ const arr = new Uint8Array(Math.floor((inputLength * 3) / 4))
101
120
  let at = 0
102
121
  let i = 0
103
- let tmp
104
-
105
- while (i < mainLength) {
106
- // a [ b c ] d, each 6 bits
107
- const bc = (map[str.charCodeAt(i + 1)] << 6) | map[str.charCodeAt(i + 2)]
108
- arr[at++] = (map[str.charCodeAt(i)] << 2) | (bc >> 10)
109
- arr[at++] = (bc >> 2) & 0xff
110
- arr[at++] = ((bc << 6) & 0xff) | map[str.charCodeAt(i + 3)]
111
- i += 4
122
+
123
+ if (nativeEncoder) {
124
+ const codes = nativeEncoder.encode(str)
125
+ if (codes.length !== str.length) throw new SyntaxError(E_CHAR) // non-ascii
126
+ while (i < mainLength) {
127
+ const a = (m[codes[i++]] << 18) | (m[codes[i++]] << 12) | (m[codes[i++]] << 6) | m[codes[i++]]
128
+ if (a < 0) throw new SyntaxError(E_CHAR)
129
+ arr[at++] = a >> 16
130
+ arr[at++] = (a >> 8) & 0xff
131
+ arr[at++] = a & 0xff
132
+ }
133
+ } else {
134
+ while (i < mainLength) {
135
+ const a =
136
+ (m[str.charCodeAt(i++)] << 18) |
137
+ (m[str.charCodeAt(i++)] << 12) |
138
+ (m[str.charCodeAt(i++)] << 6) |
139
+ m[str.charCodeAt(i++)]
140
+ if (a < 0) throw new SyntaxError(E_CHAR)
141
+ arr[at++] = a >> 16
142
+ arr[at++] = (a >> 8) & 0xff
143
+ arr[at++] = a & 0xff
144
+ }
112
145
  }
113
146
 
114
- if (tailLength === 3) {
115
- tmp =
116
- (map[str.charCodeAt(i)] << 10) |
117
- (map[str.charCodeAt(i + 1)] << 4) |
118
- (map[str.charCodeAt(i + 2)] >> 2)
119
- arr[at++] = (tmp >> 8) & 0xff
120
- arr[at++] = tmp & 0xff
121
- } else if (tailLength === 2) {
122
- tmp = (map[str.charCodeAt(i)] << 2) | (map[str.charCodeAt(i + 1)] >> 4)
123
- arr[at++] = tmp & 0xff
147
+ // Can be 0, 2 or 3, verified by padding checks already
148
+ if (tailLength < 2) return arr // 0
149
+ const ab = (m[str.charCodeAt(i++)] << 6) | m[str.charCodeAt(i++)]
150
+ if (ab < 0) throw new SyntaxError(E_CHAR)
151
+ arr[at++] = ab >> 4
152
+ if (tailLength < 3) {
153
+ if (ab & 0xf) throw new SyntaxError(E_LAST)
154
+ return arr // 2
124
155
  }
125
156
 
126
- return arr
157
+ const c = m[str.charCodeAt(i++)]
158
+ if (c < 0) throw new SyntaxError(E_CHAR)
159
+ arr[at++] = ((ab << 4) & 0xff) | (c >> 2)
160
+ if (c & 0x3) throw new SyntaxError(E_LAST)
161
+ return arr // 3
127
162
  }
package/fallback/hex.js CHANGED
@@ -1,12 +1,11 @@
1
- import { assert, assertUint8 } from '../assert.js'
2
-
3
- // We use TextEncoder here to parse strings to charcodes, this is faster than individual charCodeAt calls
4
- const { TextEncoder } = globalThis // Buffer is optional, only used when native
5
- const nativeEncoder = TextEncoder?.toString().includes('[native code]') ? new TextEncoder() : null
1
+ import { assertUint8 } from '../assert.js'
2
+ import { nativeEncoder } from './_utils.js'
6
3
 
7
4
  let hexArray
8
5
  let dehexArray
9
6
 
7
+ export const E_HEX = 'Input is not a hex string'
8
+
10
9
  function toHexPart(arr, start, end) {
11
10
  let o = ''
12
11
  let i = start
@@ -52,15 +51,18 @@ export function toHex(arr) {
52
51
  return toHexPart(arr, 0, length)
53
52
  }
54
53
 
54
+ // TODO: can this be optimized? This only affects non-Hermes barebone engines though
55
+ const mapSize = nativeEncoder ? 256 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
56
+
55
57
  export function fromHex(str) {
56
58
  if (typeof str !== 'string') throw new TypeError('Input is not a string')
57
- assert(str.length % 2 === 0, 'Input is not a hex string')
59
+ if (str.length % 2 !== 0) throw new SyntaxError(E_HEX)
58
60
 
59
61
  // We don't use native Buffer impl, as rechecking input make it slower than pure js
60
62
  // This path is used only on older engines though
61
63
 
62
64
  if (!dehexArray) {
63
- dehexArray = new Array(103) // f is 102
65
+ dehexArray = new Int8Array(mapSize).fill(-1) // no regex input validation here, so we map all other bytes to -1 and recheck sign
64
66
  for (let i = 0; i < 16; i++) {
65
67
  const s = i.toString(16)
66
68
  dehexArray[s.charCodeAt(0)] = dehexArray[s.toUpperCase().charCodeAt(0)] = i
@@ -73,16 +75,31 @@ export function fromHex(str) {
73
75
  if (nativeEncoder) {
74
76
  // Native encoder path is beneficial even for small arrays in Hermes
75
77
  const codes = nativeEncoder.encode(str)
76
- for (let i = 0; i < length; i++) {
77
- const a = dehexArray[codes[j++]] * 16 + dehexArray[codes[j++]]
78
- if (!a && a !== 0) throw new Error('Input is not a hex string')
79
- arr[i] = a
78
+ if (codes.length !== str.length) throw new SyntaxError(E_HEX) // non-ascii
79
+ const last3 = length - 3 // Unroll nativeEncoder path as this is what modern Hermes takes and a small perf improvement is nice there
80
+ let i = 0
81
+ while (i < last3) {
82
+ const a = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
83
+ const b = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
84
+ const c = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
85
+ const d = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
86
+ if (a < 0 || b < 0 || c < 0 || d < 0) throw new SyntaxError(E_HEX)
87
+ arr[i++] = a
88
+ arr[i++] = b
89
+ arr[i++] = c
90
+ arr[i++] = d
91
+ }
92
+
93
+ while (i < length) {
94
+ const res = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
95
+ if (res < 0) throw new SyntaxError(E_HEX)
96
+ arr[i++] = res
80
97
  }
81
98
  } else {
82
99
  for (let i = 0; i < length; i++) {
83
- const a = dehexArray[str.charCodeAt(j++)] * 16 + dehexArray[str.charCodeAt(j++)]
84
- if (!a && a !== 0) throw new Error('Input is not a hex string')
85
- arr[i] = a
100
+ const res = (dehexArray[str.charCodeAt(j++)] << 4) | dehexArray[str.charCodeAt(j++)]
101
+ if (res < 0) throw new SyntaxError(E_HEX)
102
+ arr[i] = res
86
103
  }
87
104
  }
88
105
 
@@ -0,0 +1,280 @@
1
+ export const E_STRICT = 'Input is not well-formed utf8'
2
+ export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
3
+
4
+ const replacementPoint = 0xff_fd
5
+
6
+ // https://encoding.spec.whatwg.org/#utf-8-decoder
7
+ // We are most likely in loose mode, for non-loose escape & decodeURIComponent solved everything
8
+ export function decode(arr, loose) {
9
+ const start = 0
10
+ const end = arr.length
11
+ let out = ''
12
+ const tmp = []
13
+
14
+ for (let i = start; i < end; i++) {
15
+ if (tmp.length > 0x2_00) {
16
+ // far below MAX_ARGUMENTS_LENGTH in npmjs.com/buffer, we use smaller chunks
17
+ // length can be off by a few as large code points produce two utf-16 char codes, also we overshoot in unrolled loop
18
+ out += String.fromCharCode.apply(String, tmp)
19
+ tmp.length = 0
20
+ }
21
+
22
+ const byte = arr[i]
23
+ if (byte < 0x80) {
24
+ // Fast path ascii
25
+ tmp.push(byte)
26
+ // Unroll the loop a bit for faster ops, overshoot by 20 chars
27
+ for (let j = 0; j < 5; j++) {
28
+ if (i + 1 >= end) break
29
+ const byte1 = arr[i + 1]
30
+ if (byte1 >= 0x80) break
31
+ tmp.push(byte1)
32
+ i++
33
+ if (i + 1 >= end) break
34
+ const byte2 = arr[i + 1]
35
+ if (byte2 >= 0x80) break
36
+ tmp.push(byte2)
37
+ i++
38
+ if (i + 1 >= end) break
39
+ const byte3 = arr[i + 1]
40
+ if (byte3 >= 0x80) break
41
+ tmp.push(byte3)
42
+ i++
43
+ if (i + 1 >= end) break
44
+ const byte4 = arr[i + 1]
45
+ if (byte4 >= 0x80) break
46
+ tmp.push(byte4)
47
+ i++
48
+ }
49
+ } else if (byte < 0xc2) {
50
+ if (!loose) throw new TypeError(E_STRICT)
51
+ tmp.push(replacementPoint)
52
+ } else if (byte < 0xe0) {
53
+ // need 1 more
54
+ if (i + 1 >= end) {
55
+ if (!loose) throw new TypeError(E_STRICT)
56
+ tmp.push(replacementPoint)
57
+ break
58
+ }
59
+
60
+ const byte1 = arr[i + 1]
61
+ if (byte1 < 0x80 || byte1 > 0xbf) {
62
+ if (!loose) throw new TypeError(E_STRICT)
63
+ tmp.push(replacementPoint)
64
+ continue
65
+ }
66
+
67
+ i++
68
+ tmp.push(((byte & 0x1f) << 6) | (byte1 & 0x3f))
69
+ } else if (byte < 0xf0) {
70
+ // need 2 more
71
+ if (i + 1 >= end) {
72
+ if (!loose) throw new TypeError(E_STRICT)
73
+ tmp.push(replacementPoint)
74
+ break
75
+ }
76
+
77
+ const lower = byte === 0xe0 ? 0xa0 : 0x80
78
+ const upper = byte === 0xed ? 0x9f : 0xbf
79
+ const byte1 = arr[i + 1]
80
+ if (byte1 < lower || byte1 > upper) {
81
+ if (!loose) throw new TypeError(E_STRICT)
82
+ tmp.push(replacementPoint)
83
+ continue
84
+ }
85
+
86
+ i++
87
+ if (i + 1 >= end) {
88
+ if (!loose) throw new TypeError(E_STRICT)
89
+ tmp.push(replacementPoint)
90
+ break
91
+ }
92
+
93
+ const byte2 = arr[i + 1]
94
+ if (byte2 < 0x80 || byte2 > 0xbf) {
95
+ if (!loose) throw new TypeError(E_STRICT)
96
+ tmp.push(replacementPoint)
97
+ continue
98
+ }
99
+
100
+ i++
101
+ tmp.push(((byte & 0xf) << 12) | ((byte1 & 0x3f) << 6) | (byte2 & 0x3f))
102
+ } else if (byte <= 0xf4) {
103
+ // need 3 more
104
+ if (i + 1 >= end) {
105
+ if (!loose) throw new TypeError(E_STRICT)
106
+ tmp.push(replacementPoint)
107
+ break
108
+ }
109
+
110
+ const lower = byte === 0xf0 ? 0x90 : 0x80
111
+ const upper = byte === 0xf4 ? 0x8f : 0xbf
112
+ const byte1 = arr[i + 1]
113
+ if (byte1 < lower || byte1 > upper) {
114
+ if (!loose) throw new TypeError(E_STRICT)
115
+ tmp.push(replacementPoint)
116
+ continue
117
+ }
118
+
119
+ i++
120
+ if (i + 1 >= end) {
121
+ if (!loose) throw new TypeError(E_STRICT)
122
+ tmp.push(replacementPoint)
123
+ break
124
+ }
125
+
126
+ const byte2 = arr[i + 1]
127
+ if (byte2 < 0x80 || byte2 > 0xbf) {
128
+ if (!loose) throw new TypeError(E_STRICT)
129
+ tmp.push(replacementPoint)
130
+ continue
131
+ }
132
+
133
+ i++
134
+ if (i + 1 >= end) {
135
+ if (!loose) throw new TypeError(E_STRICT)
136
+ tmp.push(replacementPoint)
137
+ break
138
+ }
139
+
140
+ const byte3 = arr[i + 1]
141
+ if (byte3 < 0x80 || byte3 > 0xbf) {
142
+ if (!loose) throw new TypeError(E_STRICT)
143
+ tmp.push(replacementPoint)
144
+ continue
145
+ }
146
+
147
+ i++
148
+ const codePoint =
149
+ ((byte & 0xf) << 18) | ((byte1 & 0x3f) << 12) | ((byte2 & 0x3f) << 6) | (byte3 & 0x3f)
150
+ if (codePoint > 0xff_ff) {
151
+ // split into char codes as String.fromCharCode is faster than String.fromCodePoint
152
+ const u = codePoint - 0x1_00_00
153
+ tmp.push(0xd8_00 + ((u >> 10) & 0x3_ff), 0xdc_00 + (u & 0x3_ff))
154
+ } else {
155
+ tmp.push(codePoint)
156
+ }
157
+ // eslint-disable-next-line sonarjs/no-duplicated-branches
158
+ } else {
159
+ if (!loose) throw new TypeError(E_STRICT)
160
+ tmp.push(replacementPoint)
161
+ }
162
+ }
163
+
164
+ if (tmp.length > 0) out += String.fromCharCode.apply(String, tmp)
165
+ return out
166
+ }
167
+
168
+ export function encode(string, loose) {
169
+ const length = string.length
170
+ let lead = null
171
+ let small = true
172
+ let bytes = new Uint8Array(length) // assume ascii
173
+ let p = 0
174
+
175
+ for (let i = 0; i < length; i++) {
176
+ const code = string.charCodeAt(i)
177
+ if (code < 0x80) {
178
+ // Fast path for ascii
179
+ if (lead) {
180
+ if (!loose) throw new TypeError(E_STRICT_UNICODE)
181
+ bytes[p++] = 0xef
182
+ bytes[p++] = 0xbf
183
+ bytes[p++] = 0xbd
184
+ lead = null
185
+ }
186
+
187
+ bytes[p++] = code
188
+ // Unroll the loop a bit for faster ops
189
+ for (let j = 0; j < 5; j++) {
190
+ if (i + 1 >= length) break
191
+ const c1 = string.charCodeAt(i + 1)
192
+ if (c1 >= 0x80) break
193
+ bytes[p++] = c1
194
+ i++
195
+ if (i + 1 >= length) break
196
+ const c2 = string.charCodeAt(i + 1)
197
+ if (c2 >= 0x80) break
198
+ bytes[p++] = c2
199
+ i++
200
+ if (i + 1 >= length) break
201
+ const c3 = string.charCodeAt(i + 1)
202
+ if (c3 >= 0x80) break
203
+ bytes[p++] = c3
204
+ i++
205
+ if (i + 1 >= length) break
206
+ const c4 = string.charCodeAt(i + 1)
207
+ if (c4 >= 0x80) break
208
+ bytes[p++] = c4
209
+ i++
210
+ }
211
+
212
+ continue
213
+ }
214
+
215
+ if (small) {
216
+ // TODO: use resizable array buffers? will have to return a non-resizeable one
217
+ const bytesNew = new Uint8Array(length * 3) // maximium can be 3x of the string length in charcodes
218
+ bytesNew.set(bytes)
219
+ bytes = bytesNew
220
+ small = false
221
+ }
222
+
223
+ // surrogate, charcodes = [d800 + a & 3ff, dc00 + b & 3ff]; codePoint = 0x1_00_00 | (a << 10) | b
224
+ // lead: d800 - dbff
225
+ // trail: dc00 - dfff
226
+ if (code >= 0xd8_00 && code < 0xe0_00) {
227
+ if (lead && code < 0xdc_00) {
228
+ // a second lead, meaning the previous one was unpaired
229
+ if (!loose) throw new TypeError(E_STRICT_UNICODE)
230
+ bytes[p++] = 0xef
231
+ bytes[p++] = 0xbf
232
+ bytes[p++] = 0xbd
233
+ lead = null
234
+ // code is still processed as a new lead
235
+ }
236
+
237
+ if (!lead) {
238
+ if (code > 0xdb_ff || i + 1 >= length) {
239
+ // lead out of range || unpaired
240
+ if (!loose) throw new TypeError(E_STRICT_UNICODE)
241
+ bytes[p++] = 0xef
242
+ bytes[p++] = 0xbf
243
+ bytes[p++] = 0xbd
244
+ continue
245
+ }
246
+
247
+ lead = code
248
+ continue
249
+ }
250
+
251
+ // here, codePoint is always between 0x1_00_00 and 0x11_00_00, we encode as 4 bytes
252
+ const codePoint = (((lead - 0xd8_00) << 10) | (code - 0xdc_00)) + 0x1_00_00
253
+ bytes[p++] = (codePoint >> 18) | 0xf0
254
+ bytes[p++] = ((codePoint >> 12) & 0x3f) | 0x80
255
+ bytes[p++] = ((codePoint >> 6) & 0x3f) | 0x80
256
+ bytes[p++] = (codePoint & 0x3f) | 0x80
257
+ lead = null
258
+ continue
259
+ } else if (lead) {
260
+ if (!loose) throw new TypeError(E_STRICT_UNICODE)
261
+ bytes[p++] = 0xef
262
+ bytes[p++] = 0xbf
263
+ bytes[p++] = 0xbd
264
+ lead = null
265
+ // code is still processed
266
+ }
267
+
268
+ // We are left with a non-pair char code above ascii, it gets encoded to 2 or 3 bytes
269
+ if (code < 0x8_00) {
270
+ bytes[p++] = (code >> 6) | 0xc0
271
+ bytes[p++] = (code & 0x3f) | 0x80
272
+ } else {
273
+ bytes[p++] = (code >> 12) | 0xe0
274
+ bytes[p++] = ((code >> 6) & 0x3f) | 0x80
275
+ bytes[p++] = (code & 0x3f) | 0x80
276
+ }
277
+ }
278
+
279
+ return bytes.length === p ? bytes : bytes.slice(0, p)
280
+ }
package/hex.js CHANGED
@@ -1,4 +1,4 @@
1
- import { assertTypedArray } from './assert.js'
1
+ import { assertUint8 } from './assert.js'
2
2
  import { typedView } from './array.js'
3
3
  import * as js from './fallback/hex.js'
4
4
 
@@ -7,8 +7,7 @@ const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
7
7
  const { toHex: webHex } = Uint8Array.prototype // Modern engines have this
8
8
 
9
9
  export function toHex(arr) {
10
- assertTypedArray(arr)
11
- if (!(arr instanceof Uint8Array)) arr = new Uint8Array(arr.buffer, arr.byteOffset, arr.byteLength)
10
+ assertUint8(arr)
12
11
  if (arr.length === 0) return ''
13
12
  if (webHex && arr.toHex === webHex) return arr.toHex()
14
13
  if (!haveNativeBuffer) return js.toHex(arr)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@exodus/bytes",
3
- "version": "1.0.0-rc.3",
3
+ "version": "1.0.0-rc.4",
4
4
  "description": "Various operations on Uint8Array data",
5
5
  "scripts": {
6
6
  "lint": "eslint .",
@@ -39,19 +39,25 @@
39
39
  },
40
40
  "type": "module",
41
41
  "files": [
42
+ "/fallback/_utils.js",
43
+ "/fallback/base32.js",
42
44
  "/fallback/base64.js",
43
45
  "/fallback/hex.js",
46
+ "/fallback/utf8.js",
44
47
  "/array.js",
45
48
  "/assert.js",
49
+ "/base32.js",
46
50
  "/base64.js",
47
- "/hex.js"
51
+ "/hex.js",
52
+ "/utf8.js"
48
53
  ],
49
54
  "exports": {
50
55
  "./array.js": "./array.js",
56
+ "./base32.js": "./base32.js",
51
57
  "./base64.js": "./base64.js",
52
- "./hex.js": "./hex.js"
58
+ "./hex.js": "./hex.js",
59
+ "./utf8.js": "./utf8.js"
53
60
  },
54
- "dependencies": {},
55
61
  "devDependencies": {
56
62
  "@exodus/eslint-config": "^5.24.0",
57
63
  "@exodus/prettier": "^1.0.0",
@@ -64,7 +70,10 @@
64
70
  "buffer": "^6.0.3",
65
71
  "electron": "36.5.0",
66
72
  "eslint": "^8.44.0",
67
- "jsvu": "^3.0.0"
73
+ "fast-base64-decode": "^2.0.0",
74
+ "hi-base32": "^0.5.1",
75
+ "jsvu": "^3.0.0",
76
+ "text-encoding": "^0.7.0"
68
77
  },
69
78
  "prettier": "@exodus/prettier",
70
79
  "packageManager": "pnpm@10.12.1+sha256.889bac470ec93ccc3764488a19d6ba8f9c648ad5e50a9a6e4be3768a5de387a3"
package/utf8.js ADDED
@@ -0,0 +1,110 @@
1
+ import { assert, assertUint8 } from './assert.js'
2
+ import { typedView } from './array.js'
3
+ import * as js from './fallback/utf8.js'
4
+
5
+ const { Buffer, TextEncoder, TextDecoder, decodeURIComponent, escape } = globalThis // Buffer is optional
6
+ const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
7
+ const isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]')) // we consider Node.js TextDecoder/TextEncoder native
8
+ const haveDecoder = isNative(TextDecoder)
9
+ const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
10
+ // ignoreBOM: true means that BOM will be left as-is, i.e. will be present in the output
11
+ // We don't want to strip anything unexpectedly
12
+ const decoderFatal = haveDecoder ? new TextDecoder('utf8', { ignoreBOM: true, fatal: true }) : null
13
+ const decoderLoose = haveDecoder ? new TextDecoder('utf8', { ignoreBOM: true }) : null
14
+
15
+ const { E_STRICT, E_STRICT_UNICODE } = js
16
+
17
+ const shouldUseEscapePath = Boolean(globalThis.HermesInternal) // faster only on Hermes, js path beats it on normal engines
18
+
19
+ function deLoose(str, loose, res) {
20
+ if (loose) return res
21
+ // Recheck if the string was encoded correctly
22
+ let start = 0
23
+ const last = res.length - 2
24
+ // Search for EFBFBD
25
+ while (start < last) {
26
+ const pos = res.indexOf(0xef, start)
27
+ if (pos === -1) break
28
+ start = pos + 1
29
+ if (res[pos + 1] === 0xbf && res[pos + 2] === 0xbd) {
30
+ // Found a replacement char in output, need to recheck if we encoded the input correctly
31
+ assert(str === decode(res), E_STRICT_UNICODE)
32
+ return res
33
+ }
34
+ }
35
+
36
+ return res
37
+ }
38
+
39
+ function encode(str, loose = false) {
40
+ assert(typeof str === 'string')
41
+ if (haveNativeBuffer) return deLoose(str, loose, Buffer.from(str)) // faster on ascii on Node.js
42
+ if (nativeEncoder) return deLoose(str, loose, nativeEncoder.encode(str)) // Node.js, browsers, and Hermes
43
+ // No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder
44
+ return js.encode(str, loose)
45
+ }
46
+
47
+ let escapes
48
+
49
+ function toEscapesPart(arr, start, end) {
50
+ let o = ''
51
+ let i = start
52
+ const last3 = end - 3
53
+ // Unrolled loop is faster
54
+ while (i < last3) {
55
+ const a = arr[i++]
56
+ const b = arr[i++]
57
+ const c = arr[i++]
58
+ const d = arr[i++]
59
+ o += escapes[a]
60
+ o += escapes[b]
61
+ o += escapes[c]
62
+ o += escapes[d]
63
+ }
64
+
65
+ while (i < end) o += escapes[arr[i++]]
66
+ return o
67
+ }
68
+
69
+ function decode(arr, loose = false) {
70
+ assertUint8(arr)
71
+ if (haveDecoder) return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
72
+ // No reason to use native Buffer: it's not faster than TextDecoder, needs rechecks in non-loose mode, and Node.js has TextDecoder
73
+
74
+ // This codepath gives a ~2x perf boost on Hermes
75
+ if (shouldUseEscapePath && escape && decodeURIComponent) {
76
+ if (!escapes) escapes = Array.from({ length: 256 }, (_, i) => escape(String.fromCharCode(i)))
77
+ const length = arr.length
78
+ let o
79
+ if (length > 30_000) {
80
+ // Limit concatenation to avoid excessive GC
81
+ // TODO: recheck thresholds on Hermes (taken from hex)
82
+ const concat = []
83
+ for (let i = 0; i < length; ) {
84
+ const step = i + 500
85
+ const end = step > length ? length : step
86
+ concat.push(toEscapesPart(arr, i, end))
87
+ i = end
88
+ }
89
+
90
+ o = concat.join('')
91
+ concat.length = 0
92
+ } else {
93
+ o = toEscapesPart(arr, 0, length)
94
+ }
95
+
96
+ try {
97
+ return decodeURIComponent(o) // asci to utf8, escape() is precalucated
98
+ } catch {
99
+ if (!loose) throw new TypeError(E_STRICT)
100
+ // Ok, we have to use manual implementation for loose decoder
101
+ }
102
+ }
103
+
104
+ return js.decode(arr, loose)
105
+ }
106
+
107
+ export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format)
108
+ export const utf8fromStringLoose = (str, format = 'uint8') => typedView(encode(str, true), format)
109
+ export const utf8toString = (arr) => decode(arr, false)
110
+ export const utf8toStringLoose = (arr) => decode(arr, true)