@exodus/bytes 1.0.0-rc.3 → 1.0.0-rc.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -2
- package/assert.js +8 -1
- package/base32.js +33 -0
- package/base64.js +90 -50
- package/fallback/_utils.js +6 -0
- package/fallback/base32.js +198 -0
- package/fallback/base64.js +86 -51
- package/fallback/hex.js +31 -14
- package/fallback/utf8.js +280 -0
- package/hex.js +2 -3
- package/package.json +14 -5
- package/utf8.js +110 -0
package/README.md
CHANGED
package/assert.js
CHANGED
|
@@ -16,7 +16,14 @@ export function assertTypedArray(arr) {
|
|
|
16
16
|
throw new TypeError('Expected a TypedArray instance')
|
|
17
17
|
}
|
|
18
18
|
|
|
19
|
-
export function assertUint8(arr,
|
|
19
|
+
export function assertUint8(arr, options) {
|
|
20
|
+
if (!options) {
|
|
21
|
+
// fast path
|
|
22
|
+
if (arr instanceof Uint8Array) return
|
|
23
|
+
throw new TypeError('Expected an Uint8Array')
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const { name, length, ...rest } = options
|
|
20
27
|
assertEmptyRest(rest)
|
|
21
28
|
if (arr instanceof Uint8Array && (length === undefined || arr.length === length)) return
|
|
22
29
|
throw new TypeError(makeMessage(name, length === undefined ? '' : ` of size ${Number(length)}`))
|
package/base32.js
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { assertEmptyRest } from './assert.js'
|
|
2
|
+
import { typedView } from './array.js'
|
|
3
|
+
import * as js from './fallback/base32.js'
|
|
4
|
+
|
|
5
|
+
// See https://datatracker.ietf.org/doc/html/rfc4648
|
|
6
|
+
|
|
7
|
+
// 8 chars per 5 bytes
|
|
8
|
+
|
|
9
|
+
const { E_PADDING } = js
|
|
10
|
+
|
|
11
|
+
export const toBase32 = (arr, { padding = false } = {}) => js.toBase32(arr, false, padding)
|
|
12
|
+
export const toBase32hex = (arr, { padding = false } = {}) => js.toBase32(arr, true, padding)
|
|
13
|
+
|
|
14
|
+
// By default, valid padding is accepted but not required
|
|
15
|
+
export const fromBase32 = (str, { format = 'uint8', padding = 'both', ...rest } = {}) =>
|
|
16
|
+
fromBase32common(str, false, padding, format, rest)
|
|
17
|
+
export const fromBase32hex = (str, { format = 'uint8', padding = 'both', ...rest } = {}) =>
|
|
18
|
+
fromBase32common(str, true, padding, format, rest)
|
|
19
|
+
|
|
20
|
+
function fromBase32common(str, isBase32Hex, padding, format, rest) {
|
|
21
|
+
if (typeof str !== 'string') throw new TypeError('Input is not a string')
|
|
22
|
+
assertEmptyRest(rest)
|
|
23
|
+
|
|
24
|
+
if (padding === true) {
|
|
25
|
+
if (str.length % 8 !== 0) throw new SyntaxError(E_PADDING)
|
|
26
|
+
} else if (padding === false) {
|
|
27
|
+
if (str.endsWith('=')) throw new SyntaxError('Did not expect padding in base32 input')
|
|
28
|
+
} else if (padding !== 'both') {
|
|
29
|
+
throw new TypeError('Invalid padding option')
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return typedView(js.fromBase32(str, isBase32Hex), format)
|
|
33
|
+
}
|
package/base64.js
CHANGED
|
@@ -1,94 +1,134 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { assertUint8, assertEmptyRest } from './assert.js'
|
|
2
2
|
import { typedView } from './array.js'
|
|
3
3
|
import * as js from './fallback/base64.js'
|
|
4
4
|
|
|
5
5
|
// See https://datatracker.ietf.org/doc/html/rfc4648
|
|
6
6
|
|
|
7
|
-
// base64: A-Za-z0-9+/ and =
|
|
8
|
-
// base64url: A-Za-z0-9_-
|
|
7
|
+
// base64: A-Za-z0-9+/ and = if padding not disabled
|
|
8
|
+
// base64url: A-Za-z0-9_- and = if padding enabled
|
|
9
9
|
|
|
10
10
|
const { Buffer, atob } = globalThis // Buffer is optional, only used when native
|
|
11
11
|
const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
|
|
12
12
|
const { toBase64: web64 } = Uint8Array.prototype // Modern engines have this
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
const { E_CHAR, E_PADDING, E_LENGTH, E_LAST } = js
|
|
15
|
+
|
|
16
|
+
const shouldUseAtob = atob && Boolean(globalThis.HermesInternal) // faster only on Hermes (and a little in old Chrome), js path beats it on normal engines
|
|
17
|
+
|
|
18
|
+
// For native Buffer codepaths only
|
|
19
|
+
const isBuffer = (x) => x.constructor === Buffer && Buffer.isBuffer(x)
|
|
20
|
+
const toBuffer = (x) => (isBuffer(x) ? x : Buffer.from(x.buffer, x.byteOffset, x.byteLength))
|
|
21
|
+
|
|
22
|
+
export function toBase64(x, { padding = true } = {}) {
|
|
15
23
|
assertUint8(x)
|
|
16
|
-
if (web64 && x.toBase64 === web64)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
24
|
+
if (web64 && x.toBase64 === web64) {
|
|
25
|
+
return padding ? x.toBase64() : x.toBase64({ omitPadding: !padding }) // Modern, optionless is slightly faster
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
if (!haveNativeBuffer) return js.toBase64(x, false, padding) // Fallback
|
|
29
|
+
const res = toBuffer(x).toString('base64') // Older Node.js
|
|
30
|
+
if (padding) return res
|
|
31
|
+
const at = res.indexOf('=', res.length - 3)
|
|
32
|
+
return at === -1 ? res : res.slice(0, at)
|
|
20
33
|
}
|
|
21
34
|
|
|
22
|
-
// NOTE: base64url omits padding
|
|
23
|
-
export function toBase64url(x) {
|
|
35
|
+
// NOTE: base64url omits padding by default
|
|
36
|
+
export function toBase64url(x, { padding = false } = {}) {
|
|
24
37
|
assertUint8(x)
|
|
25
|
-
if (web64 && x.toBase64 === web64)
|
|
26
|
-
|
|
38
|
+
if (web64 && x.toBase64 === web64) {
|
|
39
|
+
return x.toBase64({ alphabet: 'base64url', omitPadding: !padding }) // Modern
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (!haveNativeBuffer) return js.toBase64(x, true, padding) // Fallback
|
|
27
43
|
if (x.constructor === Buffer && Buffer.isBuffer(x)) return x.toString('base64url') // Older Node.js
|
|
28
|
-
|
|
44
|
+
const res = toBuffer(x).toString('base64url') // Older Node.js
|
|
45
|
+
return padding && res.length % 4 !== 0 ? res + '='.repeat(4 - (res.length % 4)) : res
|
|
29
46
|
}
|
|
30
47
|
|
|
31
48
|
// Unlike Buffer.from(), throws on invalid input (non-base64 symbols and incomplete chunks)
|
|
32
49
|
// Unlike Buffer.from() and Uint8Array.fromBase64(), does not allow spaces
|
|
33
50
|
// NOTE: Always operates in strict mode for last chunk
|
|
34
51
|
|
|
35
|
-
//
|
|
36
|
-
export function fromBase64(str,
|
|
37
|
-
if (typeof
|
|
52
|
+
// By default accepts both padded and non-padded variants, only strict base64
|
|
53
|
+
export function fromBase64(str, options = {}) {
|
|
54
|
+
if (typeof options === 'string') options = { format: options } // Compat due to usage, TODO: remove
|
|
55
|
+
const { format = 'uint8', padding = 'both', ...rest } = options
|
|
56
|
+
return fromBase64common(str, false, padding, format, rest)
|
|
57
|
+
}
|
|
38
58
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
assert(str[str.length - 3] !== '=', 'Excessive padding') // no more than two = at the end
|
|
44
|
-
}
|
|
59
|
+
// By default accepts only non-padded strict base64url
|
|
60
|
+
export function fromBase64url(str, { format = 'uint8', padding = false, ...rest } = {}) {
|
|
61
|
+
return fromBase64common(str, true, padding, format, rest)
|
|
62
|
+
}
|
|
45
63
|
|
|
46
|
-
|
|
64
|
+
// By default accepts both padded and non-padded variants, base64 or base64url
|
|
65
|
+
export function fromBase64any(str, { format = 'uint8', padding = 'both', ...rest } = {}) {
|
|
66
|
+
const isBase64url = !str.includes('+') && !str.includes('/') // likely to fail fast, as most input is non-url, also double scan is faster than regex
|
|
67
|
+
return fromBase64common(str, isBase64url, padding, format, rest)
|
|
47
68
|
}
|
|
48
69
|
|
|
49
|
-
|
|
50
|
-
export function fromBase64url(str, format = 'uint8') {
|
|
70
|
+
function fromBase64common(str, isBase64url, padding, format, rest) {
|
|
51
71
|
if (typeof str !== 'string') throw new TypeError('Input is not a string')
|
|
72
|
+
assertEmptyRest(rest)
|
|
73
|
+
const auto = padding === 'both' ? str.endsWith('=') : undefined
|
|
74
|
+
// Older JSC supporting Uint8Array.fromBase64 lacks proper checks
|
|
75
|
+
if (padding === true || auto === true) {
|
|
76
|
+
if (str.length % 4 !== 0) throw new SyntaxError(E_PADDING) // JSC misses this
|
|
77
|
+
if (str[str.length - 3] === '=') throw new SyntaxError(E_PADDING) // no more than two = at the end
|
|
78
|
+
} else if (padding === false || auto === false) {
|
|
79
|
+
if (str.length % 4 === 1) throw new SyntaxError(E_LENGTH) // JSC misses this in fromBase64
|
|
80
|
+
if (padding === false && str.endsWith('=')) {
|
|
81
|
+
throw new SyntaxError('Did not expect padding in base64 input') // inclusion is checked separately
|
|
82
|
+
}
|
|
83
|
+
} else {
|
|
84
|
+
throw new TypeError('Invalid padding option')
|
|
85
|
+
}
|
|
52
86
|
|
|
53
|
-
|
|
54
|
-
assert(str.length % 4 !== 1, 'Invalid base64 length') // JSC misses this in fromBase64
|
|
55
|
-
assert(!str.endsWith('='), 'Did not expect padding in base64url input') // inclusion is checked separately
|
|
56
|
-
|
|
57
|
-
return typedView(fromBase64common(str, true), format)
|
|
87
|
+
return typedView(fromBase64impl(str, isBase64url), format)
|
|
58
88
|
}
|
|
59
89
|
|
|
60
|
-
|
|
90
|
+
// ASCII whitespace is U+0009 TAB, U+000A LF, U+000C FF, U+000D CR, or U+0020 SPACE
|
|
91
|
+
const ASCII_WHITESPACE = /[\t\n\f\r ]/ // non-u for JSC perf
|
|
92
|
+
|
|
93
|
+
let fromBase64impl
|
|
61
94
|
if (Uint8Array.fromBase64) {
|
|
62
95
|
// NOTICE: this is actually slower than our JS impl in older JavaScriptCore and (slightly) in SpiderMonkey, but faster on V8 and new JavaScriptCore
|
|
63
|
-
|
|
96
|
+
fromBase64impl = (str, isBase64url) => {
|
|
64
97
|
const alphabet = isBase64url ? 'base64url' : 'base64'
|
|
65
|
-
|
|
98
|
+
if (ASCII_WHITESPACE.test(str)) throw new SyntaxError(E_CHAR) // all other chars are checked natively
|
|
66
99
|
const padded = str.length % 4 > 0 ? `${str}${'='.repeat(4 - (str.length % 4))}` : str
|
|
67
100
|
return Uint8Array.fromBase64(padded, { alphabet, lastChunkHandling: 'strict' })
|
|
68
101
|
}
|
|
69
102
|
} else {
|
|
70
|
-
|
|
71
|
-
if (isBase64url) {
|
|
72
|
-
assert(!/[^0-9a-z_-]/iu.test(str), 'Invalid character in base64url input')
|
|
73
|
-
} else {
|
|
74
|
-
assert(!/[^0-9a-z=+/]/iu.test(str), 'Invalid character in base64 input')
|
|
75
|
-
}
|
|
76
|
-
|
|
103
|
+
fromBase64impl = (str, isBase64url) => {
|
|
77
104
|
let arr
|
|
78
|
-
if (
|
|
105
|
+
if (haveNativeBuffer) {
|
|
106
|
+
const invalidRegex = isBase64url ? /[^0-9a-z=_-]/iu : /[^0-9a-z=+/]/iu
|
|
107
|
+
if (invalidRegex.test(str)) throw new SyntaxError(E_CHAR)
|
|
108
|
+
const at = str.indexOf('=')
|
|
109
|
+
if (at >= 0 && /[^=]/iu.test(str.slice(at))) throw new SyntaxError(E_PADDING)
|
|
110
|
+
arr = Buffer.from(str, 'base64')
|
|
111
|
+
} else if (shouldUseAtob) {
|
|
79
112
|
// atob is faster than manual parsing on Hermes
|
|
80
|
-
|
|
113
|
+
if (isBase64url) {
|
|
114
|
+
if (/[\t\n\f\r +/]/.test(str)) throw new SyntaxError(E_CHAR) // atob verifies other invalid input
|
|
115
|
+
str = str.replaceAll('-', '+').replaceAll('_', '/')
|
|
116
|
+
} else {
|
|
117
|
+
if (ASCII_WHITESPACE.test(str)) throw new SyntaxError(E_CHAR) // all other chars are checked natively
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
let raw
|
|
121
|
+
try {
|
|
122
|
+
raw = atob(str)
|
|
123
|
+
} catch {
|
|
124
|
+
throw new SyntaxError(E_CHAR) // convert atob errors
|
|
125
|
+
}
|
|
126
|
+
|
|
81
127
|
const length = raw.length
|
|
82
128
|
arr = new Uint8Array(length)
|
|
83
129
|
for (let i = 0; i < length; i++) arr[i] = raw.charCodeAt(i)
|
|
84
130
|
} else {
|
|
85
|
-
|
|
86
|
-
if (!isBase64url) {
|
|
87
|
-
const at = str.indexOf('=')
|
|
88
|
-
if (at >= 0) assert(!/[^=]/iu.test(str.slice(at)), 'Invalid padding')
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
arr = haveNativeBuffer ? Buffer.from(str, 'base64') : js.fromBase64(str)
|
|
131
|
+
return js.fromBase64(str, isBase64url) // early return to skip last chunk verification, it's already validated in js
|
|
92
132
|
}
|
|
93
133
|
|
|
94
134
|
if (arr.length % 3 !== 0) {
|
|
@@ -96,7 +136,7 @@ if (Uint8Array.fromBase64) {
|
|
|
96
136
|
const expected = toBase64(arr.subarray(-(arr.length % 3)))
|
|
97
137
|
const end = str.length % 4 === 0 ? str.slice(-4) : str.slice(-(str.length % 4)).padEnd(4, '=')
|
|
98
138
|
const actual = isBase64url ? end.replaceAll('-', '+').replaceAll('_', '/') : end
|
|
99
|
-
if (expected !== actual) throw new
|
|
139
|
+
if (expected !== actual) throw new SyntaxError(E_LAST)
|
|
100
140
|
}
|
|
101
141
|
|
|
102
142
|
return arr
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
const { Buffer, TextEncoder, TextDecoder } = globalThis
|
|
2
|
+
const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
|
|
3
|
+
const isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]')) // we consider Node.js TextDecoder/TextEncoder native
|
|
4
|
+
const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
|
|
5
|
+
const nativeDecoder = isNative(TextDecoder) ? new TextDecoder('utf8', { ignoreBOM: true }) : null
|
|
6
|
+
export { nativeEncoder, nativeDecoder }
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
import { assertUint8 } from '../assert.js'
|
|
2
|
+
import { nativeEncoder, nativeDecoder } from './_utils.js'
|
|
3
|
+
|
|
4
|
+
// See https://datatracker.ietf.org/doc/html/rfc4648
|
|
5
|
+
|
|
6
|
+
const BASE32 = [...'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'] // RFC 4648, #6
|
|
7
|
+
const BASE32HEX = [...'0123456789ABCDEFGHIJKLMNOPQRSTUV'] // RFC 4648, #7
|
|
8
|
+
const BASE32_HELPERS = {}
|
|
9
|
+
const BASE32HEX_HELPERS = {}
|
|
10
|
+
|
|
11
|
+
export const E_CHAR = 'Invalid character in base32 input'
|
|
12
|
+
export const E_PADDING = 'Invalid base32 padding'
|
|
13
|
+
export const E_LENGTH = 'Invalid base32 length'
|
|
14
|
+
export const E_LAST = 'Invalid last chunk'
|
|
15
|
+
|
|
16
|
+
// We construct output by concatenating chars, this seems to be fine enough on modern JS engines
|
|
17
|
+
export function toBase32(arr, isBase32Hex, padding) {
|
|
18
|
+
assertUint8(arr)
|
|
19
|
+
const fullChunks = Math.floor(arr.length / 5)
|
|
20
|
+
const fullChunksBytes = fullChunks * 5
|
|
21
|
+
let o = ''
|
|
22
|
+
let i = 0
|
|
23
|
+
|
|
24
|
+
const alphabet = isBase32Hex ? BASE32HEX : BASE32
|
|
25
|
+
const helpers = isBase32Hex ? BASE32HEX_HELPERS : BASE32_HELPERS
|
|
26
|
+
if (!helpers.pairs) {
|
|
27
|
+
helpers.pairs = []
|
|
28
|
+
if (nativeDecoder) {
|
|
29
|
+
// Lazy to save memory in case if this is not needed
|
|
30
|
+
helpers.codepairs = new Uint16Array(32 * 32)
|
|
31
|
+
const u16 = helpers.codepairs
|
|
32
|
+
const u8 = new Uint8Array(u16.buffer, u16.byteOffset, u16.byteLength) // write as 1-byte to ignore BE/LE difference
|
|
33
|
+
for (let i = 0; i < 32; i++) {
|
|
34
|
+
const ic = alphabet[i].charCodeAt(0)
|
|
35
|
+
for (let j = 0; j < 32; j++) u8[(i << 6) | (j << 1)] = u8[(j << 6) | ((i << 1) + 1)] = ic
|
|
36
|
+
}
|
|
37
|
+
} else {
|
|
38
|
+
const p = helpers.pairs
|
|
39
|
+
for (let i = 0; i < 32; i++) {
|
|
40
|
+
for (let j = 0; j < 32; j++) p.push(`${alphabet[i]}${alphabet[j]}`)
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const { pairs, codepairs } = helpers
|
|
46
|
+
|
|
47
|
+
// Fast path for complete blocks
|
|
48
|
+
// This whole loop can be commented out, the algorithm won't change, it's just an optimization of the next loop
|
|
49
|
+
if (nativeDecoder) {
|
|
50
|
+
const oa = new Uint16Array(fullChunks * 4)
|
|
51
|
+
for (let j = 0; i < fullChunksBytes; i += 5) {
|
|
52
|
+
const a = arr[i]
|
|
53
|
+
const b = arr[i + 1]
|
|
54
|
+
const c = arr[i + 2]
|
|
55
|
+
const d = arr[i + 3]
|
|
56
|
+
const e = arr[i + 4]
|
|
57
|
+
oa[j++] = codepairs[(a << 2) | (b >> 6)] // 8 + 8 - 5 - 5 = 6 left
|
|
58
|
+
oa[j++] = codepairs[((b & 0x3f) << 4) | (c >> 4)] // 6 + 8 - 5 - 5 = 4 left
|
|
59
|
+
oa[j++] = codepairs[((c & 0xf) << 6) | (d >> 2)] // 4 + 8 - 5 - 5 = 2 left
|
|
60
|
+
oa[j++] = codepairs[((d & 0x3) << 8) | e] // 2 + 8 - 5 - 5 = 0 left
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
o = nativeDecoder.decode(oa)
|
|
64
|
+
} else {
|
|
65
|
+
for (; i < fullChunksBytes; i += 5) {
|
|
66
|
+
const a = arr[i]
|
|
67
|
+
const b = arr[i + 1]
|
|
68
|
+
const c = arr[i + 2]
|
|
69
|
+
const d = arr[i + 3]
|
|
70
|
+
const e = arr[i + 4]
|
|
71
|
+
o += pairs[(a << 2) | (b >> 6)] // 8 + 8 - 5 - 5 = 6 left
|
|
72
|
+
o += pairs[((b & 0x3f) << 4) | (c >> 4)] // 6 + 8 - 5 - 5 = 4 left
|
|
73
|
+
o += pairs[((c & 0xf) << 6) | (d >> 2)] // 4 + 8 - 5 - 5 = 2 left
|
|
74
|
+
o += pairs[((d & 0x3) << 8) | e] // 2 + 8 - 5 - 5 = 0 left
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// If we have something left, process it with a full algo
|
|
79
|
+
let carry = 0
|
|
80
|
+
let shift = 3 // First byte needs to be shifted by 3 to get 5 bits
|
|
81
|
+
for (; i < arr.length; i++) {
|
|
82
|
+
const x = arr[i]
|
|
83
|
+
o += alphabet[carry | (x >> shift)] // shift >= 3, so this fits
|
|
84
|
+
if (shift >= 5) {
|
|
85
|
+
shift -= 5
|
|
86
|
+
o += alphabet[(x >> shift) & 0x1f]
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
carry = (x << (5 - shift)) & 0x1f
|
|
90
|
+
shift += 3 // Each byte prints 5 bits and leaves 3 bits
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (shift !== 3) o += alphabet[carry] // shift 3 means we have no carry left
|
|
94
|
+
if (padding) o += ['', '======', '====', '===', '='][arr.length - fullChunksBytes]
|
|
95
|
+
|
|
96
|
+
return o
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// TODO: can this be optimized? This only affects non-Hermes barebone engines though
|
|
100
|
+
const mapSize = nativeEncoder ? 256 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
|
|
101
|
+
|
|
102
|
+
export function fromBase32(str, isBase32Hex) {
|
|
103
|
+
let inputLength = str.length
|
|
104
|
+
while (str[inputLength - 1] === '=') inputLength--
|
|
105
|
+
const paddingLength = str.length - inputLength
|
|
106
|
+
const tailLength = inputLength % 8
|
|
107
|
+
const mainLength = inputLength - tailLength // multiples of 8
|
|
108
|
+
if (![0, 2, 4, 5, 7].includes(tailLength)) throw new SyntaxError(E_LENGTH) // fast verification
|
|
109
|
+
if (paddingLength > 7 || (paddingLength !== 0 && str.length % 8 !== 0)) {
|
|
110
|
+
throw new SyntaxError(E_PADDING)
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const alphabet = isBase32Hex ? BASE32HEX : BASE32
|
|
114
|
+
const helpers = isBase32Hex ? BASE32HEX_HELPERS : BASE32_HELPERS
|
|
115
|
+
|
|
116
|
+
if (!helpers.fromMap) {
|
|
117
|
+
helpers.fromMap = new Int8Array(mapSize).fill(-1) // no regex input validation here, so we map all other bytes to -1 and recheck sign
|
|
118
|
+
alphabet.forEach((c, i) => {
|
|
119
|
+
helpers.fromMap[c.charCodeAt(0)] = helpers.fromMap[c.toLowerCase().charCodeAt(0)] = i
|
|
120
|
+
})
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const m = helpers.fromMap
|
|
124
|
+
|
|
125
|
+
const arr = new Uint8Array(Math.floor((inputLength * 5) / 8))
|
|
126
|
+
let at = 0
|
|
127
|
+
let i = 0
|
|
128
|
+
|
|
129
|
+
if (nativeEncoder) {
|
|
130
|
+
const codes = nativeEncoder.encode(str)
|
|
131
|
+
if (codes.length !== str.length) throw new SyntaxError(E_CHAR) // non-ascii
|
|
132
|
+
while (i < mainLength) {
|
|
133
|
+
// each 5 bits, grouped 5 * 4 = 20
|
|
134
|
+
const a = (m[codes[i++]] << 15) | (m[codes[i++]] << 10) | (m[codes[i++]] << 5) | m[codes[i++]]
|
|
135
|
+
const b = (m[codes[i++]] << 15) | (m[codes[i++]] << 10) | (m[codes[i++]] << 5) | m[codes[i++]]
|
|
136
|
+
if (a < 0 || b < 0) throw new SyntaxError(E_CHAR)
|
|
137
|
+
arr[at++] = a >> 12
|
|
138
|
+
arr[at++] = (a >> 4) & 0xff
|
|
139
|
+
arr[at++] = ((a << 4) & 0xff) | (b >> 16)
|
|
140
|
+
arr[at++] = (b >> 8) & 0xff
|
|
141
|
+
arr[at++] = b & 0xff
|
|
142
|
+
}
|
|
143
|
+
} else {
|
|
144
|
+
while (i < mainLength) {
|
|
145
|
+
// each 5 bits, grouped 5 * 4 = 20
|
|
146
|
+
const a =
|
|
147
|
+
(m[str.charCodeAt(i++)] << 15) |
|
|
148
|
+
(m[str.charCodeAt(i++)] << 10) |
|
|
149
|
+
(m[str.charCodeAt(i++)] << 5) |
|
|
150
|
+
m[str.charCodeAt(i++)]
|
|
151
|
+
const b =
|
|
152
|
+
(m[str.charCodeAt(i++)] << 15) |
|
|
153
|
+
(m[str.charCodeAt(i++)] << 10) |
|
|
154
|
+
(m[str.charCodeAt(i++)] << 5) |
|
|
155
|
+
m[str.charCodeAt(i++)]
|
|
156
|
+
if (a < 0 || b < 0) throw new SyntaxError(E_CHAR)
|
|
157
|
+
arr[at++] = a >> 12
|
|
158
|
+
arr[at++] = (a >> 4) & 0xff
|
|
159
|
+
arr[at++] = ((a << 4) & 0xff) | (b >> 16)
|
|
160
|
+
arr[at++] = (b >> 8) & 0xff
|
|
161
|
+
arr[at++] = b & 0xff
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Last block, valid tailLength: 0 2 4 5 7, checked already
|
|
166
|
+
// We check last chunk to be strict
|
|
167
|
+
if (tailLength < 2) return arr
|
|
168
|
+
const ab = (m[str.charCodeAt(i++)] << 5) | m[str.charCodeAt(i++)]
|
|
169
|
+
if (ab < 0) throw new SyntaxError(E_CHAR)
|
|
170
|
+
arr[at++] = ab >> 2
|
|
171
|
+
if (tailLength < 4) {
|
|
172
|
+
if (ab & 0x3) throw new SyntaxError(E_LAST)
|
|
173
|
+
return arr
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const cd = (m[str.charCodeAt(i++)] << 5) | m[str.charCodeAt(i++)]
|
|
177
|
+
if (cd < 0) throw new SyntaxError(E_CHAR)
|
|
178
|
+
arr[at++] = ((ab << 6) & 0xff) | (cd >> 4)
|
|
179
|
+
if (tailLength < 5) {
|
|
180
|
+
if (cd & 0xf) throw new SyntaxError(E_LAST)
|
|
181
|
+
return arr
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const e = m[str.charCodeAt(i++)]
|
|
185
|
+
if (e < 0) throw new SyntaxError(E_CHAR)
|
|
186
|
+
arr[at++] = ((cd << 4) & 0xff) | (e >> 1) // 4 + 4
|
|
187
|
+
if (tailLength < 7) {
|
|
188
|
+
if (e & 0x1) throw new SyntaxError(E_LAST)
|
|
189
|
+
return arr
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const fg = (m[str.charCodeAt(i++)] << 5) | m[str.charCodeAt(i++)]
|
|
193
|
+
if (fg < 0) throw new SyntaxError(E_CHAR)
|
|
194
|
+
arr[at++] = ((e << 7) & 0xff) | (fg >> 3) // 1 + 5 + 2
|
|
195
|
+
// Can't be 8, so no h
|
|
196
|
+
if (fg & 0x7) throw new SyntaxError(E_LAST)
|
|
197
|
+
return arr
|
|
198
|
+
}
|
package/fallback/base64.js
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
import { assertUint8 } from '../assert.js'
|
|
2
|
+
import { nativeEncoder, nativeDecoder } from './_utils.js'
|
|
2
3
|
|
|
3
4
|
// See https://datatracker.ietf.org/doc/html/rfc4648
|
|
4
5
|
|
|
5
|
-
const { TextDecoder } = globalThis
|
|
6
|
-
const nativeDecoder = TextDecoder?.toString().includes('[native code]') ? new TextDecoder() : null
|
|
7
6
|
const BASE64 = [...'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/']
|
|
8
7
|
const BASE64URL = [...'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_']
|
|
9
|
-
const
|
|
10
|
-
const
|
|
11
|
-
|
|
12
|
-
const
|
|
8
|
+
const BASE64_HELPERS = {}
|
|
9
|
+
const BASE64URL_HELPERS = {}
|
|
10
|
+
|
|
11
|
+
export const E_CHAR = 'Invalid character in base64 input'
|
|
12
|
+
export const E_PADDING = 'Invalid base64 padding'
|
|
13
|
+
export const E_LENGTH = 'Invalid base64 length'
|
|
14
|
+
export const E_LAST = 'Invalid last chunk'
|
|
13
15
|
|
|
14
16
|
// Alternatively, we could have mapped 0-255 bytes to charcodes and just used btoa(ascii),
|
|
15
17
|
// but that approach is _slower_ than our toBase64js function, even on Hermes
|
|
@@ -23,27 +25,38 @@ export function toBase64(arr, isURL, padding) {
|
|
|
23
25
|
let i = 0
|
|
24
26
|
|
|
25
27
|
const alphabet = isURL ? BASE64URL : BASE64
|
|
26
|
-
const
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
28
|
+
const helpers = isURL ? BASE64URL_HELPERS : BASE64_HELPERS
|
|
29
|
+
if (!helpers.pairs) {
|
|
30
|
+
helpers.pairs = []
|
|
31
|
+
if (nativeDecoder) {
|
|
32
|
+
// Lazy to save memory in case if this is not needed
|
|
33
|
+
helpers.codepairs = new Uint16Array(64 * 64)
|
|
34
|
+
const u16 = helpers.codepairs
|
|
35
|
+
const u8 = new Uint8Array(u16.buffer, u16.byteOffset, u16.byteLength) // write as 1-byte to ignore BE/LE difference
|
|
36
|
+
for (let i = 0; i < 64; i++) {
|
|
37
|
+
const ic = alphabet[i].charCodeAt(0)
|
|
38
|
+
for (let j = 0; j < 64; j++) u8[(i << 7) | (j << 1)] = u8[(j << 7) | ((i << 1) + 1)] = ic
|
|
39
|
+
}
|
|
40
|
+
} else {
|
|
41
|
+
const p = helpers.pairs
|
|
42
|
+
for (let i = 0; i < 64; i++) {
|
|
43
|
+
for (let j = 0; j < 64; j++) p.push(`${alphabet[i]}${alphabet[j]}`)
|
|
44
|
+
}
|
|
32
45
|
}
|
|
33
46
|
}
|
|
34
47
|
|
|
48
|
+
const { pairs, codepairs } = helpers
|
|
49
|
+
|
|
35
50
|
// Fast path for complete blocks
|
|
36
51
|
// This whole loop can be commented out, the algorithm won't change, it's just an optimization of the next loop
|
|
37
52
|
if (nativeDecoder) {
|
|
38
|
-
const oa = new
|
|
53
|
+
const oa = new Uint16Array(fullChunks * 2)
|
|
39
54
|
for (let j = 0; i < fullChunksBytes; i += 3) {
|
|
40
55
|
const a = arr[i]
|
|
41
56
|
const b = arr[i + 1]
|
|
42
57
|
const c = arr[i + 2]
|
|
43
|
-
oa[j++] =
|
|
44
|
-
oa[j++] =
|
|
45
|
-
oa[j++] = map[((b & 0xf) << 2) | (c >> 6)]
|
|
46
|
-
oa[j++] = map[c & 0x3f]
|
|
58
|
+
oa[j++] = codepairs[(a << 4) | (b >> 4)]
|
|
59
|
+
oa[j++] = codepairs[((b & 0x0f) << 8) | c]
|
|
47
60
|
}
|
|
48
61
|
|
|
49
62
|
o = nativeDecoder.decode(oa)
|
|
@@ -78,50 +91,72 @@ export function toBase64(arr, isURL, padding) {
|
|
|
78
91
|
return o
|
|
79
92
|
}
|
|
80
93
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
// Assumes valid input and no chars after =, checked at API
|
|
84
|
-
// Last chunk is rechecked at API too
|
|
85
|
-
export function fromBase64(str) {
|
|
86
|
-
const map = fromBase64jsMap || new Array(256)
|
|
87
|
-
if (!fromBase64jsMap) {
|
|
88
|
-
fromBase64jsMap = map
|
|
89
|
-
BASE64.forEach((c, i) => (map[c.charCodeAt(0)] = i))
|
|
90
|
-
map['-'.charCodeAt(0)] = map['+'.charCodeAt(0)] // for base64url
|
|
91
|
-
map['_'.charCodeAt(0)] = map['/'.charCodeAt(0)] // for base64url
|
|
92
|
-
}
|
|
94
|
+
// TODO: can this be optimized? This only affects non-Hermes barebone engines though
|
|
95
|
+
const mapSize = nativeEncoder ? 256 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
|
|
93
96
|
|
|
97
|
+
// Last chunk is rechecked at API
|
|
98
|
+
export function fromBase64(str, isURL) {
|
|
94
99
|
let inputLength = str.length
|
|
95
100
|
while (str[inputLength - 1] === '=') inputLength--
|
|
96
|
-
|
|
97
|
-
const arr = new Uint8Array(Math.floor((inputLength * 3) / 4))
|
|
101
|
+
const paddingLength = str.length - inputLength
|
|
98
102
|
const tailLength = inputLength % 4
|
|
99
103
|
const mainLength = inputLength - tailLength // multiples of 4
|
|
104
|
+
if (tailLength === 1) throw new SyntaxError(E_LENGTH)
|
|
105
|
+
if (paddingLength > 3 || (paddingLength !== 0 && str.length % 4 !== 0)) {
|
|
106
|
+
throw new SyntaxError(E_PADDING)
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const alphabet = isURL ? BASE64URL : BASE64
|
|
110
|
+
const helpers = isURL ? BASE64URL_HELPERS : BASE64_HELPERS
|
|
100
111
|
|
|
112
|
+
if (!helpers.fromMap) {
|
|
113
|
+
helpers.fromMap = new Int8Array(mapSize).fill(-1) // no regex input validation here, so we map all other bytes to -1 and recheck sign
|
|
114
|
+
alphabet.forEach((c, i) => (helpers.fromMap[c.charCodeAt(0)] = i))
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const m = helpers.fromMap
|
|
118
|
+
|
|
119
|
+
const arr = new Uint8Array(Math.floor((inputLength * 3) / 4))
|
|
101
120
|
let at = 0
|
|
102
121
|
let i = 0
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
122
|
+
|
|
123
|
+
if (nativeEncoder) {
|
|
124
|
+
const codes = nativeEncoder.encode(str)
|
|
125
|
+
if (codes.length !== str.length) throw new SyntaxError(E_CHAR) // non-ascii
|
|
126
|
+
while (i < mainLength) {
|
|
127
|
+
const a = (m[codes[i++]] << 18) | (m[codes[i++]] << 12) | (m[codes[i++]] << 6) | m[codes[i++]]
|
|
128
|
+
if (a < 0) throw new SyntaxError(E_CHAR)
|
|
129
|
+
arr[at++] = a >> 16
|
|
130
|
+
arr[at++] = (a >> 8) & 0xff
|
|
131
|
+
arr[at++] = a & 0xff
|
|
132
|
+
}
|
|
133
|
+
} else {
|
|
134
|
+
while (i < mainLength) {
|
|
135
|
+
const a =
|
|
136
|
+
(m[str.charCodeAt(i++)] << 18) |
|
|
137
|
+
(m[str.charCodeAt(i++)] << 12) |
|
|
138
|
+
(m[str.charCodeAt(i++)] << 6) |
|
|
139
|
+
m[str.charCodeAt(i++)]
|
|
140
|
+
if (a < 0) throw new SyntaxError(E_CHAR)
|
|
141
|
+
arr[at++] = a >> 16
|
|
142
|
+
arr[at++] = (a >> 8) & 0xff
|
|
143
|
+
arr[at++] = a & 0xff
|
|
144
|
+
}
|
|
112
145
|
}
|
|
113
146
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
tmp = (map[str.charCodeAt(i)] << 2) | (map[str.charCodeAt(i + 1)] >> 4)
|
|
123
|
-
arr[at++] = tmp & 0xff
|
|
147
|
+
// Can be 0, 2 or 3, verified by padding checks already
|
|
148
|
+
if (tailLength < 2) return arr // 0
|
|
149
|
+
const ab = (m[str.charCodeAt(i++)] << 6) | m[str.charCodeAt(i++)]
|
|
150
|
+
if (ab < 0) throw new SyntaxError(E_CHAR)
|
|
151
|
+
arr[at++] = ab >> 4
|
|
152
|
+
if (tailLength < 3) {
|
|
153
|
+
if (ab & 0xf) throw new SyntaxError(E_LAST)
|
|
154
|
+
return arr // 2
|
|
124
155
|
}
|
|
125
156
|
|
|
126
|
-
|
|
157
|
+
const c = m[str.charCodeAt(i++)]
|
|
158
|
+
if (c < 0) throw new SyntaxError(E_CHAR)
|
|
159
|
+
arr[at++] = ((ab << 4) & 0xff) | (c >> 2)
|
|
160
|
+
if (c & 0x3) throw new SyntaxError(E_LAST)
|
|
161
|
+
return arr // 3
|
|
127
162
|
}
|
package/fallback/hex.js
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
// We use TextEncoder here to parse strings to charcodes, this is faster than individual charCodeAt calls
|
|
4
|
-
const { TextEncoder } = globalThis // Buffer is optional, only used when native
|
|
5
|
-
const nativeEncoder = TextEncoder?.toString().includes('[native code]') ? new TextEncoder() : null
|
|
1
|
+
import { assertUint8 } from '../assert.js'
|
|
2
|
+
import { nativeEncoder } from './_utils.js'
|
|
6
3
|
|
|
7
4
|
let hexArray
|
|
8
5
|
let dehexArray
|
|
9
6
|
|
|
7
|
+
export const E_HEX = 'Input is not a hex string'
|
|
8
|
+
|
|
10
9
|
function toHexPart(arr, start, end) {
|
|
11
10
|
let o = ''
|
|
12
11
|
let i = start
|
|
@@ -52,15 +51,18 @@ export function toHex(arr) {
|
|
|
52
51
|
return toHexPart(arr, 0, length)
|
|
53
52
|
}
|
|
54
53
|
|
|
54
|
+
// TODO: can this be optimized? This only affects non-Hermes barebone engines though
|
|
55
|
+
const mapSize = nativeEncoder ? 256 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
|
|
56
|
+
|
|
55
57
|
export function fromHex(str) {
|
|
56
58
|
if (typeof str !== 'string') throw new TypeError('Input is not a string')
|
|
57
|
-
|
|
59
|
+
if (str.length % 2 !== 0) throw new SyntaxError(E_HEX)
|
|
58
60
|
|
|
59
61
|
// We don't use native Buffer impl, as rechecking input make it slower than pure js
|
|
60
62
|
// This path is used only on older engines though
|
|
61
63
|
|
|
62
64
|
if (!dehexArray) {
|
|
63
|
-
dehexArray = new
|
|
65
|
+
dehexArray = new Int8Array(mapSize).fill(-1) // no regex input validation here, so we map all other bytes to -1 and recheck sign
|
|
64
66
|
for (let i = 0; i < 16; i++) {
|
|
65
67
|
const s = i.toString(16)
|
|
66
68
|
dehexArray[s.charCodeAt(0)] = dehexArray[s.toUpperCase().charCodeAt(0)] = i
|
|
@@ -73,16 +75,31 @@ export function fromHex(str) {
|
|
|
73
75
|
if (nativeEncoder) {
|
|
74
76
|
// Native encoder path is beneficial even for small arrays in Hermes
|
|
75
77
|
const codes = nativeEncoder.encode(str)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
78
|
+
if (codes.length !== str.length) throw new SyntaxError(E_HEX) // non-ascii
|
|
79
|
+
const last3 = length - 3 // Unroll nativeEncoder path as this is what modern Hermes takes and a small perf improvement is nice there
|
|
80
|
+
let i = 0
|
|
81
|
+
while (i < last3) {
|
|
82
|
+
const a = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
|
|
83
|
+
const b = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
|
|
84
|
+
const c = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
|
|
85
|
+
const d = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
|
|
86
|
+
if (a < 0 || b < 0 || c < 0 || d < 0) throw new SyntaxError(E_HEX)
|
|
87
|
+
arr[i++] = a
|
|
88
|
+
arr[i++] = b
|
|
89
|
+
arr[i++] = c
|
|
90
|
+
arr[i++] = d
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
while (i < length) {
|
|
94
|
+
const res = (dehexArray[codes[j++]] << 4) | dehexArray[codes[j++]]
|
|
95
|
+
if (res < 0) throw new SyntaxError(E_HEX)
|
|
96
|
+
arr[i++] = res
|
|
80
97
|
}
|
|
81
98
|
} else {
|
|
82
99
|
for (let i = 0; i < length; i++) {
|
|
83
|
-
const
|
|
84
|
-
if (
|
|
85
|
-
arr[i] =
|
|
100
|
+
const res = (dehexArray[str.charCodeAt(j++)] << 4) | dehexArray[str.charCodeAt(j++)]
|
|
101
|
+
if (res < 0) throw new SyntaxError(E_HEX)
|
|
102
|
+
arr[i] = res
|
|
86
103
|
}
|
|
87
104
|
}
|
|
88
105
|
|
package/fallback/utf8.js
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
export const E_STRICT = 'Input is not well-formed utf8'
|
|
2
|
+
export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
|
|
3
|
+
|
|
4
|
+
const replacementPoint = 0xff_fd
|
|
5
|
+
|
|
6
|
+
// https://encoding.spec.whatwg.org/#utf-8-decoder
|
|
7
|
+
// We are most likely in loose mode, for non-loose escape & decodeURIComponent solved everything
|
|
8
|
+
export function decode(arr, loose) {
|
|
9
|
+
const start = 0
|
|
10
|
+
const end = arr.length
|
|
11
|
+
let out = ''
|
|
12
|
+
const tmp = []
|
|
13
|
+
|
|
14
|
+
for (let i = start; i < end; i++) {
|
|
15
|
+
if (tmp.length > 0x2_00) {
|
|
16
|
+
// far below MAX_ARGUMENTS_LENGTH in npmjs.com/buffer, we use smaller chunks
|
|
17
|
+
// length can be off by a few as large code points produce two utf-16 char codes, also we overshoot in unrolled loop
|
|
18
|
+
out += String.fromCharCode.apply(String, tmp)
|
|
19
|
+
tmp.length = 0
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const byte = arr[i]
|
|
23
|
+
if (byte < 0x80) {
|
|
24
|
+
// Fast path ascii
|
|
25
|
+
tmp.push(byte)
|
|
26
|
+
// Unroll the loop a bit for faster ops, overshoot by 20 chars
|
|
27
|
+
for (let j = 0; j < 5; j++) {
|
|
28
|
+
if (i + 1 >= end) break
|
|
29
|
+
const byte1 = arr[i + 1]
|
|
30
|
+
if (byte1 >= 0x80) break
|
|
31
|
+
tmp.push(byte1)
|
|
32
|
+
i++
|
|
33
|
+
if (i + 1 >= end) break
|
|
34
|
+
const byte2 = arr[i + 1]
|
|
35
|
+
if (byte2 >= 0x80) break
|
|
36
|
+
tmp.push(byte2)
|
|
37
|
+
i++
|
|
38
|
+
if (i + 1 >= end) break
|
|
39
|
+
const byte3 = arr[i + 1]
|
|
40
|
+
if (byte3 >= 0x80) break
|
|
41
|
+
tmp.push(byte3)
|
|
42
|
+
i++
|
|
43
|
+
if (i + 1 >= end) break
|
|
44
|
+
const byte4 = arr[i + 1]
|
|
45
|
+
if (byte4 >= 0x80) break
|
|
46
|
+
tmp.push(byte4)
|
|
47
|
+
i++
|
|
48
|
+
}
|
|
49
|
+
} else if (byte < 0xc2) {
|
|
50
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
51
|
+
tmp.push(replacementPoint)
|
|
52
|
+
} else if (byte < 0xe0) {
|
|
53
|
+
// need 1 more
|
|
54
|
+
if (i + 1 >= end) {
|
|
55
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
56
|
+
tmp.push(replacementPoint)
|
|
57
|
+
break
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const byte1 = arr[i + 1]
|
|
61
|
+
if (byte1 < 0x80 || byte1 > 0xbf) {
|
|
62
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
63
|
+
tmp.push(replacementPoint)
|
|
64
|
+
continue
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
i++
|
|
68
|
+
tmp.push(((byte & 0x1f) << 6) | (byte1 & 0x3f))
|
|
69
|
+
} else if (byte < 0xf0) {
|
|
70
|
+
// need 2 more
|
|
71
|
+
if (i + 1 >= end) {
|
|
72
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
73
|
+
tmp.push(replacementPoint)
|
|
74
|
+
break
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const lower = byte === 0xe0 ? 0xa0 : 0x80
|
|
78
|
+
const upper = byte === 0xed ? 0x9f : 0xbf
|
|
79
|
+
const byte1 = arr[i + 1]
|
|
80
|
+
if (byte1 < lower || byte1 > upper) {
|
|
81
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
82
|
+
tmp.push(replacementPoint)
|
|
83
|
+
continue
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
i++
|
|
87
|
+
if (i + 1 >= end) {
|
|
88
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
89
|
+
tmp.push(replacementPoint)
|
|
90
|
+
break
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const byte2 = arr[i + 1]
|
|
94
|
+
if (byte2 < 0x80 || byte2 > 0xbf) {
|
|
95
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
96
|
+
tmp.push(replacementPoint)
|
|
97
|
+
continue
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
i++
|
|
101
|
+
tmp.push(((byte & 0xf) << 12) | ((byte1 & 0x3f) << 6) | (byte2 & 0x3f))
|
|
102
|
+
} else if (byte <= 0xf4) {
|
|
103
|
+
// need 3 more
|
|
104
|
+
if (i + 1 >= end) {
|
|
105
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
106
|
+
tmp.push(replacementPoint)
|
|
107
|
+
break
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const lower = byte === 0xf0 ? 0x90 : 0x80
|
|
111
|
+
const upper = byte === 0xf4 ? 0x8f : 0xbf
|
|
112
|
+
const byte1 = arr[i + 1]
|
|
113
|
+
if (byte1 < lower || byte1 > upper) {
|
|
114
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
115
|
+
tmp.push(replacementPoint)
|
|
116
|
+
continue
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
i++
|
|
120
|
+
if (i + 1 >= end) {
|
|
121
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
122
|
+
tmp.push(replacementPoint)
|
|
123
|
+
break
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const byte2 = arr[i + 1]
|
|
127
|
+
if (byte2 < 0x80 || byte2 > 0xbf) {
|
|
128
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
129
|
+
tmp.push(replacementPoint)
|
|
130
|
+
continue
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
i++
|
|
134
|
+
if (i + 1 >= end) {
|
|
135
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
136
|
+
tmp.push(replacementPoint)
|
|
137
|
+
break
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const byte3 = arr[i + 1]
|
|
141
|
+
if (byte3 < 0x80 || byte3 > 0xbf) {
|
|
142
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
143
|
+
tmp.push(replacementPoint)
|
|
144
|
+
continue
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
i++
|
|
148
|
+
const codePoint =
|
|
149
|
+
((byte & 0xf) << 18) | ((byte1 & 0x3f) << 12) | ((byte2 & 0x3f) << 6) | (byte3 & 0x3f)
|
|
150
|
+
if (codePoint > 0xff_ff) {
|
|
151
|
+
// split into char codes as String.fromCharCode is faster than String.fromCodePoint
|
|
152
|
+
const u = codePoint - 0x1_00_00
|
|
153
|
+
tmp.push(0xd8_00 + ((u >> 10) & 0x3_ff), 0xdc_00 + (u & 0x3_ff))
|
|
154
|
+
} else {
|
|
155
|
+
tmp.push(codePoint)
|
|
156
|
+
}
|
|
157
|
+
// eslint-disable-next-line sonarjs/no-duplicated-branches
|
|
158
|
+
} else {
|
|
159
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
160
|
+
tmp.push(replacementPoint)
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
if (tmp.length > 0) out += String.fromCharCode.apply(String, tmp)
|
|
165
|
+
return out
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
export function encode(string, loose) {
|
|
169
|
+
const length = string.length
|
|
170
|
+
let lead = null
|
|
171
|
+
let small = true
|
|
172
|
+
let bytes = new Uint8Array(length) // assume ascii
|
|
173
|
+
let p = 0
|
|
174
|
+
|
|
175
|
+
for (let i = 0; i < length; i++) {
|
|
176
|
+
const code = string.charCodeAt(i)
|
|
177
|
+
if (code < 0x80) {
|
|
178
|
+
// Fast path for ascii
|
|
179
|
+
if (lead) {
|
|
180
|
+
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
|
181
|
+
bytes[p++] = 0xef
|
|
182
|
+
bytes[p++] = 0xbf
|
|
183
|
+
bytes[p++] = 0xbd
|
|
184
|
+
lead = null
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
bytes[p++] = code
|
|
188
|
+
// Unroll the loop a bit for faster ops
|
|
189
|
+
for (let j = 0; j < 5; j++) {
|
|
190
|
+
if (i + 1 >= length) break
|
|
191
|
+
const c1 = string.charCodeAt(i + 1)
|
|
192
|
+
if (c1 >= 0x80) break
|
|
193
|
+
bytes[p++] = c1
|
|
194
|
+
i++
|
|
195
|
+
if (i + 1 >= length) break
|
|
196
|
+
const c2 = string.charCodeAt(i + 1)
|
|
197
|
+
if (c2 >= 0x80) break
|
|
198
|
+
bytes[p++] = c2
|
|
199
|
+
i++
|
|
200
|
+
if (i + 1 >= length) break
|
|
201
|
+
const c3 = string.charCodeAt(i + 1)
|
|
202
|
+
if (c3 >= 0x80) break
|
|
203
|
+
bytes[p++] = c3
|
|
204
|
+
i++
|
|
205
|
+
if (i + 1 >= length) break
|
|
206
|
+
const c4 = string.charCodeAt(i + 1)
|
|
207
|
+
if (c4 >= 0x80) break
|
|
208
|
+
bytes[p++] = c4
|
|
209
|
+
i++
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
continue
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
if (small) {
|
|
216
|
+
// TODO: use resizable array buffers? will have to return a non-resizeable one
|
|
217
|
+
const bytesNew = new Uint8Array(length * 3) // maximium can be 3x of the string length in charcodes
|
|
218
|
+
bytesNew.set(bytes)
|
|
219
|
+
bytes = bytesNew
|
|
220
|
+
small = false
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// surrogate, charcodes = [d800 + a & 3ff, dc00 + b & 3ff]; codePoint = 0x1_00_00 | (a << 10) | b
|
|
224
|
+
// lead: d800 - dbff
|
|
225
|
+
// trail: dc00 - dfff
|
|
226
|
+
if (code >= 0xd8_00 && code < 0xe0_00) {
|
|
227
|
+
if (lead && code < 0xdc_00) {
|
|
228
|
+
// a second lead, meaning the previous one was unpaired
|
|
229
|
+
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
|
230
|
+
bytes[p++] = 0xef
|
|
231
|
+
bytes[p++] = 0xbf
|
|
232
|
+
bytes[p++] = 0xbd
|
|
233
|
+
lead = null
|
|
234
|
+
// code is still processed as a new lead
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if (!lead) {
|
|
238
|
+
if (code > 0xdb_ff || i + 1 >= length) {
|
|
239
|
+
// lead out of range || unpaired
|
|
240
|
+
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
|
241
|
+
bytes[p++] = 0xef
|
|
242
|
+
bytes[p++] = 0xbf
|
|
243
|
+
bytes[p++] = 0xbd
|
|
244
|
+
continue
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
lead = code
|
|
248
|
+
continue
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// here, codePoint is always between 0x1_00_00 and 0x11_00_00, we encode as 4 bytes
|
|
252
|
+
const codePoint = (((lead - 0xd8_00) << 10) | (code - 0xdc_00)) + 0x1_00_00
|
|
253
|
+
bytes[p++] = (codePoint >> 18) | 0xf0
|
|
254
|
+
bytes[p++] = ((codePoint >> 12) & 0x3f) | 0x80
|
|
255
|
+
bytes[p++] = ((codePoint >> 6) & 0x3f) | 0x80
|
|
256
|
+
bytes[p++] = (codePoint & 0x3f) | 0x80
|
|
257
|
+
lead = null
|
|
258
|
+
continue
|
|
259
|
+
} else if (lead) {
|
|
260
|
+
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
|
261
|
+
bytes[p++] = 0xef
|
|
262
|
+
bytes[p++] = 0xbf
|
|
263
|
+
bytes[p++] = 0xbd
|
|
264
|
+
lead = null
|
|
265
|
+
// code is still processed
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// We are left with a non-pair char code above ascii, it gets encoded to 2 or 3 bytes
|
|
269
|
+
if (code < 0x8_00) {
|
|
270
|
+
bytes[p++] = (code >> 6) | 0xc0
|
|
271
|
+
bytes[p++] = (code & 0x3f) | 0x80
|
|
272
|
+
} else {
|
|
273
|
+
bytes[p++] = (code >> 12) | 0xe0
|
|
274
|
+
bytes[p++] = ((code >> 6) & 0x3f) | 0x80
|
|
275
|
+
bytes[p++] = (code & 0x3f) | 0x80
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
return bytes.length === p ? bytes : bytes.slice(0, p)
|
|
280
|
+
}
|
package/hex.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { assertUint8 } from './assert.js'
|
|
2
2
|
import { typedView } from './array.js'
|
|
3
3
|
import * as js from './fallback/hex.js'
|
|
4
4
|
|
|
@@ -7,8 +7,7 @@ const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
|
|
|
7
7
|
const { toHex: webHex } = Uint8Array.prototype // Modern engines have this
|
|
8
8
|
|
|
9
9
|
export function toHex(arr) {
|
|
10
|
-
|
|
11
|
-
if (!(arr instanceof Uint8Array)) arr = new Uint8Array(arr.buffer, arr.byteOffset, arr.byteLength)
|
|
10
|
+
assertUint8(arr)
|
|
12
11
|
if (arr.length === 0) return ''
|
|
13
12
|
if (webHex && arr.toHex === webHex) return arr.toHex()
|
|
14
13
|
if (!haveNativeBuffer) return js.toHex(arr)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@exodus/bytes",
|
|
3
|
-
"version": "1.0.0-rc.
|
|
3
|
+
"version": "1.0.0-rc.4",
|
|
4
4
|
"description": "Various operations on Uint8Array data",
|
|
5
5
|
"scripts": {
|
|
6
6
|
"lint": "eslint .",
|
|
@@ -39,19 +39,25 @@
|
|
|
39
39
|
},
|
|
40
40
|
"type": "module",
|
|
41
41
|
"files": [
|
|
42
|
+
"/fallback/_utils.js",
|
|
43
|
+
"/fallback/base32.js",
|
|
42
44
|
"/fallback/base64.js",
|
|
43
45
|
"/fallback/hex.js",
|
|
46
|
+
"/fallback/utf8.js",
|
|
44
47
|
"/array.js",
|
|
45
48
|
"/assert.js",
|
|
49
|
+
"/base32.js",
|
|
46
50
|
"/base64.js",
|
|
47
|
-
"/hex.js"
|
|
51
|
+
"/hex.js",
|
|
52
|
+
"/utf8.js"
|
|
48
53
|
],
|
|
49
54
|
"exports": {
|
|
50
55
|
"./array.js": "./array.js",
|
|
56
|
+
"./base32.js": "./base32.js",
|
|
51
57
|
"./base64.js": "./base64.js",
|
|
52
|
-
"./hex.js": "./hex.js"
|
|
58
|
+
"./hex.js": "./hex.js",
|
|
59
|
+
"./utf8.js": "./utf8.js"
|
|
53
60
|
},
|
|
54
|
-
"dependencies": {},
|
|
55
61
|
"devDependencies": {
|
|
56
62
|
"@exodus/eslint-config": "^5.24.0",
|
|
57
63
|
"@exodus/prettier": "^1.0.0",
|
|
@@ -64,7 +70,10 @@
|
|
|
64
70
|
"buffer": "^6.0.3",
|
|
65
71
|
"electron": "36.5.0",
|
|
66
72
|
"eslint": "^8.44.0",
|
|
67
|
-
"
|
|
73
|
+
"fast-base64-decode": "^2.0.0",
|
|
74
|
+
"hi-base32": "^0.5.1",
|
|
75
|
+
"jsvu": "^3.0.0",
|
|
76
|
+
"text-encoding": "^0.7.0"
|
|
68
77
|
},
|
|
69
78
|
"prettier": "@exodus/prettier",
|
|
70
79
|
"packageManager": "pnpm@10.12.1+sha256.889bac470ec93ccc3764488a19d6ba8f9c648ad5e50a9a6e4be3768a5de387a3"
|
package/utf8.js
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import { assert, assertUint8 } from './assert.js'
|
|
2
|
+
import { typedView } from './array.js'
|
|
3
|
+
import * as js from './fallback/utf8.js'
|
|
4
|
+
|
|
5
|
+
const { Buffer, TextEncoder, TextDecoder, decodeURIComponent, escape } = globalThis // Buffer is optional
|
|
6
|
+
const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
|
|
7
|
+
const isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]')) // we consider Node.js TextDecoder/TextEncoder native
|
|
8
|
+
const haveDecoder = isNative(TextDecoder)
|
|
9
|
+
const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
|
|
10
|
+
// ignoreBOM: true means that BOM will be left as-is, i.e. will be present in the output
|
|
11
|
+
// We don't want to strip anything unexpectedly
|
|
12
|
+
const decoderFatal = haveDecoder ? new TextDecoder('utf8', { ignoreBOM: true, fatal: true }) : null
|
|
13
|
+
const decoderLoose = haveDecoder ? new TextDecoder('utf8', { ignoreBOM: true }) : null
|
|
14
|
+
|
|
15
|
+
const { E_STRICT, E_STRICT_UNICODE } = js
|
|
16
|
+
|
|
17
|
+
const shouldUseEscapePath = Boolean(globalThis.HermesInternal) // faster only on Hermes, js path beats it on normal engines
|
|
18
|
+
|
|
19
|
+
function deLoose(str, loose, res) {
|
|
20
|
+
if (loose) return res
|
|
21
|
+
// Recheck if the string was encoded correctly
|
|
22
|
+
let start = 0
|
|
23
|
+
const last = res.length - 2
|
|
24
|
+
// Search for EFBFBD
|
|
25
|
+
while (start < last) {
|
|
26
|
+
const pos = res.indexOf(0xef, start)
|
|
27
|
+
if (pos === -1) break
|
|
28
|
+
start = pos + 1
|
|
29
|
+
if (res[pos + 1] === 0xbf && res[pos + 2] === 0xbd) {
|
|
30
|
+
// Found a replacement char in output, need to recheck if we encoded the input correctly
|
|
31
|
+
assert(str === decode(res), E_STRICT_UNICODE)
|
|
32
|
+
return res
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return res
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function encode(str, loose = false) {
|
|
40
|
+
assert(typeof str === 'string')
|
|
41
|
+
if (haveNativeBuffer) return deLoose(str, loose, Buffer.from(str)) // faster on ascii on Node.js
|
|
42
|
+
if (nativeEncoder) return deLoose(str, loose, nativeEncoder.encode(str)) // Node.js, browsers, and Hermes
|
|
43
|
+
// No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder
|
|
44
|
+
return js.encode(str, loose)
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
let escapes
|
|
48
|
+
|
|
49
|
+
function toEscapesPart(arr, start, end) {
|
|
50
|
+
let o = ''
|
|
51
|
+
let i = start
|
|
52
|
+
const last3 = end - 3
|
|
53
|
+
// Unrolled loop is faster
|
|
54
|
+
while (i < last3) {
|
|
55
|
+
const a = arr[i++]
|
|
56
|
+
const b = arr[i++]
|
|
57
|
+
const c = arr[i++]
|
|
58
|
+
const d = arr[i++]
|
|
59
|
+
o += escapes[a]
|
|
60
|
+
o += escapes[b]
|
|
61
|
+
o += escapes[c]
|
|
62
|
+
o += escapes[d]
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
while (i < end) o += escapes[arr[i++]]
|
|
66
|
+
return o
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function decode(arr, loose = false) {
|
|
70
|
+
assertUint8(arr)
|
|
71
|
+
if (haveDecoder) return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
|
|
72
|
+
// No reason to use native Buffer: it's not faster than TextDecoder, needs rechecks in non-loose mode, and Node.js has TextDecoder
|
|
73
|
+
|
|
74
|
+
// This codepath gives a ~2x perf boost on Hermes
|
|
75
|
+
if (shouldUseEscapePath && escape && decodeURIComponent) {
|
|
76
|
+
if (!escapes) escapes = Array.from({ length: 256 }, (_, i) => escape(String.fromCharCode(i)))
|
|
77
|
+
const length = arr.length
|
|
78
|
+
let o
|
|
79
|
+
if (length > 30_000) {
|
|
80
|
+
// Limit concatenation to avoid excessive GC
|
|
81
|
+
// TODO: recheck thresholds on Hermes (taken from hex)
|
|
82
|
+
const concat = []
|
|
83
|
+
for (let i = 0; i < length; ) {
|
|
84
|
+
const step = i + 500
|
|
85
|
+
const end = step > length ? length : step
|
|
86
|
+
concat.push(toEscapesPart(arr, i, end))
|
|
87
|
+
i = end
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
o = concat.join('')
|
|
91
|
+
concat.length = 0
|
|
92
|
+
} else {
|
|
93
|
+
o = toEscapesPart(arr, 0, length)
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
try {
|
|
97
|
+
return decodeURIComponent(o) // asci to utf8, escape() is precalucated
|
|
98
|
+
} catch {
|
|
99
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
100
|
+
// Ok, we have to use manual implementation for loose decoder
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return js.decode(arr, loose)
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format)
|
|
108
|
+
export const utf8fromStringLoose = (str, format = 'uint8') => typedView(encode(str, true), format)
|
|
109
|
+
export const utf8toString = (arr) => decode(arr, false)
|
|
110
|
+
export const utf8toStringLoose = (arr) => decode(arr, true)
|