@exodus/bytes 1.0.0-rc.2 → 1.0.0-rc.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -2
- package/array.js +1 -1
- package/assert.js +10 -2
- package/base32.js +33 -0
- package/base64.js +92 -153
- package/fallback/_utils.js +6 -0
- package/fallback/base32.js +198 -0
- package/fallback/base64.js +162 -0
- package/fallback/hex.js +107 -0
- package/fallback/utf8.js +280 -0
- package/hex.js +11 -68
- package/package.json +18 -7
- package/utf8.js +110 -0
package/utf8.js
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import { assert, assertUint8 } from './assert.js'
|
|
2
|
+
import { typedView } from './array.js'
|
|
3
|
+
import * as js from './fallback/utf8.js'
|
|
4
|
+
|
|
5
|
+
const { Buffer, TextEncoder, TextDecoder, decodeURIComponent, escape } = globalThis // Buffer is optional
|
|
6
|
+
const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
|
|
7
|
+
const isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]')) // we consider Node.js TextDecoder/TextEncoder native
|
|
8
|
+
const haveDecoder = isNative(TextDecoder)
|
|
9
|
+
const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
|
|
10
|
+
// ignoreBOM: true means that BOM will be left as-is, i.e. will be present in the output
|
|
11
|
+
// We don't want to strip anything unexpectedly
|
|
12
|
+
const decoderFatal = haveDecoder ? new TextDecoder('utf8', { ignoreBOM: true, fatal: true }) : null
|
|
13
|
+
const decoderLoose = haveDecoder ? new TextDecoder('utf8', { ignoreBOM: true }) : null
|
|
14
|
+
|
|
15
|
+
const { E_STRICT, E_STRICT_UNICODE } = js
|
|
16
|
+
|
|
17
|
+
const shouldUseEscapePath = Boolean(globalThis.HermesInternal) // faster only on Hermes, js path beats it on normal engines
|
|
18
|
+
|
|
19
|
+
function deLoose(str, loose, res) {
|
|
20
|
+
if (loose) return res
|
|
21
|
+
// Recheck if the string was encoded correctly
|
|
22
|
+
let start = 0
|
|
23
|
+
const last = res.length - 2
|
|
24
|
+
// Search for EFBFBD
|
|
25
|
+
while (start < last) {
|
|
26
|
+
const pos = res.indexOf(0xef, start)
|
|
27
|
+
if (pos === -1) break
|
|
28
|
+
start = pos + 1
|
|
29
|
+
if (res[pos + 1] === 0xbf && res[pos + 2] === 0xbd) {
|
|
30
|
+
// Found a replacement char in output, need to recheck if we encoded the input correctly
|
|
31
|
+
assert(str === decode(res), E_STRICT_UNICODE)
|
|
32
|
+
return res
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return res
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function encode(str, loose = false) {
|
|
40
|
+
assert(typeof str === 'string')
|
|
41
|
+
if (haveNativeBuffer) return deLoose(str, loose, Buffer.from(str)) // faster on ascii on Node.js
|
|
42
|
+
if (nativeEncoder) return deLoose(str, loose, nativeEncoder.encode(str)) // Node.js, browsers, and Hermes
|
|
43
|
+
// No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder
|
|
44
|
+
return js.encode(str, loose)
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
let escapes
|
|
48
|
+
|
|
49
|
+
function toEscapesPart(arr, start, end) {
|
|
50
|
+
let o = ''
|
|
51
|
+
let i = start
|
|
52
|
+
const last3 = end - 3
|
|
53
|
+
// Unrolled loop is faster
|
|
54
|
+
while (i < last3) {
|
|
55
|
+
const a = arr[i++]
|
|
56
|
+
const b = arr[i++]
|
|
57
|
+
const c = arr[i++]
|
|
58
|
+
const d = arr[i++]
|
|
59
|
+
o += escapes[a]
|
|
60
|
+
o += escapes[b]
|
|
61
|
+
o += escapes[c]
|
|
62
|
+
o += escapes[d]
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
while (i < end) o += escapes[arr[i++]]
|
|
66
|
+
return o
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function decode(arr, loose = false) {
|
|
70
|
+
assertUint8(arr)
|
|
71
|
+
if (haveDecoder) return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
|
|
72
|
+
// No reason to use native Buffer: it's not faster than TextDecoder, needs rechecks in non-loose mode, and Node.js has TextDecoder
|
|
73
|
+
|
|
74
|
+
// This codepath gives a ~2x perf boost on Hermes
|
|
75
|
+
if (shouldUseEscapePath && escape && decodeURIComponent) {
|
|
76
|
+
if (!escapes) escapes = Array.from({ length: 256 }, (_, i) => escape(String.fromCharCode(i)))
|
|
77
|
+
const length = arr.length
|
|
78
|
+
let o
|
|
79
|
+
if (length > 30_000) {
|
|
80
|
+
// Limit concatenation to avoid excessive GC
|
|
81
|
+
// TODO: recheck thresholds on Hermes (taken from hex)
|
|
82
|
+
const concat = []
|
|
83
|
+
for (let i = 0; i < length; ) {
|
|
84
|
+
const step = i + 500
|
|
85
|
+
const end = step > length ? length : step
|
|
86
|
+
concat.push(toEscapesPart(arr, i, end))
|
|
87
|
+
i = end
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
o = concat.join('')
|
|
91
|
+
concat.length = 0
|
|
92
|
+
} else {
|
|
93
|
+
o = toEscapesPart(arr, 0, length)
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
try {
|
|
97
|
+
return decodeURIComponent(o) // asci to utf8, escape() is precalucated
|
|
98
|
+
} catch {
|
|
99
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
100
|
+
// Ok, we have to use manual implementation for loose decoder
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return js.decode(arr, loose)
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format)
|
|
108
|
+
export const utf8fromStringLoose = (str, format = 'uint8') => typedView(encode(str, true), format)
|
|
109
|
+
export const utf8toString = (arr) => decode(arr, false)
|
|
110
|
+
export const utf8toStringLoose = (arr) => decode(arr, true)
|