@exodus/bytes 1.0.0-rc.2 → 1.0.0-rc.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/utf8.js ADDED
@@ -0,0 +1,110 @@
1
+ import { assert, assertUint8 } from './assert.js'
2
+ import { typedView } from './array.js'
3
+ import * as js from './fallback/utf8.js'
4
+
5
+ const { Buffer, TextEncoder, TextDecoder, decodeURIComponent, escape } = globalThis // Buffer is optional
6
+ const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
7
+ const isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]')) // we consider Node.js TextDecoder/TextEncoder native
8
+ const haveDecoder = isNative(TextDecoder)
9
+ const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
10
+ // ignoreBOM: true means that BOM will be left as-is, i.e. will be present in the output
11
+ // We don't want to strip anything unexpectedly
12
+ const decoderFatal = haveDecoder ? new TextDecoder('utf8', { ignoreBOM: true, fatal: true }) : null
13
+ const decoderLoose = haveDecoder ? new TextDecoder('utf8', { ignoreBOM: true }) : null
14
+
15
+ const { E_STRICT, E_STRICT_UNICODE } = js
16
+
17
+ const shouldUseEscapePath = Boolean(globalThis.HermesInternal) // faster only on Hermes, js path beats it on normal engines
18
+
19
+ function deLoose(str, loose, res) {
20
+ if (loose) return res
21
+ // Recheck if the string was encoded correctly
22
+ let start = 0
23
+ const last = res.length - 2
24
+ // Search for EFBFBD
25
+ while (start < last) {
26
+ const pos = res.indexOf(0xef, start)
27
+ if (pos === -1) break
28
+ start = pos + 1
29
+ if (res[pos + 1] === 0xbf && res[pos + 2] === 0xbd) {
30
+ // Found a replacement char in output, need to recheck if we encoded the input correctly
31
+ assert(str === decode(res), E_STRICT_UNICODE)
32
+ return res
33
+ }
34
+ }
35
+
36
+ return res
37
+ }
38
+
39
+ function encode(str, loose = false) {
40
+ assert(typeof str === 'string')
41
+ if (haveNativeBuffer) return deLoose(str, loose, Buffer.from(str)) // faster on ascii on Node.js
42
+ if (nativeEncoder) return deLoose(str, loose, nativeEncoder.encode(str)) // Node.js, browsers, and Hermes
43
+ // No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder
44
+ return js.encode(str, loose)
45
+ }
46
+
47
+ let escapes
48
+
49
+ function toEscapesPart(arr, start, end) {
50
+ let o = ''
51
+ let i = start
52
+ const last3 = end - 3
53
+ // Unrolled loop is faster
54
+ while (i < last3) {
55
+ const a = arr[i++]
56
+ const b = arr[i++]
57
+ const c = arr[i++]
58
+ const d = arr[i++]
59
+ o += escapes[a]
60
+ o += escapes[b]
61
+ o += escapes[c]
62
+ o += escapes[d]
63
+ }
64
+
65
+ while (i < end) o += escapes[arr[i++]]
66
+ return o
67
+ }
68
+
69
+ function decode(arr, loose = false) {
70
+ assertUint8(arr)
71
+ if (haveDecoder) return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
72
+ // No reason to use native Buffer: it's not faster than TextDecoder, needs rechecks in non-loose mode, and Node.js has TextDecoder
73
+
74
+ // This codepath gives a ~2x perf boost on Hermes
75
+ if (shouldUseEscapePath && escape && decodeURIComponent) {
76
+ if (!escapes) escapes = Array.from({ length: 256 }, (_, i) => escape(String.fromCharCode(i)))
77
+ const length = arr.length
78
+ let o
79
+ if (length > 30_000) {
80
+ // Limit concatenation to avoid excessive GC
81
+ // TODO: recheck thresholds on Hermes (taken from hex)
82
+ const concat = []
83
+ for (let i = 0; i < length; ) {
84
+ const step = i + 500
85
+ const end = step > length ? length : step
86
+ concat.push(toEscapesPart(arr, i, end))
87
+ i = end
88
+ }
89
+
90
+ o = concat.join('')
91
+ concat.length = 0
92
+ } else {
93
+ o = toEscapesPart(arr, 0, length)
94
+ }
95
+
96
+ try {
97
+ return decodeURIComponent(o) // asci to utf8, escape() is precalucated
98
+ } catch {
99
+ if (!loose) throw new TypeError(E_STRICT)
100
+ // Ok, we have to use manual implementation for loose decoder
101
+ }
102
+ }
103
+
104
+ return js.decode(arr, loose)
105
+ }
106
+
107
+ export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format)
108
+ export const utf8fromStringLoose = (str, format = 'uint8') => typedView(encode(str, true), format)
109
+ export const utf8toString = (arr) => decode(arr, false)
110
+ export const utf8toStringLoose = (arr) => decode(arr, true)