@exodus/bytes 1.0.0-rc.7 → 1.0.0-rc.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/encoding.js ADDED
@@ -0,0 +1,234 @@
1
+ // A limited subset of TextEncoder / TextDecoder API
2
+
3
+ // We can't return native TextDecoder if it's present, as Node.js one is broken on windows-1252 and we fix that
4
+ // We are also faster than Node.js built-in on both TextEncoder and TextDecoder
5
+
6
+ /* eslint-disable @exodus/import/no-unresolved */
7
+
8
+ import { utf16toString, utf16toStringLoose } from '@exodus/bytes/utf16.js'
9
+ import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/bytes/utf8.js'
10
+ import { createMultibyteDecoder } from '@exodus/bytes/multi-byte.js'
11
+ import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
12
+ import { multibyteSupported } from './fallback/multi-byte.js'
13
+ import labels from './fallback/encoding.labels.js'
14
+ import { unfinishedBytes } from './fallback/encoding.util.js'
15
+
16
+ const E_OPTIONS = 'The "options" argument must be of type object'
17
+ const replacementChar = '\uFFFD'
18
+
19
+ let labelsMap
20
+ const normalizeEncoding = (enc) => {
21
+ // fast path
22
+ if (enc === 'utf-8' || enc === 'utf8') return 'utf-8'
23
+ if (enc === 'windows-1252' || enc === 'ascii' || enc === 'latin1') return 'windows-1252'
24
+ // full map
25
+ let low = `${enc}`.toLowerCase()
26
+ if (low !== low.trim()) low = low.replace(/^[\t\n\f\r ]+/, '').replace(/[\t\n\f\r ]+$/, '') // only ASCII whitespace
27
+ if (Object.hasOwn(labels, low) && low !== 'replacement') return low
28
+ if (!labelsMap) {
29
+ labelsMap = new Map()
30
+ for (const [label, aliases] of Object.entries(labels)) {
31
+ for (const alias of aliases) labelsMap.set(alias, label)
32
+ }
33
+ }
34
+
35
+ const mapped = labelsMap.get(low)
36
+ if (mapped && mapped !== 'replacement') return mapped
37
+ throw new RangeError('Unknown encoding')
38
+ }
39
+
40
+ const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false })
41
+
42
+ const fromSource = (x) => {
43
+ if (x instanceof Uint8Array) return x
44
+ if (x instanceof ArrayBuffer) return new Uint8Array(x)
45
+ if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
46
+ if (globalThis.SharedArrayBuffer && x instanceof globalThis.SharedArrayBuffer) {
47
+ return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
48
+ }
49
+
50
+ throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
51
+ }
52
+
53
+ function unicodeDecoder(encoding, loose) {
54
+ if (encoding === 'utf-8') return loose ? utf8toStringLoose : utf8toString // likely
55
+ const form = encoding === 'utf-16le' ? 'uint8-le' : 'uint8-be'
56
+ return loose ? (u) => utf16toStringLoose(u, form) : (u) => utf16toString(u, form)
57
+ }
58
+
59
+ export class TextDecoder {
60
+ #decode
61
+ #unicode
62
+ #multibyte
63
+ #chunk
64
+ #canBOM
65
+
66
+ constructor(encoding = 'utf-8', options = {}) {
67
+ if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
68
+ const enc = normalizeEncoding(encoding)
69
+ define(this, 'encoding', enc)
70
+ define(this, 'fatal', Boolean(options.fatal))
71
+ define(this, 'ignoreBOM', Boolean(options.ignoreBOM))
72
+ this.#unicode = enc === 'utf-8' || enc === 'utf-16le' || enc === 'utf-16be'
73
+ this.#multibyte = !this.#unicode && enc !== 'windows-1252' && multibyteSupported(enc)
74
+ this.#canBOM = this.#unicode && !this.ignoreBOM
75
+ }
76
+
77
+ get [Symbol.toStringTag]() {
78
+ return 'TextDecoder'
79
+ }
80
+
81
+ decode(input, options = {}) {
82
+ if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
83
+ const stream = Boolean(options.stream)
84
+ let u = input === undefined ? new Uint8Array() : fromSource(input)
85
+
86
+ if (this.#unicode) {
87
+ let prefix
88
+ if (this.#chunk) {
89
+ if (u.length === 0) {
90
+ if (stream) return '' // no change
91
+ u = this.#chunk // process as final chunk to handle errors and state changes
92
+ } else if (u.length < 3) {
93
+ // No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
94
+ const a = new Uint8Array(u.length + this.#chunk.length)
95
+ a.set(this.#chunk)
96
+ a.set(u, this.#chunk.length)
97
+ u = a
98
+ } else {
99
+ // Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
100
+ const t = new Uint8Array(this.#chunk.length + 3) // We have 1-3 bytes and need 1-3 more bytes
101
+ t.set(this.#chunk)
102
+ t.set(u.subarray(0, 3), this.#chunk.length)
103
+
104
+ // Stop at the first offset where unfinished bytes reaches 0 or fits into u
105
+ // If that doesn't happen (u too short), just concat chunk and u completely
106
+ for (let i = 1; i <= 3; i++) {
107
+ const unfinished = unfinishedBytes(t, this.#chunk.length + i, this.encoding) // 0-3
108
+ if (unfinished <= i) {
109
+ // Always reachable at 3, but we still need 'unfinished' value for it
110
+ const add = i - unfinished // 0-3
111
+ prefix = add > 0 ? t.subarray(0, this.#chunk.length + add) : this.#chunk
112
+ if (add > 0) u = u.subarray(add)
113
+ break
114
+ }
115
+ }
116
+ }
117
+
118
+ this.#chunk = null
119
+ } else if (u.byteLength === 0) {
120
+ if (!stream) this.#canBOM = !this.ignoreBOM
121
+ return ''
122
+ }
123
+
124
+ // For non-stream utf-8 we don't have to do this as it matches utf8toStringLoose already
125
+ // For non-stream loose utf-16 we still have to do this as this API supports uneven byteLength unlike utf16toStringLoose
126
+ let suffix = ''
127
+ if (stream || (!this.fatal && this.encoding !== 'utf-8')) {
128
+ const trail = unfinishedBytes(u, u.byteLength, this.encoding)
129
+ if (trail > 0) {
130
+ if (stream) {
131
+ this.#chunk = Uint8Array.from(u.subarray(-trail)) // copy
132
+ } else {
133
+ // non-fatal mode as already checked
134
+ suffix = replacementChar
135
+ }
136
+
137
+ u = u.subarray(0, -trail)
138
+ }
139
+ }
140
+
141
+ if (this.#canBOM) {
142
+ const bom = this.#findBom(prefix ?? u)
143
+ if (bom) {
144
+ if (stream) this.#canBOM = false
145
+ if (prefix) {
146
+ prefix = prefix.subarray(bom)
147
+ } else {
148
+ u = u.subarray(bom)
149
+ }
150
+ }
151
+ }
152
+
153
+ if (!this.#decode) this.#decode = unicodeDecoder(this.encoding, !this.fatal)
154
+ try {
155
+ const res = (prefix ? this.#decode(prefix) : '') + this.#decode(u) + suffix
156
+ if (res.length > 0 && stream) this.#canBOM = false
157
+
158
+ if (!stream) this.#canBOM = !this.ignoreBOM
159
+ return res
160
+ } catch (err) {
161
+ this.#chunk = null // reset unfinished chunk on errors
162
+ throw err
163
+ }
164
+
165
+ // eslint-disable-next-line no-else-return
166
+ } else if (this.#multibyte) {
167
+ if (!this.#decode) this.#decode = createMultibyteDecoder(this.encoding, !this.fatal) // can contain state!
168
+ return this.#decode(u, stream)
169
+ } else {
170
+ if (!this.#decode) this.#decode = createSinglebyteDecoder(this.encoding, !this.fatal)
171
+ return this.#decode(u)
172
+ }
173
+ }
174
+
175
+ #findBom(u) {
176
+ switch (this.encoding) {
177
+ case 'utf-8':
178
+ return u.byteLength >= 3 && u[0] === 0xef && u[1] === 0xbb && u[2] === 0xbf ? 3 : 0
179
+ case 'utf-16le':
180
+ return u.byteLength >= 2 && u[0] === 0xff && u[1] === 0xfe ? 2 : 0
181
+ case 'utf-16be':
182
+ return u.byteLength >= 2 && u[0] === 0xfe && u[1] === 0xff ? 2 : 0
183
+ }
184
+
185
+ throw new Error('Unreachable')
186
+ }
187
+ }
188
+
189
+ export class TextEncoder {
190
+ constructor() {
191
+ define(this, 'encoding', 'utf-8')
192
+ }
193
+
194
+ get [Symbol.toStringTag]() {
195
+ return 'TextEncoder'
196
+ }
197
+
198
+ encode(str = '') {
199
+ if (typeof str !== 'string') str = `${str}`
200
+ const res = utf8fromStringLoose(str)
201
+ return res.byteOffset === 0 ? res : res.slice(0) // Ensure 0-offset. TODO: do we need this?
202
+ }
203
+
204
+ encodeInto(str, target) {
205
+ if (typeof str !== 'string') str = `${str}`
206
+ if (!(target instanceof Uint8Array)) throw new TypeError('Target must be an Uint8Array')
207
+ if (target.buffer.detached) return { read: 0, written: 0 } // Until https://github.com/whatwg/encoding/issues/324 is resolved
208
+
209
+ let u8 = utf8fromStringLoose(str) // TODO: perf?
210
+ let read
211
+ if (target.length >= u8.length) {
212
+ read = str.length
213
+ } else if (u8.length === str.length) {
214
+ if (u8.length > target.length) u8 = u8.subarray(0, target.length) // ascii can be truncated
215
+ read = u8.length
216
+ } else {
217
+ u8 = u8.subarray(0, target.length)
218
+ const unfinished = unfinishedBytes(u8, u8.length, 'utf-8')
219
+ if (unfinished > 0) u8 = u8.subarray(0, u8.length - unfinished)
220
+
221
+ // We can do this because loose str -> u8 -> str preserves length, unlike loose u8 -> str -> u8
222
+ // Each unpaired surrogate (1 charcode) is replaced with a single charcode
223
+ read = utf8toStringLoose(u8).length // FIXME: Converting back is very inefficient
224
+ }
225
+
226
+ try {
227
+ target.set(u8)
228
+ } catch {
229
+ return { read: 0, written: 0 } // see above, likely detached but no .detached property support
230
+ }
231
+
232
+ return { read, written: u8.length }
233
+ }
234
+ }
@@ -1,15 +1,118 @@
1
1
  const { Buffer, TextEncoder, TextDecoder } = globalThis
2
2
  const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
3
- const isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]')) // we consider Node.js TextDecoder/TextEncoder native
4
- const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
5
- const nativeDecoder = isNative(TextDecoder) ? new TextDecoder('utf8', { ignoreBOM: true }) : null
6
- const nativeBuffer = haveNativeBuffer ? Buffer : null
3
+ let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]')) // we consider Node.js TextDecoder/TextEncoder native
4
+ if (!haveNativeBuffer && isNative(() => {})) isNative = () => false // e.g. XS, we don't want false positives
5
+
6
+ export const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
7
+ export const nativeDecoder = isNative(TextDecoder)
8
+ ? new TextDecoder('utf-8', { ignoreBOM: true })
9
+ : null
10
+ export const nativeBuffer = haveNativeBuffer ? Buffer : null
11
+ export const isHermes = Boolean(globalThis.HermesInternal)
12
+ export const isDeno = Boolean(globalThis.Deno)
13
+ export const isLE = new Uint8Array(Uint16Array.of(258).buffer)[0] === 2
7
14
 
8
15
  // Actually windows-1252, compatible with ascii and latin1 decoding
9
16
  // Beware that on non-latin1, i.e. on windows-1252, this is broken in ~all Node.js versions released
10
17
  // in 2025 due to a regression, so we call it Latin1 as it's usable only for that
11
- const nativeDecoderLatin1 = isNative(TextDecoder)
12
- ? new TextDecoder('latin1', { ignoreBOM: true })
13
- : null
18
+ let nativeDecoderLatin1impl = null
19
+ if (isNative(TextDecoder)) {
20
+ // Not all barebone engines with TextDecoder support something except utf-8, detect
21
+ try {
22
+ nativeDecoderLatin1impl = new TextDecoder('latin1', { ignoreBOM: true })
23
+ } catch {}
24
+ }
25
+
26
+ export const nativeDecoderLatin1 = nativeDecoderLatin1impl
27
+ export const canDecoders = Boolean(nativeDecoderLatin1impl)
28
+
29
+ // Block Firefox < 146 specifically from using native hex/base64, as it's very slow there
30
+ // Refs: https://bugzilla.mozilla.org/show_bug.cgi?id=1994067 (and linked issues), fixed in 146
31
+ // Before that, all versions of Firefox >= 133 are slow
32
+ // TODO: this could be removed when < 146 usage diminishes (note ESR)
33
+ // We do not worry about false-negatives here but worry about false-positives!
34
+ function shouldSkipBuiltins() {
35
+ const g = globalThis
36
+ // First, attempt to exclude as many things as we can using trivial checks, just in case, and to not hit ua
37
+ if (haveNativeBuffer || isHermes || !g.window || g.chrome || !g.navigator) return false
38
+ try {
39
+ // This was fixed specifically in Firefox 146. Other engines except Hermes (already returned) get this right
40
+ new WeakSet().add(Symbol()) // eslint-disable-line symbol-description
41
+ return false
42
+ } catch {
43
+ // In catch and not after in case if something too smart optimizes out code in try. False-negative is acceptable in that case
44
+ if (!('onmozfullscreenerror' in g)) return false // Firefox has it (might remove in the future, but we don't care)
45
+ return /firefox/i.test(g.navigator.userAgent || '') // as simple as we can
46
+ }
47
+
48
+ return false // eslint-disable-line no-unreachable
49
+ }
50
+
51
+ export const skipWeb = shouldSkipBuiltins()
52
+
53
+ function decodePartAddition(a, start, end, m) {
54
+ let o = ''
55
+ let i = start
56
+ for (const last3 = end - 3; i < last3; i += 4) {
57
+ const x0 = a[i]
58
+ const x1 = a[i + 1]
59
+ const x2 = a[i + 2]
60
+ const x3 = a[i + 3]
61
+ o += m[x0]
62
+ o += m[x1]
63
+ o += m[x2]
64
+ o += m[x3]
65
+ }
66
+
67
+ while (i < end) o += m[a[i++]]
68
+ return o
69
+ }
70
+
71
+ // Decoding with templates is faster on Hermes
72
+ function decodePartTemplates(a, start, end, m) {
73
+ let o = ''
74
+ let i = start
75
+ for (const last15 = end - 15; i < last15; i += 16) {
76
+ const x0 = a[i]
77
+ const x1 = a[i + 1]
78
+ const x2 = a[i + 2]
79
+ const x3 = a[i + 3]
80
+ const x4 = a[i + 4]
81
+ const x5 = a[i + 5]
82
+ const x6 = a[i + 6]
83
+ const x7 = a[i + 7]
84
+ const x8 = a[i + 8]
85
+ const x9 = a[i + 9]
86
+ const x10 = a[i + 10]
87
+ const x11 = a[i + 11]
88
+ const x12 = a[i + 12]
89
+ const x13 = a[i + 13]
90
+ const x14 = a[i + 14]
91
+ const x15 = a[i + 15]
92
+ o += `${m[x0]}${m[x1]}${m[x2]}${m[x3]}${m[x4]}${m[x5]}${m[x6]}${m[x7]}${m[x8]}${m[x9]}${m[x10]}${m[x11]}${m[x12]}${m[x13]}${m[x14]}${m[x15]}`
93
+ }
94
+
95
+ while (i < end) o += m[a[i++]]
96
+ return o
97
+ }
98
+
99
+ const decodePart = isHermes ? decodePartTemplates : decodePartAddition
100
+ export function decode2string(arr, start, end, m) {
101
+ if (start - end > 30_000) {
102
+ // Limit concatenation to avoid excessive GC
103
+ // Thresholds checked on Hermes for toHex
104
+ const concat = []
105
+ for (let i = start; i < end; ) {
106
+ const step = i + 500
107
+ const iNext = step > end ? end : step
108
+ concat.push(decodePart(arr, i, iNext, m))
109
+ i = iNext
110
+ }
111
+
112
+ const res = concat.join('')
113
+ concat.length = 0
114
+ return res
115
+ }
14
116
 
15
- export { nativeEncoder, nativeDecoder, nativeDecoderLatin1, nativeBuffer }
117
+ return decodePart(arr, start, end, m)
118
+ }
@@ -1,5 +1,5 @@
1
1
  import { assertUint8 } from '../assert.js'
2
- import { nativeEncoder, nativeDecoder } from './_utils.js'
2
+ import { nativeEncoder, nativeDecoder, isHermes } from './_utils.js'
3
3
  import { encodeAscii, decodeAscii } from './latin1.js'
4
4
 
5
5
  // See https://datatracker.ietf.org/doc/html/rfc4648
@@ -14,7 +14,7 @@ export const E_PADDING = 'Invalid base32 padding'
14
14
  export const E_LENGTH = 'Invalid base32 length'
15
15
  export const E_LAST = 'Invalid last chunk'
16
16
 
17
- const useTemplates = Boolean(globalThis.HermesInternal) // Faster on Hermes and JSC, but we use it only on Hermes
17
+ const useTemplates = isHermes // Faster on Hermes and JSC, but we use it only on Hermes
18
18
 
19
19
  // We construct output by concatenating chars, this seems to be fine enough on modern JS engines
20
20
  export function toBase32(arr, isBase32Hex, padding) {
@@ -0,0 +1,46 @@
1
+ // See https://encoding.spec.whatwg.org/#names-and-labels
2
+
3
+ /* eslint-disable @exodus/export-default/named */
4
+ // prettier-ignore
5
+ export default {
6
+ 'utf-8': ['unicode-1-1-utf-8', 'unicode11utf8', 'unicode20utf8', 'utf8', 'x-unicode20utf8'],
7
+ ibm866: ['866', 'cp866', 'csibm866'],
8
+ 'iso-8859-2': ['csisolatin2', 'iso-ir-101', 'iso8859-2', 'iso88592', 'iso_8859-2', 'iso_8859-2:1987', 'l2', 'latin2'],
9
+ 'iso-8859-3': ['csisolatin3', 'iso-ir-109', 'iso8859-3', 'iso88593', 'iso_8859-3', 'iso_8859-3:1988', 'l3', 'latin3'],
10
+ 'iso-8859-4': ['csisolatin4', 'iso-ir-110', 'iso8859-4', 'iso88594', 'iso_8859-4', 'iso_8859-4:1988', 'l4', 'latin4'],
11
+ 'iso-8859-5': ['csisolatincyrillic', 'cyrillic', 'iso-ir-144', 'iso8859-5', 'iso88595', 'iso_8859-5', 'iso_8859-5:1988'],
12
+ 'iso-8859-6': ['arabic', 'asmo-708', 'csiso88596e', 'csiso88596i', 'csisolatinarabic', 'ecma-114', 'iso-8859-6-e', 'iso-8859-6-i', 'iso-ir-127', 'iso8859-6', 'iso88596', 'iso_8859-6', 'iso_8859-6:1987'],
13
+ 'iso-8859-7': ['csisolatingreek', 'ecma-118', 'elot_928', 'greek', 'greek8', 'iso-ir-126', 'iso8859-7', 'iso88597', 'iso_8859-7', 'iso_8859-7:1987', 'sun_eu_greek'],
14
+ 'iso-8859-8': ['csiso88598e', 'csisolatinhebrew', 'hebrew', 'iso-8859-8-e', 'iso-ir-138', 'iso8859-8', 'iso88598', 'iso_8859-8', 'iso_8859-8:1988', 'visual'],
15
+ 'iso-8859-8-i': ['csiso88598i', 'logical'],
16
+ 'iso-8859-10': ['csisolatin6', 'iso-ir-157', 'iso8859-10', 'iso885910', 'l6', 'latin6'],
17
+ 'iso-8859-13': ['iso8859-13', 'iso885913'],
18
+ 'iso-8859-14': ['iso8859-14', 'iso885914'],
19
+ 'iso-8859-15': ['csisolatin9', 'iso8859-15', 'iso885915', 'iso_8859-15', 'l9'],
20
+ 'iso-8859-16': [],
21
+ 'koi8-r': ['cskoi8r', 'koi', 'koi8', 'koi8_r'],
22
+ 'koi8-u': ['koi8-ru'],
23
+ macintosh: ['csmacintosh', 'mac', 'x-mac-roman'],
24
+ 'windows-874': ['dos-874', 'iso-8859-11', 'iso8859-11', 'iso885911', 'tis-620'],
25
+ 'windows-1250': ['cp1250', 'x-cp1250'],
26
+ 'windows-1251': ['cp1251', 'x-cp1251'],
27
+ 'windows-1252': ['ansi_x3.4-1968', 'ascii', 'cp1252', 'cp819', 'csisolatin1', 'ibm819', 'iso-8859-1', 'iso-ir-100', 'iso8859-1', 'iso88591', 'iso_8859-1', 'iso_8859-1:1987', 'l1', 'latin1', 'us-ascii', 'x-cp1252'],
28
+ 'windows-1253': ['cp1253', 'x-cp1253'],
29
+ 'windows-1254': ['cp1254', 'csisolatin5', 'iso-8859-9', 'iso-ir-148', 'iso8859-9', 'iso88599', 'iso_8859-9', 'iso_8859-9:1989', 'l5', 'latin5', 'x-cp1254'],
30
+ 'windows-1255': ['cp1255', 'x-cp1255'],
31
+ 'windows-1256': ['cp1256', 'x-cp1256'],
32
+ 'windows-1257': ['cp1257', 'x-cp1257'],
33
+ 'windows-1258': ['cp1258', 'x-cp1258'],
34
+ 'x-mac-cyrillic': ['x-mac-ukrainian'],
35
+ gbk: ['chinese', 'csgb2312', 'csiso58gb231280', 'gb2312', 'gb_2312', 'gb_2312-80', 'iso-ir-58', 'x-gbk'],
36
+ gb18030: [],
37
+ big5: ['big5-hkscs', 'cn-big5', 'csbig5', 'x-x-big5'],
38
+ 'euc-jp': ['cseucpkdfmtjapanese', 'x-euc-jp'],
39
+ 'iso-2022-jp': ['csiso2022jp'],
40
+ shift_jis: ['csshiftjis', 'ms932', 'ms_kanji', 'shift-jis', 'sjis', 'windows-31j', 'x-sjis'],
41
+ 'euc-kr': ['cseuckr', 'csksc56011987', 'iso-ir-149', 'korean', 'ks_c_5601-1987', 'ks_c_5601-1989', 'ksc5601', 'ksc_5601', 'windows-949'],
42
+ replacement: ['csiso2022kr', 'hz-gb-2312', 'iso-2022-cn', 'iso-2022-cn-ext', 'iso-2022-kr'],
43
+ 'utf-16be': ['unicodefffe'],
44
+ 'utf-16le': ['csunicode', 'iso-10646-ucs-2', 'ucs-2', 'unicode', 'unicodefeff', 'utf-16'],
45
+ 'x-user-defined': [],
46
+ }
@@ -0,0 +1,34 @@
1
+ export function unfinishedBytes(u, len, enc) {
2
+ switch (enc) {
3
+ case 'utf-8': {
4
+ // 0-3
5
+ let p = 0
6
+ while (p < 2 && p < len && (u[len - p - 1] & 0xc0) === 0x80) p++ // go back 0-2 trailing bytes
7
+ if (p === len) return 0 // no space for lead
8
+ const l = u[len - p - 1]
9
+ if (l < 0xc2 || l > 0xf4) return 0 // not a lead
10
+ if (p === 0) return 1 // nothing to recheck, we have only lead, return it. 2-byte must return here
11
+ if (l < 0xe0 || (l < 0xf0 && p >= 2)) return 0 // 2-byte, or 3-byte or less and we already have 2 trailing
12
+ const lower = l === 0xf0 ? 0x90 : l === 0xe0 ? 0xa0 : 0x80
13
+ const upper = l === 0xf4 ? 0x8f : l === 0xed ? 0x9f : 0xbf
14
+ const n = u[len - p]
15
+ return n >= lower && n <= upper ? p + 1 : 0
16
+ }
17
+
18
+ case 'utf-16le':
19
+ case 'utf-16be': {
20
+ // 0-3
21
+ let p = 0
22
+ if (len % 2 !== 0) p++ // uneven bytes
23
+ const l = len - p - 1
24
+ if (len - p >= 2) {
25
+ const last = enc === 'utf-16le' ? (u[l] << 8) ^ u[l - 1] : (u[l - 1] << 8) ^ u[l]
26
+ if (last >= 0xd8_00 && last < 0xdc_00) p += 2 // lone lead
27
+ }
28
+
29
+ return p
30
+ }
31
+ }
32
+
33
+ throw new Error('Unsupported encoding')
34
+ }
package/fallback/hex.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { assertUint8 } from '../assert.js'
2
- import { nativeDecoder, nativeEncoder } from './_utils.js'
2
+ import { nativeDecoder, nativeEncoder, decode2string } from './_utils.js'
3
3
  import { encodeAscii, decodeAscii } from './latin1.js'
4
4
 
5
5
  let hexArray // array of 256 bytes converted to two-char hex strings
@@ -11,58 +11,6 @@ const allowed = '0123456789ABCDEFabcdef'
11
11
 
12
12
  export const E_HEX = 'Input is not a hex string'
13
13
 
14
- function toHexPartAddition(a, start, end) {
15
- let o = ''
16
- let i = start
17
- const h = hexArray
18
- for (const last3 = end - 3; i < last3; i += 4) {
19
- const x0 = a[i]
20
- const x1 = a[i + 1]
21
- const x2 = a[i + 2]
22
- const x3 = a[i + 3]
23
- o += h[x0]
24
- o += h[x1]
25
- o += h[x2]
26
- o += h[x3]
27
- }
28
-
29
- while (i < end) o += h[a[i++]]
30
- return o
31
- }
32
-
33
- // Optimiziation for Hermes which is the main user of fallback
34
- function toHexPartTemplates(a, start, end) {
35
- let o = ''
36
- let i = start
37
- const h = hexArray
38
- for (const last15 = end - 15; i < last15; i += 16) {
39
- const x0 = a[i]
40
- const x1 = a[i + 1]
41
- const x2 = a[i + 2]
42
- const x3 = a[i + 3]
43
- const x4 = a[i + 4]
44
- const x5 = a[i + 5]
45
- const x6 = a[i + 6]
46
- const x7 = a[i + 7]
47
- const x8 = a[i + 8]
48
- const x9 = a[i + 9]
49
- const x10 = a[i + 10]
50
- const x11 = a[i + 11]
51
- const x12 = a[i + 12]
52
- const x13 = a[i + 13]
53
- const x14 = a[i + 14]
54
- const x15 = a[i + 15]
55
- o += `${h[x0]}${h[x1]}${h[x2]}${h[x3]}${h[x4]}${h[x5]}${h[x6]}${h[x7]}${h[x8]}${h[x9]}${h[x10]}${h[x11]}${h[x12]}${h[x13]}${h[x14]}${h[x15]}`
56
- }
57
-
58
- while (i < end) o += h[a[i++]]
59
- return o
60
- }
61
-
62
- // Using templates is significantly faster in Hermes and JSC
63
- // It's harder to detect JSC and not important anyway as it has native impl, so we detect only Hermes
64
- const toHexPart = globalThis.HermesInternal ? toHexPartTemplates : toHexPartAddition
65
-
66
14
  export function toHex(arr) {
67
15
  assertUint8(arr)
68
16
 
@@ -100,23 +48,7 @@ export function toHex(arr) {
100
48
  return decodeAscii(oa)
101
49
  }
102
50
 
103
- if (length > 30_000) {
104
- // Limit concatenation to avoid excessive GC
105
- // Thresholds checked on Hermes
106
- const concat = []
107
- for (let i = 0; i < length; ) {
108
- const step = i + 500
109
- const end = step > length ? length : step
110
- concat.push(toHexPart(arr, i, end))
111
- i = end
112
- }
113
-
114
- const res = concat.join('')
115
- concat.length = 0
116
- return res
117
- }
118
-
119
- return toHexPart(arr, 0, length)
51
+ return decode2string(arr, 0, length, hexArray)
120
52
  }
121
53
 
122
54
  export function fromHex(str) {
@@ -1,4 +1,11 @@
1
- import { nativeEncoder, nativeDecoder, nativeDecoderLatin1, nativeBuffer } from './_utils.js'
1
+ import {
2
+ nativeEncoder,
3
+ nativeDecoder,
4
+ nativeDecoderLatin1,
5
+ nativeBuffer,
6
+ isHermes,
7
+ isDeno,
8
+ } from './_utils.js'
2
9
 
3
10
  // See http://stackoverflow.com/a/22747272/680742, which says that lowest limit is in Chrome, with 0xffff args
4
11
  // On Hermes, actual max is 0x20_000 minus current stack depth, 1/16 of that should be safe
@@ -58,7 +65,7 @@ export function decodeLatin1(arr, start = 0, stop = arr.length) {
58
65
  export const decodeAscii = nativeBuffer
59
66
  ? (a) =>
60
67
  // Buffer is faster on Node.js (but only for long enough data), if we know that output is ascii
61
- a.byteLength >= 0x3_00
68
+ a.byteLength >= 0x3_00 && !isDeno
62
69
  ? nativeBuffer.from(a.buffer, a.byteOffset, a.byteLength).latin1Slice(0, a.byteLength) // .latin1Slice is faster than .asciiSlice
63
70
  : nativeDecoder.decode(a) // On Node.js, utf8 decoder is faster than latin1
64
71
  : nativeDecoderLatin1
@@ -67,7 +74,7 @@ export const decodeAscii = nativeBuffer
67
74
 
68
75
  /* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
69
76
 
70
- export const encodeCharcodes = globalThis.HermesInternal
77
+ export const encodeCharcodes = isHermes
71
78
  ? (str, arr) => {
72
79
  const length = str.length
73
80
  if (length > 64) {
@@ -91,7 +98,7 @@ export const encodeCharcodes = globalThis.HermesInternal
91
98
  export const encodeLatin1 = (str) => encodeCharcodes(str, new Uint8Array(str.length))
92
99
 
93
100
  // Expects nativeEncoder to be present
94
- export const encodeAscii = globalThis.HermesInternal
101
+ export const encodeAscii = isHermes
95
102
  ? (str, ERR) => {
96
103
  // Much faster in Hermes
97
104
  const codes = new Uint8Array(str.length + 4) // overshoot by a full utf8 char
@@ -0,0 +1 @@
1
+ module.exports = () => require('./multi-byte.encodings.json') // lazy-load