@exodus/bytes 1.0.0-rc.8 → 1.0.0-rc.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,27 +1,151 @@
1
1
  # `@exodus/bytes`
2
2
 
3
- `Uint8Array` conversion to and from `base64`, `base32`, `base58`, `hex`, and `utf8`
3
+ `Uint8Array` conversion to and from `base64`, `base32`, `base58`, `hex`, `utf8`, `utf16`, `bech32` and `wif`
4
4
 
5
- [Fast](./Performance.md)
5
+ ## Strict
6
6
 
7
- Performs proper input validation
7
+ Performs proper input validation, ensures no garbage-in-garbage-out
8
+
9
+ Tested on Node.js, Deno, Bun, browsers (including Servo), Hermes, QuickJS and barebone engines in CI [(how?)](https://github.com/ExodusMovement/test#exodustest)
10
+
11
+ ## Fast
12
+
13
+ * `10-20x` faster than `Buffer` polyfill
14
+ * `2-10x` faster than `iconv-lite`
15
+
16
+ The above was for the js fallback
17
+
18
+ It's up to `100x` when native impl is available \
19
+ e.g. in `utf8fromString` on Hermes / React Native or `fromHex` in Chrome
20
+
21
+ Also:
22
+ * `3-8x` faster than `bs58`
23
+ * `10-30x` faster than `@scure/base` (or `>100x` on Node.js <25)
24
+ * Faster in `utf8toString` / `utf8fromString` than `Buffer` or `TextDecoder` / `TextEncoder` on Node.js
25
+
26
+ See [Performance](./Performance.md) for more info
27
+
28
+ ## TextEncoder / TextDecoder polyfill
29
+
30
+ ```js
31
+ import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding.js'
32
+ ```
33
+
34
+ Less than half the bundle size of [text-encoding](https://npmjs.com/text-encoding), [whatwg-encoding](https://npmjs.com/whatwg-encoding) or [iconv-lite](https://npmjs.com/iconv-lite) (gzipped or not), and [is much faster](#fast).
35
+
36
+ Spec compliant, passing WPT and covered with extra tests.
37
+
38
+ Moreover, tests for this library uncovered [bugs in all major implementations](https://docs.google.com/spreadsheets/d/1pdEefRG6r9fZy61WHGz0TKSt8cO4ISWqlpBN5KntIvQ/edit).
39
+
40
+ [Faster than Node.js native implementation on Node.js](https://github.com/nodejs/node/issues/61041#issuecomment-3649242024).
41
+
42
+ ### Caveat: `TextDecoder` / `TextEncoder` APIs are lossy by default per spec
43
+
44
+ _These are only provided as a compatibility layer, prefer hardened APIs instead in new code._
45
+
46
+ * `TextDecoder` can (and should) be used with `{ fatal: true }` option for all purposes demanding correctness / lossless transforms
47
+
48
+ * `TextEncoder` does not support a fatal mode per spec, it always performs replacement.
49
+
50
+ That is not suitable for hashing, cryptography or consensus applications.\
51
+ Otherwise there would be non-equal strings with equal signatures and hashes — the collision is caused by the lossy transform of a JS string to bytes.
52
+ Those also survive e.g. `JSON.stringify`/`JSON.parse` or being sent over network.
53
+
54
+ Use strict APIs in new applications, see `utf8fromString` / `utf16fromString` below.\
55
+ Those throw on non-well-formed strings by default.
8
56
 
9
57
  ## API
10
58
 
59
+ ### `@exodus/bytes/utf8.js`
60
+
61
+ ##### `utf8fromString(str, format = 'uint8')`
62
+ ##### `utf8fromStringLoose(str, format = 'uint8')`
63
+ ##### `utf8toString(arr)`
64
+ ##### `utf8toStringLoose(arr)`
65
+
66
+ ### `@exodus/bytes/utf16.js`
67
+
68
+ ##### `utf16fromString(str, format = 'uint16')`
69
+ ##### `utf16fromStringLoose(str, format = 'uint16')`
70
+ ##### `utf16toString(arr, 'uint16')`
71
+ ##### `utf16toStringLoose(arr, 'uint16')`
72
+
73
+ ### `@exodus/bytes/single-byte.js`
74
+
75
+ ##### `createSinglebyteDecoder(encoding, loose = false)`
76
+
77
+ Create a decoder for a supported one-byte `encoding`.
78
+
79
+ Returns a function `decode(arr)` that decodes bytes to a string.
80
+
81
+ ### `@exodus/bytes/multi-byte.js`
82
+
83
+ ##### `createMultibyteDecoder(encoding, loose = false)`
84
+
85
+ Create a decoder for a supported legacy multi-byte `encoding`.
86
+
87
+ Returns a function `decode(arr, stream = false)` that decodes bytes to a string.
88
+
89
+ That function will have state while `stream = true` is used.
90
+
91
+ ##### `windows1252toString(arr)`
92
+
93
+ Decode `windows-1252` bytes to a string.
94
+
95
+ Also supports `ascii` and `latin-1` as those are strict subsets of `windows-1252`.
96
+
97
+ There is no loose variant for this encoding, all bytes can be decoded.
98
+
99
+ Same as `windows1252toString = createSinglebyteDecoder('windows-1252')`.
100
+
11
101
  ### `@exodus/bytes/hex.js`
12
102
 
103
+ ##### `toHex(arr)`
104
+ ##### `fromHex(string)`
105
+
13
106
  ### `@exodus/bytes/base64.js`
14
107
 
108
+ ##### `toBase64(arr, { padding = true })`
109
+ ##### `toBase64url(arr, { padding = false })`
110
+ ##### `fromBase64(str, { format = 'uint8', padding = 'both' })`
111
+ ##### `fromBase64url(str, { format = 'uint8', padding = false })`
112
+ ##### `fromBase64any(str, { format = 'uint8', padding = 'both' })`
113
+
15
114
  ### `@exodus/bytes/base32.js`
16
115
 
17
- ### `@exodus/bytes/hex.js`
116
+ ##### `toBase32(arr, { padding = false })`
117
+ ##### `toBase32hex(arr, { padding = false })`
118
+ ##### `fromBase32(str, { format = 'uint8', padding = 'both' })`
119
+ ##### `fromBase32hex(str, { format = 'uint8', padding = 'both' })`
120
+
121
+ ### `@exodus/bytes/bech32.js`
122
+
123
+ ##### `getPrefix(str, limit = 90)`
124
+ ##### `toBech32(prefix, bytes, limit = 90)`
125
+ ##### `fromBech32(str, limit = 90)`
126
+ ##### `toBech32m(prefix, bytes, limit = 90)`
127
+ ##### `fromBech32m(str, limit = 90)`
18
128
 
19
129
  ### `@exodus/bytes/base58.js`
20
130
 
131
+ ##### `toBase58(arr)`
132
+ ##### `fromBase58(str, format = 'uint8')`
133
+
21
134
  ### `@exodus/bytes/base58check.js`
22
135
 
136
+ ##### `async toBase58check(arr)`
137
+ ##### `toBase58checkSync(arr)`
138
+ ##### `async fromBase58check(str, format = 'uint8')`
139
+ ##### `fromBase58checkSync(str, format = 'uint8')`
140
+ ##### `makeBase58check(hashAlgo, hashAlgoSync)`
141
+
23
142
  ### `@exodus/bytes/wif.js`
24
143
 
144
+ ##### `async fromWifString(string, version)`
145
+ ##### `fromWifStringSync(string, version)`
146
+ ##### `async toWifString({ version, privateKey, compressed })`
147
+ ##### `toWifStringSync({ version, privateKey, compressed })`
148
+
25
149
  ## License
26
150
 
27
151
  [MIT](./LICENSE)
package/encoding.js ADDED
@@ -0,0 +1,234 @@
1
+ // A limited subset of TextEncoder / TextDecoder API
2
+
3
+ // We can't return native TextDecoder if it's present, as Node.js one is broken on windows-1252 and we fix that
4
+ // We are also faster than Node.js built-in on both TextEncoder and TextDecoder
5
+
6
+ /* eslint-disable @exodus/import/no-unresolved */
7
+
8
+ import { utf16toString, utf16toStringLoose } from '@exodus/bytes/utf16.js'
9
+ import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/bytes/utf8.js'
10
+ import { createMultibyteDecoder } from '@exodus/bytes/multi-byte.js'
11
+ import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
12
+ import { multibyteSupported } from './fallback/multi-byte.js'
13
+ import labels from './fallback/encoding.labels.js'
14
+ import { unfinishedBytes } from './fallback/encoding.util.js'
15
+
16
+ const E_OPTIONS = 'The "options" argument must be of type object'
17
+ const replacementChar = '\uFFFD'
18
+
19
+ let labelsMap
20
+ const normalizeEncoding = (enc) => {
21
+ // fast path
22
+ if (enc === 'utf-8' || enc === 'utf8') return 'utf-8'
23
+ if (enc === 'windows-1252' || enc === 'ascii' || enc === 'latin1') return 'windows-1252'
24
+ // full map
25
+ let low = `${enc}`.toLowerCase()
26
+ if (low !== low.trim()) low = low.replace(/^[\t\n\f\r ]+/, '').replace(/[\t\n\f\r ]+$/, '') // only ASCII whitespace
27
+ if (Object.hasOwn(labels, low) && low !== 'replacement') return low
28
+ if (!labelsMap) {
29
+ labelsMap = new Map()
30
+ for (const [label, aliases] of Object.entries(labels)) {
31
+ for (const alias of aliases) labelsMap.set(alias, label)
32
+ }
33
+ }
34
+
35
+ const mapped = labelsMap.get(low)
36
+ if (mapped && mapped !== 'replacement') return mapped
37
+ throw new RangeError('Unknown encoding')
38
+ }
39
+
40
+ const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false })
41
+
42
+ const fromSource = (x) => {
43
+ if (x instanceof Uint8Array) return x
44
+ if (x instanceof ArrayBuffer) return new Uint8Array(x)
45
+ if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
46
+ if (globalThis.SharedArrayBuffer && x instanceof globalThis.SharedArrayBuffer) {
47
+ return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
48
+ }
49
+
50
+ throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
51
+ }
52
+
53
+ function unicodeDecoder(encoding, loose) {
54
+ if (encoding === 'utf-8') return loose ? utf8toStringLoose : utf8toString // likely
55
+ const form = encoding === 'utf-16le' ? 'uint8-le' : 'uint8-be'
56
+ return loose ? (u) => utf16toStringLoose(u, form) : (u) => utf16toString(u, form)
57
+ }
58
+
59
+ export class TextDecoder {
60
+ #decode
61
+ #unicode
62
+ #multibyte
63
+ #chunk
64
+ #canBOM
65
+
66
+ constructor(encoding = 'utf-8', options = {}) {
67
+ if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
68
+ const enc = normalizeEncoding(encoding)
69
+ define(this, 'encoding', enc)
70
+ define(this, 'fatal', Boolean(options.fatal))
71
+ define(this, 'ignoreBOM', Boolean(options.ignoreBOM))
72
+ this.#unicode = enc === 'utf-8' || enc === 'utf-16le' || enc === 'utf-16be'
73
+ this.#multibyte = !this.#unicode && enc !== 'windows-1252' && multibyteSupported(enc)
74
+ this.#canBOM = this.#unicode && !this.ignoreBOM
75
+ }
76
+
77
+ get [Symbol.toStringTag]() {
78
+ return 'TextDecoder'
79
+ }
80
+
81
+ decode(input, options = {}) {
82
+ if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
83
+ const stream = Boolean(options.stream)
84
+ let u = input === undefined ? new Uint8Array() : fromSource(input)
85
+
86
+ if (this.#unicode) {
87
+ let prefix
88
+ if (this.#chunk) {
89
+ if (u.length === 0) {
90
+ if (stream) return '' // no change
91
+ u = this.#chunk // process as final chunk to handle errors and state changes
92
+ } else if (u.length < 3) {
93
+ // No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
94
+ const a = new Uint8Array(u.length + this.#chunk.length)
95
+ a.set(this.#chunk)
96
+ a.set(u, this.#chunk.length)
97
+ u = a
98
+ } else {
99
+ // Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
100
+ const t = new Uint8Array(this.#chunk.length + 3) // We have 1-3 bytes and need 1-3 more bytes
101
+ t.set(this.#chunk)
102
+ t.set(u.subarray(0, 3), this.#chunk.length)
103
+
104
+ // Stop at the first offset where unfinished bytes reaches 0 or fits into u
105
+ // If that doesn't happen (u too short), just concat chunk and u completely
106
+ for (let i = 1; i <= 3; i++) {
107
+ const unfinished = unfinishedBytes(t, this.#chunk.length + i, this.encoding) // 0-3
108
+ if (unfinished <= i) {
109
+ // Always reachable at 3, but we still need 'unfinished' value for it
110
+ const add = i - unfinished // 0-3
111
+ prefix = add > 0 ? t.subarray(0, this.#chunk.length + add) : this.#chunk
112
+ if (add > 0) u = u.subarray(add)
113
+ break
114
+ }
115
+ }
116
+ }
117
+
118
+ this.#chunk = null
119
+ } else if (u.byteLength === 0) {
120
+ if (!stream) this.#canBOM = !this.ignoreBOM
121
+ return ''
122
+ }
123
+
124
+ // For non-stream utf-8 we don't have to do this as it matches utf8toStringLoose already
125
+ // For non-stream loose utf-16 we still have to do this as this API supports uneven byteLength unlike utf16toStringLoose
126
+ let suffix = ''
127
+ if (stream || (!this.fatal && this.encoding !== 'utf-8')) {
128
+ const trail = unfinishedBytes(u, u.byteLength, this.encoding)
129
+ if (trail > 0) {
130
+ if (stream) {
131
+ this.#chunk = Uint8Array.from(u.subarray(-trail)) // copy
132
+ } else {
133
+ // non-fatal mode as already checked
134
+ suffix = replacementChar
135
+ }
136
+
137
+ u = u.subarray(0, -trail)
138
+ }
139
+ }
140
+
141
+ if (this.#canBOM) {
142
+ const bom = this.#findBom(prefix ?? u)
143
+ if (bom) {
144
+ if (stream) this.#canBOM = false
145
+ if (prefix) {
146
+ prefix = prefix.subarray(bom)
147
+ } else {
148
+ u = u.subarray(bom)
149
+ }
150
+ }
151
+ }
152
+
153
+ if (!this.#decode) this.#decode = unicodeDecoder(this.encoding, !this.fatal)
154
+ try {
155
+ const res = (prefix ? this.#decode(prefix) : '') + this.#decode(u) + suffix
156
+ if (res.length > 0 && stream) this.#canBOM = false
157
+
158
+ if (!stream) this.#canBOM = !this.ignoreBOM
159
+ return res
160
+ } catch (err) {
161
+ this.#chunk = null // reset unfinished chunk on errors
162
+ throw err
163
+ }
164
+
165
+ // eslint-disable-next-line no-else-return
166
+ } else if (this.#multibyte) {
167
+ if (!this.#decode) this.#decode = createMultibyteDecoder(this.encoding, !this.fatal) // can contain state!
168
+ return this.#decode(u, stream)
169
+ } else {
170
+ if (!this.#decode) this.#decode = createSinglebyteDecoder(this.encoding, !this.fatal)
171
+ return this.#decode(u)
172
+ }
173
+ }
174
+
175
+ #findBom(u) {
176
+ switch (this.encoding) {
177
+ case 'utf-8':
178
+ return u.byteLength >= 3 && u[0] === 0xef && u[1] === 0xbb && u[2] === 0xbf ? 3 : 0
179
+ case 'utf-16le':
180
+ return u.byteLength >= 2 && u[0] === 0xff && u[1] === 0xfe ? 2 : 0
181
+ case 'utf-16be':
182
+ return u.byteLength >= 2 && u[0] === 0xfe && u[1] === 0xff ? 2 : 0
183
+ }
184
+
185
+ throw new Error('Unreachable')
186
+ }
187
+ }
188
+
189
+ export class TextEncoder {
190
+ constructor() {
191
+ define(this, 'encoding', 'utf-8')
192
+ }
193
+
194
+ get [Symbol.toStringTag]() {
195
+ return 'TextEncoder'
196
+ }
197
+
198
+ encode(str = '') {
199
+ if (typeof str !== 'string') str = `${str}`
200
+ const res = utf8fromStringLoose(str)
201
+ return res.byteOffset === 0 ? res : res.slice(0) // Ensure 0-offset. TODO: do we need this?
202
+ }
203
+
204
+ encodeInto(str, target) {
205
+ if (typeof str !== 'string') str = `${str}`
206
+ if (!(target instanceof Uint8Array)) throw new TypeError('Target must be an Uint8Array')
207
+ if (target.buffer.detached) return { read: 0, written: 0 } // Until https://github.com/whatwg/encoding/issues/324 is resolved
208
+
209
+ let u8 = utf8fromStringLoose(str) // TODO: perf?
210
+ let read
211
+ if (target.length >= u8.length) {
212
+ read = str.length
213
+ } else if (u8.length === str.length) {
214
+ if (u8.length > target.length) u8 = u8.subarray(0, target.length) // ascii can be truncated
215
+ read = u8.length
216
+ } else {
217
+ u8 = u8.subarray(0, target.length)
218
+ const unfinished = unfinishedBytes(u8, u8.length, 'utf-8')
219
+ if (unfinished > 0) u8 = u8.subarray(0, u8.length - unfinished)
220
+
221
+ // We can do this because loose str -> u8 -> str preserves length, unlike loose u8 -> str -> u8
222
+ // Each unpaired surrogate (1 charcode) is replaced with a single charcode
223
+ read = utf8toStringLoose(u8).length // FIXME: Converting back is very inefficient
224
+ }
225
+
226
+ try {
227
+ target.set(u8)
228
+ } catch {
229
+ return { read: 0, written: 0 } // see above, likely detached but no .detached property support
230
+ }
231
+
232
+ return { read, written: u8.length }
233
+ }
234
+ }
@@ -1,17 +1,30 @@
1
1
  const { Buffer, TextEncoder, TextDecoder } = globalThis
2
2
  const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
3
- const isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]')) // we consider Node.js TextDecoder/TextEncoder native
4
- const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
5
- const nativeDecoder = isNative(TextDecoder) ? new TextDecoder('utf8', { ignoreBOM: true }) : null
6
- const nativeBuffer = haveNativeBuffer ? Buffer : null
7
- const isHermes = Boolean(globalThis.HermesInternal)
3
+ let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]')) // we consider Node.js TextDecoder/TextEncoder native
4
+ if (!haveNativeBuffer && isNative(() => {})) isNative = () => false // e.g. XS, we don't want false positives
5
+
6
+ export const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
7
+ export const nativeDecoder = isNative(TextDecoder)
8
+ ? new TextDecoder('utf-8', { ignoreBOM: true })
9
+ : null
10
+ export const nativeBuffer = haveNativeBuffer ? Buffer : null
11
+ export const isHermes = Boolean(globalThis.HermesInternal)
12
+ export const isDeno = Boolean(globalThis.Deno)
13
+ export const isLE = new Uint8Array(Uint16Array.of(258).buffer)[0] === 2
8
14
 
9
15
  // Actually windows-1252, compatible with ascii and latin1 decoding
10
16
  // Beware that on non-latin1, i.e. on windows-1252, this is broken in ~all Node.js versions released
11
17
  // in 2025 due to a regression, so we call it Latin1 as it's usable only for that
12
- const nativeDecoderLatin1 = isNative(TextDecoder)
13
- ? new TextDecoder('latin1', { ignoreBOM: true })
14
- : null
18
+ let nativeDecoderLatin1impl = null
19
+ if (isNative(TextDecoder)) {
20
+ // Not all barebone engines with TextDecoder support something except utf-8, detect
21
+ try {
22
+ nativeDecoderLatin1impl = new TextDecoder('latin1', { ignoreBOM: true })
23
+ } catch {}
24
+ }
25
+
26
+ export const nativeDecoderLatin1 = nativeDecoderLatin1impl
27
+ export const canDecoders = Boolean(nativeDecoderLatin1impl)
15
28
 
16
29
  // Block Firefox < 146 specifically from using native hex/base64, as it's very slow there
17
30
  // Refs: https://bugzilla.mozilla.org/show_bug.cgi?id=1994067 (and linked issues), fixed in 146
@@ -35,6 +48,71 @@ function shouldSkipBuiltins() {
35
48
  return false // eslint-disable-line no-unreachable
36
49
  }
37
50
 
38
- const skipWeb = shouldSkipBuiltins()
51
+ export const skipWeb = shouldSkipBuiltins()
52
+
53
+ function decodePartAddition(a, start, end, m) {
54
+ let o = ''
55
+ let i = start
56
+ for (const last3 = end - 3; i < last3; i += 4) {
57
+ const x0 = a[i]
58
+ const x1 = a[i + 1]
59
+ const x2 = a[i + 2]
60
+ const x3 = a[i + 3]
61
+ o += m[x0]
62
+ o += m[x1]
63
+ o += m[x2]
64
+ o += m[x3]
65
+ }
66
+
67
+ while (i < end) o += m[a[i++]]
68
+ return o
69
+ }
70
+
71
+ // Decoding with templates is faster on Hermes
72
+ function decodePartTemplates(a, start, end, m) {
73
+ let o = ''
74
+ let i = start
75
+ for (const last15 = end - 15; i < last15; i += 16) {
76
+ const x0 = a[i]
77
+ const x1 = a[i + 1]
78
+ const x2 = a[i + 2]
79
+ const x3 = a[i + 3]
80
+ const x4 = a[i + 4]
81
+ const x5 = a[i + 5]
82
+ const x6 = a[i + 6]
83
+ const x7 = a[i + 7]
84
+ const x8 = a[i + 8]
85
+ const x9 = a[i + 9]
86
+ const x10 = a[i + 10]
87
+ const x11 = a[i + 11]
88
+ const x12 = a[i + 12]
89
+ const x13 = a[i + 13]
90
+ const x14 = a[i + 14]
91
+ const x15 = a[i + 15]
92
+ o += `${m[x0]}${m[x1]}${m[x2]}${m[x3]}${m[x4]}${m[x5]}${m[x6]}${m[x7]}${m[x8]}${m[x9]}${m[x10]}${m[x11]}${m[x12]}${m[x13]}${m[x14]}${m[x15]}`
93
+ }
94
+
95
+ while (i < end) o += m[a[i++]]
96
+ return o
97
+ }
39
98
 
40
- export { nativeEncoder, nativeDecoder, nativeDecoderLatin1, nativeBuffer, isHermes, skipWeb }
99
+ const decodePart = isHermes ? decodePartTemplates : decodePartAddition
100
+ export function decode2string(arr, start, end, m) {
101
+ if (start - end > 30_000) {
102
+ // Limit concatenation to avoid excessive GC
103
+ // Thresholds checked on Hermes for toHex
104
+ const concat = []
105
+ for (let i = start; i < end; ) {
106
+ const step = i + 500
107
+ const iNext = step > end ? end : step
108
+ concat.push(decodePart(arr, i, iNext, m))
109
+ i = iNext
110
+ }
111
+
112
+ const res = concat.join('')
113
+ concat.length = 0
114
+ return res
115
+ }
116
+
117
+ return decodePart(arr, start, end, m)
118
+ }
@@ -0,0 +1,46 @@
1
+ // See https://encoding.spec.whatwg.org/#names-and-labels
2
+
3
+ /* eslint-disable @exodus/export-default/named */
4
+ // prettier-ignore
5
+ export default {
6
+ 'utf-8': ['unicode-1-1-utf-8', 'unicode11utf8', 'unicode20utf8', 'utf8', 'x-unicode20utf8'],
7
+ ibm866: ['866', 'cp866', 'csibm866'],
8
+ 'iso-8859-2': ['csisolatin2', 'iso-ir-101', 'iso8859-2', 'iso88592', 'iso_8859-2', 'iso_8859-2:1987', 'l2', 'latin2'],
9
+ 'iso-8859-3': ['csisolatin3', 'iso-ir-109', 'iso8859-3', 'iso88593', 'iso_8859-3', 'iso_8859-3:1988', 'l3', 'latin3'],
10
+ 'iso-8859-4': ['csisolatin4', 'iso-ir-110', 'iso8859-4', 'iso88594', 'iso_8859-4', 'iso_8859-4:1988', 'l4', 'latin4'],
11
+ 'iso-8859-5': ['csisolatincyrillic', 'cyrillic', 'iso-ir-144', 'iso8859-5', 'iso88595', 'iso_8859-5', 'iso_8859-5:1988'],
12
+ 'iso-8859-6': ['arabic', 'asmo-708', 'csiso88596e', 'csiso88596i', 'csisolatinarabic', 'ecma-114', 'iso-8859-6-e', 'iso-8859-6-i', 'iso-ir-127', 'iso8859-6', 'iso88596', 'iso_8859-6', 'iso_8859-6:1987'],
13
+ 'iso-8859-7': ['csisolatingreek', 'ecma-118', 'elot_928', 'greek', 'greek8', 'iso-ir-126', 'iso8859-7', 'iso88597', 'iso_8859-7', 'iso_8859-7:1987', 'sun_eu_greek'],
14
+ 'iso-8859-8': ['csiso88598e', 'csisolatinhebrew', 'hebrew', 'iso-8859-8-e', 'iso-ir-138', 'iso8859-8', 'iso88598', 'iso_8859-8', 'iso_8859-8:1988', 'visual'],
15
+ 'iso-8859-8-i': ['csiso88598i', 'logical'],
16
+ 'iso-8859-10': ['csisolatin6', 'iso-ir-157', 'iso8859-10', 'iso885910', 'l6', 'latin6'],
17
+ 'iso-8859-13': ['iso8859-13', 'iso885913'],
18
+ 'iso-8859-14': ['iso8859-14', 'iso885914'],
19
+ 'iso-8859-15': ['csisolatin9', 'iso8859-15', 'iso885915', 'iso_8859-15', 'l9'],
20
+ 'iso-8859-16': [],
21
+ 'koi8-r': ['cskoi8r', 'koi', 'koi8', 'koi8_r'],
22
+ 'koi8-u': ['koi8-ru'],
23
+ macintosh: ['csmacintosh', 'mac', 'x-mac-roman'],
24
+ 'windows-874': ['dos-874', 'iso-8859-11', 'iso8859-11', 'iso885911', 'tis-620'],
25
+ 'windows-1250': ['cp1250', 'x-cp1250'],
26
+ 'windows-1251': ['cp1251', 'x-cp1251'],
27
+ 'windows-1252': ['ansi_x3.4-1968', 'ascii', 'cp1252', 'cp819', 'csisolatin1', 'ibm819', 'iso-8859-1', 'iso-ir-100', 'iso8859-1', 'iso88591', 'iso_8859-1', 'iso_8859-1:1987', 'l1', 'latin1', 'us-ascii', 'x-cp1252'],
28
+ 'windows-1253': ['cp1253', 'x-cp1253'],
29
+ 'windows-1254': ['cp1254', 'csisolatin5', 'iso-8859-9', 'iso-ir-148', 'iso8859-9', 'iso88599', 'iso_8859-9', 'iso_8859-9:1989', 'l5', 'latin5', 'x-cp1254'],
30
+ 'windows-1255': ['cp1255', 'x-cp1255'],
31
+ 'windows-1256': ['cp1256', 'x-cp1256'],
32
+ 'windows-1257': ['cp1257', 'x-cp1257'],
33
+ 'windows-1258': ['cp1258', 'x-cp1258'],
34
+ 'x-mac-cyrillic': ['x-mac-ukrainian'],
35
+ gbk: ['chinese', 'csgb2312', 'csiso58gb231280', 'gb2312', 'gb_2312', 'gb_2312-80', 'iso-ir-58', 'x-gbk'],
36
+ gb18030: [],
37
+ big5: ['big5-hkscs', 'cn-big5', 'csbig5', 'x-x-big5'],
38
+ 'euc-jp': ['cseucpkdfmtjapanese', 'x-euc-jp'],
39
+ 'iso-2022-jp': ['csiso2022jp'],
40
+ shift_jis: ['csshiftjis', 'ms932', 'ms_kanji', 'shift-jis', 'sjis', 'windows-31j', 'x-sjis'],
41
+ 'euc-kr': ['cseuckr', 'csksc56011987', 'iso-ir-149', 'korean', 'ks_c_5601-1987', 'ks_c_5601-1989', 'ksc5601', 'ksc_5601', 'windows-949'],
42
+ replacement: ['csiso2022kr', 'hz-gb-2312', 'iso-2022-cn', 'iso-2022-cn-ext', 'iso-2022-kr'],
43
+ 'utf-16be': ['unicodefffe'],
44
+ 'utf-16le': ['csunicode', 'iso-10646-ucs-2', 'ucs-2', 'unicode', 'unicodefeff', 'utf-16'],
45
+ 'x-user-defined': [],
46
+ }
@@ -0,0 +1,34 @@
1
+ export function unfinishedBytes(u, len, enc) {
2
+ switch (enc) {
3
+ case 'utf-8': {
4
+ // 0-3
5
+ let p = 0
6
+ while (p < 2 && p < len && (u[len - p - 1] & 0xc0) === 0x80) p++ // go back 0-2 trailing bytes
7
+ if (p === len) return 0 // no space for lead
8
+ const l = u[len - p - 1]
9
+ if (l < 0xc2 || l > 0xf4) return 0 // not a lead
10
+ if (p === 0) return 1 // nothing to recheck, we have only lead, return it. 2-byte must return here
11
+ if (l < 0xe0 || (l < 0xf0 && p >= 2)) return 0 // 2-byte, or 3-byte or less and we already have 2 trailing
12
+ const lower = l === 0xf0 ? 0x90 : l === 0xe0 ? 0xa0 : 0x80
13
+ const upper = l === 0xf4 ? 0x8f : l === 0xed ? 0x9f : 0xbf
14
+ const n = u[len - p]
15
+ return n >= lower && n <= upper ? p + 1 : 0
16
+ }
17
+
18
+ case 'utf-16le':
19
+ case 'utf-16be': {
20
+ // 0-3
21
+ let p = 0
22
+ if (len % 2 !== 0) p++ // uneven bytes
23
+ const l = len - p - 1
24
+ if (len - p >= 2) {
25
+ const last = enc === 'utf-16le' ? (u[l] << 8) ^ u[l - 1] : (u[l - 1] << 8) ^ u[l]
26
+ if (last >= 0xd8_00 && last < 0xdc_00) p += 2 // lone lead
27
+ }
28
+
29
+ return p
30
+ }
31
+ }
32
+
33
+ throw new Error('Unsupported encoding')
34
+ }
package/fallback/hex.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { assertUint8 } from '../assert.js'
2
- import { nativeDecoder, nativeEncoder, isHermes } from './_utils.js'
2
+ import { nativeDecoder, nativeEncoder, decode2string } from './_utils.js'
3
3
  import { encodeAscii, decodeAscii } from './latin1.js'
4
4
 
5
5
  let hexArray // array of 256 bytes converted to two-char hex strings
@@ -11,58 +11,6 @@ const allowed = '0123456789ABCDEFabcdef'
11
11
 
12
12
  export const E_HEX = 'Input is not a hex string'
13
13
 
14
- function toHexPartAddition(a, start, end) {
15
- let o = ''
16
- let i = start
17
- const h = hexArray
18
- for (const last3 = end - 3; i < last3; i += 4) {
19
- const x0 = a[i]
20
- const x1 = a[i + 1]
21
- const x2 = a[i + 2]
22
- const x3 = a[i + 3]
23
- o += h[x0]
24
- o += h[x1]
25
- o += h[x2]
26
- o += h[x3]
27
- }
28
-
29
- while (i < end) o += h[a[i++]]
30
- return o
31
- }
32
-
33
- // Optimiziation for Hermes which is the main user of fallback
34
- function toHexPartTemplates(a, start, end) {
35
- let o = ''
36
- let i = start
37
- const h = hexArray
38
- for (const last15 = end - 15; i < last15; i += 16) {
39
- const x0 = a[i]
40
- const x1 = a[i + 1]
41
- const x2 = a[i + 2]
42
- const x3 = a[i + 3]
43
- const x4 = a[i + 4]
44
- const x5 = a[i + 5]
45
- const x6 = a[i + 6]
46
- const x7 = a[i + 7]
47
- const x8 = a[i + 8]
48
- const x9 = a[i + 9]
49
- const x10 = a[i + 10]
50
- const x11 = a[i + 11]
51
- const x12 = a[i + 12]
52
- const x13 = a[i + 13]
53
- const x14 = a[i + 14]
54
- const x15 = a[i + 15]
55
- o += `${h[x0]}${h[x1]}${h[x2]}${h[x3]}${h[x4]}${h[x5]}${h[x6]}${h[x7]}${h[x8]}${h[x9]}${h[x10]}${h[x11]}${h[x12]}${h[x13]}${h[x14]}${h[x15]}`
56
- }
57
-
58
- while (i < end) o += h[a[i++]]
59
- return o
60
- }
61
-
62
- // Using templates is significantly faster in Hermes and JSC
63
- // It's harder to detect JSC and not important anyway as it has native impl, so we detect only Hermes
64
- const toHexPart = isHermes ? toHexPartTemplates : toHexPartAddition
65
-
66
14
  export function toHex(arr) {
67
15
  assertUint8(arr)
68
16
 
@@ -100,23 +48,7 @@ export function toHex(arr) {
100
48
  return decodeAscii(oa)
101
49
  }
102
50
 
103
- if (length > 30_000) {
104
- // Limit concatenation to avoid excessive GC
105
- // Thresholds checked on Hermes
106
- const concat = []
107
- for (let i = 0; i < length; ) {
108
- const step = i + 500
109
- const end = step > length ? length : step
110
- concat.push(toHexPart(arr, i, end))
111
- i = end
112
- }
113
-
114
- const res = concat.join('')
115
- concat.length = 0
116
- return res
117
- }
118
-
119
- return toHexPart(arr, 0, length)
51
+ return decode2string(arr, 0, length, hexArray)
120
52
  }
121
53
 
122
54
  export function fromHex(str) {