@exodus/bytes 1.8.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -5
- package/array.d.ts +0 -1
- package/base58.js +1 -1
- package/base58check.js +1 -2
- package/base64.d.ts +0 -1
- package/encoding-browser.browser.js +29 -0
- package/encoding-browser.d.ts +1 -0
- package/encoding-browser.js +1 -0
- package/encoding-browser.native.js +1 -0
- package/fallback/_utils.js +1 -0
- package/fallback/encoding.api.js +81 -0
- package/fallback/encoding.js +6 -82
- package/fallback/latin1.js +1 -0
- package/fallback/multi-byte.js +456 -71
- package/fallback/multi-byte.table.js +20 -15
- package/fallback/single-byte.js +1 -1
- package/fallback/utf16.js +45 -26
- package/fallback/utf8.js +1 -1
- package/hex.d.ts +0 -1
- package/index.d.ts +43 -0
- package/index.js +5 -0
- package/multi-byte.js +7 -1
- package/multi-byte.node.js +7 -1
- package/package.json +32 -3
- package/single-byte.js +9 -11
- package/single-byte.node.js +29 -26
- package/utf16.js +1 -0
- package/utf16.node.js +6 -2
package/README.md
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
# `@exodus/bytes`
|
|
2
2
|
|
|
3
3
|
[](https://npmjs.org/package/@exodus/bytes)
|
|
4
|
-
](https://github.com/ExodusOSS/bytes/releases)
|
|
5
|
+
[](https://www.npmcharts.com/compare/@exodus/bytes?minimal=true)
|
|
5
6
|
[](https://github.com/ExodusOSS/bytes/blob/HEAD/LICENSE)
|
|
6
7
|
|
|
7
8
|
`Uint8Array` conversion to and from `base64`, `base32`, `base58`, `hex`, `utf8`, `utf16`, `bech32` and `wif`
|
|
@@ -129,6 +130,7 @@ import { utf16fromStringLoose, utf16toStringLoose } from '@exodus/bytes/utf16.js
|
|
|
129
130
|
```js
|
|
130
131
|
import { createSinglebyteDecoder, createSinglebyteEncoder } from '@exodus/bytes/single-byte.js'
|
|
131
132
|
import { windows1252toString, windows1252fromString } from '@exodus/bytes/single-byte.js'
|
|
133
|
+
import { latin1toString, latin1fromString } from '@exodus/bytes/single-byte.js'
|
|
132
134
|
```
|
|
133
135
|
|
|
134
136
|
Decode / encode the legacy single-byte encodings according to the
|
|
@@ -167,6 +169,11 @@ Also supports `iso-8859-1`, `iso-8859-9`, `iso-8859-11` as defined at
|
|
|
167
169
|
'\x80\x81Ğ' // this is iso-8859-9 as defined at https://unicode.org/Public/MAPPINGS/ISO8859/8859-9.txt
|
|
168
170
|
```
|
|
169
171
|
|
|
172
|
+
All WHATWG Encoding spec [`windows-*` encodings](https://encoding.spec.whatwg.org/#windows-874) are supersets of
|
|
173
|
+
corresponding [unicode.org encodings](https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/), meaning that
|
|
174
|
+
they encode/decode all the old valid (non-replacement) strings / byte sequences identically, but can also support
|
|
175
|
+
a wider range of inputs.
|
|
176
|
+
|
|
170
177
|
##### `createSinglebyteDecoder(encoding, loose = false)`
|
|
171
178
|
|
|
172
179
|
Create a decoder for a supported one-byte `encoding`, given its lowercased name `encoding`.
|
|
@@ -308,8 +315,8 @@ Implements [BIP-0173](https://github.com/bitcoin/bips/blob/master/bip-0173.media
|
|
|
308
315
|
|
|
309
316
|
```js
|
|
310
317
|
import { fromBech32, toBech32 } from '@exodus/bytes/bech32.js'
|
|
311
|
-
import { fromBech32m, toBech32m } from '@exodus/bytes/
|
|
312
|
-
import { getPrefix } from '@exodus/bytes/
|
|
318
|
+
import { fromBech32m, toBech32m } from '@exodus/bytes/bech32.js'
|
|
319
|
+
import { getPrefix } from '@exodus/bytes/bech32.js'
|
|
313
320
|
```
|
|
314
321
|
|
|
315
322
|
##### `getPrefix(str, limit = 90)`
|
|
@@ -341,7 +348,7 @@ import { fromBase58checkSync, toBase58checkSync } from '@exodus/bytes/base58chec
|
|
|
341
348
|
import { makeBase58check } from '@exodus/bytes/base58check.js'
|
|
342
349
|
```
|
|
343
350
|
|
|
344
|
-
On non-Node.js, requires peer dependency [@
|
|
351
|
+
On non-Node.js, requires peer dependency [@noble/hashes](https://www.npmjs.com/package/@noble/hashes) to be installed.
|
|
345
352
|
|
|
346
353
|
##### `async fromBase58check(str, format = 'uint8')`
|
|
347
354
|
##### `async toBase58check(arr)`
|
|
@@ -356,7 +363,7 @@ import { fromWifString, toWifString } from '@exodus/bytes/wif.js'
|
|
|
356
363
|
import { fromWifStringSync, toWifStringSync } from '@exodus/bytes/wif.js'
|
|
357
364
|
```
|
|
358
365
|
|
|
359
|
-
On non-Node.js, requires peer dependency [@
|
|
366
|
+
On non-Node.js, requires peer dependency [@noble/hashes](https://www.npmjs.com/package/@noble/hashes) to be installed.
|
|
360
367
|
|
|
361
368
|
##### `async fromWifString(string, version)`
|
|
362
369
|
##### `fromWifStringSync(string, version)`
|
|
@@ -525,6 +532,27 @@ true
|
|
|
525
532
|
'%'
|
|
526
533
|
```
|
|
527
534
|
|
|
535
|
+
### `@exodus/bytes/encoding-browser.js`
|
|
536
|
+
|
|
537
|
+
```js
|
|
538
|
+
import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding-browser.js'
|
|
539
|
+
import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding-browser.js' // Requires Streams
|
|
540
|
+
|
|
541
|
+
// Hooks for standards
|
|
542
|
+
import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding-browser.js'
|
|
543
|
+
```
|
|
544
|
+
|
|
545
|
+
Same as `@exodus/bytes/encoding.js`, but in browsers instead of polyfilling just uses whatever the
|
|
546
|
+
browser provides, drastically reducing the bundle size (to less than 2 KiB gzipped).
|
|
547
|
+
|
|
548
|
+
Under non-browser engines (Node.js, React Native, etc.) a full polyfill is used as those platforms
|
|
549
|
+
do not provide sufficiently complete / non-buggy `TextDecoder` APIs.
|
|
550
|
+
|
|
551
|
+
> [!NOTE]
|
|
552
|
+
> Implementations in browsers [have bugs](https://docs.google.com/spreadsheets/d/1pdEefRG6r9fZy61WHGz0TKSt8cO4ISWqlpBN5KntIvQ/edit),
|
|
553
|
+
> but they are fixing them and the expected update window is short.\
|
|
554
|
+
> If you want to circumvent browser bugs, use full `@exodus/bytes/encoding.js` import.
|
|
555
|
+
|
|
528
556
|
## License
|
|
529
557
|
|
|
530
558
|
[MIT](./LICENSE)
|
package/array.d.ts
CHANGED
|
@@ -21,4 +21,3 @@ export type OutputFormat = 'uint8' | 'buffer';
|
|
|
21
21
|
export function typedView(arr: ArrayBufferView, format: 'uint8'): Uint8Array;
|
|
22
22
|
export function typedView(arr: ArrayBufferView, format: 'buffer'): Buffer;
|
|
23
23
|
export function typedView(arr: ArrayBufferView, format: OutputFormat): Uint8Array | Buffer;
|
|
24
|
-
|
package/base58.js
CHANGED
|
@@ -207,7 +207,7 @@ function fromBase58core(str, alphabet, codes, format = 'uint8') {
|
|
|
207
207
|
}
|
|
208
208
|
|
|
209
209
|
at = k + 1
|
|
210
|
-
if (c !== 0 || at < zeros) throw new Error('Unexpected') // unreachable
|
|
210
|
+
if (c !== 0 || at < zeros) /* c8 ignore next */ throw new Error('Unexpected') // unreachable
|
|
211
211
|
}
|
|
212
212
|
}
|
|
213
213
|
|
package/base58check.js
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { sha256 } from '@noble/hashes/sha2.js'
|
|
2
2
|
import { makeBase58check } from './fallback/base58check.js'
|
|
3
3
|
|
|
4
4
|
// Note: while API is async, we use hashSync for now until we improve webcrypto perf for hash256
|
|
5
5
|
// Inputs to base58 are typically very small, and that makes a difference
|
|
6
6
|
|
|
7
7
|
// eslint-disable-next-line @exodus/import/no-deprecated
|
|
8
|
-
const sha256 = (x) => hashSync('sha256', x, 'uint8')
|
|
9
8
|
const hash256sync = (x) => sha256(sha256(x))
|
|
10
9
|
const hash256 = hash256sync // See note at the top
|
|
11
10
|
const {
|
package/base64.d.ts
CHANGED
|
@@ -73,4 +73,3 @@ export function fromBase64url(str: string, options: FromBase64Options & { format
|
|
|
73
73
|
*/
|
|
74
74
|
export function fromBase64any(str: string, options?: FromBase64Options): Uint8ArrayBuffer;
|
|
75
75
|
export function fromBase64any(str: string, options: FromBase64Options & { format: 'buffer' }): Buffer;
|
|
76
|
-
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import {
|
|
2
|
+
fromSource,
|
|
3
|
+
getBOMEncoding,
|
|
4
|
+
normalizeEncoding,
|
|
5
|
+
E_ENCODING,
|
|
6
|
+
} from './fallback/encoding.api.js'
|
|
7
|
+
import labels from './fallback/encoding.labels.js'
|
|
8
|
+
|
|
9
|
+
// Lite-weight version which re-exports existing implementations on browsers,
|
|
10
|
+
// while still being aliased to the full impl in RN and Node.js
|
|
11
|
+
|
|
12
|
+
// WARNING: Note that browsers have bugs (which hopefully will get fixed soon)
|
|
13
|
+
|
|
14
|
+
const { TextDecoder, TextEncoder, TextDecoderStream, TextEncoderStream } = globalThis
|
|
15
|
+
|
|
16
|
+
export { normalizeEncoding, getBOMEncoding, labelToName } from './fallback/encoding.api.js'
|
|
17
|
+
export { TextDecoder, TextEncoder, TextDecoderStream, TextEncoderStream }
|
|
18
|
+
|
|
19
|
+
// https://encoding.spec.whatwg.org/#decode
|
|
20
|
+
export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
|
|
21
|
+
let u8 = fromSource(input)
|
|
22
|
+
const bomEncoding = getBOMEncoding(u8)
|
|
23
|
+
if (bomEncoding) u8 = u8.subarray(bomEncoding === 'utf-8' ? 3 : 2)
|
|
24
|
+
const enc = bomEncoding ?? normalizeEncoding(fallbackEncoding) // "the byte order mark is more authoritative than anything else"
|
|
25
|
+
if (enc === 'utf-8') return new TextDecoder('utf-8', { ignoreBOM: true }).decode(u8) // fast path
|
|
26
|
+
if (enc === 'replacement') return u8.byteLength > 0 ? '\uFFFD' : ''
|
|
27
|
+
if (!Object.hasOwn(labels, enc)) throw new RangeError(E_ENCODING)
|
|
28
|
+
return new TextDecoder(enc, { ignoreBOM: true }).decode(u8)
|
|
29
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './encoding.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './encoding.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './encoding.js'
|
package/fallback/_utils.js
CHANGED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import labels from './encoding.labels.js'
|
|
2
|
+
|
|
3
|
+
let labelsMap
|
|
4
|
+
|
|
5
|
+
export const E_ENCODING = 'Unknown encoding'
|
|
6
|
+
|
|
7
|
+
// Warning: unlike whatwg-encoding, returns lowercased labels
|
|
8
|
+
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
|
9
|
+
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
10
|
+
export function normalizeEncoding(label) {
|
|
11
|
+
// fast path
|
|
12
|
+
if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
|
|
13
|
+
if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
|
|
14
|
+
// full map
|
|
15
|
+
if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
|
|
16
|
+
const low = `${label}`.trim().toLowerCase()
|
|
17
|
+
if (Object.hasOwn(labels, low)) return low
|
|
18
|
+
if (!labelsMap) {
|
|
19
|
+
labelsMap = new Map()
|
|
20
|
+
for (const [label, aliases] of Object.entries(labels)) {
|
|
21
|
+
for (const alias of aliases) labelsMap.set(alias, label)
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const mapped = labelsMap.get(low)
|
|
26
|
+
if (mapped) return mapped
|
|
27
|
+
return null
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// TODO: make this more strict against Symbol.toStringTag
|
|
31
|
+
// Is not very significant though, anything faking Symbol.toStringTag could as well override
|
|
32
|
+
// prototypes, which is not something we protect against
|
|
33
|
+
|
|
34
|
+
function isAnyArrayBuffer(x) {
|
|
35
|
+
if (x instanceof ArrayBuffer) return true
|
|
36
|
+
if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return true
|
|
37
|
+
if (!x || typeof x.byteLength !== 'number') return false
|
|
38
|
+
const s = Object.prototype.toString.call(x)
|
|
39
|
+
return s === '[object ArrayBuffer]' || s === '[object SharedArrayBuffer]'
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function fromSource(x) {
|
|
43
|
+
if (x instanceof Uint8Array) return x
|
|
44
|
+
if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
|
|
45
|
+
if (isAnyArrayBuffer(x)) {
|
|
46
|
+
if ('detached' in x) return x.detached === true ? new Uint8Array() : new Uint8Array(x)
|
|
47
|
+
// Old engines without .detached, try-catch
|
|
48
|
+
try {
|
|
49
|
+
return new Uint8Array(x)
|
|
50
|
+
} catch {
|
|
51
|
+
return new Uint8Array()
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Warning: unlike whatwg-encoding, returns lowercased labels
|
|
59
|
+
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
|
60
|
+
export function getBOMEncoding(input) {
|
|
61
|
+
const u8 = fromSource(input) // asserts
|
|
62
|
+
if (u8.length >= 3 && u8[0] === 0xef && u8[1] === 0xbb && u8[2] === 0xbf) return 'utf-8'
|
|
63
|
+
if (u8.length < 2) return null
|
|
64
|
+
if (u8[0] === 0xff && u8[1] === 0xfe) return 'utf-16le'
|
|
65
|
+
if (u8[0] === 0xfe && u8[1] === 0xff) return 'utf-16be'
|
|
66
|
+
return null
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
|
|
70
|
+
|
|
71
|
+
// Unlike normalizeEncoding, case-sensitive
|
|
72
|
+
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
73
|
+
export function labelToName(label) {
|
|
74
|
+
const enc = normalizeEncoding(label)
|
|
75
|
+
if (enc === 'utf-8') return 'UTF-8' // fast path
|
|
76
|
+
if (!enc) return enc
|
|
77
|
+
if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
|
|
78
|
+
if (enc === 'big5') return 'Big5'
|
|
79
|
+
if (enc === 'shift_jis') return 'Shift_JIS'
|
|
80
|
+
return enc
|
|
81
|
+
}
|
package/fallback/encoding.js
CHANGED
|
@@ -5,14 +5,15 @@ import { utf16toString, utf16toStringLoose } from '@exodus/bytes/utf16.js'
|
|
|
5
5
|
import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/bytes/utf8.js'
|
|
6
6
|
import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
|
|
7
7
|
import labels from './encoding.labels.js'
|
|
8
|
+
import { fromSource, getBOMEncoding, normalizeEncoding, E_ENCODING } from './encoding.api.js'
|
|
8
9
|
import { unfinishedBytes } from './encoding.util.js'
|
|
9
10
|
|
|
10
|
-
|
|
11
|
-
const E_ENCODING = 'Unknown encoding'
|
|
12
|
-
const replacementChar = '\uFFFD'
|
|
11
|
+
export { labelToName, getBOMEncoding, normalizeEncoding } from './encoding.api.js'
|
|
13
12
|
|
|
13
|
+
const E_OPTIONS = 'The "options" argument must be of type object'
|
|
14
14
|
const E_MULTI =
|
|
15
15
|
'Legacy multi-byte encodings are disabled in /encoding-lite.js, use /encoding.js for full encodings range support'
|
|
16
|
+
const replacementChar = '\uFFFD'
|
|
16
17
|
const multibyteSet = new Set(['big5', 'euc-kr', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'gbk', 'gb18030']) // prettier-ignore
|
|
17
18
|
let createMultibyteDecoder
|
|
18
19
|
|
|
@@ -20,67 +21,14 @@ export function setMultibyteDecoder(createDecoder) {
|
|
|
20
21
|
createMultibyteDecoder = createDecoder
|
|
21
22
|
}
|
|
22
23
|
|
|
23
|
-
let labelsMap
|
|
24
|
-
|
|
25
|
-
// Warning: unlike whatwg-encoding, returns lowercased labels
|
|
26
|
-
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
|
27
|
-
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
28
|
-
export function normalizeEncoding(label) {
|
|
29
|
-
// fast path
|
|
30
|
-
if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
|
|
31
|
-
if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
|
|
32
|
-
// full map
|
|
33
|
-
if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
|
|
34
|
-
const low = `${label}`.trim().toLowerCase()
|
|
35
|
-
if (Object.hasOwn(labels, low)) return low
|
|
36
|
-
if (!labelsMap) {
|
|
37
|
-
labelsMap = new Map()
|
|
38
|
-
for (const [label, aliases] of Object.entries(labels)) {
|
|
39
|
-
for (const alias of aliases) labelsMap.set(alias, label)
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
const mapped = labelsMap.get(low)
|
|
44
|
-
if (mapped) return mapped
|
|
45
|
-
return null
|
|
46
|
-
}
|
|
47
|
-
|
|
48
24
|
const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false })
|
|
49
25
|
|
|
50
|
-
// TODO: make this more strict against Symbol.toStringTag
|
|
51
|
-
// Is not very significant though, anything faking Symbol.toStringTag could as well override
|
|
52
|
-
// prototypes, which is not something we protect against
|
|
53
|
-
|
|
54
|
-
function isAnyArrayBuffer(x) {
|
|
55
|
-
if (x instanceof ArrayBuffer) return true
|
|
56
|
-
if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return true
|
|
57
|
-
if (!x || typeof x.byteLength !== 'number') return false
|
|
58
|
-
const s = Object.prototype.toString.call(x)
|
|
59
|
-
return s === '[object ArrayBuffer]' || s === '[object SharedArrayBuffer]'
|
|
60
|
-
}
|
|
61
|
-
|
|
62
26
|
function isAnyUint8Array(x) {
|
|
63
27
|
if (x instanceof Uint8Array) return true
|
|
64
28
|
if (!x || !ArrayBuffer.isView(x) || x.BYTES_PER_ELEMENT !== 1) return false
|
|
65
29
|
return Object.prototype.toString.call(x) === '[object Uint8Array]'
|
|
66
30
|
}
|
|
67
31
|
|
|
68
|
-
const fromSource = (x) => {
|
|
69
|
-
if (x instanceof Uint8Array) return x
|
|
70
|
-
if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
|
|
71
|
-
if (isAnyArrayBuffer(x)) {
|
|
72
|
-
if ('detached' in x) return x.detached === true ? new Uint8Array() : new Uint8Array(x)
|
|
73
|
-
// Old engines without .detached, try-catch
|
|
74
|
-
try {
|
|
75
|
-
return new Uint8Array(x)
|
|
76
|
-
} catch {
|
|
77
|
-
return new Uint8Array()
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
|
|
82
|
-
}
|
|
83
|
-
|
|
84
32
|
function unicodeDecoder(encoding, loose) {
|
|
85
33
|
if (encoding === 'utf-8') return loose ? utf8toStringLoose : utf8toString // likely
|
|
86
34
|
const form = encoding === 'utf-16le' ? 'uint8-le' : 'uint8-be'
|
|
@@ -215,6 +163,7 @@ export class TextDecoder {
|
|
|
215
163
|
return u.byteLength >= 2 && u[0] === 0xfe && u[1] === 0xff ? 2 : 0
|
|
216
164
|
}
|
|
217
165
|
|
|
166
|
+
/* c8 ignore next */
|
|
218
167
|
throw new Error('Unreachable')
|
|
219
168
|
}
|
|
220
169
|
}
|
|
@@ -341,17 +290,6 @@ export class TextEncoderStream {
|
|
|
341
290
|
}
|
|
342
291
|
}
|
|
343
292
|
|
|
344
|
-
// Warning: unlike whatwg-encoding, returns lowercased labels
|
|
345
|
-
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
|
346
|
-
export function getBOMEncoding(input) {
|
|
347
|
-
const u8 = fromSource(input) // asserts
|
|
348
|
-
if (u8.length >= 3 && u8[0] === 0xef && u8[1] === 0xbb && u8[2] === 0xbf) return 'utf-8'
|
|
349
|
-
if (u8.length < 2) return null
|
|
350
|
-
if (u8[0] === 0xff && u8[1] === 0xfe) return 'utf-16le'
|
|
351
|
-
if (u8[0] === 0xfe && u8[1] === 0xff) return 'utf-16be'
|
|
352
|
-
return null
|
|
353
|
-
}
|
|
354
|
-
|
|
355
293
|
// https://encoding.spec.whatwg.org/#decode
|
|
356
294
|
// Warning: encoding sniffed from BOM takes preference over the supplied one
|
|
357
295
|
// Warning: lossy, performs replacement, no option of throwing
|
|
@@ -368,7 +306,7 @@ export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
|
|
|
368
306
|
let suffix = ''
|
|
369
307
|
if (u8.byteLength % 2 !== 0) {
|
|
370
308
|
suffix = replacementChar
|
|
371
|
-
u8 = u8.subarray(0, -
|
|
309
|
+
u8 = u8.subarray(0, -unfinishedBytes(u8, u8.byteLength, enc))
|
|
372
310
|
}
|
|
373
311
|
|
|
374
312
|
return utf16toStringLoose(u8, enc === 'utf-16le' ? 'uint8-le' : 'uint8-be') + suffix
|
|
@@ -387,17 +325,3 @@ export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
|
|
|
387
325
|
|
|
388
326
|
return createSinglebyteDecoder(enc, true)(u8)
|
|
389
327
|
}
|
|
390
|
-
|
|
391
|
-
const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
|
|
392
|
-
|
|
393
|
-
// Unlike normalizeEncoding, case-sensitive
|
|
394
|
-
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
395
|
-
export function labelToName(label) {
|
|
396
|
-
const enc = normalizeEncoding(label)
|
|
397
|
-
if (enc === 'utf-8') return 'UTF-8' // fast path
|
|
398
|
-
if (!enc) return enc
|
|
399
|
-
if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
|
|
400
|
-
if (enc === 'big5') return 'Big5'
|
|
401
|
-
if (enc === 'shift_jis') return 'Shift_JIS'
|
|
402
|
-
return enc
|
|
403
|
-
}
|
package/fallback/latin1.js
CHANGED
|
@@ -37,6 +37,7 @@ export function asciiPrefix(arr) {
|
|
|
37
37
|
const b = u32[i + 1]
|
|
38
38
|
const c = u32[i + 2]
|
|
39
39
|
const d = u32[i + 3]
|
|
40
|
+
// "(a | b | c | d) & mask" is slower on Hermes though faster on v8
|
|
40
41
|
if (a & 0x80_80_80_80 || b & 0x80_80_80_80 || c & 0x80_80_80_80 || d & 0x80_80_80_80) break
|
|
41
42
|
}
|
|
42
43
|
|