@exodus/bytes 1.12.0 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -29
- package/array.js +1 -1
- package/base32.js +1 -3
- package/base58.js +3 -3
- package/base58check.d.ts +2 -2
- package/base58check.js +6 -7
- package/base64.js +7 -6
- package/bech32.js +3 -3
- package/encoding-browser.browser.js +43 -17
- package/fallback/_utils.js +7 -122
- package/fallback/base32.js +3 -3
- package/fallback/base58check.js +3 -3
- package/fallback/base64.js +2 -3
- package/fallback/encoding.api.js +0 -43
- package/fallback/encoding.js +41 -2
- package/fallback/encoding.labels.js +20 -16
- package/fallback/hex.js +3 -4
- package/fallback/latin1.js +6 -25
- package/fallback/percent.js +1 -1
- package/fallback/platform.browser.js +31 -0
- package/fallback/platform.js +2 -0
- package/fallback/platform.native.js +122 -0
- package/fallback/single-byte.encodings.js +40 -49
- package/fallback/single-byte.js +4 -4
- package/fallback/utf16.js +70 -3
- package/fallback/utf8.auto.browser.js +2 -0
- package/fallback/utf8.auto.js +1 -0
- package/fallback/utf8.auto.native.js +1 -0
- package/fallback/utf8.js +25 -3
- package/hex.js +6 -8
- package/hex.node.js +2 -3
- package/multi-byte.js +2 -2
- package/multi-byte.node.js +3 -3
- package/package.json +28 -7
- package/single-byte.js +9 -9
- package/single-byte.node.js +8 -8
- package/utf16.browser.js +8 -0
- package/utf16.js +1 -90
- package/utf16.native.js +22 -0
- package/utf16.node.js +5 -20
- package/utf8.js +9 -28
- package/utf8.node.js +3 -4
- package/whatwg.js +6 -2
package/fallback/encoding.api.js
CHANGED
|
@@ -1,32 +1,3 @@
|
|
|
1
|
-
import labels from './encoding.labels.js'
|
|
2
|
-
|
|
3
|
-
let labelsMap
|
|
4
|
-
|
|
5
|
-
export const E_ENCODING = 'Unknown encoding'
|
|
6
|
-
|
|
7
|
-
// Warning: unlike whatwg-encoding, returns lowercased labels
|
|
8
|
-
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
|
9
|
-
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
10
|
-
export function normalizeEncoding(label) {
|
|
11
|
-
// fast path
|
|
12
|
-
if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
|
|
13
|
-
if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
|
|
14
|
-
// full map
|
|
15
|
-
if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
|
|
16
|
-
const low = `${label}`.trim().toLowerCase()
|
|
17
|
-
if (Object.hasOwn(labels, low)) return low
|
|
18
|
-
if (!labelsMap) {
|
|
19
|
-
labelsMap = new Map()
|
|
20
|
-
for (const [label, aliases] of Object.entries(labels)) {
|
|
21
|
-
for (const alias of aliases) labelsMap.set(alias, label)
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
const mapped = labelsMap.get(low)
|
|
26
|
-
if (mapped) return mapped
|
|
27
|
-
return null
|
|
28
|
-
}
|
|
29
|
-
|
|
30
1
|
// TODO: make this more strict against Symbol.toStringTag
|
|
31
2
|
// Is not very significant though, anything faking Symbol.toStringTag could as well override
|
|
32
3
|
// prototypes, which is not something we protect against
|
|
@@ -65,17 +36,3 @@ export function getBOMEncoding(input) {
|
|
|
65
36
|
if (u8[0] === 0xfe && u8[1] === 0xff) return 'utf-16be'
|
|
66
37
|
return null
|
|
67
38
|
}
|
|
68
|
-
|
|
69
|
-
const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
|
|
70
|
-
|
|
71
|
-
// Unlike normalizeEncoding, case-sensitive
|
|
72
|
-
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
73
|
-
export function labelToName(label) {
|
|
74
|
-
const enc = normalizeEncoding(label)
|
|
75
|
-
if (enc === 'utf-8') return 'UTF-8' // fast path
|
|
76
|
-
if (!enc) return enc
|
|
77
|
-
if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
|
|
78
|
-
if (enc === 'big5') return 'Big5'
|
|
79
|
-
if (enc === 'shift_jis') return 'Shift_JIS'
|
|
80
|
-
return enc
|
|
81
|
-
}
|
package/fallback/encoding.js
CHANGED
|
@@ -5,17 +5,56 @@ import { utf16toString, utf16toStringLoose } from '@exodus/bytes/utf16.js'
|
|
|
5
5
|
import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/bytes/utf8.js'
|
|
6
6
|
import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
|
|
7
7
|
import labels from './encoding.labels.js'
|
|
8
|
-
import { fromSource, getBOMEncoding
|
|
8
|
+
import { fromSource, getBOMEncoding } from './encoding.api.js'
|
|
9
9
|
import { unfinishedBytes, mergePrefix } from './encoding.util.js'
|
|
10
10
|
|
|
11
|
-
export {
|
|
11
|
+
export { getBOMEncoding } from './encoding.api.js'
|
|
12
12
|
|
|
13
|
+
export const E_ENCODING = 'Unknown encoding'
|
|
13
14
|
const E_MULTI = "import '@exodus/bytes/encoding.js' for legacy multi-byte encodings support"
|
|
14
15
|
const E_OPTIONS = 'The "options" argument must be of type object'
|
|
15
16
|
const replacementChar = '\uFFFD'
|
|
16
17
|
const multibyteSet = new Set(['big5', 'euc-kr', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'gbk', 'gb18030']) // prettier-ignore
|
|
17
18
|
let createMultibyteDecoder, multibyteEncoder
|
|
18
19
|
|
|
20
|
+
let labelsMap
|
|
21
|
+
// Warning: unlike whatwg-encoding, returns lowercased labels
|
|
22
|
+
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
|
23
|
+
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
24
|
+
export function normalizeEncoding(label) {
|
|
25
|
+
// fast path
|
|
26
|
+
if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
|
|
27
|
+
if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
|
|
28
|
+
// full map
|
|
29
|
+
if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
|
|
30
|
+
const low = `${label}`.trim().toLowerCase()
|
|
31
|
+
if (Object.hasOwn(labels, low)) return low
|
|
32
|
+
if (!labelsMap) {
|
|
33
|
+
labelsMap = new Map()
|
|
34
|
+
for (const [name, aliases] of Object.entries(labels)) {
|
|
35
|
+
for (const alias of aliases) labelsMap.set(alias, name)
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const mapped = labelsMap.get(low)
|
|
40
|
+
if (mapped) return mapped
|
|
41
|
+
return null
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
|
|
45
|
+
|
|
46
|
+
// Unlike normalizeEncoding, case-sensitive
|
|
47
|
+
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
48
|
+
export function labelToName(label) {
|
|
49
|
+
const enc = normalizeEncoding(label)
|
|
50
|
+
if (enc === 'utf-8') return 'UTF-8' // fast path
|
|
51
|
+
if (!enc) return enc
|
|
52
|
+
if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
|
|
53
|
+
if (enc === 'big5') return 'Big5'
|
|
54
|
+
if (enc === 'shift_jis') return 'Shift_JIS'
|
|
55
|
+
return enc
|
|
56
|
+
}
|
|
57
|
+
|
|
19
58
|
export const isMultibyte = (enc) => multibyteSet.has(enc)
|
|
20
59
|
export function setMultibyte(createDecoder, createEncoder) {
|
|
21
60
|
createMultibyteDecoder = createDecoder
|
|
@@ -4,43 +4,47 @@
|
|
|
4
4
|
// prettier-ignore
|
|
5
5
|
const labels = {
|
|
6
6
|
'utf-8': ['unicode-1-1-utf-8', 'unicode11utf8', 'unicode20utf8', 'utf8', 'x-unicode20utf8'],
|
|
7
|
-
|
|
8
|
-
'
|
|
9
|
-
'iso-8859-
|
|
10
|
-
'iso-8859-
|
|
11
|
-
'iso-8859-
|
|
12
|
-
'iso-8859-
|
|
13
|
-
'iso-8859-
|
|
14
|
-
'iso-8859-
|
|
7
|
+
'utf-16be': ['unicodefffe'],
|
|
8
|
+
'utf-16le': ['csunicode', 'iso-10646-ucs-2', 'ucs-2', 'unicode', 'unicodefeff', 'utf-16'],
|
|
9
|
+
'iso-8859-2': ['iso-ir-101'],
|
|
10
|
+
'iso-8859-3': ['iso-ir-109'],
|
|
11
|
+
'iso-8859-4': ['iso-ir-110'],
|
|
12
|
+
'iso-8859-5': ['csisolatincyrillic', 'cyrillic', 'iso-ir-144'],
|
|
13
|
+
'iso-8859-6': ['arabic', 'asmo-708', 'csiso88596e', 'csiso88596i', 'csisolatinarabic', 'ecma-114', 'iso-8859-6-e', 'iso-8859-6-i', 'iso-ir-127'],
|
|
14
|
+
'iso-8859-7': ['csisolatingreek', 'ecma-118', 'elot_928', 'greek', 'greek8', 'iso-ir-126', 'sun_eu_greek'],
|
|
15
|
+
'iso-8859-8': ['csiso88598e', 'csisolatinhebrew', 'hebrew', 'iso-8859-8-e', 'iso-ir-138', 'visual'],
|
|
15
16
|
'iso-8859-8-i': ['csiso88598i', 'logical'],
|
|
16
|
-
'iso-8859-10': ['csisolatin6', 'iso-ir-157', 'iso8859-10', 'iso885910', 'l6', 'latin6'],
|
|
17
|
-
'iso-8859-13': ['iso8859-13', 'iso885913'],
|
|
18
|
-
'iso-8859-14': ['iso8859-14', 'iso885914'],
|
|
19
|
-
'iso-8859-15': ['csisolatin9', 'iso8859-15', 'iso885915', 'iso_8859-15', 'l9'],
|
|
20
17
|
'iso-8859-16': [],
|
|
21
18
|
'koi8-r': ['cskoi8r', 'koi', 'koi8', 'koi8_r'],
|
|
22
19
|
'koi8-u': ['koi8-ru'],
|
|
23
|
-
macintosh: ['csmacintosh', 'mac', 'x-mac-roman'],
|
|
24
20
|
'windows-874': ['dos-874', 'iso-8859-11', 'iso8859-11', 'iso885911', 'tis-620'],
|
|
21
|
+
ibm866: ['866', 'cp866', 'csibm866'],
|
|
25
22
|
'x-mac-cyrillic': ['x-mac-ukrainian'],
|
|
23
|
+
macintosh: ['csmacintosh', 'mac', 'x-mac-roman'],
|
|
26
24
|
gbk: ['chinese', 'csgb2312', 'csiso58gb231280', 'gb2312', 'gb_2312', 'gb_2312-80', 'iso-ir-58', 'x-gbk'],
|
|
27
25
|
gb18030: [],
|
|
28
26
|
big5: ['big5-hkscs', 'cn-big5', 'csbig5', 'x-x-big5'],
|
|
29
27
|
'euc-jp': ['cseucpkdfmtjapanese', 'x-euc-jp'],
|
|
30
|
-
'iso-2022-jp': ['csiso2022jp'],
|
|
31
28
|
shift_jis: ['csshiftjis', 'ms932', 'ms_kanji', 'shift-jis', 'sjis', 'windows-31j', 'x-sjis'],
|
|
32
29
|
'euc-kr': ['cseuckr', 'csksc56011987', 'iso-ir-149', 'korean', 'ks_c_5601-1987', 'ks_c_5601-1989', 'ksc5601', 'ksc_5601', 'windows-949'],
|
|
30
|
+
'iso-2022-jp': ['csiso2022jp'],
|
|
33
31
|
replacement: ['csiso2022kr', 'hz-gb-2312', 'iso-2022-cn', 'iso-2022-cn-ext', 'iso-2022-kr'],
|
|
34
|
-
'utf-16be': ['unicodefffe'],
|
|
35
|
-
'utf-16le': ['csunicode', 'iso-10646-ucs-2', 'ucs-2', 'unicode', 'unicodefeff', 'utf-16'],
|
|
36
32
|
'x-user-defined': [],
|
|
37
33
|
}
|
|
38
34
|
|
|
35
|
+
for (const i of [10, 13, 14, 15]) labels[`iso-8859-${i}`] = [`iso8859-${i}`, `iso8859${i}`]
|
|
36
|
+
for (const i of [2, 6, 7]) labels[`iso-8859-${i}`].push(`iso_8859-${i}:1987`)
|
|
37
|
+
for (const i of [3, 4, 5, 8]) labels[`iso-8859-${i}`].push(`iso_8859-${i}:1988`)
|
|
38
|
+
// prettier-ignore
|
|
39
|
+
for (let i = 2; i < 9; i++) labels[`iso-8859-${i}`].push(`iso8859-${i}`, `iso8859${i}`, `iso_8859-${i}`)
|
|
40
|
+
for (let i = 2; i < 5; i++) labels[`iso-8859-${i}`].push(`csisolatin${i}`, `l${i}`, `latin${i}`)
|
|
39
41
|
for (let i = 0; i < 9; i++) labels[`windows-125${i}`] = [`cp125${i}`, `x-cp125${i}`]
|
|
40
42
|
|
|
41
43
|
// prettier-ignore
|
|
42
44
|
labels['windows-1252'].push('ansi_x3.4-1968', 'ascii', 'cp819', 'csisolatin1', 'ibm819', 'iso-8859-1', 'iso-ir-100', 'iso8859-1', 'iso88591', 'iso_8859-1', 'iso_8859-1:1987', 'l1', 'latin1', 'us-ascii')
|
|
43
45
|
// prettier-ignore
|
|
44
46
|
labels['windows-1254'].push('csisolatin5', 'iso-8859-9', 'iso-ir-148', 'iso8859-9', 'iso88599', 'iso_8859-9', 'iso_8859-9:1989', 'l5', 'latin5')
|
|
47
|
+
labels['iso-8859-10'].push('csisolatin6', 'iso-ir-157', 'l6', 'latin6')
|
|
48
|
+
labels['iso-8859-15'].push('csisolatin9', 'iso_8859-15', 'l9')
|
|
45
49
|
|
|
46
50
|
export default labels
|
package/fallback/hex.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { nativeDecoder, nativeEncoder, decode2string
|
|
1
|
+
import { E_STRING } from './_utils.js'
|
|
2
|
+
import { nativeDecoder, nativeEncoder, decode2string } from './platform.js'
|
|
3
3
|
import { encodeAscii, decodeAscii } from './latin1.js'
|
|
4
4
|
|
|
5
5
|
let hexArray // array of 256 bytes converted to two-char hex strings
|
|
@@ -11,9 +11,8 @@ const allowed = '0123456789ABCDEFabcdef'
|
|
|
11
11
|
|
|
12
12
|
export const E_HEX = 'Input is not a hex string'
|
|
13
13
|
|
|
14
|
+
// Expects a checked Uint8Array
|
|
14
15
|
export function toHex(arr) {
|
|
15
|
-
assertUint8(arr)
|
|
16
|
-
|
|
17
16
|
if (!hexArray) hexArray = Array.from({ length: 256 }, (_, i) => i.toString(16).padStart(2, '0'))
|
|
18
17
|
const length = arr.length // this helps Hermes
|
|
19
18
|
|
package/fallback/latin1.js
CHANGED
|
@@ -3,21 +3,21 @@ import {
|
|
|
3
3
|
nativeDecoder,
|
|
4
4
|
nativeDecoderLatin1,
|
|
5
5
|
nativeBuffer,
|
|
6
|
+
encodeCharcodes,
|
|
6
7
|
isHermes,
|
|
7
8
|
isDeno,
|
|
8
9
|
isLE,
|
|
9
|
-
|
|
10
|
-
} from './_utils.js'
|
|
10
|
+
} from './platform.js'
|
|
11
11
|
|
|
12
|
-
const
|
|
13
|
-
const
|
|
12
|
+
const atob = /* @__PURE__ */ (() => globalThis.atob)()
|
|
13
|
+
const web64 = /* @__PURE__ */ (() => Uint8Array.prototype.toBase64)()
|
|
14
14
|
|
|
15
15
|
// See http://stackoverflow.com/a/22747272/680742, which says that lowest limit is in Chrome, with 0xffff args
|
|
16
16
|
// On Hermes, actual max is 0x20_000 minus current stack depth, 1/16 of that should be safe
|
|
17
17
|
const maxFunctionArgs = 0x20_00
|
|
18
18
|
|
|
19
19
|
// toBase64+atob path is faster on everything where fromBase64 is fast
|
|
20
|
-
const useLatin1atob = web64 && atob
|
|
20
|
+
const useLatin1atob = web64 && atob
|
|
21
21
|
|
|
22
22
|
export function asciiPrefix(arr) {
|
|
23
23
|
let p = 0 // verified ascii bytes
|
|
@@ -108,25 +108,6 @@ export const decodeAscii = nativeBuffer
|
|
|
108
108
|
|
|
109
109
|
/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
|
|
110
110
|
|
|
111
|
-
export const encodeCharcodes = isHermes
|
|
112
|
-
? (str, arr) => {
|
|
113
|
-
const length = str.length
|
|
114
|
-
if (length > 64) {
|
|
115
|
-
const at = str.charCodeAt.bind(str) // faster on strings from ~64 chars on Hermes, but can be 10x slower on e.g. JSC
|
|
116
|
-
for (let i = 0; i < length; i++) arr[i] = at(i)
|
|
117
|
-
} else {
|
|
118
|
-
for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
return arr
|
|
122
|
-
}
|
|
123
|
-
: (str, arr) => {
|
|
124
|
-
const length = str.length
|
|
125
|
-
// Can be optimized with unrolling, but this is not used on non-Hermes atm
|
|
126
|
-
for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
|
|
127
|
-
return arr
|
|
128
|
-
}
|
|
129
|
-
|
|
130
111
|
export function encodeAsciiPrefix(x, s) {
|
|
131
112
|
let i = 0
|
|
132
113
|
for (const len3 = s.length - 3; i < len3; i += 4) {
|
|
@@ -147,7 +128,7 @@ export function encodeAsciiPrefix(x, s) {
|
|
|
147
128
|
export const encodeLatin1 = (str) => encodeCharcodes(str, new Uint8Array(str.length))
|
|
148
129
|
|
|
149
130
|
// Expects nativeEncoder to be present
|
|
150
|
-
const useEncodeInto = isHermes && nativeEncoder?.encodeInto
|
|
131
|
+
const useEncodeInto = /* @__PURE__ */ (() => isHermes && nativeEncoder?.encodeInto)()
|
|
151
132
|
export const encodeAscii = useEncodeInto
|
|
152
133
|
? (str, ERR) => {
|
|
153
134
|
// Much faster in Hermes
|
package/fallback/percent.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { decodeAscii, encodeLatin1 } from './latin1.js'
|
|
2
|
-
import { decode2string } from './
|
|
2
|
+
import { decode2string } from './platform.js'
|
|
3
3
|
|
|
4
4
|
const ERR = 'percentEncodeSet must be a string of unique increasing codepoints in range 0x20 - 0x7e'
|
|
5
5
|
const percentMap = new Map()
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { decodePartAddition as decodePart } from './platform.native.js'
|
|
2
|
+
|
|
3
|
+
export { isLE, encodeCharcodesPure as encodeCharcodes } from './platform.native.js'
|
|
4
|
+
|
|
5
|
+
export const nativeBuffer = null
|
|
6
|
+
export const isHermes = false
|
|
7
|
+
export const isDeno = false
|
|
8
|
+
export const nativeEncoder = /* @__PURE__ */ (() => new TextEncoder())()
|
|
9
|
+
export const nativeDecoder = /* @__PURE__ */ (() => new TextDecoder('utf-8', { ignoreBOM: true }))()
|
|
10
|
+
export const nativeDecoderLatin1 = /* @__PURE__ */ (() =>
|
|
11
|
+
new TextDecoder('latin1', { ignoreBOM: true }))()
|
|
12
|
+
|
|
13
|
+
export function decode2string(arr, start, end, m) {
|
|
14
|
+
if (end - start > 30_000) {
|
|
15
|
+
// Limit concatenation to avoid excessive GC
|
|
16
|
+
// Thresholds checked on Hermes for toHex
|
|
17
|
+
const concat = []
|
|
18
|
+
for (let i = start; i < end; ) {
|
|
19
|
+
const step = i + 500
|
|
20
|
+
const iNext = step > end ? end : step
|
|
21
|
+
concat.push(decodePart(arr, i, iNext, m))
|
|
22
|
+
i = iNext
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const res = concat.join('')
|
|
26
|
+
concat.length = 0
|
|
27
|
+
return res
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
return decodePart(arr, start, end, m)
|
|
31
|
+
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
const { Buffer } = globalThis
|
|
2
|
+
const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
|
|
3
|
+
export const nativeBuffer = haveNativeBuffer ? Buffer : null
|
|
4
|
+
export const isHermes = /* @__PURE__ */ (() => !!globalThis.HermesInternal)()
|
|
5
|
+
export const isDeno = /* @__PURE__ */ (() => !!globalThis.Deno)()
|
|
6
|
+
export const isLE = /* @__PURE__ */ (() => new Uint8Array(Uint16Array.of(258).buffer)[0] === 2)()
|
|
7
|
+
|
|
8
|
+
// We consider Node.js TextDecoder/TextEncoder native
|
|
9
|
+
// Still needed in platform.native.js as this is re-exported to platform.js
|
|
10
|
+
let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]'))
|
|
11
|
+
if (!haveNativeBuffer && isNative(() => {})) isNative = () => false // e.g. XS, we don't want false positives
|
|
12
|
+
|
|
13
|
+
export const nativeEncoder = /* @__PURE__ */ (() =>
|
|
14
|
+
isNative(globalThis.TextEncoder) ? new TextEncoder() : null)()
|
|
15
|
+
export const nativeDecoder = /* @__PURE__ */ (() =>
|
|
16
|
+
isNative(globalThis.TextDecoder) ? new TextDecoder('utf-8', { ignoreBOM: true }) : null)()
|
|
17
|
+
|
|
18
|
+
// Actually windows-1252, compatible with ascii and latin1 decoding
|
|
19
|
+
// Beware that on non-latin1, i.e. on windows-1252, this is broken in ~all Node.js versions released
|
|
20
|
+
// in 2025 due to a regression, so we call it Latin1 as it's usable only for that
|
|
21
|
+
export const nativeDecoderLatin1 = /* @__PURE__ */ (() => {
|
|
22
|
+
// Not all barebone engines with TextDecoder support something except utf-8, detect
|
|
23
|
+
if (nativeDecoder) {
|
|
24
|
+
try {
|
|
25
|
+
return new TextDecoder('latin1', { ignoreBOM: true })
|
|
26
|
+
} catch {}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
return null
|
|
30
|
+
})()
|
|
31
|
+
|
|
32
|
+
export function decodePartAddition(a, start, end, m) {
|
|
33
|
+
let o = ''
|
|
34
|
+
let i = start
|
|
35
|
+
for (const last3 = end - 3; i < last3; i += 4) {
|
|
36
|
+
const x0 = a[i]
|
|
37
|
+
const x1 = a[i + 1]
|
|
38
|
+
const x2 = a[i + 2]
|
|
39
|
+
const x3 = a[i + 3]
|
|
40
|
+
o += m[x0]
|
|
41
|
+
o += m[x1]
|
|
42
|
+
o += m[x2]
|
|
43
|
+
o += m[x3]
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
while (i < end) o += m[a[i++]]
|
|
47
|
+
return o
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Decoding with templates is faster on Hermes
|
|
51
|
+
export function decodePartTemplates(a, start, end, m) {
|
|
52
|
+
let o = ''
|
|
53
|
+
let i = start
|
|
54
|
+
for (const last15 = end - 15; i < last15; i += 16) {
|
|
55
|
+
const x0 = a[i]
|
|
56
|
+
const x1 = a[i + 1]
|
|
57
|
+
const x2 = a[i + 2]
|
|
58
|
+
const x3 = a[i + 3]
|
|
59
|
+
const x4 = a[i + 4]
|
|
60
|
+
const x5 = a[i + 5]
|
|
61
|
+
const x6 = a[i + 6]
|
|
62
|
+
const x7 = a[i + 7]
|
|
63
|
+
const x8 = a[i + 8]
|
|
64
|
+
const x9 = a[i + 9]
|
|
65
|
+
const x10 = a[i + 10]
|
|
66
|
+
const x11 = a[i + 11]
|
|
67
|
+
const x12 = a[i + 12]
|
|
68
|
+
const x13 = a[i + 13]
|
|
69
|
+
const x14 = a[i + 14]
|
|
70
|
+
const x15 = a[i + 15]
|
|
71
|
+
o += `${m[x0]}${m[x1]}${m[x2]}${m[x3]}${m[x4]}${m[x5]}${m[x6]}${m[x7]}${m[x8]}${m[x9]}${m[x10]}${m[x11]}${m[x12]}${m[x13]}${m[x14]}${m[x15]}`
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
while (i < end) o += m[a[i++]]
|
|
75
|
+
return o
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const decodePart = isHermes ? decodePartTemplates : decodePartAddition
|
|
79
|
+
export function decode2string(arr, start, end, m) {
|
|
80
|
+
if (end - start > 30_000) {
|
|
81
|
+
// Limit concatenation to avoid excessive GC
|
|
82
|
+
// Thresholds checked on Hermes for toHex
|
|
83
|
+
const concat = []
|
|
84
|
+
for (let i = start; i < end; ) {
|
|
85
|
+
const step = i + 500
|
|
86
|
+
const iNext = step > end ? end : step
|
|
87
|
+
concat.push(decodePart(arr, i, iNext, m))
|
|
88
|
+
i = iNext
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const res = concat.join('')
|
|
92
|
+
concat.length = 0
|
|
93
|
+
return res
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return decodePart(arr, start, end, m)
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
|
|
100
|
+
|
|
101
|
+
function encodeCharcodesHermes(str, arr) {
|
|
102
|
+
const length = str.length
|
|
103
|
+
if (length > 64) {
|
|
104
|
+
const at = str.charCodeAt.bind(str) // faster on strings from ~64 chars on Hermes, but can be 10x slower on e.g. JSC
|
|
105
|
+
for (let i = 0; i < length; i++) arr[i] = at(i)
|
|
106
|
+
} else {
|
|
107
|
+
for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return arr
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export function encodeCharcodesPure(str, arr) {
|
|
114
|
+
const length = str.length
|
|
115
|
+
// Can be optimized with unrolling, but this is not used on non-Hermes atm
|
|
116
|
+
for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
|
|
117
|
+
return arr
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/* eslint-enable @exodus/mutable/no-param-reassign-prop-only */
|
|
121
|
+
|
|
122
|
+
export const encodeCharcodes = isHermes ? encodeCharcodesHermes : encodeCharcodesPure
|
|
@@ -1,57 +1,48 @@
|
|
|
1
1
|
// See tests/encoding/fixtures/single-byte/dump.js for generator
|
|
2
2
|
|
|
3
3
|
const r = 0xff_fd
|
|
4
|
-
const e = (x) => new Array(x).fill(1)
|
|
5
|
-
const h = (x) => new Array(x).fill(r)
|
|
6
4
|
|
|
7
5
|
/* eslint-disable unicorn/numeric-separators-style, @exodus/export-default/named */
|
|
8
6
|
|
|
9
7
|
// Common ranges
|
|
10
8
|
|
|
11
9
|
// prettier-ignore
|
|
12
|
-
const i2 = [
|
|
13
|
-
const i4a = [-75, -63, e(5), 104, -34, -67, 79, -77, 75, -73, 1]
|
|
14
|
-
const i4b = [34, -32, e(5), 73, -34, -36, 48, -46, 44, -42, 1]
|
|
15
|
-
const i7 = [721, 1, 1, -719, 721, -719, 721, e(19), r, 2, e(43), r]
|
|
16
|
-
const i8 = [e(26), r, r, 6692, 1, r]
|
|
17
|
-
const i9 = [79, -77, e(11), 84, 46, -127, e(16), 48, -46, e(11), 53, 46]
|
|
18
|
-
const iB = [3425, e(57), h(4), 5, e(28), h(4)]
|
|
19
|
-
const p2 = [-99, 12, 20, -12, 17, 37, -29, 2]
|
|
20
|
-
const p1 = [8237, -8235, 8089, -7816, 7820, 8, -6, 1]
|
|
21
|
-
const w0 = [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104]
|
|
22
|
-
const w8 = [8072, 1, 3, 1, 5, -15, 1]
|
|
23
|
-
const w1 = [w8, -7480, 7750, -8129, 7897, -7911, -182]
|
|
24
|
-
const w3 = [w8, -8060, 8330, -8328, 8096, -8094]
|
|
25
|
-
const m0 = [8558, -8328, 8374, -66, -8539, 16, 8043, -8070]
|
|
10
|
+
const i2 = [189,148,0,0,63,0,116,64,0,68,0,78,0,78,0,0,63,64,114,117,0,0,123,0,0,128,149,0,149,0,0,132,0,117,0,0,32,0,85,33,0,37,0,47,0,47,0,0,32,33,83,86,0,0,92,0,0,97,118,0,118,0,0,101,474]
|
|
26
11
|
// prettier-ignore
|
|
27
|
-
const
|
|
12
|
+
const iB = [[58,3424],[4,r],[29,3424],[4,r]]
|
|
13
|
+
const i9 = [[47], 78, [12], 83, 128, [17], 47, [12], 52, 97]
|
|
14
|
+
const w1 = [8236, 0, 8088, 0, 8090, 8097, 8090, 8090, 0, 8103]
|
|
15
|
+
const w2 = [8236, 0, 8088, 271, 8090, 8097, 8090, 8090, 574, 8103]
|
|
28
16
|
// prettier-ignore
|
|
29
|
-
const
|
|
17
|
+
const w7 = [64,0,157,[4],39,68,109,62,67,0,0,82,75,68,0,175,75,86,105,92,108,144,114,115,0,120,[3],154,104,128,143,0,158,159,0,37,78,31,36,0,0,51,44,37,0,144,44,55,74,61,77,113,83,84,0,89,[3],123,73,97,112,0,127,128]
|
|
18
|
+
const w8 = [8071, 8071, 8073, 8073, 8077, 8061, 8061]
|
|
30
19
|
// prettier-ignore
|
|
31
|
-
const k8b = [-
|
|
20
|
+
const k8b = [-22,910,879,879,899,880,880,894,876,893,[8,879],894,[4,878],864,859,884,882,861,877,881,876,873,875,846,815,815,835,816,816,830,812,829,[8,815],830,[4,814],800,795,820,818,797,813,817,812,809,811]
|
|
21
|
+
// prettier-ignore
|
|
22
|
+
const k8a = [9344,9345,9354,9357,9360,9363,9366,9373,9380,9387,9394,9461,9464,9467,9470,[4,9473],8845,9484,8580,8580,8625,8652,8652,6,8838,20,21,25,88,[3,9392],942]
|
|
32
23
|
|
|
33
24
|
// prettier-ignore
|
|
34
25
|
const maps = {
|
|
35
|
-
ibm866: [
|
|
36
|
-
'koi8-
|
|
37
|
-
'koi8-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
'windows-874': [
|
|
26
|
+
ibm866: [[48,912],[3,9441],...[29,62,122,122,109,107,120,101,106,111,109,107,31,34,65,56,39,10,69,102,102,96,89,109,105,98,81,108,102,102,97,97,84,82,75,75,98,96,13,0,123,118,125,128,111].map(x=>x+9266),[16,864],785,864,786,865,787,866,792,871,-72,8480,-67,8479,8218,-89,9378,-95],
|
|
27
|
+
'koi8-u': [...k8a,944,9391,944,944,[5,9391],996,944,[4,9391],846,848,9390,848,848,[5,9390],979,848,...k8b],
|
|
28
|
+
'koi8-r': [...k8a,[15,9391],846,[11,9390],...k8b],
|
|
29
|
+
macintosh: [68,68,69,70,77,81,86,90,88,89,90,88,89,90,91,89,90,90,91,89,90,90,91,92,90,91,92,90,94,92,93,93,8064,15,0,0,3,8061,16,56,6,0,8312,9,-4,8627,24,41,8558,0,8626,8626,-15,0,8524,8538,8535,775,8561,-17,-2,748,40,57,-1,-32,-22,8535,206,8579,8512,-28,-13,8029,-42,-11,-9,8,132,132,8003,8003,8010,8010,8004,8004,33,9459,39,159,8042,8145,8029,8029,64035,64035,8001,-42,7992,7995,8012,-35,-28,-38,-29,-33,[3,-29],-33,-27,-27,63503,-31,-24,-24,-27,60,464,485,-73,[3,479],-68,480,477,456],
|
|
30
|
+
'x-mac-cyrillic': [[32,912],8064,15,1006,0,3,8061,16,863,6,0,8312,855,934,8627,853,932,8558,0,8626,8626,930,0,987,849,844,923,845,924,845,924,844,923,920,836,-22,8535,206,8579,8512,-28,-13,8029,-42,832,911,831,910,902,8003,8003,8010,8010,8004,8004,33,8007,822,901,821,900,8250,804,883,880,[31,848],8109],
|
|
31
|
+
'windows-874': [8236,[4],8097,[11],...w8,[9],...iB],
|
|
41
32
|
}
|
|
42
33
|
|
|
43
34
|
// windows-1250 - windows-1258
|
|
44
35
|
// prettier-ignore
|
|
45
36
|
;[
|
|
46
|
-
[
|
|
47
|
-
[
|
|
48
|
-
[
|
|
49
|
-
[
|
|
50
|
-
[
|
|
51
|
-
[
|
|
52
|
-
[
|
|
53
|
-
[
|
|
54
|
-
[
|
|
37
|
+
[...w1,214,8110,206,215,239,234,0,...w8,0,8329,199,8095,191,200,224,219,0,550,566,158,0,95,[4],180,[4],204,0,0,553,143,[5],76,165,0,129,544,128,...i2],
|
|
38
|
+
[898,898,8088,976,8090,8097,8090,8090,8228,8103,895,8110,894,895,893,896,962,...w8,0,8329,959,8095,958,959,957,960,0,877,956,869,0,1003,0,0,857,0,858,[4],856,0,0,852,931,989,[3],921,8285,922,0,924,840,919,920,[64,848]],
|
|
39
|
+
[...w2,214,8110,198,0,239,0,0,...w8,580,8329,199,8095,183,0,224,217],
|
|
40
|
+
[8236,0,8088,271,8090,8097,8090,8090,0,8103,0,8110,[5],...w8,0,8329,0,8095,[5],740,740,[7],r,[4],8038,[4],720,[3],[3,720],0,720,0,[20,720],r,[44,720],r],
|
|
41
|
+
[...w2,214,8110,198,[4],...w8,580,8329,199,8095,183,0,0,217,0,...i9],
|
|
42
|
+
[...w2,0,8110,[5],...w8,580,8329,0,8095,[8],8198,[5],45,[15],61,[5],[20,1264],[5,1308],[7,r],[27,1264],r,r,7953,7953,r],
|
|
43
|
+
[8236,1533,8088,271,8090,8097,8090,8090,574,8103,1519,8110,198,1529,1546,1529,1567,...w8,1553,8329,1527,8095,183,8047,8047,1563,0,1387,[8],1556,[15],1377,[4],1376,1537,[22,1376],0,[4,1375],[4,1380],0,1379,0,[4,1378],[5],1373,1373,0,0,[4,1371],0,1370,1370,0,1369,0,1368,0,0,7953,7953,1491],
|
|
44
|
+
[...w1,0,8110,0,27,569,41,0,...w8,0,8329,0,8095,0,18,573,0,0,r,[3],r,0,0,48,0,172,[4],23,[8],...w7,474],
|
|
45
|
+
[...w2,0,8110,198,[4],...w8,580,8329,0,8095,183,0,0,217,[35],63,[8],564,[3],64,0,567,0,0,203,[7],210,549,[4],32,[8],533,[3],33,0,561,0,0,172,[7],179,8109],
|
|
55
46
|
].forEach((m, i) => {
|
|
56
47
|
maps[`windows-${i + 1250}`] = m
|
|
57
48
|
});
|
|
@@ -60,23 +51,23 @@ const maps = {
|
|
|
60
51
|
// prettier-ignore
|
|
61
52
|
;[
|
|
62
53
|
[], // Actual Latin1 / Unicode subset, non-WHATWG, which maps iso-8859-1 to windows-1252
|
|
63
|
-
[
|
|
64
|
-
[
|
|
65
|
-
[
|
|
66
|
-
[
|
|
67
|
-
[
|
|
68
|
-
[
|
|
69
|
-
[r,
|
|
70
|
-
|
|
71
|
-
[
|
|
54
|
+
[99,566,158,0,152,180,0,0,183,180,185,205,0,207,204,0,84,553,143,0,137,165,528,0,168,165,170,190,544,192,...i2],
|
|
55
|
+
[133,566,0,0,r,126,0,0,135,180,115,136,0,r,204,0,118,[4],111,0,0,120,165,100,121,0,r,189,[3],r,0,69,66,[9],r,[4],75,0,0,68,[4],143,126,[4],r,0,38,35,[9],r,[4],44,0,0,37,[4],112,95,474],
|
|
56
|
+
[99,150,179,0,131,149,0,0,183,104,119,186,0,207,0,0,84,553,164,0,116,134,528,0,168,89,104,171,141,192,140,64,[6],103,68,0,78,0,74,0,0,91,64,116,122,99,[5],153,[3],139,140,0,33,[6],72,37,0,47,0,43,0,0,60,33,85,91,68,[5],122,[3],108,109,474],
|
|
57
|
+
[[12,864],0,[66,864],8230,[12,864],-86,864,864],
|
|
58
|
+
[[3,r],0,[7,r],1376,0,[13,r],1376,[3,r],1376,r,[26,1376],[5,r],[19,1376],[13,r]],
|
|
59
|
+
[8055,8055,0,8200,8202,[4],720,[3],r,8038,[4],[3,720],0,[3,720],0,720,0,[20,720],r,[44,720],r],
|
|
60
|
+
[r,[8],45,[15],61,[4],[32,r],7992,[27,1264],r,r,7953,7953,r],
|
|
61
|
+
i9, // non-WHATWG, which maps iso-8859-9 to windows-1254
|
|
62
|
+
[99,112,127,134,131,144,0,147,103,182,187,209,0,188,155,0,84,97,112,119,116,129,0,132,88,167,172,194,8024,173,140,64,[6],103,68,0,78,0,74,[4],116,122,[4],145,0,153,[6],33,[6],72,37,0,47,0,43,[4],85,91,[4],114,0,122,[5],57],
|
|
72
63
|
iB, // non-WHATWG, which maps iso-8859-11 to windows-874
|
|
73
64
|
null, // no 12
|
|
74
|
-
[
|
|
75
|
-
[
|
|
76
|
-
[
|
|
77
|
-
[
|
|
65
|
+
[8060,[3],8057,0,0,48,0,172,[4],23,[4],8040,[3],...w7,7962],
|
|
66
|
+
[7521,7521,0,102,102,7524,0,7640,0,7640,7520,7750,0,0,201,7534,7534,110,110,7564,7564,0,7583,7625,7582,7625,7589,7735,7623,7623,7586,[16],164,[6],7571,[6],152,[17],133,[6],7540,[6],121],
|
|
67
|
+
[[3],8200,0,186,0,185,[11],201,[3],198,[3],150,150,186],
|
|
68
|
+
[99,99,158,8200,8057,186,0,185,0,366,0,205,0,204,204,0,0,90,143,201,8040,0,0,198,84,351,0,150,150,186,189,[3],63,0,65,[10],64,114,[3],123,0,131,152,[4],59,316,[4],32,0,34,[10],33,83,[3],92,0,100,121,[4],28,285],
|
|
78
69
|
].forEach((m, i) => {
|
|
79
|
-
if (m) maps[`iso-8859-${i + 1}`] = [
|
|
80
|
-
})
|
|
70
|
+
if (m) maps[`iso-8859-${i + 1}`] = [[33], ...m]
|
|
71
|
+
});
|
|
81
72
|
|
|
82
73
|
export default maps
|
package/fallback/single-byte.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { asciiPrefix, decodeAscii, decodeLatin1 } from './latin1.js'
|
|
2
2
|
import encodings from './single-byte.encodings.js'
|
|
3
|
-
import { decode2string, nativeDecoder } from './
|
|
3
|
+
import { decode2string, nativeDecoder } from './platform.js'
|
|
4
4
|
|
|
5
5
|
export const E_STRICT = 'Input is not well-formed for this encoding'
|
|
6
6
|
const xUserDefined = 'x-user-defined'
|
|
@@ -17,9 +17,9 @@ export function getEncoding(encoding) {
|
|
|
17
17
|
assertEncoding(encoding)
|
|
18
18
|
if (encoding === xUserDefined) return Array.from({ length: 128 }, (_, i) => 0xf7_80 + i)
|
|
19
19
|
if (encoding === iso8i) encoding = 'iso-8859-8'
|
|
20
|
-
|
|
21
|
-
const
|
|
22
|
-
return
|
|
20
|
+
const enc = encodings[encoding]
|
|
21
|
+
const deltas = enc.flatMap((x) => (Array.isArray(x) ? new Array(x[0]).fill(x[1] ?? 0) : x))
|
|
22
|
+
return deltas.map((x, i) => (x === r ? x : x + 128 + i))
|
|
23
23
|
}
|
|
24
24
|
|
|
25
25
|
const mappers = new Map()
|