@exodus/bytes 1.11.0 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +75 -35
- package/base58.js +3 -3
- package/base64.js +7 -6
- package/bech32.js +3 -3
- package/encoding-browser.browser.js +43 -17
- package/fallback/_utils.js +7 -123
- package/fallback/base32.js +3 -3
- package/fallback/base58check.js +3 -3
- package/fallback/base64.js +2 -3
- package/fallback/encoding.api.js +0 -43
- package/fallback/encoding.js +41 -2
- package/fallback/encoding.labels.js +20 -16
- package/fallback/hex.js +3 -4
- package/fallback/latin1.js +6 -6
- package/fallback/multi-byte.table.js +17 -28
- package/fallback/percent.js +1 -1
- package/fallback/platform.browser.js +31 -0
- package/fallback/platform.js +2 -0
- package/fallback/platform.native.js +97 -0
- package/fallback/single-byte.encodings.js +40 -49
- package/fallback/single-byte.js +4 -4
- package/fallback/utf16.js +69 -2
- package/fallback/utf8.auto.browser.js +2 -0
- package/fallback/utf8.auto.js +1 -0
- package/fallback/utf8.auto.native.js +1 -0
- package/fallback/utf8.js +25 -3
- package/hex.js +6 -8
- package/hex.node.js +2 -3
- package/multi-byte.js +2 -2
- package/multi-byte.node.js +3 -3
- package/package.json +32 -9
- package/single-byte.js +6 -6
- package/single-byte.node.js +4 -4
- package/utf16.browser.js +8 -0
- package/utf16.js +1 -75
- package/utf16.native.js +22 -0
- package/utf16.node.js +5 -20
- package/utf8.js +9 -28
- package/utf8.node.js +3 -4
- package/whatwg.js +6 -2
package/fallback/encoding.js
CHANGED
|
@@ -5,17 +5,56 @@ import { utf16toString, utf16toStringLoose } from '@exodus/bytes/utf16.js'
|
|
|
5
5
|
import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/bytes/utf8.js'
|
|
6
6
|
import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
|
|
7
7
|
import labels from './encoding.labels.js'
|
|
8
|
-
import { fromSource, getBOMEncoding
|
|
8
|
+
import { fromSource, getBOMEncoding } from './encoding.api.js'
|
|
9
9
|
import { unfinishedBytes, mergePrefix } from './encoding.util.js'
|
|
10
10
|
|
|
11
|
-
export {
|
|
11
|
+
export { getBOMEncoding } from './encoding.api.js'
|
|
12
12
|
|
|
13
|
+
export const E_ENCODING = 'Unknown encoding'
|
|
13
14
|
const E_MULTI = "import '@exodus/bytes/encoding.js' for legacy multi-byte encodings support"
|
|
14
15
|
const E_OPTIONS = 'The "options" argument must be of type object'
|
|
15
16
|
const replacementChar = '\uFFFD'
|
|
16
17
|
const multibyteSet = new Set(['big5', 'euc-kr', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'gbk', 'gb18030']) // prettier-ignore
|
|
17
18
|
let createMultibyteDecoder, multibyteEncoder
|
|
18
19
|
|
|
20
|
+
let labelsMap
|
|
21
|
+
// Warning: unlike whatwg-encoding, returns lowercased labels
|
|
22
|
+
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
|
23
|
+
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
24
|
+
export function normalizeEncoding(label) {
|
|
25
|
+
// fast path
|
|
26
|
+
if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
|
|
27
|
+
if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
|
|
28
|
+
// full map
|
|
29
|
+
if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
|
|
30
|
+
const low = `${label}`.trim().toLowerCase()
|
|
31
|
+
if (Object.hasOwn(labels, low)) return low
|
|
32
|
+
if (!labelsMap) {
|
|
33
|
+
labelsMap = new Map()
|
|
34
|
+
for (const [name, aliases] of Object.entries(labels)) {
|
|
35
|
+
for (const alias of aliases) labelsMap.set(alias, name)
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const mapped = labelsMap.get(low)
|
|
40
|
+
if (mapped) return mapped
|
|
41
|
+
return null
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
|
|
45
|
+
|
|
46
|
+
// Unlike normalizeEncoding, case-sensitive
|
|
47
|
+
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
48
|
+
export function labelToName(label) {
|
|
49
|
+
const enc = normalizeEncoding(label)
|
|
50
|
+
if (enc === 'utf-8') return 'UTF-8' // fast path
|
|
51
|
+
if (!enc) return enc
|
|
52
|
+
if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
|
|
53
|
+
if (enc === 'big5') return 'Big5'
|
|
54
|
+
if (enc === 'shift_jis') return 'Shift_JIS'
|
|
55
|
+
return enc
|
|
56
|
+
}
|
|
57
|
+
|
|
19
58
|
export const isMultibyte = (enc) => multibyteSet.has(enc)
|
|
20
59
|
export function setMultibyte(createDecoder, createEncoder) {
|
|
21
60
|
createMultibyteDecoder = createDecoder
|
|
@@ -4,43 +4,47 @@
|
|
|
4
4
|
// prettier-ignore
|
|
5
5
|
const labels = {
|
|
6
6
|
'utf-8': ['unicode-1-1-utf-8', 'unicode11utf8', 'unicode20utf8', 'utf8', 'x-unicode20utf8'],
|
|
7
|
-
|
|
8
|
-
'
|
|
9
|
-
'iso-8859-
|
|
10
|
-
'iso-8859-
|
|
11
|
-
'iso-8859-
|
|
12
|
-
'iso-8859-
|
|
13
|
-
'iso-8859-
|
|
14
|
-
'iso-8859-
|
|
7
|
+
'utf-16be': ['unicodefffe'],
|
|
8
|
+
'utf-16le': ['csunicode', 'iso-10646-ucs-2', 'ucs-2', 'unicode', 'unicodefeff', 'utf-16'],
|
|
9
|
+
'iso-8859-2': ['iso-ir-101'],
|
|
10
|
+
'iso-8859-3': ['iso-ir-109'],
|
|
11
|
+
'iso-8859-4': ['iso-ir-110'],
|
|
12
|
+
'iso-8859-5': ['csisolatincyrillic', 'cyrillic', 'iso-ir-144'],
|
|
13
|
+
'iso-8859-6': ['arabic', 'asmo-708', 'csiso88596e', 'csiso88596i', 'csisolatinarabic', 'ecma-114', 'iso-8859-6-e', 'iso-8859-6-i', 'iso-ir-127'],
|
|
14
|
+
'iso-8859-7': ['csisolatingreek', 'ecma-118', 'elot_928', 'greek', 'greek8', 'iso-ir-126', 'sun_eu_greek'],
|
|
15
|
+
'iso-8859-8': ['csiso88598e', 'csisolatinhebrew', 'hebrew', 'iso-8859-8-e', 'iso-ir-138', 'visual'],
|
|
15
16
|
'iso-8859-8-i': ['csiso88598i', 'logical'],
|
|
16
|
-
'iso-8859-10': ['csisolatin6', 'iso-ir-157', 'iso8859-10', 'iso885910', 'l6', 'latin6'],
|
|
17
|
-
'iso-8859-13': ['iso8859-13', 'iso885913'],
|
|
18
|
-
'iso-8859-14': ['iso8859-14', 'iso885914'],
|
|
19
|
-
'iso-8859-15': ['csisolatin9', 'iso8859-15', 'iso885915', 'iso_8859-15', 'l9'],
|
|
20
17
|
'iso-8859-16': [],
|
|
21
18
|
'koi8-r': ['cskoi8r', 'koi', 'koi8', 'koi8_r'],
|
|
22
19
|
'koi8-u': ['koi8-ru'],
|
|
23
|
-
macintosh: ['csmacintosh', 'mac', 'x-mac-roman'],
|
|
24
20
|
'windows-874': ['dos-874', 'iso-8859-11', 'iso8859-11', 'iso885911', 'tis-620'],
|
|
21
|
+
ibm866: ['866', 'cp866', 'csibm866'],
|
|
25
22
|
'x-mac-cyrillic': ['x-mac-ukrainian'],
|
|
23
|
+
macintosh: ['csmacintosh', 'mac', 'x-mac-roman'],
|
|
26
24
|
gbk: ['chinese', 'csgb2312', 'csiso58gb231280', 'gb2312', 'gb_2312', 'gb_2312-80', 'iso-ir-58', 'x-gbk'],
|
|
27
25
|
gb18030: [],
|
|
28
26
|
big5: ['big5-hkscs', 'cn-big5', 'csbig5', 'x-x-big5'],
|
|
29
27
|
'euc-jp': ['cseucpkdfmtjapanese', 'x-euc-jp'],
|
|
30
|
-
'iso-2022-jp': ['csiso2022jp'],
|
|
31
28
|
shift_jis: ['csshiftjis', 'ms932', 'ms_kanji', 'shift-jis', 'sjis', 'windows-31j', 'x-sjis'],
|
|
32
29
|
'euc-kr': ['cseuckr', 'csksc56011987', 'iso-ir-149', 'korean', 'ks_c_5601-1987', 'ks_c_5601-1989', 'ksc5601', 'ksc_5601', 'windows-949'],
|
|
30
|
+
'iso-2022-jp': ['csiso2022jp'],
|
|
33
31
|
replacement: ['csiso2022kr', 'hz-gb-2312', 'iso-2022-cn', 'iso-2022-cn-ext', 'iso-2022-kr'],
|
|
34
|
-
'utf-16be': ['unicodefffe'],
|
|
35
|
-
'utf-16le': ['csunicode', 'iso-10646-ucs-2', 'ucs-2', 'unicode', 'unicodefeff', 'utf-16'],
|
|
36
32
|
'x-user-defined': [],
|
|
37
33
|
}
|
|
38
34
|
|
|
35
|
+
for (const i of [10, 13, 14, 15]) labels[`iso-8859-${i}`] = [`iso8859-${i}`, `iso8859${i}`]
|
|
36
|
+
for (const i of [2, 6, 7]) labels[`iso-8859-${i}`].push(`iso_8859-${i}:1987`)
|
|
37
|
+
for (const i of [3, 4, 5, 8]) labels[`iso-8859-${i}`].push(`iso_8859-${i}:1988`)
|
|
38
|
+
// prettier-ignore
|
|
39
|
+
for (let i = 2; i < 9; i++) labels[`iso-8859-${i}`].push(`iso8859-${i}`, `iso8859${i}`, `iso_8859-${i}`)
|
|
40
|
+
for (let i = 2; i < 5; i++) labels[`iso-8859-${i}`].push(`csisolatin${i}`, `l${i}`, `latin${i}`)
|
|
39
41
|
for (let i = 0; i < 9; i++) labels[`windows-125${i}`] = [`cp125${i}`, `x-cp125${i}`]
|
|
40
42
|
|
|
41
43
|
// prettier-ignore
|
|
42
44
|
labels['windows-1252'].push('ansi_x3.4-1968', 'ascii', 'cp819', 'csisolatin1', 'ibm819', 'iso-8859-1', 'iso-ir-100', 'iso8859-1', 'iso88591', 'iso_8859-1', 'iso_8859-1:1987', 'l1', 'latin1', 'us-ascii')
|
|
43
45
|
// prettier-ignore
|
|
44
46
|
labels['windows-1254'].push('csisolatin5', 'iso-8859-9', 'iso-ir-148', 'iso8859-9', 'iso88599', 'iso_8859-9', 'iso_8859-9:1989', 'l5', 'latin5')
|
|
47
|
+
labels['iso-8859-10'].push('csisolatin6', 'iso-ir-157', 'l6', 'latin6')
|
|
48
|
+
labels['iso-8859-15'].push('csisolatin9', 'iso_8859-15', 'l9')
|
|
45
49
|
|
|
46
50
|
export default labels
|
package/fallback/hex.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { nativeDecoder, nativeEncoder, decode2string
|
|
1
|
+
import { E_STRING } from './_utils.js'
|
|
2
|
+
import { nativeDecoder, nativeEncoder, decode2string } from './platform.js'
|
|
3
3
|
import { encodeAscii, decodeAscii } from './latin1.js'
|
|
4
4
|
|
|
5
5
|
let hexArray // array of 256 bytes converted to two-char hex strings
|
|
@@ -11,9 +11,8 @@ const allowed = '0123456789ABCDEFabcdef'
|
|
|
11
11
|
|
|
12
12
|
export const E_HEX = 'Input is not a hex string'
|
|
13
13
|
|
|
14
|
+
// Expects a checked Uint8Array
|
|
14
15
|
export function toHex(arr) {
|
|
15
|
-
assertUint8(arr)
|
|
16
|
-
|
|
17
16
|
if (!hexArray) hexArray = Array.from({ length: 256 }, (_, i) => i.toString(16).padStart(2, '0'))
|
|
18
17
|
const length = arr.length // this helps Hermes
|
|
19
18
|
|
package/fallback/latin1.js
CHANGED
|
@@ -6,18 +6,17 @@ import {
|
|
|
6
6
|
isHermes,
|
|
7
7
|
isDeno,
|
|
8
8
|
isLE,
|
|
9
|
-
|
|
10
|
-
} from './_utils.js'
|
|
9
|
+
} from './platform.js'
|
|
11
10
|
|
|
12
|
-
const
|
|
13
|
-
const
|
|
11
|
+
const atob = /* @__PURE__ */ (() => globalThis.atob)()
|
|
12
|
+
const web64 = /* @__PURE__ */ (() => Uint8Array.prototype.toBase64)()
|
|
14
13
|
|
|
15
14
|
// See http://stackoverflow.com/a/22747272/680742, which says that lowest limit is in Chrome, with 0xffff args
|
|
16
15
|
// On Hermes, actual max is 0x20_000 minus current stack depth, 1/16 of that should be safe
|
|
17
16
|
const maxFunctionArgs = 0x20_00
|
|
18
17
|
|
|
19
18
|
// toBase64+atob path is faster on everything where fromBase64 is fast
|
|
20
|
-
const useLatin1atob = web64 && atob
|
|
19
|
+
const useLatin1atob = web64 && atob
|
|
21
20
|
|
|
22
21
|
export function asciiPrefix(arr) {
|
|
23
22
|
let p = 0 // verified ascii bytes
|
|
@@ -147,7 +146,8 @@ export function encodeAsciiPrefix(x, s) {
|
|
|
147
146
|
export const encodeLatin1 = (str) => encodeCharcodes(str, new Uint8Array(str.length))
|
|
148
147
|
|
|
149
148
|
// Expects nativeEncoder to be present
|
|
150
|
-
|
|
149
|
+
const useEncodeInto = /* @__PURE__ */ (() => isHermes && nativeEncoder?.encodeInto)()
|
|
150
|
+
export const encodeAscii = useEncodeInto
|
|
151
151
|
? (str, ERR) => {
|
|
152
152
|
// Much faster in Hermes
|
|
153
153
|
const codes = new Uint8Array(str.length + 4) // overshoot by a full utf8 char
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { fromBase64url } from '@exodus/bytes/base64.js'
|
|
2
2
|
import { utf16toString } from '@exodus/bytes/utf16.js'
|
|
3
3
|
import loadEncodings from './multi-byte.encodings.cjs'
|
|
4
|
-
import { to16input } from './utf16.js'
|
|
5
4
|
|
|
6
5
|
export const sizes = {
|
|
7
6
|
jis0208: 11_104,
|
|
@@ -40,7 +39,7 @@ function loadBase64(str) {
|
|
|
40
39
|
return y
|
|
41
40
|
}
|
|
42
41
|
|
|
43
|
-
function unwrap(res, t, pos
|
|
42
|
+
function unwrap(res, t, pos) {
|
|
44
43
|
let code = 0
|
|
45
44
|
for (let i = 0; i < t.length; i++) {
|
|
46
45
|
let x = t[i]
|
|
@@ -55,35 +54,26 @@ function unwrap(res, t, pos, highMode = false) {
|
|
|
55
54
|
code += t[++i]
|
|
56
55
|
}
|
|
57
56
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
res[pos] = (c.charCodeAt(0) << 16) | c.charCodeAt(1)
|
|
65
|
-
}
|
|
57
|
+
for (let k = 0; k < x; k++, pos++, code++) {
|
|
58
|
+
if (code <= 0xff_ff) {
|
|
59
|
+
res[pos] = code
|
|
60
|
+
} else {
|
|
61
|
+
const c = String.fromCodePoint(code)
|
|
62
|
+
res[pos] = (c.charCodeAt(0) << 16) | c.charCodeAt(1)
|
|
66
63
|
}
|
|
67
|
-
} else {
|
|
68
|
-
for (let k = 0; k < x; k++, pos++, code++) res[pos] = code
|
|
69
64
|
}
|
|
70
65
|
}
|
|
71
66
|
} else if (x[0] === '$' && Object.hasOwn(indices, x)) {
|
|
72
|
-
pos = unwrap(res, indices[x], pos
|
|
73
|
-
} else
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
for (
|
|
77
|
-
|
|
67
|
+
pos = unwrap(res, indices[x], pos) // self-reference using shared chunks
|
|
68
|
+
} else {
|
|
69
|
+
let last
|
|
70
|
+
// splits by codepoints
|
|
71
|
+
for (const c of utf16toString(loadBase64(x), 'uint8-le')) {
|
|
72
|
+
last = c
|
|
78
73
|
res[pos++] = c.length === 1 ? c.charCodeAt(0) : (c.charCodeAt(0) << 16) | c.charCodeAt(1)
|
|
79
74
|
}
|
|
80
75
|
|
|
81
|
-
code =
|
|
82
|
-
} else {
|
|
83
|
-
const u16 = to16input(loadBase64(x), true) // data is little-endian
|
|
84
|
-
res.set(u16, pos)
|
|
85
|
-
pos += u16.length
|
|
86
|
-
code = u16[u16.length - 1] + 1
|
|
76
|
+
code = last.codePointAt(0) + 1
|
|
87
77
|
}
|
|
88
78
|
}
|
|
89
79
|
|
|
@@ -108,9 +98,8 @@ export function getTable(id) {
|
|
|
108
98
|
let a = -1
|
|
109
99
|
res = new Uint16Array(indices[id].map((x) => (a += x + 1)))
|
|
110
100
|
} else if (id === 'big5') {
|
|
111
|
-
|
|
112
|
-
res
|
|
113
|
-
unwrap(res, indices[id], 0, true)
|
|
101
|
+
res = new Uint32Array(sizes[id]) // single or double charcodes
|
|
102
|
+
unwrap(res, indices[id], 0)
|
|
114
103
|
// Pointer code updates are embedded into the table
|
|
115
104
|
// These are skipped in encoder as encoder uses only pointers >= (0xA1 - 0x81) * 157
|
|
116
105
|
res[1133] = 0xca_03_04
|
|
@@ -120,7 +109,7 @@ export function getTable(id) {
|
|
|
120
109
|
} else {
|
|
121
110
|
if (!Object.hasOwn(sizes, id)) throw new Error('Unknown encoding')
|
|
122
111
|
res = new Uint16Array(sizes[id])
|
|
123
|
-
unwrap(res, indices[id], 0
|
|
112
|
+
unwrap(res, indices[id], 0)
|
|
124
113
|
}
|
|
125
114
|
|
|
126
115
|
indices[id] = null // gc
|
package/fallback/percent.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { decodeAscii, encodeLatin1 } from './latin1.js'
|
|
2
|
-
import { decode2string } from './
|
|
2
|
+
import { decode2string } from './platform.js'
|
|
3
3
|
|
|
4
4
|
const ERR = 'percentEncodeSet must be a string of unique increasing codepoints in range 0x20 - 0x7e'
|
|
5
5
|
const percentMap = new Map()
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { decodePartAddition as decodePart } from './platform.native.js'
|
|
2
|
+
|
|
3
|
+
export const nativeBuffer = null
|
|
4
|
+
export const isHermes = false
|
|
5
|
+
export const isDeno = false
|
|
6
|
+
export const nativeEncoder = /* @__PURE__ */ (() => new TextEncoder())()
|
|
7
|
+
export const nativeDecoder = /* @__PURE__ */ (() => new TextDecoder('utf-8', { ignoreBOM: true }))()
|
|
8
|
+
export const nativeDecoderLatin1 = /* @__PURE__ */ (() =>
|
|
9
|
+
new TextDecoder('latin1', { ignoreBOM: true }))()
|
|
10
|
+
|
|
11
|
+
export { isLE } from './platform.native.js'
|
|
12
|
+
|
|
13
|
+
export function decode2string(arr, start, end, m) {
|
|
14
|
+
if (end - start > 30_000) {
|
|
15
|
+
// Limit concatenation to avoid excessive GC
|
|
16
|
+
// Thresholds checked on Hermes for toHex
|
|
17
|
+
const concat = []
|
|
18
|
+
for (let i = start; i < end; ) {
|
|
19
|
+
const step = i + 500
|
|
20
|
+
const iNext = step > end ? end : step
|
|
21
|
+
concat.push(decodePart(arr, i, iNext, m))
|
|
22
|
+
i = iNext
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const res = concat.join('')
|
|
26
|
+
concat.length = 0
|
|
27
|
+
return res
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
return decodePart(arr, start, end, m)
|
|
31
|
+
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
const { Buffer } = globalThis
|
|
2
|
+
const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
|
|
3
|
+
export const nativeBuffer = haveNativeBuffer ? Buffer : null
|
|
4
|
+
export const isHermes = /* @__PURE__ */ (() => !!globalThis.HermesInternal)()
|
|
5
|
+
export const isDeno = /* @__PURE__ */ (() => !!globalThis.Deno)()
|
|
6
|
+
export const isLE = /* @__PURE__ */ (() => new Uint8Array(Uint16Array.of(258).buffer)[0] === 2)()
|
|
7
|
+
|
|
8
|
+
// We consider Node.js TextDecoder/TextEncoder native
|
|
9
|
+
// Still needed in platform.native.js as this is re-exported to platform.js
|
|
10
|
+
let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]'))
|
|
11
|
+
if (!haveNativeBuffer && isNative(() => {})) isNative = () => false // e.g. XS, we don't want false positives
|
|
12
|
+
|
|
13
|
+
export const nativeEncoder = /* @__PURE__ */ (() =>
|
|
14
|
+
isNative(globalThis.TextEncoder) ? new TextEncoder() : null)()
|
|
15
|
+
export const nativeDecoder = /* @__PURE__ */ (() =>
|
|
16
|
+
isNative(globalThis.TextDecoder) ? new TextDecoder('utf-8', { ignoreBOM: true }) : null)()
|
|
17
|
+
|
|
18
|
+
// Actually windows-1252, compatible with ascii and latin1 decoding
|
|
19
|
+
// Beware that on non-latin1, i.e. on windows-1252, this is broken in ~all Node.js versions released
|
|
20
|
+
// in 2025 due to a regression, so we call it Latin1 as it's usable only for that
|
|
21
|
+
export const nativeDecoderLatin1 = /* @__PURE__ */ (() => {
|
|
22
|
+
// Not all barebone engines with TextDecoder support something except utf-8, detect
|
|
23
|
+
if (nativeDecoder) {
|
|
24
|
+
try {
|
|
25
|
+
return new TextDecoder('latin1', { ignoreBOM: true })
|
|
26
|
+
} catch {}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
return null
|
|
30
|
+
})()
|
|
31
|
+
|
|
32
|
+
export function decodePartAddition(a, start, end, m) {
|
|
33
|
+
let o = ''
|
|
34
|
+
let i = start
|
|
35
|
+
for (const last3 = end - 3; i < last3; i += 4) {
|
|
36
|
+
const x0 = a[i]
|
|
37
|
+
const x1 = a[i + 1]
|
|
38
|
+
const x2 = a[i + 2]
|
|
39
|
+
const x3 = a[i + 3]
|
|
40
|
+
o += m[x0]
|
|
41
|
+
o += m[x1]
|
|
42
|
+
o += m[x2]
|
|
43
|
+
o += m[x3]
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
while (i < end) o += m[a[i++]]
|
|
47
|
+
return o
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Decoding with templates is faster on Hermes
|
|
51
|
+
export function decodePartTemplates(a, start, end, m) {
|
|
52
|
+
let o = ''
|
|
53
|
+
let i = start
|
|
54
|
+
for (const last15 = end - 15; i < last15; i += 16) {
|
|
55
|
+
const x0 = a[i]
|
|
56
|
+
const x1 = a[i + 1]
|
|
57
|
+
const x2 = a[i + 2]
|
|
58
|
+
const x3 = a[i + 3]
|
|
59
|
+
const x4 = a[i + 4]
|
|
60
|
+
const x5 = a[i + 5]
|
|
61
|
+
const x6 = a[i + 6]
|
|
62
|
+
const x7 = a[i + 7]
|
|
63
|
+
const x8 = a[i + 8]
|
|
64
|
+
const x9 = a[i + 9]
|
|
65
|
+
const x10 = a[i + 10]
|
|
66
|
+
const x11 = a[i + 11]
|
|
67
|
+
const x12 = a[i + 12]
|
|
68
|
+
const x13 = a[i + 13]
|
|
69
|
+
const x14 = a[i + 14]
|
|
70
|
+
const x15 = a[i + 15]
|
|
71
|
+
o += `${m[x0]}${m[x1]}${m[x2]}${m[x3]}${m[x4]}${m[x5]}${m[x6]}${m[x7]}${m[x8]}${m[x9]}${m[x10]}${m[x11]}${m[x12]}${m[x13]}${m[x14]}${m[x15]}`
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
while (i < end) o += m[a[i++]]
|
|
75
|
+
return o
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const decodePart = isHermes ? decodePartTemplates : decodePartAddition
|
|
79
|
+
export function decode2string(arr, start, end, m) {
|
|
80
|
+
if (end - start > 30_000) {
|
|
81
|
+
// Limit concatenation to avoid excessive GC
|
|
82
|
+
// Thresholds checked on Hermes for toHex
|
|
83
|
+
const concat = []
|
|
84
|
+
for (let i = start; i < end; ) {
|
|
85
|
+
const step = i + 500
|
|
86
|
+
const iNext = step > end ? end : step
|
|
87
|
+
concat.push(decodePart(arr, i, iNext, m))
|
|
88
|
+
i = iNext
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const res = concat.join('')
|
|
92
|
+
concat.length = 0
|
|
93
|
+
return res
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return decodePart(arr, start, end, m)
|
|
97
|
+
}
|
|
@@ -1,57 +1,48 @@
|
|
|
1
1
|
// See tests/encoding/fixtures/single-byte/dump.js for generator
|
|
2
2
|
|
|
3
3
|
const r = 0xff_fd
|
|
4
|
-
const e = (x) => new Array(x).fill(1)
|
|
5
|
-
const h = (x) => new Array(x).fill(r)
|
|
6
4
|
|
|
7
5
|
/* eslint-disable unicorn/numeric-separators-style, @exodus/export-default/named */
|
|
8
6
|
|
|
9
7
|
// Common ranges
|
|
10
8
|
|
|
11
9
|
// prettier-ignore
|
|
12
|
-
const i2 = [
|
|
13
|
-
const i4a = [-75, -63, e(5), 104, -34, -67, 79, -77, 75, -73, 1]
|
|
14
|
-
const i4b = [34, -32, e(5), 73, -34, -36, 48, -46, 44, -42, 1]
|
|
15
|
-
const i7 = [721, 1, 1, -719, 721, -719, 721, e(19), r, 2, e(43), r]
|
|
16
|
-
const i8 = [e(26), r, r, 6692, 1, r]
|
|
17
|
-
const i9 = [79, -77, e(11), 84, 46, -127, e(16), 48, -46, e(11), 53, 46]
|
|
18
|
-
const iB = [3425, e(57), h(4), 5, e(28), h(4)]
|
|
19
|
-
const p2 = [-99, 12, 20, -12, 17, 37, -29, 2]
|
|
20
|
-
const p1 = [8237, -8235, 8089, -7816, 7820, 8, -6, 1]
|
|
21
|
-
const w0 = [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104]
|
|
22
|
-
const w8 = [8072, 1, 3, 1, 5, -15, 1]
|
|
23
|
-
const w1 = [w8, -7480, 7750, -8129, 7897, -7911, -182]
|
|
24
|
-
const w3 = [w8, -8060, 8330, -8328, 8096, -8094]
|
|
25
|
-
const m0 = [8558, -8328, 8374, -66, -8539, 16, 8043, -8070]
|
|
10
|
+
const i2 = [189,148,0,0,63,0,116,64,0,68,0,78,0,78,0,0,63,64,114,117,0,0,123,0,0,128,149,0,149,0,0,132,0,117,0,0,32,0,85,33,0,37,0,47,0,47,0,0,32,33,83,86,0,0,92,0,0,97,118,0,118,0,0,101,474]
|
|
26
11
|
// prettier-ignore
|
|
27
|
-
const
|
|
12
|
+
const iB = [[58,3424],[4,r],[29,3424],[4,r]]
|
|
13
|
+
const i9 = [[47], 78, [12], 83, 128, [17], 47, [12], 52, 97]
|
|
14
|
+
const w1 = [8236, 0, 8088, 0, 8090, 8097, 8090, 8090, 0, 8103]
|
|
15
|
+
const w2 = [8236, 0, 8088, 271, 8090, 8097, 8090, 8090, 574, 8103]
|
|
28
16
|
// prettier-ignore
|
|
29
|
-
const
|
|
17
|
+
const w7 = [64,0,157,[4],39,68,109,62,67,0,0,82,75,68,0,175,75,86,105,92,108,144,114,115,0,120,[3],154,104,128,143,0,158,159,0,37,78,31,36,0,0,51,44,37,0,144,44,55,74,61,77,113,83,84,0,89,[3],123,73,97,112,0,127,128]
|
|
18
|
+
const w8 = [8071, 8071, 8073, 8073, 8077, 8061, 8061]
|
|
30
19
|
// prettier-ignore
|
|
31
|
-
const k8b = [-
|
|
20
|
+
const k8b = [-22,910,879,879,899,880,880,894,876,893,[8,879],894,[4,878],864,859,884,882,861,877,881,876,873,875,846,815,815,835,816,816,830,812,829,[8,815],830,[4,814],800,795,820,818,797,813,817,812,809,811]
|
|
21
|
+
// prettier-ignore
|
|
22
|
+
const k8a = [9344,9345,9354,9357,9360,9363,9366,9373,9380,9387,9394,9461,9464,9467,9470,[4,9473],8845,9484,8580,8580,8625,8652,8652,6,8838,20,21,25,88,[3,9392],942]
|
|
32
23
|
|
|
33
24
|
// prettier-ignore
|
|
34
25
|
const maps = {
|
|
35
|
-
ibm866: [
|
|
36
|
-
'koi8-
|
|
37
|
-
'koi8-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
'windows-874': [
|
|
26
|
+
ibm866: [[48,912],[3,9441],...[29,62,122,122,109,107,120,101,106,111,109,107,31,34,65,56,39,10,69,102,102,96,89,109,105,98,81,108,102,102,97,97,84,82,75,75,98,96,13,0,123,118,125,128,111].map(x=>x+9266),[16,864],785,864,786,865,787,866,792,871,-72,8480,-67,8479,8218,-89,9378,-95],
|
|
27
|
+
'koi8-u': [...k8a,944,9391,944,944,[5,9391],996,944,[4,9391],846,848,9390,848,848,[5,9390],979,848,...k8b],
|
|
28
|
+
'koi8-r': [...k8a,[15,9391],846,[11,9390],...k8b],
|
|
29
|
+
macintosh: [68,68,69,70,77,81,86,90,88,89,90,88,89,90,91,89,90,90,91,89,90,90,91,92,90,91,92,90,94,92,93,93,8064,15,0,0,3,8061,16,56,6,0,8312,9,-4,8627,24,41,8558,0,8626,8626,-15,0,8524,8538,8535,775,8561,-17,-2,748,40,57,-1,-32,-22,8535,206,8579,8512,-28,-13,8029,-42,-11,-9,8,132,132,8003,8003,8010,8010,8004,8004,33,9459,39,159,8042,8145,8029,8029,64035,64035,8001,-42,7992,7995,8012,-35,-28,-38,-29,-33,[3,-29],-33,-27,-27,63503,-31,-24,-24,-27,60,464,485,-73,[3,479],-68,480,477,456],
|
|
30
|
+
'x-mac-cyrillic': [[32,912],8064,15,1006,0,3,8061,16,863,6,0,8312,855,934,8627,853,932,8558,0,8626,8626,930,0,987,849,844,923,845,924,845,924,844,923,920,836,-22,8535,206,8579,8512,-28,-13,8029,-42,832,911,831,910,902,8003,8003,8010,8010,8004,8004,33,8007,822,901,821,900,8250,804,883,880,[31,848],8109],
|
|
31
|
+
'windows-874': [8236,[4],8097,[11],...w8,[9],...iB],
|
|
41
32
|
}
|
|
42
33
|
|
|
43
34
|
// windows-1250 - windows-1258
|
|
44
35
|
// prettier-ignore
|
|
45
36
|
;[
|
|
46
|
-
[
|
|
47
|
-
[
|
|
48
|
-
[
|
|
49
|
-
[
|
|
50
|
-
[
|
|
51
|
-
[
|
|
52
|
-
[
|
|
53
|
-
[
|
|
54
|
-
[
|
|
37
|
+
[...w1,214,8110,206,215,239,234,0,...w8,0,8329,199,8095,191,200,224,219,0,550,566,158,0,95,[4],180,[4],204,0,0,553,143,[5],76,165,0,129,544,128,...i2],
|
|
38
|
+
[898,898,8088,976,8090,8097,8090,8090,8228,8103,895,8110,894,895,893,896,962,...w8,0,8329,959,8095,958,959,957,960,0,877,956,869,0,1003,0,0,857,0,858,[4],856,0,0,852,931,989,[3],921,8285,922,0,924,840,919,920,[64,848]],
|
|
39
|
+
[...w2,214,8110,198,0,239,0,0,...w8,580,8329,199,8095,183,0,224,217],
|
|
40
|
+
[8236,0,8088,271,8090,8097,8090,8090,0,8103,0,8110,[5],...w8,0,8329,0,8095,[5],740,740,[7],r,[4],8038,[4],720,[3],[3,720],0,720,0,[20,720],r,[44,720],r],
|
|
41
|
+
[...w2,214,8110,198,[4],...w8,580,8329,199,8095,183,0,0,217,0,...i9],
|
|
42
|
+
[...w2,0,8110,[5],...w8,580,8329,0,8095,[8],8198,[5],45,[15],61,[5],[20,1264],[5,1308],[7,r],[27,1264],r,r,7953,7953,r],
|
|
43
|
+
[8236,1533,8088,271,8090,8097,8090,8090,574,8103,1519,8110,198,1529,1546,1529,1567,...w8,1553,8329,1527,8095,183,8047,8047,1563,0,1387,[8],1556,[15],1377,[4],1376,1537,[22,1376],0,[4,1375],[4,1380],0,1379,0,[4,1378],[5],1373,1373,0,0,[4,1371],0,1370,1370,0,1369,0,1368,0,0,7953,7953,1491],
|
|
44
|
+
[...w1,0,8110,0,27,569,41,0,...w8,0,8329,0,8095,0,18,573,0,0,r,[3],r,0,0,48,0,172,[4],23,[8],...w7,474],
|
|
45
|
+
[...w2,0,8110,198,[4],...w8,580,8329,0,8095,183,0,0,217,[35],63,[8],564,[3],64,0,567,0,0,203,[7],210,549,[4],32,[8],533,[3],33,0,561,0,0,172,[7],179,8109],
|
|
55
46
|
].forEach((m, i) => {
|
|
56
47
|
maps[`windows-${i + 1250}`] = m
|
|
57
48
|
});
|
|
@@ -60,23 +51,23 @@ const maps = {
|
|
|
60
51
|
// prettier-ignore
|
|
61
52
|
;[
|
|
62
53
|
[], // Actual Latin1 / Unicode subset, non-WHATWG, which maps iso-8859-1 to windows-1252
|
|
63
|
-
[
|
|
64
|
-
[
|
|
65
|
-
[
|
|
66
|
-
[
|
|
67
|
-
[
|
|
68
|
-
[
|
|
69
|
-
[r,
|
|
70
|
-
|
|
71
|
-
[
|
|
54
|
+
[99,566,158,0,152,180,0,0,183,180,185,205,0,207,204,0,84,553,143,0,137,165,528,0,168,165,170,190,544,192,...i2],
|
|
55
|
+
[133,566,0,0,r,126,0,0,135,180,115,136,0,r,204,0,118,[4],111,0,0,120,165,100,121,0,r,189,[3],r,0,69,66,[9],r,[4],75,0,0,68,[4],143,126,[4],r,0,38,35,[9],r,[4],44,0,0,37,[4],112,95,474],
|
|
56
|
+
[99,150,179,0,131,149,0,0,183,104,119,186,0,207,0,0,84,553,164,0,116,134,528,0,168,89,104,171,141,192,140,64,[6],103,68,0,78,0,74,0,0,91,64,116,122,99,[5],153,[3],139,140,0,33,[6],72,37,0,47,0,43,0,0,60,33,85,91,68,[5],122,[3],108,109,474],
|
|
57
|
+
[[12,864],0,[66,864],8230,[12,864],-86,864,864],
|
|
58
|
+
[[3,r],0,[7,r],1376,0,[13,r],1376,[3,r],1376,r,[26,1376],[5,r],[19,1376],[13,r]],
|
|
59
|
+
[8055,8055,0,8200,8202,[4],720,[3],r,8038,[4],[3,720],0,[3,720],0,720,0,[20,720],r,[44,720],r],
|
|
60
|
+
[r,[8],45,[15],61,[4],[32,r],7992,[27,1264],r,r,7953,7953,r],
|
|
61
|
+
i9, // non-WHATWG, which maps iso-8859-9 to windows-1254
|
|
62
|
+
[99,112,127,134,131,144,0,147,103,182,187,209,0,188,155,0,84,97,112,119,116,129,0,132,88,167,172,194,8024,173,140,64,[6],103,68,0,78,0,74,[4],116,122,[4],145,0,153,[6],33,[6],72,37,0,47,0,43,[4],85,91,[4],114,0,122,[5],57],
|
|
72
63
|
iB, // non-WHATWG, which maps iso-8859-11 to windows-874
|
|
73
64
|
null, // no 12
|
|
74
|
-
[
|
|
75
|
-
[
|
|
76
|
-
[
|
|
77
|
-
[
|
|
65
|
+
[8060,[3],8057,0,0,48,0,172,[4],23,[4],8040,[3],...w7,7962],
|
|
66
|
+
[7521,7521,0,102,102,7524,0,7640,0,7640,7520,7750,0,0,201,7534,7534,110,110,7564,7564,0,7583,7625,7582,7625,7589,7735,7623,7623,7586,[16],164,[6],7571,[6],152,[17],133,[6],7540,[6],121],
|
|
67
|
+
[[3],8200,0,186,0,185,[11],201,[3],198,[3],150,150,186],
|
|
68
|
+
[99,99,158,8200,8057,186,0,185,0,366,0,205,0,204,204,0,0,90,143,201,8040,0,0,198,84,351,0,150,150,186,189,[3],63,0,65,[10],64,114,[3],123,0,131,152,[4],59,316,[4],32,0,34,[10],33,83,[3],92,0,100,121,[4],28,285],
|
|
78
69
|
].forEach((m, i) => {
|
|
79
|
-
if (m) maps[`iso-8859-${i + 1}`] = [
|
|
80
|
-
})
|
|
70
|
+
if (m) maps[`iso-8859-${i + 1}`] = [[33], ...m]
|
|
71
|
+
});
|
|
81
72
|
|
|
82
73
|
export default maps
|
package/fallback/single-byte.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { asciiPrefix, decodeAscii, decodeLatin1 } from './latin1.js'
|
|
2
2
|
import encodings from './single-byte.encodings.js'
|
|
3
|
-
import { decode2string, nativeDecoder } from './
|
|
3
|
+
import { decode2string, nativeDecoder } from './platform.js'
|
|
4
4
|
|
|
5
5
|
export const E_STRICT = 'Input is not well-formed for this encoding'
|
|
6
6
|
const xUserDefined = 'x-user-defined'
|
|
@@ -17,9 +17,9 @@ export function getEncoding(encoding) {
|
|
|
17
17
|
assertEncoding(encoding)
|
|
18
18
|
if (encoding === xUserDefined) return Array.from({ length: 128 }, (_, i) => 0xf7_80 + i)
|
|
19
19
|
if (encoding === iso8i) encoding = 'iso-8859-8'
|
|
20
|
-
|
|
21
|
-
const
|
|
22
|
-
return
|
|
20
|
+
const enc = encodings[encoding]
|
|
21
|
+
const deltas = enc.flatMap((x) => (Array.isArray(x) ? new Array(x[0]).fill(x[1] ?? 0) : x))
|
|
22
|
+
return deltas.map((x, i) => (x === r ? x : x + 128 + i))
|
|
23
23
|
}
|
|
24
24
|
|
|
25
25
|
const mappers = new Map()
|