@exodus/bytes 1.11.0 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,17 +5,56 @@ import { utf16toString, utf16toStringLoose } from '@exodus/bytes/utf16.js'
5
5
  import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/bytes/utf8.js'
6
6
  import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
7
7
  import labels from './encoding.labels.js'
8
- import { fromSource, getBOMEncoding, normalizeEncoding, E_ENCODING } from './encoding.api.js'
8
+ import { fromSource, getBOMEncoding } from './encoding.api.js'
9
9
  import { unfinishedBytes, mergePrefix } from './encoding.util.js'
10
10
 
11
- export { labelToName, getBOMEncoding, normalizeEncoding } from './encoding.api.js'
11
+ export { getBOMEncoding } from './encoding.api.js'
12
12
 
13
+ export const E_ENCODING = 'Unknown encoding'
13
14
  const E_MULTI = "import '@exodus/bytes/encoding.js' for legacy multi-byte encodings support"
14
15
  const E_OPTIONS = 'The "options" argument must be of type object'
15
16
  const replacementChar = '\uFFFD'
16
17
  const multibyteSet = new Set(['big5', 'euc-kr', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'gbk', 'gb18030']) // prettier-ignore
17
18
  let createMultibyteDecoder, multibyteEncoder
18
19
 
20
+ let labelsMap
21
+ // Warning: unlike whatwg-encoding, returns lowercased labels
22
+ // Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
23
+ // https://encoding.spec.whatwg.org/#names-and-labels
24
+ export function normalizeEncoding(label) {
25
+ // fast path
26
+ if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
27
+ if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
28
+ // full map
29
+ if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
30
+ const low = `${label}`.trim().toLowerCase()
31
+ if (Object.hasOwn(labels, low)) return low
32
+ if (!labelsMap) {
33
+ labelsMap = new Map()
34
+ for (const [name, aliases] of Object.entries(labels)) {
35
+ for (const alias of aliases) labelsMap.set(alias, name)
36
+ }
37
+ }
38
+
39
+ const mapped = labelsMap.get(low)
40
+ if (mapped) return mapped
41
+ return null
42
+ }
43
+
44
+ const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
45
+
46
+ // Unlike normalizeEncoding, case-sensitive
47
+ // https://encoding.spec.whatwg.org/#names-and-labels
48
+ export function labelToName(label) {
49
+ const enc = normalizeEncoding(label)
50
+ if (enc === 'utf-8') return 'UTF-8' // fast path
51
+ if (!enc) return enc
52
+ if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
53
+ if (enc === 'big5') return 'Big5'
54
+ if (enc === 'shift_jis') return 'Shift_JIS'
55
+ return enc
56
+ }
57
+
19
58
  export const isMultibyte = (enc) => multibyteSet.has(enc)
20
59
  export function setMultibyte(createDecoder, createEncoder) {
21
60
  createMultibyteDecoder = createDecoder
@@ -4,43 +4,47 @@
4
4
  // prettier-ignore
5
5
  const labels = {
6
6
  'utf-8': ['unicode-1-1-utf-8', 'unicode11utf8', 'unicode20utf8', 'utf8', 'x-unicode20utf8'],
7
- ibm866: ['866', 'cp866', 'csibm866'],
8
- 'iso-8859-2': ['csisolatin2', 'iso-ir-101', 'iso8859-2', 'iso88592', 'iso_8859-2', 'iso_8859-2:1987', 'l2', 'latin2'],
9
- 'iso-8859-3': ['csisolatin3', 'iso-ir-109', 'iso8859-3', 'iso88593', 'iso_8859-3', 'iso_8859-3:1988', 'l3', 'latin3'],
10
- 'iso-8859-4': ['csisolatin4', 'iso-ir-110', 'iso8859-4', 'iso88594', 'iso_8859-4', 'iso_8859-4:1988', 'l4', 'latin4'],
11
- 'iso-8859-5': ['csisolatincyrillic', 'cyrillic', 'iso-ir-144', 'iso8859-5', 'iso88595', 'iso_8859-5', 'iso_8859-5:1988'],
12
- 'iso-8859-6': ['arabic', 'asmo-708', 'csiso88596e', 'csiso88596i', 'csisolatinarabic', 'ecma-114', 'iso-8859-6-e', 'iso-8859-6-i', 'iso-ir-127', 'iso8859-6', 'iso88596', 'iso_8859-6', 'iso_8859-6:1987'],
13
- 'iso-8859-7': ['csisolatingreek', 'ecma-118', 'elot_928', 'greek', 'greek8', 'iso-ir-126', 'iso8859-7', 'iso88597', 'iso_8859-7', 'iso_8859-7:1987', 'sun_eu_greek'],
14
- 'iso-8859-8': ['csiso88598e', 'csisolatinhebrew', 'hebrew', 'iso-8859-8-e', 'iso-ir-138', 'iso8859-8', 'iso88598', 'iso_8859-8', 'iso_8859-8:1988', 'visual'],
7
+ 'utf-16be': ['unicodefffe'],
8
+ 'utf-16le': ['csunicode', 'iso-10646-ucs-2', 'ucs-2', 'unicode', 'unicodefeff', 'utf-16'],
9
+ 'iso-8859-2': ['iso-ir-101'],
10
+ 'iso-8859-3': ['iso-ir-109'],
11
+ 'iso-8859-4': ['iso-ir-110'],
12
+ 'iso-8859-5': ['csisolatincyrillic', 'cyrillic', 'iso-ir-144'],
13
+ 'iso-8859-6': ['arabic', 'asmo-708', 'csiso88596e', 'csiso88596i', 'csisolatinarabic', 'ecma-114', 'iso-8859-6-e', 'iso-8859-6-i', 'iso-ir-127'],
14
+ 'iso-8859-7': ['csisolatingreek', 'ecma-118', 'elot_928', 'greek', 'greek8', 'iso-ir-126', 'sun_eu_greek'],
15
+ 'iso-8859-8': ['csiso88598e', 'csisolatinhebrew', 'hebrew', 'iso-8859-8-e', 'iso-ir-138', 'visual'],
15
16
  'iso-8859-8-i': ['csiso88598i', 'logical'],
16
- 'iso-8859-10': ['csisolatin6', 'iso-ir-157', 'iso8859-10', 'iso885910', 'l6', 'latin6'],
17
- 'iso-8859-13': ['iso8859-13', 'iso885913'],
18
- 'iso-8859-14': ['iso8859-14', 'iso885914'],
19
- 'iso-8859-15': ['csisolatin9', 'iso8859-15', 'iso885915', 'iso_8859-15', 'l9'],
20
17
  'iso-8859-16': [],
21
18
  'koi8-r': ['cskoi8r', 'koi', 'koi8', 'koi8_r'],
22
19
  'koi8-u': ['koi8-ru'],
23
- macintosh: ['csmacintosh', 'mac', 'x-mac-roman'],
24
20
  'windows-874': ['dos-874', 'iso-8859-11', 'iso8859-11', 'iso885911', 'tis-620'],
21
+ ibm866: ['866', 'cp866', 'csibm866'],
25
22
  'x-mac-cyrillic': ['x-mac-ukrainian'],
23
+ macintosh: ['csmacintosh', 'mac', 'x-mac-roman'],
26
24
  gbk: ['chinese', 'csgb2312', 'csiso58gb231280', 'gb2312', 'gb_2312', 'gb_2312-80', 'iso-ir-58', 'x-gbk'],
27
25
  gb18030: [],
28
26
  big5: ['big5-hkscs', 'cn-big5', 'csbig5', 'x-x-big5'],
29
27
  'euc-jp': ['cseucpkdfmtjapanese', 'x-euc-jp'],
30
- 'iso-2022-jp': ['csiso2022jp'],
31
28
  shift_jis: ['csshiftjis', 'ms932', 'ms_kanji', 'shift-jis', 'sjis', 'windows-31j', 'x-sjis'],
32
29
  'euc-kr': ['cseuckr', 'csksc56011987', 'iso-ir-149', 'korean', 'ks_c_5601-1987', 'ks_c_5601-1989', 'ksc5601', 'ksc_5601', 'windows-949'],
30
+ 'iso-2022-jp': ['csiso2022jp'],
33
31
  replacement: ['csiso2022kr', 'hz-gb-2312', 'iso-2022-cn', 'iso-2022-cn-ext', 'iso-2022-kr'],
34
- 'utf-16be': ['unicodefffe'],
35
- 'utf-16le': ['csunicode', 'iso-10646-ucs-2', 'ucs-2', 'unicode', 'unicodefeff', 'utf-16'],
36
32
  'x-user-defined': [],
37
33
  }
38
34
 
35
+ for (const i of [10, 13, 14, 15]) labels[`iso-8859-${i}`] = [`iso8859-${i}`, `iso8859${i}`]
36
+ for (const i of [2, 6, 7]) labels[`iso-8859-${i}`].push(`iso_8859-${i}:1987`)
37
+ for (const i of [3, 4, 5, 8]) labels[`iso-8859-${i}`].push(`iso_8859-${i}:1988`)
38
+ // prettier-ignore
39
+ for (let i = 2; i < 9; i++) labels[`iso-8859-${i}`].push(`iso8859-${i}`, `iso8859${i}`, `iso_8859-${i}`)
40
+ for (let i = 2; i < 5; i++) labels[`iso-8859-${i}`].push(`csisolatin${i}`, `l${i}`, `latin${i}`)
39
41
  for (let i = 0; i < 9; i++) labels[`windows-125${i}`] = [`cp125${i}`, `x-cp125${i}`]
40
42
 
41
43
  // prettier-ignore
42
44
  labels['windows-1252'].push('ansi_x3.4-1968', 'ascii', 'cp819', 'csisolatin1', 'ibm819', 'iso-8859-1', 'iso-ir-100', 'iso8859-1', 'iso88591', 'iso_8859-1', 'iso_8859-1:1987', 'l1', 'latin1', 'us-ascii')
43
45
  // prettier-ignore
44
46
  labels['windows-1254'].push('csisolatin5', 'iso-8859-9', 'iso-ir-148', 'iso8859-9', 'iso88599', 'iso_8859-9', 'iso_8859-9:1989', 'l5', 'latin5')
47
+ labels['iso-8859-10'].push('csisolatin6', 'iso-ir-157', 'l6', 'latin6')
48
+ labels['iso-8859-15'].push('csisolatin9', 'iso_8859-15', 'l9')
45
49
 
46
50
  export default labels
package/fallback/hex.js CHANGED
@@ -1,5 +1,5 @@
1
- import { assertUint8 } from '../assert.js'
2
- import { nativeDecoder, nativeEncoder, decode2string, E_STRING } from './_utils.js'
1
+ import { E_STRING } from './_utils.js'
2
+ import { nativeDecoder, nativeEncoder, decode2string } from './platform.js'
3
3
  import { encodeAscii, decodeAscii } from './latin1.js'
4
4
 
5
5
  let hexArray // array of 256 bytes converted to two-char hex strings
@@ -11,9 +11,8 @@ const allowed = '0123456789ABCDEFabcdef'
11
11
 
12
12
  export const E_HEX = 'Input is not a hex string'
13
13
 
14
+ // Expects a checked Uint8Array
14
15
  export function toHex(arr) {
15
- assertUint8(arr)
16
-
17
16
  if (!hexArray) hexArray = Array.from({ length: 256 }, (_, i) => i.toString(16).padStart(2, '0'))
18
17
  const length = arr.length // this helps Hermes
19
18
 
@@ -6,18 +6,17 @@ import {
6
6
  isHermes,
7
7
  isDeno,
8
8
  isLE,
9
- skipWeb,
10
- } from './_utils.js'
9
+ } from './platform.js'
11
10
 
12
- const { atob } = globalThis
13
- const { toBase64: web64 } = Uint8Array.prototype
11
+ const atob = /* @__PURE__ */ (() => globalThis.atob)()
12
+ const web64 = /* @__PURE__ */ (() => Uint8Array.prototype.toBase64)()
14
13
 
15
14
  // See http://stackoverflow.com/a/22747272/680742, which says that lowest limit is in Chrome, with 0xffff args
16
15
  // On Hermes, actual max is 0x20_000 minus current stack depth, 1/16 of that should be safe
17
16
  const maxFunctionArgs = 0x20_00
18
17
 
19
18
  // toBase64+atob path is faster on everything where fromBase64 is fast
20
- const useLatin1atob = web64 && atob && !skipWeb
19
+ const useLatin1atob = web64 && atob
21
20
 
22
21
  export function asciiPrefix(arr) {
23
22
  let p = 0 // verified ascii bytes
@@ -147,7 +146,8 @@ export function encodeAsciiPrefix(x, s) {
147
146
  export const encodeLatin1 = (str) => encodeCharcodes(str, new Uint8Array(str.length))
148
147
 
149
148
  // Expects nativeEncoder to be present
150
- export const encodeAscii = isHermes
149
+ const useEncodeInto = /* @__PURE__ */ (() => isHermes && nativeEncoder?.encodeInto)()
150
+ export const encodeAscii = useEncodeInto
151
151
  ? (str, ERR) => {
152
152
  // Much faster in Hermes
153
153
  const codes = new Uint8Array(str.length + 4) // overshoot by a full utf8 char
@@ -1,7 +1,6 @@
1
1
  import { fromBase64url } from '@exodus/bytes/base64.js'
2
2
  import { utf16toString } from '@exodus/bytes/utf16.js'
3
3
  import loadEncodings from './multi-byte.encodings.cjs'
4
- import { to16input } from './utf16.js'
5
4
 
6
5
  export const sizes = {
7
6
  jis0208: 11_104,
@@ -40,7 +39,7 @@ function loadBase64(str) {
40
39
  return y
41
40
  }
42
41
 
43
- function unwrap(res, t, pos, highMode = false) {
42
+ function unwrap(res, t, pos) {
44
43
  let code = 0
45
44
  for (let i = 0; i < t.length; i++) {
46
45
  let x = t[i]
@@ -55,35 +54,26 @@ function unwrap(res, t, pos, highMode = false) {
55
54
  code += t[++i]
56
55
  }
57
56
 
58
- if (highMode) {
59
- for (let k = 0; k < x; k++, pos++, code++) {
60
- if (code <= 0xff_ff) {
61
- res[pos] = code
62
- } else {
63
- const c = String.fromCodePoint(code)
64
- res[pos] = (c.charCodeAt(0) << 16) | c.charCodeAt(1)
65
- }
57
+ for (let k = 0; k < x; k++, pos++, code++) {
58
+ if (code <= 0xff_ff) {
59
+ res[pos] = code
60
+ } else {
61
+ const c = String.fromCodePoint(code)
62
+ res[pos] = (c.charCodeAt(0) << 16) | c.charCodeAt(1)
66
63
  }
67
- } else {
68
- for (let k = 0; k < x; k++, pos++, code++) res[pos] = code
69
64
  }
70
65
  }
71
66
  } else if (x[0] === '$' && Object.hasOwn(indices, x)) {
72
- pos = unwrap(res, indices[x], pos, highMode) // self-reference using shared chunks
73
- } else if (highMode) {
74
- const s = [...utf16toString(loadBase64(x), 'uint8-le')] // splits by codepoints
75
- let c
76
- for (let i = 0; i < s.length; ) {
77
- c = s[i++]
67
+ pos = unwrap(res, indices[x], pos) // self-reference using shared chunks
68
+ } else {
69
+ let last
70
+ // splits by codepoints
71
+ for (const c of utf16toString(loadBase64(x), 'uint8-le')) {
72
+ last = c
78
73
  res[pos++] = c.length === 1 ? c.charCodeAt(0) : (c.charCodeAt(0) << 16) | c.charCodeAt(1)
79
74
  }
80
75
 
81
- code = c.codePointAt(0) + 1
82
- } else {
83
- const u16 = to16input(loadBase64(x), true) // data is little-endian
84
- res.set(u16, pos)
85
- pos += u16.length
86
- code = u16[u16.length - 1] + 1
76
+ code = last.codePointAt(0) + 1
87
77
  }
88
78
  }
89
79
 
@@ -108,9 +98,8 @@ export function getTable(id) {
108
98
  let a = -1
109
99
  res = new Uint16Array(indices[id].map((x) => (a += x + 1)))
110
100
  } else if (id === 'big5') {
111
- if (!Object.hasOwn(sizes, id)) throw new Error('Unknown encoding')
112
- res = new Uint32Array(sizes[id]) // array of strings or undefined
113
- unwrap(res, indices[id], 0, true)
101
+ res = new Uint32Array(sizes[id]) // single or double charcodes
102
+ unwrap(res, indices[id], 0)
114
103
  // Pointer code updates are embedded into the table
115
104
  // These are skipped in encoder as encoder uses only pointers >= (0xA1 - 0x81) * 157
116
105
  res[1133] = 0xca_03_04
@@ -120,7 +109,7 @@ export function getTable(id) {
120
109
  } else {
121
110
  if (!Object.hasOwn(sizes, id)) throw new Error('Unknown encoding')
122
111
  res = new Uint16Array(sizes[id])
123
- unwrap(res, indices[id], 0, false)
112
+ unwrap(res, indices[id], 0)
124
113
  }
125
114
 
126
115
  indices[id] = null // gc
@@ -1,5 +1,5 @@
1
1
  import { decodeAscii, encodeLatin1 } from './latin1.js'
2
- import { decode2string } from './_utils.js'
2
+ import { decode2string } from './platform.js'
3
3
 
4
4
  const ERR = 'percentEncodeSet must be a string of unique increasing codepoints in range 0x20 - 0x7e'
5
5
  const percentMap = new Map()
@@ -0,0 +1,31 @@
1
+ import { decodePartAddition as decodePart } from './platform.native.js'
2
+
3
+ export const nativeBuffer = null
4
+ export const isHermes = false
5
+ export const isDeno = false
6
+ export const nativeEncoder = /* @__PURE__ */ (() => new TextEncoder())()
7
+ export const nativeDecoder = /* @__PURE__ */ (() => new TextDecoder('utf-8', { ignoreBOM: true }))()
8
+ export const nativeDecoderLatin1 = /* @__PURE__ */ (() =>
9
+ new TextDecoder('latin1', { ignoreBOM: true }))()
10
+
11
+ export { isLE } from './platform.native.js'
12
+
13
+ export function decode2string(arr, start, end, m) {
14
+ if (end - start > 30_000) {
15
+ // Limit concatenation to avoid excessive GC
16
+ // Thresholds checked on Hermes for toHex
17
+ const concat = []
18
+ for (let i = start; i < end; ) {
19
+ const step = i + 500
20
+ const iNext = step > end ? end : step
21
+ concat.push(decodePart(arr, i, iNext, m))
22
+ i = iNext
23
+ }
24
+
25
+ const res = concat.join('')
26
+ concat.length = 0
27
+ return res
28
+ }
29
+
30
+ return decodePart(arr, start, end, m)
31
+ }
@@ -0,0 +1,2 @@
1
+ // platform.native actually hosts Node.js / Deno detection too
2
+ export * from './platform.native.js'
@@ -0,0 +1,97 @@
1
+ const { Buffer } = globalThis
2
+ const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
3
+ export const nativeBuffer = haveNativeBuffer ? Buffer : null
4
+ export const isHermes = /* @__PURE__ */ (() => !!globalThis.HermesInternal)()
5
+ export const isDeno = /* @__PURE__ */ (() => !!globalThis.Deno)()
6
+ export const isLE = /* @__PURE__ */ (() => new Uint8Array(Uint16Array.of(258).buffer)[0] === 2)()
7
+
8
+ // We consider Node.js TextDecoder/TextEncoder native
9
+ // Still needed in platform.native.js as this is re-exported to platform.js
10
+ let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]'))
11
+ if (!haveNativeBuffer && isNative(() => {})) isNative = () => false // e.g. XS, we don't want false positives
12
+
13
+ export const nativeEncoder = /* @__PURE__ */ (() =>
14
+ isNative(globalThis.TextEncoder) ? new TextEncoder() : null)()
15
+ export const nativeDecoder = /* @__PURE__ */ (() =>
16
+ isNative(globalThis.TextDecoder) ? new TextDecoder('utf-8', { ignoreBOM: true }) : null)()
17
+
18
+ // Actually windows-1252, compatible with ascii and latin1 decoding
19
+ // Beware that on non-latin1, i.e. on windows-1252, this is broken in ~all Node.js versions released
20
+ // in 2025 due to a regression, so we call it Latin1 as it's usable only for that
21
+ export const nativeDecoderLatin1 = /* @__PURE__ */ (() => {
22
+ // Not all barebone engines with TextDecoder support something except utf-8, detect
23
+ if (nativeDecoder) {
24
+ try {
25
+ return new TextDecoder('latin1', { ignoreBOM: true })
26
+ } catch {}
27
+ }
28
+
29
+ return null
30
+ })()
31
+
32
+ export function decodePartAddition(a, start, end, m) {
33
+ let o = ''
34
+ let i = start
35
+ for (const last3 = end - 3; i < last3; i += 4) {
36
+ const x0 = a[i]
37
+ const x1 = a[i + 1]
38
+ const x2 = a[i + 2]
39
+ const x3 = a[i + 3]
40
+ o += m[x0]
41
+ o += m[x1]
42
+ o += m[x2]
43
+ o += m[x3]
44
+ }
45
+
46
+ while (i < end) o += m[a[i++]]
47
+ return o
48
+ }
49
+
50
+ // Decoding with templates is faster on Hermes
51
+ export function decodePartTemplates(a, start, end, m) {
52
+ let o = ''
53
+ let i = start
54
+ for (const last15 = end - 15; i < last15; i += 16) {
55
+ const x0 = a[i]
56
+ const x1 = a[i + 1]
57
+ const x2 = a[i + 2]
58
+ const x3 = a[i + 3]
59
+ const x4 = a[i + 4]
60
+ const x5 = a[i + 5]
61
+ const x6 = a[i + 6]
62
+ const x7 = a[i + 7]
63
+ const x8 = a[i + 8]
64
+ const x9 = a[i + 9]
65
+ const x10 = a[i + 10]
66
+ const x11 = a[i + 11]
67
+ const x12 = a[i + 12]
68
+ const x13 = a[i + 13]
69
+ const x14 = a[i + 14]
70
+ const x15 = a[i + 15]
71
+ o += `${m[x0]}${m[x1]}${m[x2]}${m[x3]}${m[x4]}${m[x5]}${m[x6]}${m[x7]}${m[x8]}${m[x9]}${m[x10]}${m[x11]}${m[x12]}${m[x13]}${m[x14]}${m[x15]}`
72
+ }
73
+
74
+ while (i < end) o += m[a[i++]]
75
+ return o
76
+ }
77
+
78
+ const decodePart = isHermes ? decodePartTemplates : decodePartAddition
79
+ export function decode2string(arr, start, end, m) {
80
+ if (end - start > 30_000) {
81
+ // Limit concatenation to avoid excessive GC
82
+ // Thresholds checked on Hermes for toHex
83
+ const concat = []
84
+ for (let i = start; i < end; ) {
85
+ const step = i + 500
86
+ const iNext = step > end ? end : step
87
+ concat.push(decodePart(arr, i, iNext, m))
88
+ i = iNext
89
+ }
90
+
91
+ const res = concat.join('')
92
+ concat.length = 0
93
+ return res
94
+ }
95
+
96
+ return decodePart(arr, start, end, m)
97
+ }
@@ -1,57 +1,48 @@
1
1
  // See tests/encoding/fixtures/single-byte/dump.js for generator
2
2
 
3
3
  const r = 0xff_fd
4
- const e = (x) => new Array(x).fill(1)
5
- const h = (x) => new Array(x).fill(r)
6
4
 
7
5
  /* eslint-disable unicorn/numeric-separators-style, @exodus/export-default/named */
8
6
 
9
7
  // Common ranges
10
8
 
11
9
  // prettier-ignore
12
- const i2 = [-40,-147,1,64,-62,117,-51,-63,69,-67,79,-77,79,-77,1,64,2,51,4,-116,1,124,-122,1,129,22,-148,150,-148,1,133,-131,118,-116,1,33,-31,86,-51,-32,38,-36,48,-46,48,-46,1,33,2,51,4,-85,1,93,-91,1,98,22,-117,119,-117,1,102,374]
13
- const i4a = [-75, -63, e(5), 104, -34, -67, 79, -77, 75, -73, 1]
14
- const i4b = [34, -32, e(5), 73, -34, -36, 48, -46, 44, -42, 1]
15
- const i7 = [721, 1, 1, -719, 721, -719, 721, e(19), r, 2, e(43), r]
16
- const i8 = [e(26), r, r, 6692, 1, r]
17
- const i9 = [79, -77, e(11), 84, 46, -127, e(16), 48, -46, e(11), 53, 46]
18
- const iB = [3425, e(57), h(4), 5, e(28), h(4)]
19
- const p2 = [-99, 12, 20, -12, 17, 37, -29, 2]
20
- const p1 = [8237, -8235, 8089, -7816, 7820, 8, -6, 1]
21
- const w0 = [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104]
22
- const w8 = [8072, 1, 3, 1, 5, -15, 1]
23
- const w1 = [w8, -7480, 7750, -8129, 7897, -7911, -182]
24
- const w3 = [w8, -8060, 8330, -8328, 8096, -8094]
25
- const m0 = [8558, -8328, 8374, -66, -8539, 16, 8043, -8070]
10
+ const i2 = [189,148,0,0,63,0,116,64,0,68,0,78,0,78,0,0,63,64,114,117,0,0,123,0,0,128,149,0,149,0,0,132,0,117,0,0,32,0,85,33,0,37,0,47,0,47,0,0,32,33,83,86,0,0,92,0,0,97,118,0,118,0,0,101,474]
26
11
  // prettier-ignore
27
- const p3 = [1,1,65,-63,158,-156,1,1,1,40,30,42,-46,6,-66,1,83,-6,-6,-67,176,p2,-114,121,-119,1,1,155,-49,25,16,-142,159,2,-158,38,42,-46,6,-35,1,52,-6,-6,-36,145,p2,-83,90,-88,1,1,124,-49,25,16,-111,128,2]
12
+ const iB = [[58,3424],[4,r],[29,3424],[4,r]]
13
+ const i9 = [[47], 78, [12], 83, 128, [17], 47, [12], 52, 97]
14
+ const w1 = [8236, 0, 8088, 0, 8090, 8097, 8090, 8090, 0, 8103]
15
+ const w2 = [8236, 0, 8088, 271, 8090, 8097, 8090, 8090, 574, 8103]
28
16
  // prettier-ignore
29
- const k8a = [9345,2,10,4,4,4,4,8,8,8,8,68,4,4,4,4,1,1,1,-627,640,-903,1,46,28,1,-8645,8833,-8817,2,5,64,9305,1,1,-8449]
17
+ const w7 = [64,0,157,[4],39,68,109,62,67,0,0,82,75,68,0,175,75,86,105,92,108,144,114,115,0,120,[3],154,104,128,143,0,158,159,0,37,78,31,36,0,0,51,44,37,0,144,44,55,74,61,77,113,83,84,0,89,[3],123,73,97,112,0,127,128]
18
+ const w8 = [8071, 8071, 8073, 8073, 8077, 8061, 8061]
30
19
  // prettier-ignore
31
- const k8b = [-30,1,21,-18,1,15,-17,18,-13,e(7),16,-15,1,1,1,-13,-4,26,-1,-20,17,5,-4,-2,3]
20
+ const k8b = [-22,910,879,879,899,880,880,894,876,893,[8,879],894,[4,878],864,859,884,882,861,877,881,876,873,875,846,815,815,835,816,816,830,812,829,[8,815],830,[4,814],800,795,820,818,797,813,817,812,809,811]
21
+ // prettier-ignore
22
+ const k8a = [9344,9345,9354,9357,9360,9363,9366,9373,9380,9387,9394,9461,9464,9467,9470,[4,9473],8845,9484,8580,8580,8625,8652,8652,6,8838,20,21,25,88,[3,9392],942]
32
23
 
33
24
  // prettier-ignore
34
25
  const maps = {
35
- ibm866: [913,e(47),8530,1,1,-145,34,61,1,-12,-1,14,-18,6,6,-1,-1,-75,4,32,-8,-16,-28,60,34,1,-5,-6,21,-3,-6,-16,28,-5,1,-4,1,-12,-1,-6,1,24,-1,-82,-12,124,-4,8,4,-16,-8512,e(15),-78,80,-77,80,-77,80,-73,80,-942,8553,-8546,8547,-260,-8306,9468,-9472],
36
- 'koi8-r': [k8a,8450,e(14),-8544,8545,e(10),-9411,933,k8b,-28,k8b],
37
- 'koi8-u': [k8a,3,8448,-8446,1,8448,1,1,1,1,-8394,-51,8448,1,1,1,-8544,3,8543,-8541,1,8543,1,1,1,1,-8410,-130,-869,933,k8b,-28,k8b],
38
- 'x-mac-cyrillic': [913,e(31),7153,-8048,992,-1005,4,8059,-8044,848,-856,-5,8313,-7456,80,7694,-7773,80,7627,-8557,8627,1,-7695,-929,988,-137,-4,80,-77,80,-78,80,-79,80,-2,-83,-857,m0,875,80,-79,80,-7,7102,1,8,1,-5,1,-7970,7975,-7184,80,-79,80,7351,-7445,80,-2,-31,e(30),7262],
39
- macintosh: [69,1,2,2,8,5,6,5,-1,2,2,-1,2,2,2,-1,2,1,2,-1,2,1,2,2,-1,2,2,-1,5,-1,2,1,7972,-8048,-14,1,4,8059,-8044,41,-49,-5,8313,-8302,-12,8632,-8602,18,8518,-8557,8627,1,-8640,16,8525,15,-2,-7759,7787,-8577,16,751,-707,18,-57,-30,11,m0,32,3,18,125,1,7872,1,8,1,-5,1,-7970,9427,-9419,121,7884,104,-115,1,56007,1,-56033,-8042,8035,4,18,-8046,8,-9,10,-3,5,1,1,-3,7,1,63531,-63533,8,1,-2,88,405,22,-557,553,1,1,-546,549,-2,-20],
40
- 'windows-874': [8237,-8235,1,1,1,8098,-8096,e(10),w8,-8060,e(8),iB],
26
+ ibm866: [[48,912],[3,9441],...[29,62,122,122,109,107,120,101,106,111,109,107,31,34,65,56,39,10,69,102,102,96,89,109,105,98,81,108,102,102,97,97,84,82,75,75,98,96,13,0,123,118,125,128,111].map(x=>x+9266),[16,864],785,864,786,865,787,866,792,871,-72,8480,-67,8479,8218,-89,9378,-95],
27
+ 'koi8-u': [...k8a,944,9391,944,944,[5,9391],996,944,[4,9391],846,848,9390,848,848,[5,9390],979,848,...k8b],
28
+ 'koi8-r': [...k8a,[15,9391],846,[11,9390],...k8b],
29
+ macintosh: [68,68,69,70,77,81,86,90,88,89,90,88,89,90,91,89,90,90,91,89,90,90,91,92,90,91,92,90,94,92,93,93,8064,15,0,0,3,8061,16,56,6,0,8312,9,-4,8627,24,41,8558,0,8626,8626,-15,0,8524,8538,8535,775,8561,-17,-2,748,40,57,-1,-32,-22,8535,206,8579,8512,-28,-13,8029,-42,-11,-9,8,132,132,8003,8003,8010,8010,8004,8004,33,9459,39,159,8042,8145,8029,8029,64035,64035,8001,-42,7992,7995,8012,-35,-28,-38,-29,-33,[3,-29],-33,-27,-27,63503,-31,-24,-24,-27,60,464,485,-73,[3,479],-68,480,477,456],
30
+ 'x-mac-cyrillic': [[32,912],8064,15,1006,0,3,8061,16,863,6,0,8312,855,934,8627,853,932,8558,0,8626,8626,930,0,987,849,844,923,845,924,845,924,844,923,920,836,-22,8535,206,8579,8512,-28,-13,8029,-42,832,911,831,910,902,8003,8003,8010,8010,8004,8004,33,8007,822,901,821,900,8250,804,883,880,[31,848],8109],
31
+ 'windows-874': [8236,[4],8097,[11],...w8,[9],...iB],
41
32
  }
42
33
 
43
34
  // windows-1250 - windows-1258
44
35
  // prettier-ignore
45
36
  ;[
46
- [w0,-7888,7897,-7903,10,25,-4,-233,w8,-8060,8330,-8129,7897,-7903,10,25,-4,-218,551,17,-407,-157,96,-94,1,1,1,181,-179,1,1,1,205,-203,1,554,-409,-142,1,1,1,1,77,90,-164,130,416,-415,62,i2],
47
- [899,1,7191,-7111,7115,8,-6,1,139,-124,-7207,7216,-7215,2,-1,4,67,7110,1,3,1,5,-15,1,-8060,8330,-7369,7137,-7136,2,-1,4,-959,878,80,-86,-868,1004,-1002,1,858,-856,859,-857,1,1,1,857,-855,1,853,80,59,-988,1,1,922,7365,-7362,-921,925,-83,80,2,-71,e(63)],
48
- [p1,-7515,7530,-7888,7897,-7911,-197,240,-238,1,w1,225,-6],
49
- [p1,-8089,8104,-8102,8111,-8109,1,1,1,1,w3,1,1,1,1,741,1,-739,e(6),r,2,1,1,1,8039,-8037,1,1,1,721,-719,1,1,i7],
50
- [p1,-7515,7530,-7888,7897,-7911,-197,1,1,1,w1,1,218,-216,e(47),i9],
51
- [p1,-7515,7530,-8102,8111,-8109,1,1,1,1,w8,-7480,7750,-8328,8096,-8094,e(7),8199,-8197,1,1,1,1,46,-44,e(14),62,-60,1,1,1,1,1265,e(19),45,1,1,1,1,h(7),-36,i8],
52
- [8237,-6702,6556,-7816,7820,8,-6,1,-7515,7530,-6583,6592,-7911,1332,18,-16,39,6505,1,3,1,5,-15,1,-6507,6777,-6801,6569,-7911,7865,1,-6483,-1562,1388,-1386,e(7),1557,-1555,e(14),1378,-1376,1,1,1,1377,162,-160,e(21),-1375,1376,1,1,1,6,1,1,1,-1379,1380,-1378,1379,1,1,1,-1377,1,1,1,1,1374,1,-1372,1,1372,1,1,1,-1370,1371,1,-1369,1370,-1368,1369,-1367,1,7954,1,-6461],
53
- [w0,-8102,8111,-8109,28,543,-527,-40,w3,19,556,-572,1,r,2,1,1,r,2,1,49,-47,173,-171,1,1,1,24,-22,e(5),p3,347],
54
- [p1,-7515,7530,-8102,8111,-7911,-197,1,1,1,w8,-7480,7750,-8328,8096,-7911,-182,1,218,-216,e(34),64,-62,e(7),565,-563,1,1,65,-63,568,-566,1,204,-202,e(6),211,340,-548,1,1,1,33,-31,e(7),534,-532,1,1,34,-32,562,-560,1,173,-171,e(6),180,7931],
37
+ [...w1,214,8110,206,215,239,234,0,...w8,0,8329,199,8095,191,200,224,219,0,550,566,158,0,95,[4],180,[4],204,0,0,553,143,[5],76,165,0,129,544,128,...i2],
38
+ [898,898,8088,976,8090,8097,8090,8090,8228,8103,895,8110,894,895,893,896,962,...w8,0,8329,959,8095,958,959,957,960,0,877,956,869,0,1003,0,0,857,0,858,[4],856,0,0,852,931,989,[3],921,8285,922,0,924,840,919,920,[64,848]],
39
+ [...w2,214,8110,198,0,239,0,0,...w8,580,8329,199,8095,183,0,224,217],
40
+ [8236,0,8088,271,8090,8097,8090,8090,0,8103,0,8110,[5],...w8,0,8329,0,8095,[5],740,740,[7],r,[4],8038,[4],720,[3],[3,720],0,720,0,[20,720],r,[44,720],r],
41
+ [...w2,214,8110,198,[4],...w8,580,8329,199,8095,183,0,0,217,0,...i9],
42
+ [...w2,0,8110,[5],...w8,580,8329,0,8095,[8],8198,[5],45,[15],61,[5],[20,1264],[5,1308],[7,r],[27,1264],r,r,7953,7953,r],
43
+ [8236,1533,8088,271,8090,8097,8090,8090,574,8103,1519,8110,198,1529,1546,1529,1567,...w8,1553,8329,1527,8095,183,8047,8047,1563,0,1387,[8],1556,[15],1377,[4],1376,1537,[22,1376],0,[4,1375],[4,1380],0,1379,0,[4,1378],[5],1373,1373,0,0,[4,1371],0,1370,1370,0,1369,0,1368,0,0,7953,7953,1491],
44
+ [...w1,0,8110,0,27,569,41,0,...w8,0,8329,0,8095,0,18,573,0,0,r,[3],r,0,0,48,0,172,[4],23,[8],...w7,474],
45
+ [...w2,0,8110,198,[4],...w8,580,8329,0,8095,183,0,0,217,[35],63,[8],564,[3],64,0,567,0,0,203,[7],210,549,[4],32,[8],533,[3],33,0,561,0,0,172,[7],179,8109],
55
46
  ].forEach((m, i) => {
56
47
  maps[`windows-${i + 1250}`] = m
57
48
  });
@@ -60,23 +51,23 @@ const maps = {
60
51
  // prettier-ignore
61
52
  ;[
62
53
  [], // Actual Latin1 / Unicode subset, non-WHATWG, which maps iso-8859-1 to windows-1252
63
- [100,468,-407,-157,153,29,-179,1,184,-2,6,21,-204,208,-2,-203,85,470,-409,-142,138,29,364,-527,169,-2,6,21,355,-351,-2,i2],
64
- [134,434,-565,1,r,128,-125,1,136,46,-64,22,-135,r,206,-203,119,-117,1,1,1,112,-110,1,121,46,-64,22,-120,r,191,-188,1,1,r,2,70,-2,-65,e(8),r,2,1,1,1,76,-74,1,69,-67,1,1,1,144,-16,-125,1,1,1,r,2,39,-2,-34,e(8),r,2,1,1,1,45,-43,1,38,-36,1,1,1,113,-16,380],
65
- [100,52,30,-178,132,19,-148,1,184,-78,16,68,-185,208,-206,1,85,470,-388,-163,117,19,395,-527,169,-78,16,68,-29,52,-51,i4a,92,-26,53,7,-22,-98,1,1,1,1,154,-152,1,1,140,2,-139,i4b,61,-26,53,7,-22,-67,1,1,1,1,123,-121,1,1,109,2,366],
66
- [865,e(11),-863,865,e(65),7367,-7365,e(11),-949,951,1],
67
- [r,r,r,4,h(7),1384,-1375,h(13),1390,r,r,r,4,r,2,e(25),h(5),6,e(18),h(13)],
68
- [8056,1,-8054,8201,3,-8201,1,1,1,721,-719,1,1,r,8040,-8037,1,1,1,721,1,1,-719,i7],
69
- [r,2,e(7),46,-44,e(14),62,-60,1,1,1,h(32),8025,-6727,i8],
70
- [e(47),i9], // non-WHATWG, which maps iso-8859-9 to windows-1254
71
- [100,14,16,8,-2,14,-143,148,-43,80,6,23,-208,189,-32,-154,85,14,16,8,-2,14,-128,133,-43,80,6,23,7831,-7850,-32,i4a,1,1,117,7,-121,1,1,1,146,-144,154,-152,e(5),i4b,1,1,86,7,-90,1,1,1,115,-113,123,-121,1,1,1,1,58],
54
+ [99,566,158,0,152,180,0,0,183,180,185,205,0,207,204,0,84,553,143,0,137,165,528,0,168,165,170,190,544,192,...i2],
55
+ [133,566,0,0,r,126,0,0,135,180,115,136,0,r,204,0,118,[4],111,0,0,120,165,100,121,0,r,189,[3],r,0,69,66,[9],r,[4],75,0,0,68,[4],143,126,[4],r,0,38,35,[9],r,[4],44,0,0,37,[4],112,95,474],
56
+ [99,150,179,0,131,149,0,0,183,104,119,186,0,207,0,0,84,553,164,0,116,134,528,0,168,89,104,171,141,192,140,64,[6],103,68,0,78,0,74,0,0,91,64,116,122,99,[5],153,[3],139,140,0,33,[6],72,37,0,47,0,43,0,0,60,33,85,91,68,[5],122,[3],108,109,474],
57
+ [[12,864],0,[66,864],8230,[12,864],-86,864,864],
58
+ [[3,r],0,[7,r],1376,0,[13,r],1376,[3,r],1376,r,[26,1376],[5,r],[19,1376],[13,r]],
59
+ [8055,8055,0,8200,8202,[4],720,[3],r,8038,[4],[3,720],0,[3,720],0,720,0,[20,720],r,[44,720],r],
60
+ [r,[8],45,[15],61,[4],[32,r],7992,[27,1264],r,r,7953,7953,r],
61
+ i9, // non-WHATWG, which maps iso-8859-9 to windows-1254
62
+ [99,112,127,134,131,144,0,147,103,182,187,209,0,188,155,0,84,97,112,119,116,129,0,132,88,167,172,194,8024,173,140,64,[6],103,68,0,78,0,74,[4],116,122,[4],145,0,153,[6],33,[6],72,37,0,47,0,43,[4],85,91,[4],114,0,122,[5],57],
72
63
  iB, // non-WHATWG, which maps iso-8859-11 to windows-874
73
64
  null, // no 12
74
- [8061,-8059,1,1,8058,-8056,1,49,-47,173,-171,1,1,1,24,-22,1,1,1,8041,-8039,p3,7835],
75
- [7522,1,-7520,103,1,7423,-7523,7641,-7639,7641,-119,231,-7749,1,202,7334,1,-7423,1,7455,1,-7563,7584,43,-42,44,-35,147,-111,1,-36,-7585,e(15),165,-163,e(5),7572,-7570,e(5),153,-151,e(16),134,-132,e(5),7541,-7539,e(5),122],
76
- [1,1,1,8201,-8199,187,-185,186,-184,e(10),202,-200,1,1,199,-197,1,1,151,1,37],
77
- [100,1,60,8043,-142,-7870,-185,186,-184,367,-365,206,-204,205,1,-203,1,91,54,59,7840,-8039,1,199,-113,268,-350,151,1,37,4,-188,1,1,64,-62,66,-64,e(9),65,51,-113,1,1,124,-122,132,22,-151,1,1,1,60,258,-315,1,1,1,33,-31,35,-33,e(9),34,51,-82,1,1,93,-91,101,22,-120,1,1,1,29,258],
65
+ [8060,[3],8057,0,0,48,0,172,[4],23,[4],8040,[3],...w7,7962],
66
+ [7521,7521,0,102,102,7524,0,7640,0,7640,7520,7750,0,0,201,7534,7534,110,110,7564,7564,0,7583,7625,7582,7625,7589,7735,7623,7623,7586,[16],164,[6],7571,[6],152,[17],133,[6],7540,[6],121],
67
+ [[3],8200,0,186,0,185,[11],201,[3],198,[3],150,150,186],
68
+ [99,99,158,8200,8057,186,0,185,0,366,0,205,0,204,204,0,0,90,143,201,8040,0,0,198,84,351,0,150,150,186,189,[3],63,0,65,[10],64,114,[3],123,0,131,152,[4],59,316,[4],32,0,34,[10],33,83,[3],92,0,100,121,[4],28,285],
78
69
  ].forEach((m, i) => {
79
- if (m) maps[`iso-8859-${i + 1}`] = [e(33), m]
80
- })
70
+ if (m) maps[`iso-8859-${i + 1}`] = [[33], ...m]
71
+ });
81
72
 
82
73
  export default maps
@@ -1,6 +1,6 @@
1
1
  import { asciiPrefix, decodeAscii, decodeLatin1 } from './latin1.js'
2
2
  import encodings from './single-byte.encodings.js'
3
- import { decode2string, nativeDecoder } from './_utils.js'
3
+ import { decode2string, nativeDecoder } from './platform.js'
4
4
 
5
5
  export const E_STRICT = 'Input is not well-formed for this encoding'
6
6
  const xUserDefined = 'x-user-defined'
@@ -17,9 +17,9 @@ export function getEncoding(encoding) {
17
17
  assertEncoding(encoding)
18
18
  if (encoding === xUserDefined) return Array.from({ length: 128 }, (_, i) => 0xf7_80 + i)
19
19
  if (encoding === iso8i) encoding = 'iso-8859-8'
20
- let prev = 127
21
- const enc = encodings[encoding].flat().flat().flat() // max depth is 3, rechecked by tests
22
- return enc.map((x) => (x === r ? x : (prev += x))) // eslint-disable-line no-return-assign
20
+ const enc = encodings[encoding]
21
+ const deltas = enc.flatMap((x) => (Array.isArray(x) ? new Array(x[0]).fill(x[1] ?? 0) : x))
22
+ return deltas.map((x, i) => (x === r ? x : x + 128 + i))
23
23
  }
24
24
 
25
25
  const mappers = new Map()