@exodus/bytes 1.12.0 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,32 +1,3 @@
1
- import labels from './encoding.labels.js'
2
-
3
- let labelsMap
4
-
5
- export const E_ENCODING = 'Unknown encoding'
6
-
7
- // Warning: unlike whatwg-encoding, returns lowercased labels
8
- // Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
9
- // https://encoding.spec.whatwg.org/#names-and-labels
10
- export function normalizeEncoding(label) {
11
- // fast path
12
- if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
13
- if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
14
- // full map
15
- if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
16
- const low = `${label}`.trim().toLowerCase()
17
- if (Object.hasOwn(labels, low)) return low
18
- if (!labelsMap) {
19
- labelsMap = new Map()
20
- for (const [label, aliases] of Object.entries(labels)) {
21
- for (const alias of aliases) labelsMap.set(alias, label)
22
- }
23
- }
24
-
25
- const mapped = labelsMap.get(low)
26
- if (mapped) return mapped
27
- return null
28
- }
29
-
30
1
  // TODO: make this more strict against Symbol.toStringTag
31
2
  // Is not very significant though, anything faking Symbol.toStringTag could as well override
32
3
  // prototypes, which is not something we protect against
@@ -65,17 +36,3 @@ export function getBOMEncoding(input) {
65
36
  if (u8[0] === 0xfe && u8[1] === 0xff) return 'utf-16be'
66
37
  return null
67
38
  }
68
-
69
- const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
70
-
71
- // Unlike normalizeEncoding, case-sensitive
72
- // https://encoding.spec.whatwg.org/#names-and-labels
73
- export function labelToName(label) {
74
- const enc = normalizeEncoding(label)
75
- if (enc === 'utf-8') return 'UTF-8' // fast path
76
- if (!enc) return enc
77
- if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
78
- if (enc === 'big5') return 'Big5'
79
- if (enc === 'shift_jis') return 'Shift_JIS'
80
- return enc
81
- }
@@ -5,17 +5,56 @@ import { utf16toString, utf16toStringLoose } from '@exodus/bytes/utf16.js'
5
5
  import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/bytes/utf8.js'
6
6
  import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
7
7
  import labels from './encoding.labels.js'
8
- import { fromSource, getBOMEncoding, normalizeEncoding, E_ENCODING } from './encoding.api.js'
8
+ import { fromSource, getBOMEncoding } from './encoding.api.js'
9
9
  import { unfinishedBytes, mergePrefix } from './encoding.util.js'
10
10
 
11
- export { labelToName, getBOMEncoding, normalizeEncoding } from './encoding.api.js'
11
+ export { getBOMEncoding } from './encoding.api.js'
12
12
 
13
+ export const E_ENCODING = 'Unknown encoding'
13
14
  const E_MULTI = "import '@exodus/bytes/encoding.js' for legacy multi-byte encodings support"
14
15
  const E_OPTIONS = 'The "options" argument must be of type object'
15
16
  const replacementChar = '\uFFFD'
16
17
  const multibyteSet = new Set(['big5', 'euc-kr', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'gbk', 'gb18030']) // prettier-ignore
17
18
  let createMultibyteDecoder, multibyteEncoder
18
19
 
20
+ let labelsMap
21
+ // Warning: unlike whatwg-encoding, returns lowercased labels
22
+ // Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
23
+ // https://encoding.spec.whatwg.org/#names-and-labels
24
+ export function normalizeEncoding(label) {
25
+ // fast path
26
+ if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
27
+ if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
28
+ // full map
29
+ if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
30
+ const low = `${label}`.trim().toLowerCase()
31
+ if (Object.hasOwn(labels, low)) return low
32
+ if (!labelsMap) {
33
+ labelsMap = new Map()
34
+ for (const [name, aliases] of Object.entries(labels)) {
35
+ for (const alias of aliases) labelsMap.set(alias, name)
36
+ }
37
+ }
38
+
39
+ const mapped = labelsMap.get(low)
40
+ if (mapped) return mapped
41
+ return null
42
+ }
43
+
44
+ const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
45
+
46
+ // Unlike normalizeEncoding, case-sensitive
47
+ // https://encoding.spec.whatwg.org/#names-and-labels
48
+ export function labelToName(label) {
49
+ const enc = normalizeEncoding(label)
50
+ if (enc === 'utf-8') return 'UTF-8' // fast path
51
+ if (!enc) return enc
52
+ if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
53
+ if (enc === 'big5') return 'Big5'
54
+ if (enc === 'shift_jis') return 'Shift_JIS'
55
+ return enc
56
+ }
57
+
19
58
  export const isMultibyte = (enc) => multibyteSet.has(enc)
20
59
  export function setMultibyte(createDecoder, createEncoder) {
21
60
  createMultibyteDecoder = createDecoder
@@ -4,43 +4,47 @@
4
4
  // prettier-ignore
5
5
  const labels = {
6
6
  'utf-8': ['unicode-1-1-utf-8', 'unicode11utf8', 'unicode20utf8', 'utf8', 'x-unicode20utf8'],
7
- ibm866: ['866', 'cp866', 'csibm866'],
8
- 'iso-8859-2': ['csisolatin2', 'iso-ir-101', 'iso8859-2', 'iso88592', 'iso_8859-2', 'iso_8859-2:1987', 'l2', 'latin2'],
9
- 'iso-8859-3': ['csisolatin3', 'iso-ir-109', 'iso8859-3', 'iso88593', 'iso_8859-3', 'iso_8859-3:1988', 'l3', 'latin3'],
10
- 'iso-8859-4': ['csisolatin4', 'iso-ir-110', 'iso8859-4', 'iso88594', 'iso_8859-4', 'iso_8859-4:1988', 'l4', 'latin4'],
11
- 'iso-8859-5': ['csisolatincyrillic', 'cyrillic', 'iso-ir-144', 'iso8859-5', 'iso88595', 'iso_8859-5', 'iso_8859-5:1988'],
12
- 'iso-8859-6': ['arabic', 'asmo-708', 'csiso88596e', 'csiso88596i', 'csisolatinarabic', 'ecma-114', 'iso-8859-6-e', 'iso-8859-6-i', 'iso-ir-127', 'iso8859-6', 'iso88596', 'iso_8859-6', 'iso_8859-6:1987'],
13
- 'iso-8859-7': ['csisolatingreek', 'ecma-118', 'elot_928', 'greek', 'greek8', 'iso-ir-126', 'iso8859-7', 'iso88597', 'iso_8859-7', 'iso_8859-7:1987', 'sun_eu_greek'],
14
- 'iso-8859-8': ['csiso88598e', 'csisolatinhebrew', 'hebrew', 'iso-8859-8-e', 'iso-ir-138', 'iso8859-8', 'iso88598', 'iso_8859-8', 'iso_8859-8:1988', 'visual'],
7
+ 'utf-16be': ['unicodefffe'],
8
+ 'utf-16le': ['csunicode', 'iso-10646-ucs-2', 'ucs-2', 'unicode', 'unicodefeff', 'utf-16'],
9
+ 'iso-8859-2': ['iso-ir-101'],
10
+ 'iso-8859-3': ['iso-ir-109'],
11
+ 'iso-8859-4': ['iso-ir-110'],
12
+ 'iso-8859-5': ['csisolatincyrillic', 'cyrillic', 'iso-ir-144'],
13
+ 'iso-8859-6': ['arabic', 'asmo-708', 'csiso88596e', 'csiso88596i', 'csisolatinarabic', 'ecma-114', 'iso-8859-6-e', 'iso-8859-6-i', 'iso-ir-127'],
14
+ 'iso-8859-7': ['csisolatingreek', 'ecma-118', 'elot_928', 'greek', 'greek8', 'iso-ir-126', 'sun_eu_greek'],
15
+ 'iso-8859-8': ['csiso88598e', 'csisolatinhebrew', 'hebrew', 'iso-8859-8-e', 'iso-ir-138', 'visual'],
15
16
  'iso-8859-8-i': ['csiso88598i', 'logical'],
16
- 'iso-8859-10': ['csisolatin6', 'iso-ir-157', 'iso8859-10', 'iso885910', 'l6', 'latin6'],
17
- 'iso-8859-13': ['iso8859-13', 'iso885913'],
18
- 'iso-8859-14': ['iso8859-14', 'iso885914'],
19
- 'iso-8859-15': ['csisolatin9', 'iso8859-15', 'iso885915', 'iso_8859-15', 'l9'],
20
17
  'iso-8859-16': [],
21
18
  'koi8-r': ['cskoi8r', 'koi', 'koi8', 'koi8_r'],
22
19
  'koi8-u': ['koi8-ru'],
23
- macintosh: ['csmacintosh', 'mac', 'x-mac-roman'],
24
20
  'windows-874': ['dos-874', 'iso-8859-11', 'iso8859-11', 'iso885911', 'tis-620'],
21
+ ibm866: ['866', 'cp866', 'csibm866'],
25
22
  'x-mac-cyrillic': ['x-mac-ukrainian'],
23
+ macintosh: ['csmacintosh', 'mac', 'x-mac-roman'],
26
24
  gbk: ['chinese', 'csgb2312', 'csiso58gb231280', 'gb2312', 'gb_2312', 'gb_2312-80', 'iso-ir-58', 'x-gbk'],
27
25
  gb18030: [],
28
26
  big5: ['big5-hkscs', 'cn-big5', 'csbig5', 'x-x-big5'],
29
27
  'euc-jp': ['cseucpkdfmtjapanese', 'x-euc-jp'],
30
- 'iso-2022-jp': ['csiso2022jp'],
31
28
  shift_jis: ['csshiftjis', 'ms932', 'ms_kanji', 'shift-jis', 'sjis', 'windows-31j', 'x-sjis'],
32
29
  'euc-kr': ['cseuckr', 'csksc56011987', 'iso-ir-149', 'korean', 'ks_c_5601-1987', 'ks_c_5601-1989', 'ksc5601', 'ksc_5601', 'windows-949'],
30
+ 'iso-2022-jp': ['csiso2022jp'],
33
31
  replacement: ['csiso2022kr', 'hz-gb-2312', 'iso-2022-cn', 'iso-2022-cn-ext', 'iso-2022-kr'],
34
- 'utf-16be': ['unicodefffe'],
35
- 'utf-16le': ['csunicode', 'iso-10646-ucs-2', 'ucs-2', 'unicode', 'unicodefeff', 'utf-16'],
36
32
  'x-user-defined': [],
37
33
  }
38
34
 
35
+ for (const i of [10, 13, 14, 15]) labels[`iso-8859-${i}`] = [`iso8859-${i}`, `iso8859${i}`]
36
+ for (const i of [2, 6, 7]) labels[`iso-8859-${i}`].push(`iso_8859-${i}:1987`)
37
+ for (const i of [3, 4, 5, 8]) labels[`iso-8859-${i}`].push(`iso_8859-${i}:1988`)
38
+ // prettier-ignore
39
+ for (let i = 2; i < 9; i++) labels[`iso-8859-${i}`].push(`iso8859-${i}`, `iso8859${i}`, `iso_8859-${i}`)
40
+ for (let i = 2; i < 5; i++) labels[`iso-8859-${i}`].push(`csisolatin${i}`, `l${i}`, `latin${i}`)
39
41
  for (let i = 0; i < 9; i++) labels[`windows-125${i}`] = [`cp125${i}`, `x-cp125${i}`]
40
42
 
41
43
  // prettier-ignore
42
44
  labels['windows-1252'].push('ansi_x3.4-1968', 'ascii', 'cp819', 'csisolatin1', 'ibm819', 'iso-8859-1', 'iso-ir-100', 'iso8859-1', 'iso88591', 'iso_8859-1', 'iso_8859-1:1987', 'l1', 'latin1', 'us-ascii')
43
45
  // prettier-ignore
44
46
  labels['windows-1254'].push('csisolatin5', 'iso-8859-9', 'iso-ir-148', 'iso8859-9', 'iso88599', 'iso_8859-9', 'iso_8859-9:1989', 'l5', 'latin5')
47
+ labels['iso-8859-10'].push('csisolatin6', 'iso-ir-157', 'l6', 'latin6')
48
+ labels['iso-8859-15'].push('csisolatin9', 'iso_8859-15', 'l9')
45
49
 
46
50
  export default labels
package/fallback/hex.js CHANGED
@@ -1,5 +1,5 @@
1
- import { assertUint8 } from '../assert.js'
2
- import { nativeDecoder, nativeEncoder, decode2string, E_STRING } from './_utils.js'
1
+ import { E_STRING } from './_utils.js'
2
+ import { nativeDecoder, nativeEncoder, decode2string } from './platform.js'
3
3
  import { encodeAscii, decodeAscii } from './latin1.js'
4
4
 
5
5
  let hexArray // array of 256 bytes converted to two-char hex strings
@@ -11,9 +11,8 @@ const allowed = '0123456789ABCDEFabcdef'
11
11
 
12
12
  export const E_HEX = 'Input is not a hex string'
13
13
 
14
+ // Expects a checked Uint8Array
14
15
  export function toHex(arr) {
15
- assertUint8(arr)
16
-
17
16
  if (!hexArray) hexArray = Array.from({ length: 256 }, (_, i) => i.toString(16).padStart(2, '0'))
18
17
  const length = arr.length // this helps Hermes
19
18
 
@@ -3,21 +3,21 @@ import {
3
3
  nativeDecoder,
4
4
  nativeDecoderLatin1,
5
5
  nativeBuffer,
6
+ encodeCharcodes,
6
7
  isHermes,
7
8
  isDeno,
8
9
  isLE,
9
- skipWeb,
10
- } from './_utils.js'
10
+ } from './platform.js'
11
11
 
12
- const { atob } = globalThis
13
- const { toBase64: web64 } = Uint8Array.prototype
12
+ const atob = /* @__PURE__ */ (() => globalThis.atob)()
13
+ const web64 = /* @__PURE__ */ (() => Uint8Array.prototype.toBase64)()
14
14
 
15
15
  // See http://stackoverflow.com/a/22747272/680742, which says that lowest limit is in Chrome, with 0xffff args
16
16
  // On Hermes, actual max is 0x20_000 minus current stack depth, 1/16 of that should be safe
17
17
  const maxFunctionArgs = 0x20_00
18
18
 
19
19
  // toBase64+atob path is faster on everything where fromBase64 is fast
20
- const useLatin1atob = web64 && atob && !skipWeb
20
+ const useLatin1atob = web64 && atob
21
21
 
22
22
  export function asciiPrefix(arr) {
23
23
  let p = 0 // verified ascii bytes
@@ -108,25 +108,6 @@ export const decodeAscii = nativeBuffer
108
108
 
109
109
  /* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
110
110
 
111
- export const encodeCharcodes = isHermes
112
- ? (str, arr) => {
113
- const length = str.length
114
- if (length > 64) {
115
- const at = str.charCodeAt.bind(str) // faster on strings from ~64 chars on Hermes, but can be 10x slower on e.g. JSC
116
- for (let i = 0; i < length; i++) arr[i] = at(i)
117
- } else {
118
- for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
119
- }
120
-
121
- return arr
122
- }
123
- : (str, arr) => {
124
- const length = str.length
125
- // Can be optimized with unrolling, but this is not used on non-Hermes atm
126
- for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
127
- return arr
128
- }
129
-
130
111
  export function encodeAsciiPrefix(x, s) {
131
112
  let i = 0
132
113
  for (const len3 = s.length - 3; i < len3; i += 4) {
@@ -147,7 +128,7 @@ export function encodeAsciiPrefix(x, s) {
147
128
  export const encodeLatin1 = (str) => encodeCharcodes(str, new Uint8Array(str.length))
148
129
 
149
130
  // Expects nativeEncoder to be present
150
- const useEncodeInto = isHermes && nativeEncoder?.encodeInto
131
+ const useEncodeInto = /* @__PURE__ */ (() => isHermes && nativeEncoder?.encodeInto)()
151
132
  export const encodeAscii = useEncodeInto
152
133
  ? (str, ERR) => {
153
134
  // Much faster in Hermes
@@ -1,5 +1,5 @@
1
1
  import { decodeAscii, encodeLatin1 } from './latin1.js'
2
- import { decode2string } from './_utils.js'
2
+ import { decode2string } from './platform.js'
3
3
 
4
4
  const ERR = 'percentEncodeSet must be a string of unique increasing codepoints in range 0x20 - 0x7e'
5
5
  const percentMap = new Map()
@@ -0,0 +1,31 @@
1
+ import { decodePartAddition as decodePart } from './platform.native.js'
2
+
3
+ export { isLE, encodeCharcodesPure as encodeCharcodes } from './platform.native.js'
4
+
5
+ export const nativeBuffer = null
6
+ export const isHermes = false
7
+ export const isDeno = false
8
+ export const nativeEncoder = /* @__PURE__ */ (() => new TextEncoder())()
9
+ export const nativeDecoder = /* @__PURE__ */ (() => new TextDecoder('utf-8', { ignoreBOM: true }))()
10
+ export const nativeDecoderLatin1 = /* @__PURE__ */ (() =>
11
+ new TextDecoder('latin1', { ignoreBOM: true }))()
12
+
13
+ export function decode2string(arr, start, end, m) {
14
+ if (end - start > 30_000) {
15
+ // Limit concatenation to avoid excessive GC
16
+ // Thresholds checked on Hermes for toHex
17
+ const concat = []
18
+ for (let i = start; i < end; ) {
19
+ const step = i + 500
20
+ const iNext = step > end ? end : step
21
+ concat.push(decodePart(arr, i, iNext, m))
22
+ i = iNext
23
+ }
24
+
25
+ const res = concat.join('')
26
+ concat.length = 0
27
+ return res
28
+ }
29
+
30
+ return decodePart(arr, start, end, m)
31
+ }
@@ -0,0 +1,2 @@
1
+ // platform.native actually hosts Node.js / Deno detection too
2
+ export * from './platform.native.js'
@@ -0,0 +1,122 @@
1
+ const { Buffer } = globalThis
2
+ const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
3
+ export const nativeBuffer = haveNativeBuffer ? Buffer : null
4
+ export const isHermes = /* @__PURE__ */ (() => !!globalThis.HermesInternal)()
5
+ export const isDeno = /* @__PURE__ */ (() => !!globalThis.Deno)()
6
+ export const isLE = /* @__PURE__ */ (() => new Uint8Array(Uint16Array.of(258).buffer)[0] === 2)()
7
+
8
+ // We consider Node.js TextDecoder/TextEncoder native
9
+ // Still needed in platform.native.js as this is re-exported to platform.js
10
+ let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]'))
11
+ if (!haveNativeBuffer && isNative(() => {})) isNative = () => false // e.g. XS, we don't want false positives
12
+
13
+ export const nativeEncoder = /* @__PURE__ */ (() =>
14
+ isNative(globalThis.TextEncoder) ? new TextEncoder() : null)()
15
+ export const nativeDecoder = /* @__PURE__ */ (() =>
16
+ isNative(globalThis.TextDecoder) ? new TextDecoder('utf-8', { ignoreBOM: true }) : null)()
17
+
18
+ // Actually windows-1252, compatible with ascii and latin1 decoding
19
+ // Beware that on non-latin1, i.e. on windows-1252, this is broken in ~all Node.js versions released
20
+ // in 2025 due to a regression, so we call it Latin1 as it's usable only for that
21
+ export const nativeDecoderLatin1 = /* @__PURE__ */ (() => {
22
+ // Not all barebone engines with TextDecoder support something except utf-8, detect
23
+ if (nativeDecoder) {
24
+ try {
25
+ return new TextDecoder('latin1', { ignoreBOM: true })
26
+ } catch {}
27
+ }
28
+
29
+ return null
30
+ })()
31
+
32
+ export function decodePartAddition(a, start, end, m) {
33
+ let o = ''
34
+ let i = start
35
+ for (const last3 = end - 3; i < last3; i += 4) {
36
+ const x0 = a[i]
37
+ const x1 = a[i + 1]
38
+ const x2 = a[i + 2]
39
+ const x3 = a[i + 3]
40
+ o += m[x0]
41
+ o += m[x1]
42
+ o += m[x2]
43
+ o += m[x3]
44
+ }
45
+
46
+ while (i < end) o += m[a[i++]]
47
+ return o
48
+ }
49
+
50
+ // Decoding with templates is faster on Hermes
51
+ export function decodePartTemplates(a, start, end, m) {
52
+ let o = ''
53
+ let i = start
54
+ for (const last15 = end - 15; i < last15; i += 16) {
55
+ const x0 = a[i]
56
+ const x1 = a[i + 1]
57
+ const x2 = a[i + 2]
58
+ const x3 = a[i + 3]
59
+ const x4 = a[i + 4]
60
+ const x5 = a[i + 5]
61
+ const x6 = a[i + 6]
62
+ const x7 = a[i + 7]
63
+ const x8 = a[i + 8]
64
+ const x9 = a[i + 9]
65
+ const x10 = a[i + 10]
66
+ const x11 = a[i + 11]
67
+ const x12 = a[i + 12]
68
+ const x13 = a[i + 13]
69
+ const x14 = a[i + 14]
70
+ const x15 = a[i + 15]
71
+ o += `${m[x0]}${m[x1]}${m[x2]}${m[x3]}${m[x4]}${m[x5]}${m[x6]}${m[x7]}${m[x8]}${m[x9]}${m[x10]}${m[x11]}${m[x12]}${m[x13]}${m[x14]}${m[x15]}`
72
+ }
73
+
74
+ while (i < end) o += m[a[i++]]
75
+ return o
76
+ }
77
+
78
+ const decodePart = isHermes ? decodePartTemplates : decodePartAddition
79
+ export function decode2string(arr, start, end, m) {
80
+ if (end - start > 30_000) {
81
+ // Limit concatenation to avoid excessive GC
82
+ // Thresholds checked on Hermes for toHex
83
+ const concat = []
84
+ for (let i = start; i < end; ) {
85
+ const step = i + 500
86
+ const iNext = step > end ? end : step
87
+ concat.push(decodePart(arr, i, iNext, m))
88
+ i = iNext
89
+ }
90
+
91
+ const res = concat.join('')
92
+ concat.length = 0
93
+ return res
94
+ }
95
+
96
+ return decodePart(arr, start, end, m)
97
+ }
98
+
99
+ /* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
100
+
101
+ function encodeCharcodesHermes(str, arr) {
102
+ const length = str.length
103
+ if (length > 64) {
104
+ const at = str.charCodeAt.bind(str) // faster on strings from ~64 chars on Hermes, but can be 10x slower on e.g. JSC
105
+ for (let i = 0; i < length; i++) arr[i] = at(i)
106
+ } else {
107
+ for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
108
+ }
109
+
110
+ return arr
111
+ }
112
+
113
+ export function encodeCharcodesPure(str, arr) {
114
+ const length = str.length
115
+ // Can be optimized with unrolling, but this is not used on non-Hermes atm
116
+ for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
117
+ return arr
118
+ }
119
+
120
+ /* eslint-enable @exodus/mutable/no-param-reassign-prop-only */
121
+
122
+ export const encodeCharcodes = isHermes ? encodeCharcodesHermes : encodeCharcodesPure
@@ -1,57 +1,48 @@
1
1
  // See tests/encoding/fixtures/single-byte/dump.js for generator
2
2
 
3
3
  const r = 0xff_fd
4
- const e = (x) => new Array(x).fill(1)
5
- const h = (x) => new Array(x).fill(r)
6
4
 
7
5
  /* eslint-disable unicorn/numeric-separators-style, @exodus/export-default/named */
8
6
 
9
7
  // Common ranges
10
8
 
11
9
  // prettier-ignore
12
- const i2 = [-40,-147,1,64,-62,117,-51,-63,69,-67,79,-77,79,-77,1,64,2,51,4,-116,1,124,-122,1,129,22,-148,150,-148,1,133,-131,118,-116,1,33,-31,86,-51,-32,38,-36,48,-46,48,-46,1,33,2,51,4,-85,1,93,-91,1,98,22,-117,119,-117,1,102,374]
13
- const i4a = [-75, -63, e(5), 104, -34, -67, 79, -77, 75, -73, 1]
14
- const i4b = [34, -32, e(5), 73, -34, -36, 48, -46, 44, -42, 1]
15
- const i7 = [721, 1, 1, -719, 721, -719, 721, e(19), r, 2, e(43), r]
16
- const i8 = [e(26), r, r, 6692, 1, r]
17
- const i9 = [79, -77, e(11), 84, 46, -127, e(16), 48, -46, e(11), 53, 46]
18
- const iB = [3425, e(57), h(4), 5, e(28), h(4)]
19
- const p2 = [-99, 12, 20, -12, 17, 37, -29, 2]
20
- const p1 = [8237, -8235, 8089, -7816, 7820, 8, -6, 1]
21
- const w0 = [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104]
22
- const w8 = [8072, 1, 3, 1, 5, -15, 1]
23
- const w1 = [w8, -7480, 7750, -8129, 7897, -7911, -182]
24
- const w3 = [w8, -8060, 8330, -8328, 8096, -8094]
25
- const m0 = [8558, -8328, 8374, -66, -8539, 16, 8043, -8070]
10
+ const i2 = [189,148,0,0,63,0,116,64,0,68,0,78,0,78,0,0,63,64,114,117,0,0,123,0,0,128,149,0,149,0,0,132,0,117,0,0,32,0,85,33,0,37,0,47,0,47,0,0,32,33,83,86,0,0,92,0,0,97,118,0,118,0,0,101,474]
26
11
  // prettier-ignore
27
- const p3 = [1,1,65,-63,158,-156,1,1,1,40,30,42,-46,6,-66,1,83,-6,-6,-67,176,p2,-114,121,-119,1,1,155,-49,25,16,-142,159,2,-158,38,42,-46,6,-35,1,52,-6,-6,-36,145,p2,-83,90,-88,1,1,124,-49,25,16,-111,128,2]
12
+ const iB = [[58,3424],[4,r],[29,3424],[4,r]]
13
+ const i9 = [[47], 78, [12], 83, 128, [17], 47, [12], 52, 97]
14
+ const w1 = [8236, 0, 8088, 0, 8090, 8097, 8090, 8090, 0, 8103]
15
+ const w2 = [8236, 0, 8088, 271, 8090, 8097, 8090, 8090, 574, 8103]
28
16
  // prettier-ignore
29
- const k8a = [9345,2,10,4,4,4,4,8,8,8,8,68,4,4,4,4,1,1,1,-627,640,-903,1,46,28,1,-8645,8833,-8817,2,5,64,9305,1,1,-8449]
17
+ const w7 = [64,0,157,[4],39,68,109,62,67,0,0,82,75,68,0,175,75,86,105,92,108,144,114,115,0,120,[3],154,104,128,143,0,158,159,0,37,78,31,36,0,0,51,44,37,0,144,44,55,74,61,77,113,83,84,0,89,[3],123,73,97,112,0,127,128]
18
+ const w8 = [8071, 8071, 8073, 8073, 8077, 8061, 8061]
30
19
  // prettier-ignore
31
- const k8b = [-30,1,21,-18,1,15,-17,18,-13,e(7),16,-15,1,1,1,-13,-4,26,-1,-20,17,5,-4,-2,3]
20
+ const k8b = [-22,910,879,879,899,880,880,894,876,893,[8,879],894,[4,878],864,859,884,882,861,877,881,876,873,875,846,815,815,835,816,816,830,812,829,[8,815],830,[4,814],800,795,820,818,797,813,817,812,809,811]
21
+ // prettier-ignore
22
+ const k8a = [9344,9345,9354,9357,9360,9363,9366,9373,9380,9387,9394,9461,9464,9467,9470,[4,9473],8845,9484,8580,8580,8625,8652,8652,6,8838,20,21,25,88,[3,9392],942]
32
23
 
33
24
  // prettier-ignore
34
25
  const maps = {
35
- ibm866: [913,e(47),8530,1,1,-145,34,61,1,-12,-1,14,-18,6,6,-1,-1,-75,4,32,-8,-16,-28,60,34,1,-5,-6,21,-3,-6,-16,28,-5,1,-4,1,-12,-1,-6,1,24,-1,-82,-12,124,-4,8,4,-16,-8512,e(15),-78,80,-77,80,-77,80,-73,80,-942,8553,-8546,8547,-260,-8306,9468,-9472],
36
- 'koi8-r': [k8a,8450,e(14),-8544,8545,e(10),-9411,933,k8b,-28,k8b],
37
- 'koi8-u': [k8a,3,8448,-8446,1,8448,1,1,1,1,-8394,-51,8448,1,1,1,-8544,3,8543,-8541,1,8543,1,1,1,1,-8410,-130,-869,933,k8b,-28,k8b],
38
- 'x-mac-cyrillic': [913,e(31),7153,-8048,992,-1005,4,8059,-8044,848,-856,-5,8313,-7456,80,7694,-7773,80,7627,-8557,8627,1,-7695,-929,988,-137,-4,80,-77,80,-78,80,-79,80,-2,-83,-857,m0,875,80,-79,80,-7,7102,1,8,1,-5,1,-7970,7975,-7184,80,-79,80,7351,-7445,80,-2,-31,e(30),7262],
39
- macintosh: [69,1,2,2,8,5,6,5,-1,2,2,-1,2,2,2,-1,2,1,2,-1,2,1,2,2,-1,2,2,-1,5,-1,2,1,7972,-8048,-14,1,4,8059,-8044,41,-49,-5,8313,-8302,-12,8632,-8602,18,8518,-8557,8627,1,-8640,16,8525,15,-2,-7759,7787,-8577,16,751,-707,18,-57,-30,11,m0,32,3,18,125,1,7872,1,8,1,-5,1,-7970,9427,-9419,121,7884,104,-115,1,56007,1,-56033,-8042,8035,4,18,-8046,8,-9,10,-3,5,1,1,-3,7,1,63531,-63533,8,1,-2,88,405,22,-557,553,1,1,-546,549,-2,-20],
40
- 'windows-874': [8237,-8235,1,1,1,8098,-8096,e(10),w8,-8060,e(8),iB],
26
+ ibm866: [[48,912],[3,9441],...[29,62,122,122,109,107,120,101,106,111,109,107,31,34,65,56,39,10,69,102,102,96,89,109,105,98,81,108,102,102,97,97,84,82,75,75,98,96,13,0,123,118,125,128,111].map(x=>x+9266),[16,864],785,864,786,865,787,866,792,871,-72,8480,-67,8479,8218,-89,9378,-95],
27
+ 'koi8-u': [...k8a,944,9391,944,944,[5,9391],996,944,[4,9391],846,848,9390,848,848,[5,9390],979,848,...k8b],
28
+ 'koi8-r': [...k8a,[15,9391],846,[11,9390],...k8b],
29
+ macintosh: [68,68,69,70,77,81,86,90,88,89,90,88,89,90,91,89,90,90,91,89,90,90,91,92,90,91,92,90,94,92,93,93,8064,15,0,0,3,8061,16,56,6,0,8312,9,-4,8627,24,41,8558,0,8626,8626,-15,0,8524,8538,8535,775,8561,-17,-2,748,40,57,-1,-32,-22,8535,206,8579,8512,-28,-13,8029,-42,-11,-9,8,132,132,8003,8003,8010,8010,8004,8004,33,9459,39,159,8042,8145,8029,8029,64035,64035,8001,-42,7992,7995,8012,-35,-28,-38,-29,-33,[3,-29],-33,-27,-27,63503,-31,-24,-24,-27,60,464,485,-73,[3,479],-68,480,477,456],
30
+ 'x-mac-cyrillic': [[32,912],8064,15,1006,0,3,8061,16,863,6,0,8312,855,934,8627,853,932,8558,0,8626,8626,930,0,987,849,844,923,845,924,845,924,844,923,920,836,-22,8535,206,8579,8512,-28,-13,8029,-42,832,911,831,910,902,8003,8003,8010,8010,8004,8004,33,8007,822,901,821,900,8250,804,883,880,[31,848],8109],
31
+ 'windows-874': [8236,[4],8097,[11],...w8,[9],...iB],
41
32
  }
42
33
 
43
34
  // windows-1250 - windows-1258
44
35
  // prettier-ignore
45
36
  ;[
46
- [w0,-7888,7897,-7903,10,25,-4,-233,w8,-8060,8330,-8129,7897,-7903,10,25,-4,-218,551,17,-407,-157,96,-94,1,1,1,181,-179,1,1,1,205,-203,1,554,-409,-142,1,1,1,1,77,90,-164,130,416,-415,62,i2],
47
- [899,1,7191,-7111,7115,8,-6,1,139,-124,-7207,7216,-7215,2,-1,4,67,7110,1,3,1,5,-15,1,-8060,8330,-7369,7137,-7136,2,-1,4,-959,878,80,-86,-868,1004,-1002,1,858,-856,859,-857,1,1,1,857,-855,1,853,80,59,-988,1,1,922,7365,-7362,-921,925,-83,80,2,-71,e(63)],
48
- [p1,-7515,7530,-7888,7897,-7911,-197,240,-238,1,w1,225,-6],
49
- [p1,-8089,8104,-8102,8111,-8109,1,1,1,1,w3,1,1,1,1,741,1,-739,e(6),r,2,1,1,1,8039,-8037,1,1,1,721,-719,1,1,i7],
50
- [p1,-7515,7530,-7888,7897,-7911,-197,1,1,1,w1,1,218,-216,e(47),i9],
51
- [p1,-7515,7530,-8102,8111,-8109,1,1,1,1,w8,-7480,7750,-8328,8096,-8094,e(7),8199,-8197,1,1,1,1,46,-44,e(14),62,-60,1,1,1,1,1265,e(19),45,1,1,1,1,h(7),-36,i8],
52
- [8237,-6702,6556,-7816,7820,8,-6,1,-7515,7530,-6583,6592,-7911,1332,18,-16,39,6505,1,3,1,5,-15,1,-6507,6777,-6801,6569,-7911,7865,1,-6483,-1562,1388,-1386,e(7),1557,-1555,e(14),1378,-1376,1,1,1,1377,162,-160,e(21),-1375,1376,1,1,1,6,1,1,1,-1379,1380,-1378,1379,1,1,1,-1377,1,1,1,1,1374,1,-1372,1,1372,1,1,1,-1370,1371,1,-1369,1370,-1368,1369,-1367,1,7954,1,-6461],
53
- [w0,-8102,8111,-8109,28,543,-527,-40,w3,19,556,-572,1,r,2,1,1,r,2,1,49,-47,173,-171,1,1,1,24,-22,e(5),p3,347],
54
- [p1,-7515,7530,-8102,8111,-7911,-197,1,1,1,w8,-7480,7750,-8328,8096,-7911,-182,1,218,-216,e(34),64,-62,e(7),565,-563,1,1,65,-63,568,-566,1,204,-202,e(6),211,340,-548,1,1,1,33,-31,e(7),534,-532,1,1,34,-32,562,-560,1,173,-171,e(6),180,7931],
37
+ [...w1,214,8110,206,215,239,234,0,...w8,0,8329,199,8095,191,200,224,219,0,550,566,158,0,95,[4],180,[4],204,0,0,553,143,[5],76,165,0,129,544,128,...i2],
38
+ [898,898,8088,976,8090,8097,8090,8090,8228,8103,895,8110,894,895,893,896,962,...w8,0,8329,959,8095,958,959,957,960,0,877,956,869,0,1003,0,0,857,0,858,[4],856,0,0,852,931,989,[3],921,8285,922,0,924,840,919,920,[64,848]],
39
+ [...w2,214,8110,198,0,239,0,0,...w8,580,8329,199,8095,183,0,224,217],
40
+ [8236,0,8088,271,8090,8097,8090,8090,0,8103,0,8110,[5],...w8,0,8329,0,8095,[5],740,740,[7],r,[4],8038,[4],720,[3],[3,720],0,720,0,[20,720],r,[44,720],r],
41
+ [...w2,214,8110,198,[4],...w8,580,8329,199,8095,183,0,0,217,0,...i9],
42
+ [...w2,0,8110,[5],...w8,580,8329,0,8095,[8],8198,[5],45,[15],61,[5],[20,1264],[5,1308],[7,r],[27,1264],r,r,7953,7953,r],
43
+ [8236,1533,8088,271,8090,8097,8090,8090,574,8103,1519,8110,198,1529,1546,1529,1567,...w8,1553,8329,1527,8095,183,8047,8047,1563,0,1387,[8],1556,[15],1377,[4],1376,1537,[22,1376],0,[4,1375],[4,1380],0,1379,0,[4,1378],[5],1373,1373,0,0,[4,1371],0,1370,1370,0,1369,0,1368,0,0,7953,7953,1491],
44
+ [...w1,0,8110,0,27,569,41,0,...w8,0,8329,0,8095,0,18,573,0,0,r,[3],r,0,0,48,0,172,[4],23,[8],...w7,474],
45
+ [...w2,0,8110,198,[4],...w8,580,8329,0,8095,183,0,0,217,[35],63,[8],564,[3],64,0,567,0,0,203,[7],210,549,[4],32,[8],533,[3],33,0,561,0,0,172,[7],179,8109],
55
46
  ].forEach((m, i) => {
56
47
  maps[`windows-${i + 1250}`] = m
57
48
  });
@@ -60,23 +51,23 @@ const maps = {
60
51
  // prettier-ignore
61
52
  ;[
62
53
  [], // Actual Latin1 / Unicode subset, non-WHATWG, which maps iso-8859-1 to windows-1252
63
- [100,468,-407,-157,153,29,-179,1,184,-2,6,21,-204,208,-2,-203,85,470,-409,-142,138,29,364,-527,169,-2,6,21,355,-351,-2,i2],
64
- [134,434,-565,1,r,128,-125,1,136,46,-64,22,-135,r,206,-203,119,-117,1,1,1,112,-110,1,121,46,-64,22,-120,r,191,-188,1,1,r,2,70,-2,-65,e(8),r,2,1,1,1,76,-74,1,69,-67,1,1,1,144,-16,-125,1,1,1,r,2,39,-2,-34,e(8),r,2,1,1,1,45,-43,1,38,-36,1,1,1,113,-16,380],
65
- [100,52,30,-178,132,19,-148,1,184,-78,16,68,-185,208,-206,1,85,470,-388,-163,117,19,395,-527,169,-78,16,68,-29,52,-51,i4a,92,-26,53,7,-22,-98,1,1,1,1,154,-152,1,1,140,2,-139,i4b,61,-26,53,7,-22,-67,1,1,1,1,123,-121,1,1,109,2,366],
66
- [865,e(11),-863,865,e(65),7367,-7365,e(11),-949,951,1],
67
- [r,r,r,4,h(7),1384,-1375,h(13),1390,r,r,r,4,r,2,e(25),h(5),6,e(18),h(13)],
68
- [8056,1,-8054,8201,3,-8201,1,1,1,721,-719,1,1,r,8040,-8037,1,1,1,721,1,1,-719,i7],
69
- [r,2,e(7),46,-44,e(14),62,-60,1,1,1,h(32),8025,-6727,i8],
70
- [e(47),i9], // non-WHATWG, which maps iso-8859-9 to windows-1254
71
- [100,14,16,8,-2,14,-143,148,-43,80,6,23,-208,189,-32,-154,85,14,16,8,-2,14,-128,133,-43,80,6,23,7831,-7850,-32,i4a,1,1,117,7,-121,1,1,1,146,-144,154,-152,e(5),i4b,1,1,86,7,-90,1,1,1,115,-113,123,-121,1,1,1,1,58],
54
+ [99,566,158,0,152,180,0,0,183,180,185,205,0,207,204,0,84,553,143,0,137,165,528,0,168,165,170,190,544,192,...i2],
55
+ [133,566,0,0,r,126,0,0,135,180,115,136,0,r,204,0,118,[4],111,0,0,120,165,100,121,0,r,189,[3],r,0,69,66,[9],r,[4],75,0,0,68,[4],143,126,[4],r,0,38,35,[9],r,[4],44,0,0,37,[4],112,95,474],
56
+ [99,150,179,0,131,149,0,0,183,104,119,186,0,207,0,0,84,553,164,0,116,134,528,0,168,89,104,171,141,192,140,64,[6],103,68,0,78,0,74,0,0,91,64,116,122,99,[5],153,[3],139,140,0,33,[6],72,37,0,47,0,43,0,0,60,33,85,91,68,[5],122,[3],108,109,474],
57
+ [[12,864],0,[66,864],8230,[12,864],-86,864,864],
58
+ [[3,r],0,[7,r],1376,0,[13,r],1376,[3,r],1376,r,[26,1376],[5,r],[19,1376],[13,r]],
59
+ [8055,8055,0,8200,8202,[4],720,[3],r,8038,[4],[3,720],0,[3,720],0,720,0,[20,720],r,[44,720],r],
60
+ [r,[8],45,[15],61,[4],[32,r],7992,[27,1264],r,r,7953,7953,r],
61
+ i9, // non-WHATWG, which maps iso-8859-9 to windows-1254
62
+ [99,112,127,134,131,144,0,147,103,182,187,209,0,188,155,0,84,97,112,119,116,129,0,132,88,167,172,194,8024,173,140,64,[6],103,68,0,78,0,74,[4],116,122,[4],145,0,153,[6],33,[6],72,37,0,47,0,43,[4],85,91,[4],114,0,122,[5],57],
72
63
  iB, // non-WHATWG, which maps iso-8859-11 to windows-874
73
64
  null, // no 12
74
- [8061,-8059,1,1,8058,-8056,1,49,-47,173,-171,1,1,1,24,-22,1,1,1,8041,-8039,p3,7835],
75
- [7522,1,-7520,103,1,7423,-7523,7641,-7639,7641,-119,231,-7749,1,202,7334,1,-7423,1,7455,1,-7563,7584,43,-42,44,-35,147,-111,1,-36,-7585,e(15),165,-163,e(5),7572,-7570,e(5),153,-151,e(16),134,-132,e(5),7541,-7539,e(5),122],
76
- [1,1,1,8201,-8199,187,-185,186,-184,e(10),202,-200,1,1,199,-197,1,1,151,1,37],
77
- [100,1,60,8043,-142,-7870,-185,186,-184,367,-365,206,-204,205,1,-203,1,91,54,59,7840,-8039,1,199,-113,268,-350,151,1,37,4,-188,1,1,64,-62,66,-64,e(9),65,51,-113,1,1,124,-122,132,22,-151,1,1,1,60,258,-315,1,1,1,33,-31,35,-33,e(9),34,51,-82,1,1,93,-91,101,22,-120,1,1,1,29,258],
65
+ [8060,[3],8057,0,0,48,0,172,[4],23,[4],8040,[3],...w7,7962],
66
+ [7521,7521,0,102,102,7524,0,7640,0,7640,7520,7750,0,0,201,7534,7534,110,110,7564,7564,0,7583,7625,7582,7625,7589,7735,7623,7623,7586,[16],164,[6],7571,[6],152,[17],133,[6],7540,[6],121],
67
+ [[3],8200,0,186,0,185,[11],201,[3],198,[3],150,150,186],
68
+ [99,99,158,8200,8057,186,0,185,0,366,0,205,0,204,204,0,0,90,143,201,8040,0,0,198,84,351,0,150,150,186,189,[3],63,0,65,[10],64,114,[3],123,0,131,152,[4],59,316,[4],32,0,34,[10],33,83,[3],92,0,100,121,[4],28,285],
78
69
  ].forEach((m, i) => {
79
- if (m) maps[`iso-8859-${i + 1}`] = [e(33), m]
80
- })
70
+ if (m) maps[`iso-8859-${i + 1}`] = [[33], ...m]
71
+ });
81
72
 
82
73
  export default maps
@@ -1,6 +1,6 @@
1
1
  import { asciiPrefix, decodeAscii, decodeLatin1 } from './latin1.js'
2
2
  import encodings from './single-byte.encodings.js'
3
- import { decode2string, nativeDecoder } from './_utils.js'
3
+ import { decode2string, nativeDecoder } from './platform.js'
4
4
 
5
5
  export const E_STRICT = 'Input is not well-formed for this encoding'
6
6
  const xUserDefined = 'x-user-defined'
@@ -17,9 +17,9 @@ export function getEncoding(encoding) {
17
17
  assertEncoding(encoding)
18
18
  if (encoding === xUserDefined) return Array.from({ length: 128 }, (_, i) => 0xf7_80 + i)
19
19
  if (encoding === iso8i) encoding = 'iso-8859-8'
20
- let prev = 127
21
- const enc = encodings[encoding].flat().flat().flat() // max depth is 3, rechecked by tests
22
- return enc.map((x) => (x === r ? x : (prev += x))) // eslint-disable-line no-return-assign
20
+ const enc = encodings[encoding]
21
+ const deltas = enc.flatMap((x) => (Array.isArray(x) ? new Array(x[0]).fill(x[1] ?? 0) : x))
22
+ return deltas.map((x, i) => (x === r ? x : x + 128 + i))
23
23
  }
24
24
 
25
25
  const mappers = new Map()