@exodus/bytes 1.0.0-rc.9 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +151 -0
- package/array.d.ts +24 -0
- package/base58.js +16 -8
- package/base64.d.ts +76 -0
- package/bigint.js +14 -0
- package/encoding-lite.js +7 -0
- package/encoding.js +12 -234
- package/fallback/_utils.js +18 -6
- package/fallback/encoding.js +290 -0
- package/fallback/encoding.labels.js +10 -10
- package/fallback/multi-byte.js +2 -3
- package/fallback/multi-byte.table.js +2 -2
- package/fallback/single-byte.encodings.js +56 -40
- package/fallback/single-byte.js +17 -14
- package/hex.d.ts +22 -0
- package/package.json +24 -6
- package/single-byte.js +0 -1
- package/single-byte.node.js +5 -5
- package/utf8.d.ts +42 -0
package/fallback/_utils.js
CHANGED
|
@@ -1,22 +1,30 @@
|
|
|
1
1
|
const { Buffer, TextEncoder, TextDecoder } = globalThis
|
|
2
2
|
const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
|
|
3
|
-
|
|
3
|
+
export const nativeBuffer = haveNativeBuffer ? Buffer : null
|
|
4
|
+
export const isHermes = Boolean(globalThis.HermesInternal)
|
|
5
|
+
export const isDeno = Boolean(globalThis.Deno)
|
|
6
|
+
export const isLE = new Uint8Array(Uint16Array.of(258).buffer)[0] === 2
|
|
7
|
+
|
|
8
|
+
let isNative = (x) => {
|
|
9
|
+
if (!x) return false
|
|
10
|
+
if (haveNativeBuffer) return true // we consider Node.js TextDecoder/TextEncoder native
|
|
11
|
+
const s = `${x}`
|
|
12
|
+
// See https://github.com/facebook/hermes/pull/1855#issuecomment-3659386410
|
|
13
|
+
return s.includes('[native code]') || s.includes(`[bytecode]`) // Static Hermes has [bytecode] for contrib, which includes TextEncoder/TextDecoder
|
|
14
|
+
}
|
|
15
|
+
|
|
4
16
|
if (!haveNativeBuffer && isNative(() => {})) isNative = () => false // e.g. XS, we don't want false positives
|
|
5
17
|
|
|
6
18
|
export const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
|
|
7
19
|
export const nativeDecoder = isNative(TextDecoder)
|
|
8
20
|
? new TextDecoder('utf-8', { ignoreBOM: true })
|
|
9
21
|
: null
|
|
10
|
-
export const nativeBuffer = haveNativeBuffer ? Buffer : null
|
|
11
|
-
export const isHermes = Boolean(globalThis.HermesInternal)
|
|
12
|
-
export const isDeno = Boolean(globalThis.Deno)
|
|
13
|
-
export const isLE = new Uint8Array(Uint16Array.of(258).buffer)[0] === 2
|
|
14
22
|
|
|
15
23
|
// Actually windows-1252, compatible with ascii and latin1 decoding
|
|
16
24
|
// Beware that on non-latin1, i.e. on windows-1252, this is broken in ~all Node.js versions released
|
|
17
25
|
// in 2025 due to a regression, so we call it Latin1 as it's usable only for that
|
|
18
26
|
let nativeDecoderLatin1impl = null
|
|
19
|
-
if (
|
|
27
|
+
if (nativeDecoder) {
|
|
20
28
|
// Not all barebone engines with TextDecoder support something except utf-8, detect
|
|
21
29
|
try {
|
|
22
30
|
nativeDecoderLatin1impl = new TextDecoder('latin1', { ignoreBOM: true })
|
|
@@ -116,3 +124,7 @@ export function decode2string(arr, start, end, m) {
|
|
|
116
124
|
|
|
117
125
|
return decodePart(arr, start, end, m)
|
|
118
126
|
}
|
|
127
|
+
|
|
128
|
+
export function assert(condition, msg) {
|
|
129
|
+
if (!condition) throw new Error(msg)
|
|
130
|
+
}
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
// We can't return native TextDecoder if it's present, as Node.js one is broken on windows-1252 and we fix that
|
|
2
|
+
// We are also faster than Node.js built-in on both TextEncoder and TextDecoder
|
|
3
|
+
|
|
4
|
+
import { utf16toString, utf16toStringLoose } from '@exodus/bytes/utf16.js'
|
|
5
|
+
import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/bytes/utf8.js'
|
|
6
|
+
import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
|
|
7
|
+
import labels from './encoding.labels.js'
|
|
8
|
+
import { unfinishedBytes } from './encoding.util.js'
|
|
9
|
+
|
|
10
|
+
const E_OPTIONS = 'The "options" argument must be of type object'
|
|
11
|
+
const E_ENCODING = 'Unknown encoding'
|
|
12
|
+
const replacementChar = '\uFFFD'
|
|
13
|
+
|
|
14
|
+
const E_MULTI =
|
|
15
|
+
'Legacy multi-byte encodings are disabled in /encoding-lite.js, use /encoding.js for full encodings range support'
|
|
16
|
+
const multibyteSet = new Set(['big5', 'euc-kr', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'gbk', 'gb18030']) // prettier-ignore
|
|
17
|
+
let createMultibyteDecoder
|
|
18
|
+
|
|
19
|
+
export function setMultibyteDecoder(createDecoder) {
|
|
20
|
+
createMultibyteDecoder = createDecoder
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
let labelsMap
|
|
24
|
+
|
|
25
|
+
// Warning: unlike whatwg-encoding, returns lowercased labels
|
|
26
|
+
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
|
27
|
+
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
28
|
+
export function normalizeEncoding(label) {
|
|
29
|
+
// fast path
|
|
30
|
+
if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
|
|
31
|
+
if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
|
|
32
|
+
// full map
|
|
33
|
+
if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
|
|
34
|
+
const low = `${label}`.trim().toLowerCase()
|
|
35
|
+
if (Object.hasOwn(labels, low)) return low
|
|
36
|
+
if (!labelsMap) {
|
|
37
|
+
labelsMap = new Map()
|
|
38
|
+
for (const [label, aliases] of Object.entries(labels)) {
|
|
39
|
+
for (const alias of aliases) labelsMap.set(alias, label)
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const mapped = labelsMap.get(low)
|
|
44
|
+
if (mapped) return mapped
|
|
45
|
+
return null
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false })
|
|
49
|
+
|
|
50
|
+
const fromSource = (x) => {
|
|
51
|
+
if (x instanceof Uint8Array) return x
|
|
52
|
+
if (x instanceof ArrayBuffer) return new Uint8Array(x)
|
|
53
|
+
if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
|
|
54
|
+
if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return new Uint8Array(x)
|
|
55
|
+
throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function unicodeDecoder(encoding, loose) {
|
|
59
|
+
if (encoding === 'utf-8') return loose ? utf8toStringLoose : utf8toString // likely
|
|
60
|
+
const form = encoding === 'utf-16le' ? 'uint8-le' : 'uint8-be'
|
|
61
|
+
return loose ? (u) => utf16toStringLoose(u, form) : (u) => utf16toString(u, form)
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export class TextDecoder {
|
|
65
|
+
#decode
|
|
66
|
+
#unicode
|
|
67
|
+
#multibyte
|
|
68
|
+
#chunk
|
|
69
|
+
#canBOM
|
|
70
|
+
|
|
71
|
+
constructor(encoding = 'utf-8', options = {}) {
|
|
72
|
+
if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
|
|
73
|
+
const enc = normalizeEncoding(encoding)
|
|
74
|
+
if (!enc || enc === 'replacement') throw new RangeError(E_ENCODING)
|
|
75
|
+
define(this, 'encoding', enc)
|
|
76
|
+
define(this, 'fatal', Boolean(options.fatal))
|
|
77
|
+
define(this, 'ignoreBOM', Boolean(options.ignoreBOM))
|
|
78
|
+
this.#unicode = enc === 'utf-8' || enc === 'utf-16le' || enc === 'utf-16be'
|
|
79
|
+
this.#multibyte = !this.#unicode && multibyteSet.has(enc)
|
|
80
|
+
this.#canBOM = this.#unicode && !this.ignoreBOM
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
get [Symbol.toStringTag]() {
|
|
84
|
+
return 'TextDecoder'
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
decode(input, options = {}) {
|
|
88
|
+
if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
|
|
89
|
+
const stream = Boolean(options.stream)
|
|
90
|
+
let u = input === undefined ? new Uint8Array() : fromSource(input)
|
|
91
|
+
|
|
92
|
+
if (this.#unicode) {
|
|
93
|
+
let prefix
|
|
94
|
+
if (this.#chunk) {
|
|
95
|
+
if (u.length === 0) {
|
|
96
|
+
if (stream) return '' // no change
|
|
97
|
+
u = this.#chunk // process as final chunk to handle errors and state changes
|
|
98
|
+
} else if (u.length < 3) {
|
|
99
|
+
// No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
|
|
100
|
+
const a = new Uint8Array(u.length + this.#chunk.length)
|
|
101
|
+
a.set(this.#chunk)
|
|
102
|
+
a.set(u, this.#chunk.length)
|
|
103
|
+
u = a
|
|
104
|
+
} else {
|
|
105
|
+
// Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
|
|
106
|
+
const t = new Uint8Array(this.#chunk.length + 3) // We have 1-3 bytes and need 1-3 more bytes
|
|
107
|
+
t.set(this.#chunk)
|
|
108
|
+
t.set(u.subarray(0, 3), this.#chunk.length)
|
|
109
|
+
|
|
110
|
+
// Stop at the first offset where unfinished bytes reaches 0 or fits into u
|
|
111
|
+
// If that doesn't happen (u too short), just concat chunk and u completely
|
|
112
|
+
for (let i = 1; i <= 3; i++) {
|
|
113
|
+
const unfinished = unfinishedBytes(t, this.#chunk.length + i, this.encoding) // 0-3
|
|
114
|
+
if (unfinished <= i) {
|
|
115
|
+
// Always reachable at 3, but we still need 'unfinished' value for it
|
|
116
|
+
const add = i - unfinished // 0-3
|
|
117
|
+
prefix = add > 0 ? t.subarray(0, this.#chunk.length + add) : this.#chunk
|
|
118
|
+
if (add > 0) u = u.subarray(add)
|
|
119
|
+
break
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
this.#chunk = null
|
|
125
|
+
} else if (u.byteLength === 0) {
|
|
126
|
+
if (!stream) this.#canBOM = !this.ignoreBOM
|
|
127
|
+
return ''
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// For non-stream utf-8 we don't have to do this as it matches utf8toStringLoose already
|
|
131
|
+
// For non-stream loose utf-16 we still have to do this as this API supports uneven byteLength unlike utf16toStringLoose
|
|
132
|
+
let suffix = ''
|
|
133
|
+
if (stream || (!this.fatal && this.encoding !== 'utf-8')) {
|
|
134
|
+
const trail = unfinishedBytes(u, u.byteLength, this.encoding)
|
|
135
|
+
if (trail > 0) {
|
|
136
|
+
if (stream) {
|
|
137
|
+
this.#chunk = Uint8Array.from(u.subarray(-trail)) // copy
|
|
138
|
+
} else {
|
|
139
|
+
// non-fatal mode as already checked
|
|
140
|
+
suffix = replacementChar
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
u = u.subarray(0, -trail)
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if (this.#canBOM) {
|
|
148
|
+
const bom = this.#findBom(prefix ?? u)
|
|
149
|
+
if (bom) {
|
|
150
|
+
if (stream) this.#canBOM = false
|
|
151
|
+
if (prefix) {
|
|
152
|
+
prefix = prefix.subarray(bom)
|
|
153
|
+
} else {
|
|
154
|
+
u = u.subarray(bom)
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
if (!this.#decode) this.#decode = unicodeDecoder(this.encoding, !this.fatal)
|
|
160
|
+
try {
|
|
161
|
+
const res = (prefix ? this.#decode(prefix) : '') + this.#decode(u) + suffix
|
|
162
|
+
if (res.length > 0 && stream) this.#canBOM = false
|
|
163
|
+
|
|
164
|
+
if (!stream) this.#canBOM = !this.ignoreBOM
|
|
165
|
+
return res
|
|
166
|
+
} catch (err) {
|
|
167
|
+
this.#chunk = null // reset unfinished chunk on errors
|
|
168
|
+
throw err
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// eslint-disable-next-line no-else-return
|
|
172
|
+
} else if (this.#multibyte) {
|
|
173
|
+
if (!createMultibyteDecoder) throw new Error(E_MULTI)
|
|
174
|
+
if (!this.#decode) this.#decode = createMultibyteDecoder(this.encoding, !this.fatal) // can contain state!
|
|
175
|
+
return this.#decode(u, stream)
|
|
176
|
+
} else {
|
|
177
|
+
if (!this.#decode) this.#decode = createSinglebyteDecoder(this.encoding, !this.fatal)
|
|
178
|
+
return this.#decode(u)
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
#findBom(u) {
|
|
183
|
+
switch (this.encoding) {
|
|
184
|
+
case 'utf-8':
|
|
185
|
+
return u.byteLength >= 3 && u[0] === 0xef && u[1] === 0xbb && u[2] === 0xbf ? 3 : 0
|
|
186
|
+
case 'utf-16le':
|
|
187
|
+
return u.byteLength >= 2 && u[0] === 0xff && u[1] === 0xfe ? 2 : 0
|
|
188
|
+
case 'utf-16be':
|
|
189
|
+
return u.byteLength >= 2 && u[0] === 0xfe && u[1] === 0xff ? 2 : 0
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
throw new Error('Unreachable')
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
export class TextEncoder {
|
|
197
|
+
constructor() {
|
|
198
|
+
define(this, 'encoding', 'utf-8')
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
get [Symbol.toStringTag]() {
|
|
202
|
+
return 'TextEncoder'
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
encode(str = '') {
|
|
206
|
+
if (typeof str !== 'string') str = `${str}`
|
|
207
|
+
const res = utf8fromStringLoose(str)
|
|
208
|
+
return res.byteOffset === 0 ? res : res.slice(0) // Ensure 0-offset, to match new Uint8Array (per spec), which is non-pooled
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
encodeInto(str, target) {
|
|
212
|
+
if (typeof str !== 'string') str = `${str}`
|
|
213
|
+
if (!(target instanceof Uint8Array)) throw new TypeError('Target must be an Uint8Array')
|
|
214
|
+
if (target.buffer.detached) return { read: 0, written: 0 } // Until https://github.com/whatwg/encoding/issues/324 is resolved
|
|
215
|
+
|
|
216
|
+
const tlen = target.length
|
|
217
|
+
if (tlen < str.length) str = str.slice(0, tlen)
|
|
218
|
+
let u8 = utf8fromStringLoose(str)
|
|
219
|
+
let read
|
|
220
|
+
if (tlen >= u8.length) {
|
|
221
|
+
read = str.length
|
|
222
|
+
} else if (u8.length === str.length) {
|
|
223
|
+
if (u8.length > tlen) u8 = u8.subarray(0, tlen) // ascii can be truncated
|
|
224
|
+
read = u8.length
|
|
225
|
+
} else {
|
|
226
|
+
u8 = u8.subarray(0, tlen)
|
|
227
|
+
const unfinished = unfinishedBytes(u8, u8.length, 'utf-8')
|
|
228
|
+
if (unfinished > 0) u8 = u8.subarray(0, u8.length - unfinished)
|
|
229
|
+
|
|
230
|
+
// We can do this because loose str -> u8 -> str preserves length, unlike loose u8 -> str -> u8
|
|
231
|
+
// Each unpaired surrogate (1 charcode) is replaced with a single charcode
|
|
232
|
+
read = utf8toStringLoose(u8).length // FIXME: Converting back is very inefficient
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
try {
|
|
236
|
+
target.set(u8)
|
|
237
|
+
} catch {
|
|
238
|
+
return { read: 0, written: 0 } // see above, likely detached but no .detached property support
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
return { read, written: u8.length }
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Warning: unlike whatwg-encoding, returns lowercased labels
|
|
246
|
+
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
|
247
|
+
export function getBOMEncoding(input) {
|
|
248
|
+
const u8 = fromSource(input) // asserts
|
|
249
|
+
if (u8.length >= 3 && u8[0] === 0xef && u8[1] === 0xbb && u8[2] === 0xbf) return 'utf-8'
|
|
250
|
+
if (u8.length < 2) return null
|
|
251
|
+
if (u8[0] === 0xff && u8[1] === 0xfe) return 'utf-16le'
|
|
252
|
+
if (u8[0] === 0xfe && u8[1] === 0xff) return 'utf-16be'
|
|
253
|
+
return null
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// https://encoding.spec.whatwg.org/#decode
|
|
257
|
+
// Warning: encoding sniffed from BOM takes preference over the supplied one
|
|
258
|
+
// Warning: lossy, performs replacement, no option of throwing
|
|
259
|
+
// Completely ignores encoding and even skips validation when BOM is found
|
|
260
|
+
// Unlike TextDecoder public API, additionally supports 'replacement' encoding
|
|
261
|
+
export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
|
|
262
|
+
let u8 = fromSource(input)
|
|
263
|
+
const bomEncoding = getBOMEncoding(u8)
|
|
264
|
+
if (bomEncoding) u8 = u8.subarray(bomEncoding === 'utf-8' ? 3 : 2)
|
|
265
|
+
const enc = bomEncoding ?? normalizeEncoding(fallbackEncoding) // "the byte order mark is more authoritative than anything else"
|
|
266
|
+
|
|
267
|
+
if (enc === 'utf-8') return utf8toStringLoose(u8)
|
|
268
|
+
if (enc === 'utf-16le' || enc === 'utf-16be') {
|
|
269
|
+
let suffix = ''
|
|
270
|
+
if (u8.byteLength % 2 !== 0) {
|
|
271
|
+
suffix = replacementChar
|
|
272
|
+
u8 = u8.subarray(0, -1)
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
return utf16toStringLoose(u8, enc === 'utf-16le' ? 'uint8-le' : 'uint8-be') + suffix
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
if (!Object.hasOwn(labels, enc)) throw new RangeError(E_ENCODING)
|
|
279
|
+
|
|
280
|
+
if (multibyteSet.has(enc)) {
|
|
281
|
+
if (!createMultibyteDecoder) throw new Error(E_MULTI)
|
|
282
|
+
return createMultibyteDecoder(enc, true)(u8)
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// https://encoding.spec.whatwg.org/#replacement-decoder
|
|
286
|
+
// On non-streaming non-fatal case, it just replaces any non-empty input with a single replacement char
|
|
287
|
+
if (enc === 'replacement') return input.byteLength > 0 ? replacementChar : ''
|
|
288
|
+
|
|
289
|
+
return createSinglebyteDecoder(enc, true)(u8)
|
|
290
|
+
}
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
/* eslint-disable @exodus/export-default/named */
|
|
4
4
|
// prettier-ignore
|
|
5
|
-
|
|
5
|
+
const labels = {
|
|
6
6
|
'utf-8': ['unicode-1-1-utf-8', 'unicode11utf8', 'unicode20utf8', 'utf8', 'x-unicode20utf8'],
|
|
7
7
|
ibm866: ['866', 'cp866', 'csibm866'],
|
|
8
8
|
'iso-8859-2': ['csisolatin2', 'iso-ir-101', 'iso8859-2', 'iso88592', 'iso_8859-2', 'iso_8859-2:1987', 'l2', 'latin2'],
|
|
@@ -22,15 +22,6 @@ export default {
|
|
|
22
22
|
'koi8-u': ['koi8-ru'],
|
|
23
23
|
macintosh: ['csmacintosh', 'mac', 'x-mac-roman'],
|
|
24
24
|
'windows-874': ['dos-874', 'iso-8859-11', 'iso8859-11', 'iso885911', 'tis-620'],
|
|
25
|
-
'windows-1250': ['cp1250', 'x-cp1250'],
|
|
26
|
-
'windows-1251': ['cp1251', 'x-cp1251'],
|
|
27
|
-
'windows-1252': ['ansi_x3.4-1968', 'ascii', 'cp1252', 'cp819', 'csisolatin1', 'ibm819', 'iso-8859-1', 'iso-ir-100', 'iso8859-1', 'iso88591', 'iso_8859-1', 'iso_8859-1:1987', 'l1', 'latin1', 'us-ascii', 'x-cp1252'],
|
|
28
|
-
'windows-1253': ['cp1253', 'x-cp1253'],
|
|
29
|
-
'windows-1254': ['cp1254', 'csisolatin5', 'iso-8859-9', 'iso-ir-148', 'iso8859-9', 'iso88599', 'iso_8859-9', 'iso_8859-9:1989', 'l5', 'latin5', 'x-cp1254'],
|
|
30
|
-
'windows-1255': ['cp1255', 'x-cp1255'],
|
|
31
|
-
'windows-1256': ['cp1256', 'x-cp1256'],
|
|
32
|
-
'windows-1257': ['cp1257', 'x-cp1257'],
|
|
33
|
-
'windows-1258': ['cp1258', 'x-cp1258'],
|
|
34
25
|
'x-mac-cyrillic': ['x-mac-ukrainian'],
|
|
35
26
|
gbk: ['chinese', 'csgb2312', 'csiso58gb231280', 'gb2312', 'gb_2312', 'gb_2312-80', 'iso-ir-58', 'x-gbk'],
|
|
36
27
|
gb18030: [],
|
|
@@ -44,3 +35,12 @@ export default {
|
|
|
44
35
|
'utf-16le': ['csunicode', 'iso-10646-ucs-2', 'ucs-2', 'unicode', 'unicodefeff', 'utf-16'],
|
|
45
36
|
'x-user-defined': [],
|
|
46
37
|
}
|
|
38
|
+
|
|
39
|
+
for (let i = 0; i < 9; i++) labels[`windows-125${i}`] = [`cp125${i}`, `x-cp125${i}`]
|
|
40
|
+
|
|
41
|
+
// prettier-ignore
|
|
42
|
+
labels['windows-1252'].push('ansi_x3.4-1968', 'ascii', 'cp819', 'csisolatin1', 'ibm819', 'iso-8859-1', 'iso-ir-100', 'iso8859-1', 'iso88591', 'iso_8859-1', 'iso_8859-1:1987', 'l1', 'latin1', 'us-ascii')
|
|
43
|
+
// prettier-ignore
|
|
44
|
+
labels['windows-1254'].push('csisolatin5', 'iso-8859-9', 'iso-ir-148', 'iso8859-9', 'iso88599', 'iso_8859-9', 'iso_8859-9:1989', 'l5', 'latin5')
|
|
45
|
+
|
|
46
|
+
export default labels
|
package/fallback/multi-byte.js
CHANGED
|
@@ -319,7 +319,6 @@ const mappers = {
|
|
|
319
319
|
}
|
|
320
320
|
|
|
321
321
|
export const isAsciiSuperset = (enc) => enc !== 'iso-2022-jp' // all others are ASCII supersets and can use fast path
|
|
322
|
-
export const multibyteSupported = (enc) => Object.hasOwn(mappers, enc) || enc === 'big5'
|
|
323
322
|
|
|
324
323
|
export function multibyteDecoder(enc, loose = false) {
|
|
325
324
|
if (enc === 'big5') return big5decoder(loose)
|
|
@@ -337,7 +336,7 @@ export function multibyteDecoder(enc, loose = false) {
|
|
|
337
336
|
// Decoders big5, euc-jp, euc-kr, shift_jis, gb18030 / gbk - all clear state before throwing unless EOF, so not affected
|
|
338
337
|
// iso-2022-jp is the only tricky one one where this !stream check matters in non-stream mode
|
|
339
338
|
if (!stream) mapper = null // destroy state, effectively the same as 'do not flush' = false, but early
|
|
340
|
-
throw new
|
|
339
|
+
throw new TypeError(E_STRICT)
|
|
341
340
|
}
|
|
342
341
|
|
|
343
342
|
let res = ''
|
|
@@ -400,7 +399,7 @@ function big5decoder(loose) {
|
|
|
400
399
|
: () => {
|
|
401
400
|
pushback.length = 0 // the queue is cleared on returning an error
|
|
402
401
|
// Lead is always already cleared before throwing
|
|
403
|
-
throw new
|
|
402
|
+
throw new TypeError(E_STRICT)
|
|
404
403
|
}
|
|
405
404
|
|
|
406
405
|
let res = ''
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { fromBase64url } from '@exodus/bytes/base64.js'
|
|
2
|
-
import { utf16toString } from '@exodus/bytes/utf16.js'
|
|
1
|
+
import { fromBase64url } from '@exodus/bytes/base64.js'
|
|
2
|
+
import { utf16toString } from '@exodus/bytes/utf16.js'
|
|
3
3
|
import loadEncodings from './multi-byte.encodings.cjs'
|
|
4
4
|
import { to16input } from './utf16.js'
|
|
5
5
|
|
|
@@ -1,45 +1,61 @@
|
|
|
1
|
-
// See tests/fixtures/
|
|
1
|
+
// See tests/encoding/fixtures/single-byte/dump.js for generator
|
|
2
2
|
|
|
3
|
-
const
|
|
4
|
-
|
|
5
|
-
const
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
const
|
|
13
|
-
|
|
3
|
+
const r = 0xff_fd
|
|
4
|
+
const e = (x) => new Array(x).fill(1)
|
|
5
|
+
const h = (x) => new Array(x).fill(r)
|
|
6
|
+
|
|
7
|
+
/* eslint-disable unicorn/numeric-separators-style, @exodus/export-default/named */
|
|
8
|
+
|
|
9
|
+
// Common ranges
|
|
10
|
+
|
|
11
|
+
// prettier-ignore
|
|
12
|
+
const k8a = [9345,2,10,4,4,4,4,8,8,8,8,68,4,4,4,4,1,1,1,-627,640,-903,1,46,28,1,-8645,8833,-8817,2,5,64,9305,1,1,-8449]
|
|
13
|
+
// prettier-ignore
|
|
14
|
+
const k8b = [-30,1,21,-18,1,15,-17,18,-13,...e(7),16,-15,1,1,1,-13,-4,26,-1,-20,17,5,-4,-2,3]
|
|
15
|
+
const p1 = [8237, -8235, 8089, -7816, 7820, 8, -6, 1]
|
|
16
|
+
const p2 = [-99, 12, 20, -12, 17, 37, -29, 2]
|
|
17
|
+
// prettier-ignore
|
|
18
|
+
const p3 = [1,1,65,-63,158,-156,1,1,1,40,30,42,-46,6,-66,1,83,-6,-6,-67,176,...p2,-114,121,-119,1,1,155,-49,25,16,-142,159,2,-158,38,42,-46,6,-35,1,52,-6,-6,-36,145,...p2,-83,90,-88,1,1,124,-49,25,16,-111,128,2]
|
|
19
|
+
const i0 = e(33)
|
|
20
|
+
// prettier-ignore
|
|
21
|
+
const i2 = [-40,-147,1,64,-62,117,-51,-63,69,-67,79,-77,79,-77,1,64,2,51,4,-116,1,124,-122,1,129,22,-148,150,-148,1,133,-131,118,-116,1,33,-31,86,-51,-32,38,-36,48,-46,48,-46,1,33,2,51,4,-85,1,93,-91,1,98,22,-117,119,-117,1,102,374]
|
|
22
|
+
const i4a = [-75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1]
|
|
23
|
+
const i4b = [34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1]
|
|
24
|
+
const i7 = [721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r]
|
|
25
|
+
const i8 = [...e(26), r, r, 6692, 1, r]
|
|
26
|
+
const w0 = [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104]
|
|
27
|
+
const w8 = [8072, 1, 3, 1, 5, -15, 1]
|
|
28
|
+
const w1 = [...w8, -7480, 7750, -8129, 7897, -7911, -182]
|
|
29
|
+
const w3 = [...w8, -8060, 8330, -8328, 8096, -8094]
|
|
30
|
+
const m0 = [8558, -8328, 8374, -66, -8539, 16, 8043, -8070]
|
|
14
31
|
|
|
15
|
-
/* eslint-disable @exodus/export-default/named */
|
|
16
32
|
// prettier-ignore
|
|
17
33
|
export default {
|
|
18
|
-
ibm866:
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
macintosh:
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
34
|
+
ibm866: [913,...e(47),8530,1,1,-145,34,61,1,-12,-1,14,-18,6,6,-1,-1,-75,4,32,-8,-16,-28,60,34,1,-5,-6,21,-3,-6,-16,28,-5,1,-4,1,-12,-1,-6,1,24,-1,-82,-12,124,-4,8,4,-16,-8512,...e(15),-78,80,-77,80,-77,80,-73,80,-942,8553,-8546,8547,-260,-8306,9468,-9472],
|
|
35
|
+
'iso-8859-10': [...i0,100,14,16,8,-2,14,-143,148,-43,80,6,23,-208,189,-32,-154,85,14,16,8,-2,14,-128,133,-43,80,6,23,7831,-7850,-32,...i4a,1,1,117,7,-121,1,1,1,146,-144,154,-152,...e(5),...i4b,1,1,86,7,-90,1,1,1,115,-113,123,-121,1,1,1,1,58],
|
|
36
|
+
'iso-8859-13': [...i0,8061,-8059,1,1,8058,-8056,1,49,-47,173,-171,1,1,1,24,-22,1,1,1,8041,-8039,...p3,7835],
|
|
37
|
+
'iso-8859-14': [...i0,7522,1,-7520,103,1,7423,-7523,7641,-7639,7641,-119,231,-7749,1,202,7334,1,-7423,1,7455,1,-7563,7584,43,-42,44,-35,147,-111,1,-36,-7585,...e(15),165,-163,...e(5),7572,-7570,...e(5),153,-151,...e(16),134,-132,...e(5),7541,-7539,...e(5),122],
|
|
38
|
+
'iso-8859-15': [...i0,1,1,1,8201,-8199,187,-185,186,-184,...e(10),202,-200,1,1,199,-197,1,1,151,1,37],
|
|
39
|
+
'iso-8859-16': [...i0,100,1,60,8043,-142,-7870,-185,186,-184,367,-365,206,-204,205,1,-203,1,91,54,59,7840,-8039,1,199,-113,268,-350,151,1,37,4,-188,1,1,64,-62,66,-64,...e(9),65,51,-113,1,1,124,-122,132,22,-151,1,1,1,60,258,-315,1,1,1,33,-31,35,-33,...e(9),34,51,-82,1,1,93,-91,101,22,-120,1,1,1,29,258],
|
|
40
|
+
'iso-8859-2': [...i0,100,468,-407,-157,153,29,-179,1,184,-2,6,21,-204,208,-2,-203,85,470,-409,-142,138,29,364,-527,169,-2,6,21,355,-351,-2,...i2],
|
|
41
|
+
'iso-8859-3': [...i0,134,434,-565,1,r,128,-125,1,136,46,-64,22,-135,r,206,-203,119,-117,1,1,1,112,-110,1,121,46,-64,22,-120,r,191,-188,1,1,r,2,70,-2,-65,...e(8),r,2,1,1,1,76,-74,1,69,-67,1,1,1,144,-16,-125,1,1,1,r,2,39,-2,-34,...e(8),r,2,1,1,1,45,-43,1,38,-36,1,1,1,113,-16,380],
|
|
42
|
+
'iso-8859-4': [...i0,100,52,30,-178,132,19,-148,1,184,-78,16,68,-185,208,-206,1,85,470,-388,-163,117,19,395,-527,169,-78,16,68,-29,52,-51,...i4a,92,-26,53,7,-22,-98,1,1,1,1,154,-152,1,1,140,2,-139,...i4b,61,-26,53,7,-22,-67,1,1,1,1,123,-121,1,1,109,2,366],
|
|
43
|
+
'iso-8859-5': [...i0,865,...e(11),-863,865,...e(65),7367,-7365,...e(11),-949,951,1],
|
|
44
|
+
'iso-8859-6': [...i0,r,r,r,4,...h(7),1384,-1375,...h(13),1390,r,r,r,4,r,2,...e(25),r,r,r,r,r,6,...e(18),...h(13)],
|
|
45
|
+
'iso-8859-7': [...i0,8056,1,-8054,8201,3,-8201,1,1,1,721,-719,1,1,r,8040,-8037,1,1,1,721,1,1,-719,...i7],
|
|
46
|
+
'iso-8859-8': [...i0,r,2,...e(7),46,-44,...e(14),62,-60,1,1,1,...h(32),8025,-6727,...i8],
|
|
47
|
+
'koi8-r': [...k8a,8450,...e(14),-8544,8545,...e(10),-9411,933,...k8b,-28,...k8b],
|
|
48
|
+
'koi8-u': [...k8a,3,8448,-8446,1,8448,1,1,1,1,-8394,-51,8448,1,1,1,-8544,3,8543,-8541,1,8543,1,1,1,1,-8410,-130,-869,933,...k8b,-28,...k8b],
|
|
49
|
+
macintosh: [69,1,2,2,8,5,6,5,-1,2,2,-1,2,2,2,-1,2,1,2,-1,2,1,2,2,-1,2,2,-1,5,-1,2,1,7972,-8048,-14,1,4,8059,-8044,41,-49,-5,8313,-8302,-12,8632,-8602,18,8518,-8557,8627,1,-8640,16,8525,15,-2,-7759,7787,-8577,16,751,-707,18,-57,-30,11,...m0,32,3,18,125,1,7872,1,8,1,-5,1,-7970,9427,-9419,121,7884,104,-115,1,56007,1,-56033,-8042,8035,4,18,-8046,8,-9,10,-3,5,1,1,-3,7,1,63531,-63533,8,1,-2,88,405,22,-557,553,1,1,-546,549,-2,-20],
|
|
50
|
+
'windows-1250': [...w0,-7888,7897,-7903,10,25,-4,-233,...w8,-8060,8330,-8129,7897,-7903,10,25,-4,-218,551,17,-407,-157,96,-94,1,1,1,181,-179,1,1,1,205,-203,1,554,-409,-142,1,1,1,1,77,90,-164,130,416,-415,62,...i2],
|
|
51
|
+
'windows-1251': [899,1,7191,-7111,7115,8,-6,1,139,-124,-7207,7216,-7215,2,-1,4,67,7110,1,3,1,5,-15,1,-8060,8330,-7369,7137,-7136,2,-1,4,-959,878,80,-86,-868,1004,-1002,1,858,-856,859,-857,1,1,1,857,-855,1,853,80,59,-988,1,1,922,7365,-7362,-921,925,-83,80,2,-71,...e(63)],
|
|
52
|
+
'windows-1252': [...p1,-7515,7530,-7888,7897,-7911,-197,240,-238,1,...w1,225,-6],
|
|
53
|
+
'windows-1253': [...p1,-8089,8104,-8102,8111,-8109,1,1,1,1,...w3,1,1,1,1,741,1,-739,1,1,1,1,1,1,r,2,1,1,1,8039,-8037,1,1,1,721,-719,1,1,...i7],
|
|
54
|
+
'windows-1254': [...p1,-7515,7530,-7888,7897,-7911,-197,1,1,1,...w1,1,218,-216,...e(47),79,-77,...e(11),84,46,-127,...e(16),48,-46,...e(11),53,46],
|
|
55
|
+
'windows-1255': [...p1,-7515,7530,-8102,8111,-8109,1,1,1,1,...w8,-7480,7750,-8328,8096,-8094,...e(7),8199,-8197,1,1,1,1,46,-44,...e(14),62,-60,1,1,1,1,1265,...e(19),45,1,1,1,1,...h(7),-36,...i8],
|
|
56
|
+
'windows-1256': [8237,-6702,6556,-7816,7820,8,-6,1,-7515,7530,-6583,6592,-7911,1332,18,-16,39,6505,1,3,1,5,-15,1,-6507,6777,-6801,6569,-7911,7865,1,-6483,-1562,1388,-1386,...e(7),1557,-1555,...e(14),1378,-1376,1,1,1,1377,162,-160,...e(21),-1375,1376,1,1,1,6,1,1,1,-1379,1380,-1378,1379,1,1,1,-1377,1,1,1,1,1374,1,-1372,1,1372,1,1,1,-1370,1371,1,-1369,1370,-1368,1369,-1367,1,7954,1,-6461],
|
|
57
|
+
'windows-1257': [...w0,-8102,8111,-8109,28,543,-527,-40,...w3,19,556,-572,1,r,2,1,1,r,2,1,49,-47,173,-171,1,1,1,24,-22,...e(5),...p3,347],
|
|
58
|
+
'windows-1258': [...p1,-7515,7530,-8102,8111,-7911,-197,1,1,1,...w8,-7480,7750,-8328,8096,-7911,-182,1,218,-216,...e(34),64,-62,...e(7),565,-563,1,1,65,-63,568,-566,1,204,-202,1,1,1,1,1,1,211,340,-548,1,1,1,33,-31,...e(7),534,-532,1,1,34,-32,562,-560,1,173,-171,1,1,1,1,1,1,180,7931],
|
|
59
|
+
'windows-874': [8237,-8235,1,1,1,8098,-8096,...e(10),...w8,-8060,...e(8),3425,...e(57),r,r,r,r,5,...e(28),r,r,r,r],
|
|
60
|
+
'x-mac-cyrillic': [913,...e(31),7153,-8048,992,-1005,4,8059,-8044,848,-856,-5,8313,-7456,80,7694,-7773,80,7627,-8557,8627,1,-7695,-929,988,-137,-4,80,-77,80,-78,80,-79,80,-2,-83,-857,...m0,875,80,-79,80,-7,7102,1,8,1,-5,1,-7970,7975,-7184,80,-79,80,7351,-7445,80,-2,-31,...e(30),7262]
|
|
45
61
|
}
|
package/fallback/single-byte.js
CHANGED
|
@@ -4,19 +4,21 @@ import { decode2string } from './_utils.js'
|
|
|
4
4
|
|
|
5
5
|
export const E_STRICT = 'Input is not well-formed for this encoding'
|
|
6
6
|
const xUserDefined = 'x-user-defined'
|
|
7
|
+
const iso8i = 'iso-8859-8-i'
|
|
7
8
|
|
|
8
9
|
export const assertEncoding = (encoding) => {
|
|
9
|
-
if (Object.hasOwn(encodings, encoding) || encoding === xUserDefined) return
|
|
10
|
+
if (Object.hasOwn(encodings, encoding) || encoding === xUserDefined || encoding === iso8i) return
|
|
10
11
|
throw new RangeError('Unsupported encoding')
|
|
11
12
|
}
|
|
12
13
|
|
|
14
|
+
const r = 0xff_fd
|
|
15
|
+
|
|
13
16
|
function getEncoding(encoding) {
|
|
14
17
|
assertEncoding(encoding)
|
|
15
|
-
if (encoding === xUserDefined) {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
return encodings[encoding]
|
|
18
|
+
if (encoding === xUserDefined) return Array.from({ length: 128 }, (_, i) => 0xf7_80 + i)
|
|
19
|
+
if (encoding === iso8i) encoding = 'iso-8859-8'
|
|
20
|
+
let prev = 127
|
|
21
|
+
return encodings[encoding].map((x) => (x === r ? x : (prev += x))) // eslint-disable-line no-return-assign
|
|
20
22
|
}
|
|
21
23
|
|
|
22
24
|
const mappers = new Map()
|
|
@@ -28,13 +30,13 @@ export function encodingMapper(encoding) {
|
|
|
28
30
|
const cached = mappers.get(encoding)
|
|
29
31
|
if (cached) return cached
|
|
30
32
|
|
|
31
|
-
const
|
|
33
|
+
const codes = getEncoding(encoding)
|
|
34
|
+
const incomplete = codes.includes(0xff_fd)
|
|
32
35
|
let map
|
|
33
36
|
const mapper = (arr, start = 0) => {
|
|
34
37
|
if (!map) {
|
|
35
|
-
map = Uint16Array
|
|
36
|
-
|
|
37
|
-
map.set(Uint16Array.from(strings.map((x) => x.charCodeAt(0))), 128)
|
|
38
|
+
map = new Uint16Array(256).map((_, i) => i) // Unicode subset
|
|
39
|
+
map.set(Uint16Array.from(codes), 128)
|
|
38
40
|
}
|
|
39
41
|
|
|
40
42
|
const o = Uint16Array.from(start === 0 ? arr : arr.subarray(start)) // copy to modify in-place, also those are 16-bit now
|
|
@@ -63,12 +65,13 @@ export function encodingDecoder(encoding) {
|
|
|
63
65
|
if (cached) return cached
|
|
64
66
|
|
|
65
67
|
let strings
|
|
66
|
-
const
|
|
68
|
+
const codes = getEncoding(encoding)
|
|
69
|
+
const incomplete = codes.includes(0xff_fd)
|
|
67
70
|
const decoder = (arr, loose = false) => {
|
|
68
71
|
if (!strings) {
|
|
69
|
-
const
|
|
70
|
-
|
|
71
|
-
|
|
72
|
+
const allCodes = Array.from({ length: 128 }, (_, i) => i).concat(codes)
|
|
73
|
+
while (allCodes.length < 256) allCodes.push(allCodes.length)
|
|
74
|
+
strings = allCodes.map((c) => String.fromCharCode(c))
|
|
72
75
|
}
|
|
73
76
|
|
|
74
77
|
const prefix = decodeLatin1(arr, 0, asciiPrefix(arr))
|
package/hex.d.ts
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/// <reference types="node" />
|
|
2
|
+
|
|
3
|
+
import type { OutputFormat, Uint8ArrayBuffer } from './array.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Encodes a Uint8Array to a lowercase hex string
|
|
7
|
+
* @param arr - The input bytes
|
|
8
|
+
* @returns The hex encoded string
|
|
9
|
+
*/
|
|
10
|
+
export function toHex(arr: Uint8ArrayBuffer): string;
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Decodes a hex string to bytes
|
|
14
|
+
* Unlike Buffer.from(), throws on invalid input
|
|
15
|
+
* @param str - The hex encoded string (case-insensitive)
|
|
16
|
+
* @param format - Output format (default: 'uint8')
|
|
17
|
+
* @returns The decoded bytes
|
|
18
|
+
*/
|
|
19
|
+
export function fromHex(str: string, format?: 'uint8'): Uint8ArrayBuffer;
|
|
20
|
+
export function fromHex(str: string, format: 'buffer'): Buffer;
|
|
21
|
+
export function fromHex(str: string, format?: OutputFormat): Uint8ArrayBuffer | Buffer;
|
|
22
|
+
|