@exodus/bytes 1.0.0-rc.9 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,22 +1,30 @@
1
1
  const { Buffer, TextEncoder, TextDecoder } = globalThis
2
2
  const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
3
- let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]')) // we consider Node.js TextDecoder/TextEncoder native
3
+ export const nativeBuffer = haveNativeBuffer ? Buffer : null
4
+ export const isHermes = Boolean(globalThis.HermesInternal)
5
+ export const isDeno = Boolean(globalThis.Deno)
6
+ export const isLE = new Uint8Array(Uint16Array.of(258).buffer)[0] === 2
7
+
8
+ let isNative = (x) => {
9
+ if (!x) return false
10
+ if (haveNativeBuffer) return true // we consider Node.js TextDecoder/TextEncoder native
11
+ const s = `${x}`
12
+ // See https://github.com/facebook/hermes/pull/1855#issuecomment-3659386410
13
+ return s.includes('[native code]') || s.includes(`[bytecode]`) // Static Hermes has [bytecode] for contrib, which includes TextEncoder/TextDecoder
14
+ }
15
+
4
16
  if (!haveNativeBuffer && isNative(() => {})) isNative = () => false // e.g. XS, we don't want false positives
5
17
 
6
18
  export const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
7
19
  export const nativeDecoder = isNative(TextDecoder)
8
20
  ? new TextDecoder('utf-8', { ignoreBOM: true })
9
21
  : null
10
- export const nativeBuffer = haveNativeBuffer ? Buffer : null
11
- export const isHermes = Boolean(globalThis.HermesInternal)
12
- export const isDeno = Boolean(globalThis.Deno)
13
- export const isLE = new Uint8Array(Uint16Array.of(258).buffer)[0] === 2
14
22
 
15
23
  // Actually windows-1252, compatible with ascii and latin1 decoding
16
24
  // Beware that on non-latin1, i.e. on windows-1252, this is broken in ~all Node.js versions released
17
25
  // in 2025 due to a regression, so we call it Latin1 as it's usable only for that
18
26
  let nativeDecoderLatin1impl = null
19
- if (isNative(TextDecoder)) {
27
+ if (nativeDecoder) {
20
28
  // Not all barebone engines with TextDecoder support something except utf-8, detect
21
29
  try {
22
30
  nativeDecoderLatin1impl = new TextDecoder('latin1', { ignoreBOM: true })
@@ -116,3 +124,7 @@ export function decode2string(arr, start, end, m) {
116
124
 
117
125
  return decodePart(arr, start, end, m)
118
126
  }
127
+
128
+ export function assert(condition, msg) {
129
+ if (!condition) throw new Error(msg)
130
+ }
@@ -0,0 +1,290 @@
1
+ // We can't return native TextDecoder if it's present, as Node.js one is broken on windows-1252 and we fix that
2
+ // We are also faster than Node.js built-in on both TextEncoder and TextDecoder
3
+
4
+ import { utf16toString, utf16toStringLoose } from '@exodus/bytes/utf16.js'
5
+ import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/bytes/utf8.js'
6
+ import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
7
+ import labels from './encoding.labels.js'
8
+ import { unfinishedBytes } from './encoding.util.js'
9
+
10
+ const E_OPTIONS = 'The "options" argument must be of type object'
11
+ const E_ENCODING = 'Unknown encoding'
12
+ const replacementChar = '\uFFFD'
13
+
14
+ const E_MULTI =
15
+ 'Legacy multi-byte encodings are disabled in /encoding-lite.js, use /encoding.js for full encodings range support'
16
+ const multibyteSet = new Set(['big5', 'euc-kr', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'gbk', 'gb18030']) // prettier-ignore
17
+ let createMultibyteDecoder
18
+
19
+ export function setMultibyteDecoder(createDecoder) {
20
+ createMultibyteDecoder = createDecoder
21
+ }
22
+
23
+ let labelsMap
24
+
25
+ // Warning: unlike whatwg-encoding, returns lowercased labels
26
+ // Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
27
+ // https://encoding.spec.whatwg.org/#names-and-labels
28
+ export function normalizeEncoding(label) {
29
+ // fast path
30
+ if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
31
+ if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
32
+ // full map
33
+ if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
34
+ const low = `${label}`.trim().toLowerCase()
35
+ if (Object.hasOwn(labels, low)) return low
36
+ if (!labelsMap) {
37
+ labelsMap = new Map()
38
+ for (const [label, aliases] of Object.entries(labels)) {
39
+ for (const alias of aliases) labelsMap.set(alias, label)
40
+ }
41
+ }
42
+
43
+ const mapped = labelsMap.get(low)
44
+ if (mapped) return mapped
45
+ return null
46
+ }
47
+
48
+ const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false })
49
+
50
+ const fromSource = (x) => {
51
+ if (x instanceof Uint8Array) return x
52
+ if (x instanceof ArrayBuffer) return new Uint8Array(x)
53
+ if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
54
+ if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return new Uint8Array(x)
55
+ throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
56
+ }
57
+
58
+ function unicodeDecoder(encoding, loose) {
59
+ if (encoding === 'utf-8') return loose ? utf8toStringLoose : utf8toString // likely
60
+ const form = encoding === 'utf-16le' ? 'uint8-le' : 'uint8-be'
61
+ return loose ? (u) => utf16toStringLoose(u, form) : (u) => utf16toString(u, form)
62
+ }
63
+
64
+ export class TextDecoder {
65
+ #decode
66
+ #unicode
67
+ #multibyte
68
+ #chunk
69
+ #canBOM
70
+
71
+ constructor(encoding = 'utf-8', options = {}) {
72
+ if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
73
+ const enc = normalizeEncoding(encoding)
74
+ if (!enc || enc === 'replacement') throw new RangeError(E_ENCODING)
75
+ define(this, 'encoding', enc)
76
+ define(this, 'fatal', Boolean(options.fatal))
77
+ define(this, 'ignoreBOM', Boolean(options.ignoreBOM))
78
+ this.#unicode = enc === 'utf-8' || enc === 'utf-16le' || enc === 'utf-16be'
79
+ this.#multibyte = !this.#unicode && multibyteSet.has(enc)
80
+ this.#canBOM = this.#unicode && !this.ignoreBOM
81
+ }
82
+
83
+ get [Symbol.toStringTag]() {
84
+ return 'TextDecoder'
85
+ }
86
+
87
+ decode(input, options = {}) {
88
+ if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
89
+ const stream = Boolean(options.stream)
90
+ let u = input === undefined ? new Uint8Array() : fromSource(input)
91
+
92
+ if (this.#unicode) {
93
+ let prefix
94
+ if (this.#chunk) {
95
+ if (u.length === 0) {
96
+ if (stream) return '' // no change
97
+ u = this.#chunk // process as final chunk to handle errors and state changes
98
+ } else if (u.length < 3) {
99
+ // No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
100
+ const a = new Uint8Array(u.length + this.#chunk.length)
101
+ a.set(this.#chunk)
102
+ a.set(u, this.#chunk.length)
103
+ u = a
104
+ } else {
105
+ // Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
106
+ const t = new Uint8Array(this.#chunk.length + 3) // We have 1-3 bytes and need 1-3 more bytes
107
+ t.set(this.#chunk)
108
+ t.set(u.subarray(0, 3), this.#chunk.length)
109
+
110
+ // Stop at the first offset where unfinished bytes reaches 0 or fits into u
111
+ // If that doesn't happen (u too short), just concat chunk and u completely
112
+ for (let i = 1; i <= 3; i++) {
113
+ const unfinished = unfinishedBytes(t, this.#chunk.length + i, this.encoding) // 0-3
114
+ if (unfinished <= i) {
115
+ // Always reachable at 3, but we still need 'unfinished' value for it
116
+ const add = i - unfinished // 0-3
117
+ prefix = add > 0 ? t.subarray(0, this.#chunk.length + add) : this.#chunk
118
+ if (add > 0) u = u.subarray(add)
119
+ break
120
+ }
121
+ }
122
+ }
123
+
124
+ this.#chunk = null
125
+ } else if (u.byteLength === 0) {
126
+ if (!stream) this.#canBOM = !this.ignoreBOM
127
+ return ''
128
+ }
129
+
130
+ // For non-stream utf-8 we don't have to do this as it matches utf8toStringLoose already
131
+ // For non-stream loose utf-16 we still have to do this as this API supports uneven byteLength unlike utf16toStringLoose
132
+ let suffix = ''
133
+ if (stream || (!this.fatal && this.encoding !== 'utf-8')) {
134
+ const trail = unfinishedBytes(u, u.byteLength, this.encoding)
135
+ if (trail > 0) {
136
+ if (stream) {
137
+ this.#chunk = Uint8Array.from(u.subarray(-trail)) // copy
138
+ } else {
139
+ // non-fatal mode as already checked
140
+ suffix = replacementChar
141
+ }
142
+
143
+ u = u.subarray(0, -trail)
144
+ }
145
+ }
146
+
147
+ if (this.#canBOM) {
148
+ const bom = this.#findBom(prefix ?? u)
149
+ if (bom) {
150
+ if (stream) this.#canBOM = false
151
+ if (prefix) {
152
+ prefix = prefix.subarray(bom)
153
+ } else {
154
+ u = u.subarray(bom)
155
+ }
156
+ }
157
+ }
158
+
159
+ if (!this.#decode) this.#decode = unicodeDecoder(this.encoding, !this.fatal)
160
+ try {
161
+ const res = (prefix ? this.#decode(prefix) : '') + this.#decode(u) + suffix
162
+ if (res.length > 0 && stream) this.#canBOM = false
163
+
164
+ if (!stream) this.#canBOM = !this.ignoreBOM
165
+ return res
166
+ } catch (err) {
167
+ this.#chunk = null // reset unfinished chunk on errors
168
+ throw err
169
+ }
170
+
171
+ // eslint-disable-next-line no-else-return
172
+ } else if (this.#multibyte) {
173
+ if (!createMultibyteDecoder) throw new Error(E_MULTI)
174
+ if (!this.#decode) this.#decode = createMultibyteDecoder(this.encoding, !this.fatal) // can contain state!
175
+ return this.#decode(u, stream)
176
+ } else {
177
+ if (!this.#decode) this.#decode = createSinglebyteDecoder(this.encoding, !this.fatal)
178
+ return this.#decode(u)
179
+ }
180
+ }
181
+
182
+ #findBom(u) {
183
+ switch (this.encoding) {
184
+ case 'utf-8':
185
+ return u.byteLength >= 3 && u[0] === 0xef && u[1] === 0xbb && u[2] === 0xbf ? 3 : 0
186
+ case 'utf-16le':
187
+ return u.byteLength >= 2 && u[0] === 0xff && u[1] === 0xfe ? 2 : 0
188
+ case 'utf-16be':
189
+ return u.byteLength >= 2 && u[0] === 0xfe && u[1] === 0xff ? 2 : 0
190
+ }
191
+
192
+ throw new Error('Unreachable')
193
+ }
194
+ }
195
+
196
+ export class TextEncoder {
197
+ constructor() {
198
+ define(this, 'encoding', 'utf-8')
199
+ }
200
+
201
+ get [Symbol.toStringTag]() {
202
+ return 'TextEncoder'
203
+ }
204
+
205
+ encode(str = '') {
206
+ if (typeof str !== 'string') str = `${str}`
207
+ const res = utf8fromStringLoose(str)
208
+ return res.byteOffset === 0 ? res : res.slice(0) // Ensure 0-offset, to match new Uint8Array (per spec), which is non-pooled
209
+ }
210
+
211
+ encodeInto(str, target) {
212
+ if (typeof str !== 'string') str = `${str}`
213
+ if (!(target instanceof Uint8Array)) throw new TypeError('Target must be an Uint8Array')
214
+ if (target.buffer.detached) return { read: 0, written: 0 } // Until https://github.com/whatwg/encoding/issues/324 is resolved
215
+
216
+ const tlen = target.length
217
+ if (tlen < str.length) str = str.slice(0, tlen)
218
+ let u8 = utf8fromStringLoose(str)
219
+ let read
220
+ if (tlen >= u8.length) {
221
+ read = str.length
222
+ } else if (u8.length === str.length) {
223
+ if (u8.length > tlen) u8 = u8.subarray(0, tlen) // ascii can be truncated
224
+ read = u8.length
225
+ } else {
226
+ u8 = u8.subarray(0, tlen)
227
+ const unfinished = unfinishedBytes(u8, u8.length, 'utf-8')
228
+ if (unfinished > 0) u8 = u8.subarray(0, u8.length - unfinished)
229
+
230
+ // We can do this because loose str -> u8 -> str preserves length, unlike loose u8 -> str -> u8
231
+ // Each unpaired surrogate (1 charcode) is replaced with a single charcode
232
+ read = utf8toStringLoose(u8).length // FIXME: Converting back is very inefficient
233
+ }
234
+
235
+ try {
236
+ target.set(u8)
237
+ } catch {
238
+ return { read: 0, written: 0 } // see above, likely detached but no .detached property support
239
+ }
240
+
241
+ return { read, written: u8.length }
242
+ }
243
+ }
244
+
245
+ // Warning: unlike whatwg-encoding, returns lowercased labels
246
+ // Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
247
+ export function getBOMEncoding(input) {
248
+ const u8 = fromSource(input) // asserts
249
+ if (u8.length >= 3 && u8[0] === 0xef && u8[1] === 0xbb && u8[2] === 0xbf) return 'utf-8'
250
+ if (u8.length < 2) return null
251
+ if (u8[0] === 0xff && u8[1] === 0xfe) return 'utf-16le'
252
+ if (u8[0] === 0xfe && u8[1] === 0xff) return 'utf-16be'
253
+ return null
254
+ }
255
+
256
+ // https://encoding.spec.whatwg.org/#decode
257
+ // Warning: encoding sniffed from BOM takes preference over the supplied one
258
+ // Warning: lossy, performs replacement, no option of throwing
259
+ // Expects normalized (lower-case) encoding as input. Completely ignores it and even skips validation when BOM is found
260
+ // Unlike TextDecoder public API, additionally supports 'replacement' encoding
261
+ export function legacyHookDecode(input, fallbackEncoding) {
262
+ let u8 = fromSource(input)
263
+ const bomEncoding = getBOMEncoding(u8)
264
+ if (bomEncoding) u8 = u8.subarray(bomEncoding === 'utf-8' ? 3 : 2)
265
+ const enc = bomEncoding ?? fallbackEncoding ?? 'utf-8' // "the byte order mark is more authoritative than anything else"
266
+
267
+ if (enc === 'utf-8') return utf8toStringLoose(u8)
268
+ if (enc === 'utf-16le' || enc === 'utf-16be') {
269
+ let suffix = ''
270
+ if (u8.byteLength % 2 !== 0) {
271
+ suffix = replacementChar
272
+ u8 = u8.subarray(0, -1)
273
+ }
274
+
275
+ return utf16toStringLoose(u8, enc === 'utf-16le' ? 'uint8-le' : 'uint8-be') + suffix
276
+ }
277
+
278
+ if (!Object.hasOwn(labels, enc)) throw new RangeError(E_ENCODING)
279
+
280
+ if (multibyteSet.has(enc)) {
281
+ if (!createMultibyteDecoder) throw new Error(E_MULTI)
282
+ return createMultibyteDecoder(enc, true)(u8)
283
+ }
284
+
285
+ // https://encoding.spec.whatwg.org/#replacement-decoder
286
+ // On non-streaming non-fatal case, it just replaces any non-empty input with a single replacement char
287
+ if (enc === 'replacement') return input.byteLength > 0 ? replacementChar : ''
288
+
289
+ return createSinglebyteDecoder(enc, true)(u8)
290
+ }
@@ -319,7 +319,6 @@ const mappers = {
319
319
  }
320
320
 
321
321
  export const isAsciiSuperset = (enc) => enc !== 'iso-2022-jp' // all others are ASCII supersets and can use fast path
322
- export const multibyteSupported = (enc) => Object.hasOwn(mappers, enc) || enc === 'big5'
323
322
 
324
323
  export function multibyteDecoder(enc, loose = false) {
325
324
  if (enc === 'big5') return big5decoder(loose)
@@ -337,7 +336,7 @@ export function multibyteDecoder(enc, loose = false) {
337
336
  // Decoders big5, euc-jp, euc-kr, shift_jis, gb18030 / gbk - all clear state before throwing unless EOF, so not affected
338
337
  // iso-2022-jp is the only tricky one one where this !stream check matters in non-stream mode
339
338
  if (!stream) mapper = null // destroy state, effectively the same as 'do not flush' = false, but early
340
- throw new Error(E_STRICT)
339
+ throw new TypeError(E_STRICT)
341
340
  }
342
341
 
343
342
  let res = ''
@@ -400,7 +399,7 @@ function big5decoder(loose) {
400
399
  : () => {
401
400
  pushback.length = 0 // the queue is cleared on returning an error
402
401
  // Lead is always already cleared before throwing
403
- throw new Error(E_STRICT)
402
+ throw new TypeError(E_STRICT)
404
403
  }
405
404
 
406
405
  let res = ''
@@ -1,5 +1,5 @@
1
- import { fromBase64url } from '@exodus/bytes/base64.js' // eslint-disable-line @exodus/import/no-unresolved
2
- import { utf16toString } from '@exodus/bytes/utf16.js' // eslint-disable-line @exodus/import/no-unresolved
1
+ import { fromBase64url } from '@exodus/bytes/base64.js'
2
+ import { utf16toString } from '@exodus/bytes/utf16.js'
3
3
  import loadEncodings from './multi-byte.encodings.cjs'
4
4
  import { to16input } from './utf16.js'
5
5
 
@@ -1,45 +1,61 @@
1
- // See tests/fixtures/encodings/single-byte/dump.js for generator
1
+ // See tests/encoding/fixtures/single-byte/dump.js for generator
2
2
 
3
- const c =
4
- '\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0'
5
- const k8a = '─│┌┐└┘├┤┬┴┼▀▄█▌▐░▒▓⌠■∙√≈≤≥\xA0⌡°²·÷═║╒ё'
6
- const k8b = '©юабцдефгхийклмнопярстужвьызшэщчъЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ'
7
- const i2 = 'żŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢßŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙'
8
- const ch = 'АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'
9
- const cl1 = 'абвгдежзийклмнопрстуфхцчшщъыьэю'
10
- const i8 = 'אבגדהוזחטיךכלםמןנסעףפץצקרשת'
11
- const p = '€\x81‚ƒ„…†‡'
12
- const s = 'µ¶·ø¹ŗ»¼½¾æĄĮĀĆÄÅĘĒČÉŹĖĢĶĪĻŠŃŅÓŌÕÖ×ŲŁŚŪÜŻŽßąįāćäåęēčéźėģķīļšńņóōõö÷ųłśūüżž'
13
- const f = (n) => '\uFFFD'.repeat(n)
3
+ const r = 0xff_fd
4
+ const e = (x) => new Array(x).fill(1)
5
+ const h = (x) => new Array(x).fill(r)
6
+
7
+ /* eslint-disable unicorn/numeric-separators-style, @exodus/export-default/named */
8
+
9
+ // Common ranges
10
+
11
+ // prettier-ignore
12
+ const k8a = [9345,2,10,4,4,4,4,8,8,8,8,68,4,4,4,4,1,1,1,-627,640,-903,1,46,28,1,-8645,8833,-8817,2,5,64,9305,1,1,-8449]
13
+ // prettier-ignore
14
+ const k8b = [-30,1,21,-18,1,15,-17,18,-13,...e(7),16,-15,1,1,1,-13,-4,26,-1,-20,17,5,-4,-2,3]
15
+ const p1 = [8237, -8235, 8089, -7816, 7820, 8, -6, 1]
16
+ const p2 = [-99, 12, 20, -12, 17, 37, -29, 2]
17
+ // prettier-ignore
18
+ const p3 = [1,1,65,-63,158,-156,1,1,1,40,30,42,-46,6,-66,1,83,-6,-6,-67,176,...p2,-114,121,-119,1,1,155,-49,25,16,-142,159,2,-158,38,42,-46,6,-35,1,52,-6,-6,-36,145,...p2,-83,90,-88,1,1,124,-49,25,16,-111,128,2]
19
+ const i0 = e(33)
20
+ // prettier-ignore
21
+ const i2 = [-40,-147,1,64,-62,117,-51,-63,69,-67,79,-77,79,-77,1,64,2,51,4,-116,1,124,-122,1,129,22,-148,150,-148,1,133,-131,118,-116,1,33,-31,86,-51,-32,38,-36,48,-46,48,-46,1,33,2,51,4,-85,1,93,-91,1,98,22,-117,119,-117,1,102,374]
22
+ const i4a = [-75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1]
23
+ const i4b = [34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1]
24
+ const i7 = [721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r]
25
+ const i8 = [...e(26), r, r, 6692, 1, r]
26
+ const w0 = [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104]
27
+ const w8 = [8072, 1, 3, 1, 5, -15, 1]
28
+ const w1 = [...w8, -7480, 7750, -8129, 7897, -7911, -182]
29
+ const w3 = [...w8, -8060, 8330, -8328, 8096, -8094]
30
+ const m0 = [8558, -8328, 8374, -66, -8539, 16, 8043, -8070]
14
31
 
15
- /* eslint-disable @exodus/export-default/named */
16
32
  // prettier-ignore
17
33
  export default {
18
- ibm866: ch + "абвгдежзийклмноп░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀рстуфхцчшщъыьэюяЁёЄєЇїЎў°∙·√№¤■\xA0",
19
- "iso-8859-10": c + "ĄĒĢĪĨͧĻĐŠŦŽ\xADŪŊ°ąēģīĩķ·ļđšŧž―ūŋĀÁÂÃÄÅÆĮČÉĘËĖÍÎÏÐŅŌÓÔÕÖŨØŲÚÛÜÝÞßāáâãäåæįčéęëėíîïðņōóôõöũøųúûüýþĸ",
20
- "iso-8859-13": c + "”¢£¤„¦§Ø©Ŗ«¬\xAD®Æ°±²³“" + s + "’",
21
- "iso-8859-14": c + "Ḃḃ£ĊċḊ§Ẁ©ẂḋỲ\xAD®ŸḞḟĠġṀṁ¶ṖẁṗẃṠỳẄẅṡÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏŴÑÒÓÔÕÖṪØÙÚÛÜÝŶßàáâãäåæçèéêëìíîïŵñòóôõöṫøùúûüýŷ",
22
- "iso-8859-15": c + "¡¢£€¥Š§š©ª«¬\xAD®¯°±²³Žµ¶·ž¹º»ŒœŸ",
23
- "iso-8859-16": c + "ĄąŁ€„Чš©Ș«Ź\xADźŻ°±ČłŽ”¶·žčș»ŒœŸżÀÁÂĂÄĆÆÇÈÉÊËÌÍÎÏĐŃÒÓÔŐÖŚŰÙÚÛÜĘȚßàáâăäćæçèéêëìíîïđńòóôőöśűùúûüęț",
24
- "iso-8859-2": c + "Ą˘Ł¤ĽŚ§¨ŠŞŤŹ\xADŽŻ°ą˛ł´ľśˇ¸šşťź˝ž" + i2,
25
- "iso-8859-3": c + "Ħ˘£¤\uFFFDĤ§¨İŞĞĴ\xAD\uFFFDݰħ²³´µĥ·¸ışğĵ½\uFFFDżÀÁÂ\uFFFDÄĊĈÇÈÉÊËÌÍÎÏ\uFFFDÑÒÓÔĠÖ×ĜÙÚÛÜŬŜßàáâ\uFFFDäċĉçèéêëìíîï\uFFFDñòóôġö÷ĝùúûüŭŝ˙",
26
- "iso-8859-4": c + "ĄĸŖ¤Ĩϧ¨ŠĒĢŦ\xADޝ°ą˛ŗ´ĩšēģŧŊžŋĀÁÂÃÄÅÆĮČÉĘËĖÍÎĪĐŅŌĶÔÕÖרŲÚÛÜŨŪßāáâãäåæįčéęëėíîīđņōķôõö÷øųúûüũū˙",
27
- "iso-8859-5": c + "ЁЂЃЄЅІЇЈЉЊЋЌ\xADЎЏ" + ch + cl1 + "я№ёђѓєѕіїјљњћќ§ўџ",
28
- "iso-8859-6": c + f(3) + "¤" + f(7) + "،\xAD" + f(13) + "؛" + f(3) + "؟\uFFFDءآأؤإئابةتثجحخدذرزسشصضطظعغ" + f(5) + "ـفقكلمنهوىي\u064B\u064C\u064D\u064E\u064F\u0650\u0651\u0652" + f(13),
29
- "iso-8859-7": c + "‘’£€₯¦§¨©ͺ«¬\xAD\uFFFD―°±²³΄΅Ά·ΈΉΊ»Ό½ΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ\uFFFDΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ\uFFFD",
30
- "iso-8859-8": c + "\uFFFD¢£¤¥¦§¨©×«¬\xAD®¯°±²³´µ¶·¸¹÷»¼½¾" + f(32) + "‗" + i8 + f(2) + "\u200E\u200F\uFFFD",
31
- "koi8-r": k8a + "╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡Ё╢╣╤╥╦╧╨╩╪╫╬" + k8b,
32
- "koi8-u": k8a + "є╔ії╗╘╙╚╛ґў╞╟╠╡ЁЄ╣ІЇ╦╧╨╩╪ҐЎ" + k8b,
33
- macintosh: "ÄÅÇÉÑÖÜáàâäãåçéèêëíìîïñóòôöõúùûü†°¢£§•¶ß®©™´¨≠ÆØ∞±≤≥¥µ∂∑∏π∫ªºΩæø¿¡¬√ƒ≈∆«»…\xA0ÀÃÕŒœ–—“”‘’÷◊ÿŸ⁄€‹›fifl‡·‚„‰ÂÊÁËÈÍÎÏÌÓÔ\uF8FFÒÚÛÙıˆ˜¯˘˙˚¸˝˛ˇ",
34
- "windows-1250": "€\x81‚\x83„…†‡\x88‰Š‹ŚŤŽŹ\x90‘’“”•–—\x98™š›śťžź\xA0ˇ˘Ł¤Ą¦§¨©Ş«¬\xAD®Ż°±˛ł´µ¶·¸ąş»Ľ˝ľ" + i2,
35
- "windows-1251": "ЂЃ‚ѓ„…†‡€‰Љ‹ЊЌЋЏђ‘’“”•–—\x98™љ›њќћџ\xA0ЎўЈ¤Ґ¦§Ё©Є«¬\xAD®Ї°±Ііґµ¶·ё№є»јЅѕї" + ch + cl1 + 'я',
36
- "windows-1252": p + "ˆ‰Š‹Œ\x8DŽ\x8F\x90‘’“”•–—˜™š›œ\x9DžŸ",
37
- "windows-1253": p + "\x88‰\x8A‹\x8C\x8D\x8E\x8F\x90‘’“”•–—\x98™\x9A›\x9C\x9D\x9E\x9F\xA0΅Ά£¤¥¦§¨©\uFFFD«¬\xAD®―°±²³΄µ¶·ΈΉΊ»Ό½ΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ\uFFFDΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ\uFFFD",
38
- "windows-1254": p + "ˆ‰Š‹Œ\x8D\x8E\x8F\x90‘’“”•–—˜™š›œ\x9D\x9EŸ\xA0¡¢£¤¥¦§¨©ª«¬\xAD®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏĞÑÒÓÔÕÖרÙÚÛÜİŞßàáâãäåæçèéêëìíîïğñòóôõö÷øùúûüış",
39
- "windows-1255": p + "ˆ‰\x8A‹\x8C\x8D\x8E\x8F\x90‘’“”•–—˜™\x9A›\x9C\x9D\x9E\x9F\xA0¡¢£₪¥¦§¨©×«¬\xAD®¯°±²³´µ¶·¸¹÷»¼½¾¿\u05B0\u05B1\u05B2\u05B3\u05B4\u05B5\u05B6\u05B7\u05B8\u05B9\u05BA\u05BB\u05BC\u05BD־\u05BF׀\u05C1\u05C2׃װױײ׳״" + f(7) + i8 + f(2) + "\u200E\u200F\uFFFD",
40
- "windows-1256": "€پ‚ƒ„…†‡ˆ‰ٹ‹Œچژڈگ‘’“”•–—ک™ڑ›œ\u200C\u200Dں\xA0،¢£¤¥¦§¨©ھ«¬\xAD®¯°±²³´µ¶·¸¹؛»¼½¾؟ہءآأؤإئابةتثجحخدذرزسشصض×طظعغـفقكàلâمنهوçèéêëىيîï\u064B\u064C\u064D\u064Eô\u064F\u0650÷\u0651ù\u0652ûü\u200E\u200Fے",
41
- "windows-1257": "€\x81‚\x83„…†‡\x88‰\x8A‹\x8C¨ˇ¸\x90‘’“”•–—\x98™\x9A›\x9C¯˛\x9F\xA0\uFFFD¢£¤\uFFFD¦§Ø©Ŗ«¬\xAD®Æ°±²³´" + s + "˙",
42
- "windows-1258": p + "ˆ‰\x8A‹Œ\x8D\x8E\x8F\x90‘’“”•–—˜™\x9A›œ\x9D\x9EŸ\xA0¡¢£¤¥¦§¨©ª«¬\xAD®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂĂÄÅÆÇÈÉÊË\u0300ÍÎÏĐÑ\u0309ÓÔƠÖרÙÚÛÜƯ\u0303ßàáâăäåæçèéêë\u0301íîïđñ\u0323óôơö÷øùúûüư₫",
43
- "windows-874": "€\x81\x82\x83\x84…\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90‘’“”•–—\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะ\u0E31าำ\u0E34\u0E35\u0E36\u0E37\u0E38\u0E39\u0E3A" + f(4) + "฿เแโใไๅๆ\u0E47\u0E48\u0E49\u0E4A\u0E4B\u0E4C\u0E4D\u0E4E๏๐๑๒๓๔๕๖๗๘๙๚๛" + f(4),
44
- "x-mac-cyrillic": ch + "†°Ґ£§•¶І®©™Ђђ≠Ѓѓ∞±≤≥іµґЈЄєЇїЉљЊњјЅ¬√ƒ≈∆«»…\xA0ЋћЌќѕ–—“”‘’÷„ЎўЏџ№Ёёя" + cl1 + "€"
34
+ ibm866: [913,...e(47),8530,1,1,-145,34,61,1,-12,-1,14,-18,6,6,-1,-1,-75,4,32,-8,-16,-28,60,34,1,-5,-6,21,-3,-6,-16,28,-5,1,-4,1,-12,-1,-6,1,24,-1,-82,-12,124,-4,8,4,-16,-8512,...e(15),-78,80,-77,80,-77,80,-73,80,-942,8553,-8546,8547,-260,-8306,9468,-9472],
35
+ 'iso-8859-10': [...i0,100,14,16,8,-2,14,-143,148,-43,80,6,23,-208,189,-32,-154,85,14,16,8,-2,14,-128,133,-43,80,6,23,7831,-7850,-32,...i4a,1,1,117,7,-121,1,1,1,146,-144,154,-152,...e(5),...i4b,1,1,86,7,-90,1,1,1,115,-113,123,-121,1,1,1,1,58],
36
+ 'iso-8859-13': [...i0,8061,-8059,1,1,8058,-8056,1,49,-47,173,-171,1,1,1,24,-22,1,1,1,8041,-8039,...p3,7835],
37
+ 'iso-8859-14': [...i0,7522,1,-7520,103,1,7423,-7523,7641,-7639,7641,-119,231,-7749,1,202,7334,1,-7423,1,7455,1,-7563,7584,43,-42,44,-35,147,-111,1,-36,-7585,...e(15),165,-163,...e(5),7572,-7570,...e(5),153,-151,...e(16),134,-132,...e(5),7541,-7539,...e(5),122],
38
+ 'iso-8859-15': [...i0,1,1,1,8201,-8199,187,-185,186,-184,...e(10),202,-200,1,1,199,-197,1,1,151,1,37],
39
+ 'iso-8859-16': [...i0,100,1,60,8043,-142,-7870,-185,186,-184,367,-365,206,-204,205,1,-203,1,91,54,59,7840,-8039,1,199,-113,268,-350,151,1,37,4,-188,1,1,64,-62,66,-64,...e(9),65,51,-113,1,1,124,-122,132,22,-151,1,1,1,60,258,-315,1,1,1,33,-31,35,-33,...e(9),34,51,-82,1,1,93,-91,101,22,-120,1,1,1,29,258],
40
+ 'iso-8859-2': [...i0,100,468,-407,-157,153,29,-179,1,184,-2,6,21,-204,208,-2,-203,85,470,-409,-142,138,29,364,-527,169,-2,6,21,355,-351,-2,...i2],
41
+ 'iso-8859-3': [...i0,134,434,-565,1,r,128,-125,1,136,46,-64,22,-135,r,206,-203,119,-117,1,1,1,112,-110,1,121,46,-64,22,-120,r,191,-188,1,1,r,2,70,-2,-65,...e(8),r,2,1,1,1,76,-74,1,69,-67,1,1,1,144,-16,-125,1,1,1,r,2,39,-2,-34,...e(8),r,2,1,1,1,45,-43,1,38,-36,1,1,1,113,-16,380],
42
+ 'iso-8859-4': [...i0,100,52,30,-178,132,19,-148,1,184,-78,16,68,-185,208,-206,1,85,470,-388,-163,117,19,395,-527,169,-78,16,68,-29,52,-51,...i4a,92,-26,53,7,-22,-98,1,1,1,1,154,-152,1,1,140,2,-139,...i4b,61,-26,53,7,-22,-67,1,1,1,1,123,-121,1,1,109,2,366],
43
+ 'iso-8859-5': [...i0,865,...e(11),-863,865,...e(65),7367,-7365,...e(11),-949,951,1],
44
+ 'iso-8859-6': [...i0,r,r,r,4,...h(7),1384,-1375,...h(13),1390,r,r,r,4,r,2,...e(25),r,r,r,r,r,6,...e(18),...h(13)],
45
+ 'iso-8859-7': [...i0,8056,1,-8054,8201,3,-8201,1,1,1,721,-719,1,1,r,8040,-8037,1,1,1,721,1,1,-719,...i7],
46
+ 'iso-8859-8': [...i0,r,2,...e(7),46,-44,...e(14),62,-60,1,1,1,...h(32),8025,-6727,...i8],
47
+ 'koi8-r': [...k8a,8450,...e(14),-8544,8545,...e(10),-9411,933,...k8b,-28,...k8b],
48
+ 'koi8-u': [...k8a,3,8448,-8446,1,8448,1,1,1,1,-8394,-51,8448,1,1,1,-8544,3,8543,-8541,1,8543,1,1,1,1,-8410,-130,-869,933,...k8b,-28,...k8b],
49
+ macintosh: [69,1,2,2,8,5,6,5,-1,2,2,-1,2,2,2,-1,2,1,2,-1,2,1,2,2,-1,2,2,-1,5,-1,2,1,7972,-8048,-14,1,4,8059,-8044,41,-49,-5,8313,-8302,-12,8632,-8602,18,8518,-8557,8627,1,-8640,16,8525,15,-2,-7759,7787,-8577,16,751,-707,18,-57,-30,11,...m0,32,3,18,125,1,7872,1,8,1,-5,1,-7970,9427,-9419,121,7884,104,-115,1,56007,1,-56033,-8042,8035,4,18,-8046,8,-9,10,-3,5,1,1,-3,7,1,63531,-63533,8,1,-2,88,405,22,-557,553,1,1,-546,549,-2,-20],
50
+ 'windows-1250': [...w0,-7888,7897,-7903,10,25,-4,-233,...w8,-8060,8330,-8129,7897,-7903,10,25,-4,-218,551,17,-407,-157,96,-94,1,1,1,181,-179,1,1,1,205,-203,1,554,-409,-142,1,1,1,1,77,90,-164,130,416,-415,62,...i2],
51
+ 'windows-1251': [899,1,7191,-7111,7115,8,-6,1,139,-124,-7207,7216,-7215,2,-1,4,67,7110,1,3,1,5,-15,1,-8060,8330,-7369,7137,-7136,2,-1,4,-959,878,80,-86,-868,1004,-1002,1,858,-856,859,-857,1,1,1,857,-855,1,853,80,59,-988,1,1,922,7365,-7362,-921,925,-83,80,2,-71,...e(63)],
52
+ 'windows-1252': [...p1,-7515,7530,-7888,7897,-7911,-197,240,-238,1,...w1,225,-6],
53
+ 'windows-1253': [...p1,-8089,8104,-8102,8111,-8109,1,1,1,1,...w3,1,1,1,1,741,1,-739,1,1,1,1,1,1,r,2,1,1,1,8039,-8037,1,1,1,721,-719,1,1,...i7],
54
+ 'windows-1254': [...p1,-7515,7530,-7888,7897,-7911,-197,1,1,1,...w1,1,218,-216,...e(47),79,-77,...e(11),84,46,-127,...e(16),48,-46,...e(11),53,46],
55
+ 'windows-1255': [...p1,-7515,7530,-8102,8111,-8109,1,1,1,1,...w8,-7480,7750,-8328,8096,-8094,...e(7),8199,-8197,1,1,1,1,46,-44,...e(14),62,-60,1,1,1,1,1265,...e(19),45,1,1,1,1,...h(7),-36,...i8],
56
+ 'windows-1256': [8237,-6702,6556,-7816,7820,8,-6,1,-7515,7530,-6583,6592,-7911,1332,18,-16,39,6505,1,3,1,5,-15,1,-6507,6777,-6801,6569,-7911,7865,1,-6483,-1562,1388,-1386,...e(7),1557,-1555,...e(14),1378,-1376,1,1,1,1377,162,-160,...e(21),-1375,1376,1,1,1,6,1,1,1,-1379,1380,-1378,1379,1,1,1,-1377,1,1,1,1,1374,1,-1372,1,1372,1,1,1,-1370,1371,1,-1369,1370,-1368,1369,-1367,1,7954,1,-6461],
57
+ 'windows-1257': [...w0,-8102,8111,-8109,28,543,-527,-40,...w3,19,556,-572,1,r,2,1,1,r,2,1,49,-47,173,-171,1,1,1,24,-22,...e(5),...p3,347],
58
+ 'windows-1258': [...p1,-7515,7530,-8102,8111,-7911,-197,1,1,1,...w8,-7480,7750,-8328,8096,-7911,-182,1,218,-216,...e(34),64,-62,...e(7),565,-563,1,1,65,-63,568,-566,1,204,-202,1,1,1,1,1,1,211,340,-548,1,1,1,33,-31,...e(7),534,-532,1,1,34,-32,562,-560,1,173,-171,1,1,1,1,1,1,180,7931],
59
+ 'windows-874': [8237,-8235,1,1,1,8098,-8096,...e(10),...w8,-8060,...e(8),3425,...e(57),r,r,r,r,5,...e(28),r,r,r,r],
60
+ 'x-mac-cyrillic': [913,...e(31),7153,-8048,992,-1005,4,8059,-8044,848,-856,-5,8313,-7456,80,7694,-7773,80,7627,-8557,8627,1,-7695,-929,988,-137,-4,80,-77,80,-78,80,-79,80,-2,-83,-857,...m0,875,80,-79,80,-7,7102,1,8,1,-5,1,-7970,7975,-7184,80,-79,80,7351,-7445,80,-2,-31,...e(30),7262]
45
61
  }
@@ -4,19 +4,21 @@ import { decode2string } from './_utils.js'
4
4
 
5
5
  export const E_STRICT = 'Input is not well-formed for this encoding'
6
6
  const xUserDefined = 'x-user-defined'
7
+ const iso8i = 'iso-8859-8-i'
7
8
 
8
9
  export const assertEncoding = (encoding) => {
9
- if (Object.hasOwn(encodings, encoding) || encoding === xUserDefined) return
10
+ if (Object.hasOwn(encodings, encoding) || encoding === xUserDefined || encoding === iso8i) return
10
11
  throw new RangeError('Unsupported encoding')
11
12
  }
12
13
 
14
+ const r = 0xff_fd
15
+
13
16
  function getEncoding(encoding) {
14
17
  assertEncoding(encoding)
15
- if (encoding === xUserDefined) {
16
- return Array.from({ length: 128 }, (_, i) => String.fromCharCode(0xf7_80 + i)).join('')
17
- }
18
-
19
- return encodings[encoding]
18
+ if (encoding === xUserDefined) return Array.from({ length: 128 }, (_, i) => 0xf7_80 + i)
19
+ if (encoding === iso8i) encoding = 'iso-8859-8'
20
+ let prev = 127
21
+ return encodings[encoding].map((x) => (x === r ? x : (prev += x))) // eslint-disable-line no-return-assign
20
22
  }
21
23
 
22
24
  const mappers = new Map()
@@ -28,13 +30,13 @@ export function encodingMapper(encoding) {
28
30
  const cached = mappers.get(encoding)
29
31
  if (cached) return cached
30
32
 
31
- const incomplete = getEncoding(encoding).includes('\uFFFD')
33
+ const codes = getEncoding(encoding)
34
+ const incomplete = codes.includes(0xff_fd)
32
35
  let map
33
36
  const mapper = (arr, start = 0) => {
34
37
  if (!map) {
35
- map = Uint16Array.from({ length: 256 }, (_, i) => i) // Unicode subset
36
- const strings = getEncoding(encoding).split('')
37
- map.set(Uint16Array.from(strings.map((x) => x.charCodeAt(0))), 128)
38
+ map = new Uint16Array(256).map((_, i) => i) // Unicode subset
39
+ map.set(Uint16Array.from(codes), 128)
38
40
  }
39
41
 
40
42
  const o = Uint16Array.from(start === 0 ? arr : arr.subarray(start)) // copy to modify in-place, also those are 16-bit now
@@ -63,12 +65,13 @@ export function encodingDecoder(encoding) {
63
65
  if (cached) return cached
64
66
 
65
67
  let strings
66
- const incomplete = getEncoding(encoding).includes('\uFFFD')
68
+ const codes = getEncoding(encoding)
69
+ const incomplete = codes.includes(0xff_fd)
67
70
  const decoder = (arr, loose = false) => {
68
71
  if (!strings) {
69
- const part = getEncoding(encoding).split('')
70
- strings = Array.from({ length: 128 }, (_, i) => String.fromCharCode(i)).concat(part)
71
- while (strings.length < 256) strings.push(String.fromCharCode(strings.length))
72
+ const allCodes = Array.from({ length: 128 }, (_, i) => i).concat(codes)
73
+ while (allCodes.length < 256) allCodes.push(allCodes.length)
74
+ strings = allCodes.map((c) => String.fromCharCode(c))
72
75
  }
73
76
 
74
77
  const prefix = decodeLatin1(arr, 0, asciiPrefix(arr))
package/hex.d.ts ADDED
@@ -0,0 +1,22 @@
1
+ /// <reference types="node" />
2
+
3
+ import type { OutputFormat, Uint8ArrayBuffer } from './array.js';
4
+
5
+ /**
6
+ * Encodes a Uint8Array to a lowercase hex string
7
+ * @param arr - The input bytes
8
+ * @returns The hex encoded string
9
+ */
10
+ export function toHex(arr: Uint8ArrayBuffer): string;
11
+
12
+ /**
13
+ * Decodes a hex string to bytes
14
+ * Unlike Buffer.from(), throws on invalid input
15
+ * @param str - The hex encoded string (case-insensitive)
16
+ * @param format - Output format (default: 'uint8')
17
+ * @returns The decoded bytes
18
+ */
19
+ export function fromHex(str: string, format?: 'uint8'): Uint8ArrayBuffer;
20
+ export function fromHex(str: string, format: 'buffer'): Buffer;
21
+ export function fromHex(str: string, format?: OutputFormat): Uint8ArrayBuffer | Buffer;
22
+