@exodus/bytes 1.0.0-rc.8 → 1.0.0-rc.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,449 @@
1
+ import { asciiPrefix, decodeLatin1 } from './latin1.js'
2
+ import { getTable } from './multi-byte.table.js'
3
+
4
+ export const E_STRICT = 'Input is not well-formed for this encoding'
5
+
6
+ // TODO: optimize
7
+
8
+ // If the decoder is not cleared properly, state can be preserved between non-streaming calls!
9
+ // See comment about fatal stream
10
+
11
+ // All except iso-2022-jp are ASCII supersets
12
+ // When adding something that is not an ASCII superset, ajust the ASCII fast path
13
+ const REP = 0xff_fd
14
+ const mappers = {
15
+ // https://encoding.spec.whatwg.org/#euc-kr-decoder
16
+ 'euc-kr': () => {
17
+ const euc = getTable('euc-kr')
18
+ let lead = 0
19
+
20
+ const pushback = []
21
+ const bytes = (b) => {
22
+ if (lead) {
23
+ const cp = b >= 0x41 && b <= 0xfe ? euc[(lead - 0x81) * 190 + b - 0x41] : undefined
24
+ lead = 0
25
+ if (cp !== undefined && cp !== REP) return cp
26
+ if (b < 128) pushback.push(b)
27
+ return -2
28
+ }
29
+
30
+ if (b < 128) return b
31
+ if (b < 0x81 || b === 0xff) return -2
32
+ lead = b
33
+ return -1
34
+ }
35
+
36
+ const eof = () => {
37
+ if (!lead) return null
38
+ lead = 0
39
+ return -2
40
+ }
41
+
42
+ return { bytes, eof, pushback }
43
+ },
44
+ // https://encoding.spec.whatwg.org/#euc-jp-decoder
45
+ 'euc-jp': () => {
46
+ const jis0208 = getTable('jis0208')
47
+ const jis0212 = getTable('jis0212')
48
+ let j12 = false
49
+ let lead = 0
50
+
51
+ const pushback = []
52
+ const bytes = (b) => {
53
+ if (lead === 0x8e && b >= 0xa1 && b <= 0xdf) {
54
+ lead = 0
55
+ return 0xfe_c0 + b
56
+ }
57
+
58
+ if (lead === 0x8f && b >= 0xa1 && b <= 0xfe) {
59
+ j12 = true
60
+ lead = b
61
+ return -1
62
+ }
63
+
64
+ if (lead) {
65
+ let cp
66
+ if (lead >= 0xa1 && lead <= 0xfe && b >= 0xa1 && b <= 0xfe) {
67
+ cp = (j12 ? jis0212 : jis0208)[(lead - 0xa1) * 94 + b - 0xa1]
68
+ }
69
+
70
+ lead = 0
71
+ j12 = false
72
+ if (cp !== undefined && cp !== REP) return cp
73
+ if (b < 128) pushback.push(b)
74
+ return -2
75
+ }
76
+
77
+ if (b < 128) return b
78
+ if ((b < 0xa1 && b !== 0x8e && b !== 0x8f) || b === 0xff) return -2
79
+ lead = b
80
+ return -1
81
+ }
82
+
83
+ // eslint-disable-next-line sonarjs/no-identical-functions
84
+ const eof = () => {
85
+ if (!lead) return null
86
+ lead = 0
87
+ return -2
88
+ }
89
+
90
+ return { bytes, eof, pushback }
91
+ },
92
+ // https://encoding.spec.whatwg.org/#iso-2022-jp-decoder
93
+ // Per-letter of the spec, don't shortcut on state changes on EOF. Some code is regrouped but preserving the logic
94
+ 'iso-2022-jp': () => {
95
+ const jis0208 = getTable('jis0208')
96
+ const EOF = -1
97
+ let dState = 1
98
+ let oState = 1
99
+ let lead = 0
100
+ let out = false
101
+
102
+ const pushback = []
103
+ const bytes = (b) => {
104
+ if (dState < 5) {
105
+ if (b === EOF) return null
106
+ if (b === 0x1b) {
107
+ dState = 6 // escape start
108
+ return -1
109
+ }
110
+ }
111
+
112
+ switch (dState) {
113
+ case 1:
114
+ case 2:
115
+ // ASCII, Roman (common)
116
+ out = false
117
+ if (dState === 2) {
118
+ if (b === 0x5c) return 0xa5
119
+ if (b === 0x7e) return 0x20_3e
120
+ }
121
+
122
+ if (b <= 0x7f && b !== 0x0e && b !== 0x0f) return b
123
+ return -2
124
+ case 3:
125
+ // Katakana
126
+ out = false
127
+ if (b >= 0x21 && b <= 0x5f) return 0xff_40 + b
128
+ return -2
129
+ case 4:
130
+ // Leading byte
131
+ out = false
132
+ if ((b >= 0x21) & (b <= 0x7e)) {
133
+ lead = b
134
+ dState = 5
135
+ return -1
136
+ }
137
+
138
+ return -2
139
+ case 5:
140
+ // Trailing byte
141
+ out = false
142
+ if (b === 0x1b) {
143
+ dState = 6 // escape start
144
+ return -2
145
+ }
146
+
147
+ dState = 4
148
+ if (b >= 0x21 && b <= 0x7e) {
149
+ const cp = jis0208[(lead - 0x21) * 94 + b - 0x21]
150
+ return cp !== undefined && cp !== REP ? cp : -2
151
+ }
152
+
153
+ return -2
154
+ case 6:
155
+ // Escape start
156
+ if (b === 0x24 || b === 0x28) {
157
+ lead = b
158
+ dState = 7
159
+ return -1
160
+ }
161
+
162
+ out = false
163
+ dState = oState
164
+ if (b !== EOF) pushback.push(b)
165
+ return -2
166
+ case 7: {
167
+ // Escape
168
+ const l = lead
169
+ lead = 0
170
+ let s
171
+ if (l === 0x28) {
172
+ // eslint-disable-next-line unicorn/prefer-switch
173
+ if (b === 0x42) {
174
+ s = 1
175
+ } else if (b === 0x4a) {
176
+ s = 2
177
+ } else if (b === 0x49) {
178
+ s = 3
179
+ }
180
+ } else if (l === 0x24 && (b === 0x40 || b === 0x42)) {
181
+ s = 4
182
+ }
183
+
184
+ if (s) {
185
+ dState = oState = s
186
+ const output = out
187
+ out = true
188
+ return output ? -2 : -1
189
+ }
190
+
191
+ out = false
192
+ dState = oState
193
+ if (b !== EOF) pushback.push(b)
194
+ pushback.push(l)
195
+ return -2
196
+ }
197
+ }
198
+ }
199
+
200
+ const eof = () => bytes(EOF)
201
+
202
+ return { bytes, eof, pushback }
203
+ },
204
+ // https://encoding.spec.whatwg.org/#shift_jis-decoder
205
+ shift_jis: () => {
206
+ const jis0208 = getTable('jis0208')
207
+ let lead = 0
208
+
209
+ const pushback = []
210
+ const bytes = (b) => {
211
+ if (lead) {
212
+ const l = lead
213
+ lead = 0
214
+ if (b >= 0x40 && b <= 0xfc && b !== 0x7f) {
215
+ const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41)
216
+ if (p >= 8836 && p <= 10_715) return 0xe0_00 - 8836 + p // 16-bit
217
+ const cp = jis0208[p]
218
+ if (cp !== undefined && cp !== REP) return cp
219
+ }
220
+
221
+ if (b < 128) pushback.push(b)
222
+ return -2
223
+ }
224
+
225
+ if (b <= 0x80) return b // 0x80 is allowed
226
+ if (b >= 0xa1 && b <= 0xdf) return 0xff_61 - 0xa1 + b
227
+ if (b < 0x81 || (b > 0x9f && b < 0xe0) || b > 0xfc) return -2
228
+ lead = b
229
+ return -1
230
+ }
231
+
232
+ // eslint-disable-next-line sonarjs/no-identical-functions
233
+ const eof = () => {
234
+ if (!lead) return null
235
+ lead = 0 // this clears state completely on EOF
236
+ return -2
237
+ }
238
+
239
+ return { bytes, eof, pushback }
240
+ },
241
+ // https://encoding.spec.whatwg.org/#gbk-decoder
242
+ gbk: () => mappers.gb18030(), // 10.1.1. GBK’s decoder is gb18030’s decoder
243
+ // https://encoding.spec.whatwg.org/#gb18030-decoder
244
+ gb18030: () => {
245
+ const gb18030 = getTable('gb18030')
246
+ const gb18030r = getTable('gb18030-ranges')
247
+ let g1 = 0, g2 = 0, g3 = 0 // prettier-ignore
248
+ const index = (p) => {
249
+ if ((p > 39_419 && p < 189_000) || p > 1_237_575) return
250
+ if (p === 7457) return 0xe7_c7
251
+ let a = 0, b = 0 // prettier-ignore
252
+ for (const [c, d] of gb18030r) {
253
+ if (c > p) break
254
+ a = c
255
+ b = d
256
+ }
257
+
258
+ return b + p - a
259
+ }
260
+
261
+ const pushback = []
262
+ const bytes = (b) => {
263
+ if (g3) {
264
+ if (b < 0x30 || b > 0x39) {
265
+ pushback.push(b, g3, g2)
266
+ g1 = g2 = g3 = 0
267
+ return -2
268
+ }
269
+
270
+ const cp = index((g1 - 0x81) * 12_600 + (g2 - 0x30) * 1260 + (g3 - 0x81) * 10 + b - 0x30)
271
+ g1 = g2 = g3 = 0
272
+ if (cp !== undefined) return cp // Can validly return replacement
273
+ return -2
274
+ }
275
+
276
+ if (g2) {
277
+ if (b >= 0x81 && b <= 0xfe) {
278
+ g3 = b
279
+ return -1
280
+ }
281
+
282
+ pushback.push(b, g2)
283
+ g1 = g2 = 0
284
+ return -2
285
+ }
286
+
287
+ if (g1) {
288
+ if (b >= 0x30 && b <= 0x39) {
289
+ g2 = b
290
+ return -1
291
+ }
292
+
293
+ let cp
294
+ if (b >= 0x40 && b <= 0xfe && b !== 0x7f) {
295
+ cp = gb18030[(g1 - 0x81) * 190 + b - (b < 0x7f ? 0x40 : 0x41)]
296
+ }
297
+
298
+ g1 = 0
299
+ if (cp !== undefined && cp !== REP) return cp
300
+ if (b < 128) pushback.push(b)
301
+ return -2
302
+ }
303
+
304
+ if (b < 128) return b
305
+ if (b === 0x80) return 0x20_ac
306
+ if (b === 0xff) return -2
307
+ g1 = b
308
+ return -1
309
+ }
310
+
311
+ const eof = () => {
312
+ if (!g1 && !g2 && !g3) return null
313
+ g1 = g2 = g3 = 0
314
+ return -2
315
+ }
316
+
317
+ return { bytes, eof, pushback }
318
+ },
319
+ }
320
+
321
+ export const isAsciiSuperset = (enc) => enc !== 'iso-2022-jp' // all others are ASCII supersets and can use fast path
322
+ export const multibyteSupported = (enc) => Object.hasOwn(mappers, enc) || enc === 'big5'
323
+
324
+ export function multibyteDecoder(enc, loose = false) {
325
+ if (enc === 'big5') return big5decoder(loose)
326
+ if (!Object.hasOwn(mappers, enc)) throw new RangeError('Unsupported encoding')
327
+
328
+ // Input is assumed to be typechecked already
329
+ let mapper
330
+ const asciiSuperset = isAsciiSuperset(enc)
331
+ return (arr, stream = false) => {
332
+ const onErr = loose
333
+ ? () => '\uFFFD'
334
+ : () => {
335
+ mapper.pushback.length = 0 // the queue is cleared on returning an error
336
+ // The correct way per spec seems to be not destoying the decoder state in stream mode, even when fatal
337
+ // Decoders big5, euc-jp, euc-kr, shift_jis, gb18030 / gbk - all clear state before throwing unless EOF, so not affected
338
+ // iso-2022-jp is the only tricky one one where this !stream check matters in non-stream mode
339
+ if (!stream) mapper = null // destroy state, effectively the same as 'do not flush' = false, but early
340
+ throw new Error(E_STRICT)
341
+ }
342
+
343
+ let res = ''
344
+ const length = arr.length
345
+ if (asciiSuperset && !mapper) {
346
+ res = decodeLatin1(arr, 0, asciiPrefix(arr))
347
+ if (res.length === arr.length) return res // ascii
348
+ }
349
+
350
+ if (!mapper) mapper = mappers[enc]()
351
+ const { bytes, eof, pushback } = mapper
352
+ let i = res.length
353
+
354
+ // First, dump everything until EOF
355
+ // Same as the full loop, but without EOF handling
356
+ while (i < length || pushback.length > 0) {
357
+ const c = bytes(pushback.length > 0 ? pushback.pop() : arr[i++])
358
+ if (c >= 0) {
359
+ res += String.fromCodePoint(c) // gb18030 returns codepoints above 0xFFFF from ranges
360
+ } else if (c === -2) {
361
+ res += onErr()
362
+ }
363
+ }
364
+
365
+ // Then, dump EOF. This needs the same loop as the characters can be pushed back
366
+ // TODO: only some encodings need this, most can be optimized
367
+ if (!stream) {
368
+ while (i <= length || pushback.length > 0) {
369
+ const isEOF = i === length && pushback.length === 0
370
+ const c = isEOF ? eof() : bytes(pushback.length > 0 ? pushback.pop() : arr[i++])
371
+ if (isEOF && c === null) break // clean exit
372
+ if (c === -1) continue // consuming
373
+ if (c === -2) {
374
+ res += onErr()
375
+ } else {
376
+ res += String.fromCodePoint(c) // gb18030 returns codepoints above 0xFFFF from ranges
377
+ }
378
+ }
379
+ }
380
+
381
+ // Chrome and WebKit fail on this, we don't: completely destroy the old decoder instance when finished streaming
382
+ // > If this’s do not flush is false, then set this’s decoder to a new instance of this’s encoding’s decoder,
383
+ // > Set this’s do not flush to options["stream"]
384
+ if (!stream) mapper = null
385
+
386
+ return res
387
+ }
388
+ }
389
+
390
+ // The only decoder which returns multiple codepoints per byte, also has non-charcode codepoints
391
+ // We store that as strings
392
+ function big5decoder(loose) {
393
+ // Input is assumed to be typechecked already
394
+ let lead = 0
395
+ let big5
396
+ const pushback = []
397
+ return (arr, stream = false) => {
398
+ const onErr = loose
399
+ ? () => '\uFFFD'
400
+ : () => {
401
+ pushback.length = 0 // the queue is cleared on returning an error
402
+ // Lead is always already cleared before throwing
403
+ throw new Error(E_STRICT)
404
+ }
405
+
406
+ let res = ''
407
+ const length = arr.length
408
+ if (!lead) {
409
+ res = decodeLatin1(arr, 0, asciiPrefix(arr))
410
+ if (res.length === arr.length) return res // ascii
411
+ }
412
+
413
+ if (!big5) big5 = getTable('big5')
414
+ for (let i = res.length; i < length || pushback.length > 0; ) {
415
+ const b = pushback.length > 0 ? pushback.pop() : arr[i++]
416
+ if (lead) {
417
+ let cp
418
+ if ((b >= 0x40 && b <= 0x7e) || (b >= 0xa1 && b !== 0xff)) {
419
+ cp = big5[(lead - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)]
420
+ }
421
+
422
+ lead = 0
423
+ if (cp) {
424
+ res += cp // strings
425
+ } else {
426
+ res += onErr()
427
+ if (b < 128) pushback.push(b)
428
+ }
429
+ } else if (b < 128) {
430
+ res += String.fromCharCode(b)
431
+ } else if (b < 0x81 || b === 0xff) {
432
+ res += onErr()
433
+ } else {
434
+ lead = b
435
+ }
436
+ }
437
+
438
+ if (!stream) {
439
+ // Destroy decoder state
440
+ pushback.length = 0
441
+ if (lead) {
442
+ lead = 0
443
+ res += onErr()
444
+ }
445
+ }
446
+
447
+ return res
448
+ }
449
+ }
@@ -0,0 +1,114 @@
1
+ import { fromBase64url } from '@exodus/bytes/base64.js' // eslint-disable-line @exodus/import/no-unresolved
2
+ import { utf16toString } from '@exodus/bytes/utf16.js' // eslint-disable-line @exodus/import/no-unresolved
3
+ import loadEncodings from './multi-byte.encodings.cjs'
4
+ import { to16input } from './utf16.js'
5
+
6
+ export const sizes = {
7
+ jis0208: 11_104,
8
+ jis0212: 7211,
9
+ 'euc-kr': 23_750,
10
+ gb18030: 23_940,
11
+ big5: 19_782,
12
+ }
13
+
14
+ // This is huge. It's _much_ smaller than https://npmjs.com/text-encoding though
15
+ // Exactly as mapped by the index table
16
+ // 0,x - hole of x empty elements
17
+ // n,c - continious [c, ...] of length n
18
+ // $.. - references to common chunks
19
+ // -{x} - same as 1,{x}
20
+
21
+ // See tests/multi-byte.test.js to verify that this data decodes exactly into the encoding spec tables
22
+
23
+ let indices
24
+ const tables = new Map()
25
+ /* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
26
+
27
+ function loadBase64(str) {
28
+ const x = fromBase64url(str)
29
+ const len = x.length
30
+ const len2 = len >> 1
31
+ const y = new Uint8Array(len)
32
+ let a = -1, b = 0 // prettier-ignore
33
+ for (let i = 0, j = 0; i < len; i += 2, j++) {
34
+ a = (a + x[j] + 1) & 0xff
35
+ b = (b + x[len2 + j]) & 0xff
36
+ y[i] = a
37
+ y[i + 1] = b
38
+ }
39
+
40
+ return y
41
+ }
42
+
43
+ function unwrap(res, t, pos, stringMode = false) {
44
+ let code = 0
45
+ for (let i = 0; i < t.length; i++) {
46
+ let x = t[i]
47
+ if (typeof x === 'number') {
48
+ if (x === 0) {
49
+ pos += t[++i]
50
+ } else {
51
+ if (x < 0) {
52
+ code -= x
53
+ x = 1
54
+ } else {
55
+ code += t[++i]
56
+ }
57
+
58
+ if (stringMode) {
59
+ for (let k = 0; k < x; k++, pos++, code++) res[pos] = String.fromCodePoint(code)
60
+ } else {
61
+ for (let k = 0; k < x; k++, pos++, code++) res[pos] = code
62
+ }
63
+ }
64
+ } else if (x[0] === '$' && Object.hasOwn(indices, x)) {
65
+ pos = unwrap(res, indices[x], pos, stringMode) // self-reference using shared chunks
66
+ } else if (stringMode) {
67
+ const s = [...utf16toString(loadBase64(x), 'uint8-le')] // splits by codepoints
68
+ for (let i = 0; i < s.length; ) res[pos++] = s[i++] // TODO: splice?
69
+ code = s[s.length - 1].codePointAt(0) + 1
70
+ } else {
71
+ const u16 = to16input(loadBase64(x), true) // data is little-endian
72
+ res.set(u16, pos)
73
+ pos += u16.length
74
+ code = u16[u16.length - 1] + 1
75
+ }
76
+ }
77
+
78
+ return pos
79
+ }
80
+
81
+ export function getTable(id) {
82
+ const cached = tables.get(id)
83
+ if (cached) return cached
84
+
85
+ if (!indices) indices = loadEncodings() // lazy-load
86
+ if (!Object.hasOwn(indices, id)) throw new Error('Unknown encoding')
87
+ if (!indices[id]) throw new Error('Table already used (likely incorrect bundler dedupe)')
88
+
89
+ let res
90
+ if (id.endsWith('-ranges')) {
91
+ res = []
92
+ let a = 0, b = 0 // prettier-ignore
93
+ const idx = indices[id]
94
+ while (idx.length > 0) res.push([(a += idx.shift()), (b += idx.shift())]) // destroying, we remove it later anyway
95
+ } else if (id === 'big5') {
96
+ if (!Object.hasOwn(sizes, id)) throw new Error('Unknown encoding')
97
+ res = new Array(sizes[id]) // array of strings or undefined
98
+ unwrap(res, indices[id], 0, true)
99
+ // Pointer code updates are embedded into the table
100
+ res[1133] = '\xCA\u0304'
101
+ res[1135] = '\xCA\u030C'
102
+ res[1164] = '\xEA\u0304'
103
+ res[1166] = '\xEA\u030C'
104
+ } else {
105
+ if (!Object.hasOwn(sizes, id)) throw new Error('Unknown encoding')
106
+ res = new Uint16Array(sizes[id])
107
+ res.fill(0xff_fd)
108
+ unwrap(res, indices[id], 0, false)
109
+ }
110
+
111
+ indices[id] = null // gc
112
+ tables.set(id, res)
113
+ return res
114
+ }
@@ -0,0 +1,45 @@
1
+ // See tests/fixtures/encodings/single-byte/dump.js for generator
2
+
3
+ const c =
4
+ '\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0'
5
+ const k8a = '─│┌┐└┘├┤┬┴┼▀▄█▌▐░▒▓⌠■∙√≈≤≥\xA0⌡°²·÷═║╒ё'
6
+ const k8b = '©юабцдефгхийклмнопярстужвьызшэщчъЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ'
7
+ const i2 = 'żŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢßŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙'
8
+ const ch = 'АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'
9
+ const cl1 = 'абвгдежзийклмнопрстуфхцчшщъыьэю'
10
+ const i8 = 'אבגדהוזחטיךכלםמןנסעףפץצקרשת'
11
+ const p = '€\x81‚ƒ„…†‡'
12
+ const s = 'µ¶·ø¹ŗ»¼½¾æĄĮĀĆÄÅĘĒČÉŹĖĢĶĪĻŠŃŅÓŌÕÖ×ŲŁŚŪÜŻŽßąįāćäåęēčéźėģķīļšńņóōõö÷ųłśūüżž'
13
+ const f = (n) => '\uFFFD'.repeat(n)
14
+
15
+ /* eslint-disable @exodus/export-default/named */
16
+ // prettier-ignore
17
+ export default {
18
+ ibm866: ch + "абвгдежзийклмноп░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀рстуфхцчшщъыьэюяЁёЄєЇїЎў°∙·√№¤■\xA0",
19
+ "iso-8859-10": c + "ĄĒĢĪĨͧĻĐŠŦŽ\xADŪŊ°ąēģīĩķ·ļđšŧž―ūŋĀÁÂÃÄÅÆĮČÉĘËĖÍÎÏÐŅŌÓÔÕÖŨØŲÚÛÜÝÞßāáâãäåæįčéęëėíîïðņōóôõöũøųúûüýþĸ",
20
+ "iso-8859-13": c + "”¢£¤„¦§Ø©Ŗ«¬\xAD®Æ°±²³“" + s + "’",
21
+ "iso-8859-14": c + "Ḃḃ£ĊċḊ§Ẁ©ẂḋỲ\xAD®ŸḞḟĠġṀṁ¶ṖẁṗẃṠỳẄẅṡÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏŴÑÒÓÔÕÖṪØÙÚÛÜÝŶßàáâãäåæçèéêëìíîïŵñòóôõöṫøùúûüýŷ",
22
+ "iso-8859-15": c + "¡¢£€¥Š§š©ª«¬\xAD®¯°±²³Žµ¶·ž¹º»ŒœŸ",
23
+ "iso-8859-16": c + "ĄąŁ€„Чš©Ș«Ź\xADźŻ°±ČłŽ”¶·žčș»ŒœŸżÀÁÂĂÄĆÆÇÈÉÊËÌÍÎÏĐŃÒÓÔŐÖŚŰÙÚÛÜĘȚßàáâăäćæçèéêëìíîïđńòóôőöśűùúûüęț",
24
+ "iso-8859-2": c + "Ą˘Ł¤ĽŚ§¨ŠŞŤŹ\xADŽŻ°ą˛ł´ľśˇ¸šşťź˝ž" + i2,
25
+ "iso-8859-3": c + "Ħ˘£¤\uFFFDĤ§¨İŞĞĴ\xAD\uFFFDݰħ²³´µĥ·¸ışğĵ½\uFFFDżÀÁÂ\uFFFDÄĊĈÇÈÉÊËÌÍÎÏ\uFFFDÑÒÓÔĠÖ×ĜÙÚÛÜŬŜßàáâ\uFFFDäċĉçèéêëìíîï\uFFFDñòóôġö÷ĝùúûüŭŝ˙",
26
+ "iso-8859-4": c + "ĄĸŖ¤Ĩϧ¨ŠĒĢŦ\xADޝ°ą˛ŗ´ĩšēģŧŊžŋĀÁÂÃÄÅÆĮČÉĘËĖÍÎĪĐŅŌĶÔÕÖרŲÚÛÜŨŪßāáâãäåæįčéęëėíîīđņōķôõö÷øųúûüũū˙",
27
+ "iso-8859-5": c + "ЁЂЃЄЅІЇЈЉЊЋЌ\xADЎЏ" + ch + cl1 + "я№ёђѓєѕіїјљњћќ§ўџ",
28
+ "iso-8859-6": c + f(3) + "¤" + f(7) + "،\xAD" + f(13) + "؛" + f(3) + "؟\uFFFDءآأؤإئابةتثجحخدذرزسشصضطظعغ" + f(5) + "ـفقكلمنهوىي\u064B\u064C\u064D\u064E\u064F\u0650\u0651\u0652" + f(13),
29
+ "iso-8859-7": c + "‘’£€₯¦§¨©ͺ«¬\xAD\uFFFD―°±²³΄΅Ά·ΈΉΊ»Ό½ΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ\uFFFDΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ\uFFFD",
30
+ "iso-8859-8": c + "\uFFFD¢£¤¥¦§¨©×«¬\xAD®¯°±²³´µ¶·¸¹÷»¼½¾" + f(32) + "‗" + i8 + f(2) + "\u200E\u200F\uFFFD",
31
+ "koi8-r": k8a + "╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡Ё╢╣╤╥╦╧╨╩╪╫╬" + k8b,
32
+ "koi8-u": k8a + "є╔ії╗╘╙╚╛ґў╞╟╠╡ЁЄ╣ІЇ╦╧╨╩╪ҐЎ" + k8b,
33
+ macintosh: "ÄÅÇÉÑÖÜáàâäãåçéèêëíìîïñóòôöõúùûü†°¢£§•¶ß®©™´¨≠ÆØ∞±≤≥¥µ∂∑∏π∫ªºΩæø¿¡¬√ƒ≈∆«»…\xA0ÀÃÕŒœ–—“”‘’÷◊ÿŸ⁄€‹›fifl‡·‚„‰ÂÊÁËÈÍÎÏÌÓÔ\uF8FFÒÚÛÙıˆ˜¯˘˙˚¸˝˛ˇ",
34
+ "windows-1250": "€\x81‚\x83„…†‡\x88‰Š‹ŚŤŽŹ\x90‘’“”•–—\x98™š›śťžź\xA0ˇ˘Ł¤Ą¦§¨©Ş«¬\xAD®Ż°±˛ł´µ¶·¸ąş»Ľ˝ľ" + i2,
35
+ "windows-1251": "ЂЃ‚ѓ„…†‡€‰Љ‹ЊЌЋЏђ‘’“”•–—\x98™љ›њќћџ\xA0ЎўЈ¤Ґ¦§Ё©Є«¬\xAD®Ї°±Ііґµ¶·ё№є»јЅѕї" + ch + cl1 + 'я',
36
+ "windows-1252": p + "ˆ‰Š‹Œ\x8DŽ\x8F\x90‘’“”•–—˜™š›œ\x9DžŸ",
37
+ "windows-1253": p + "\x88‰\x8A‹\x8C\x8D\x8E\x8F\x90‘’“”•–—\x98™\x9A›\x9C\x9D\x9E\x9F\xA0΅Ά£¤¥¦§¨©\uFFFD«¬\xAD®―°±²³΄µ¶·ΈΉΊ»Ό½ΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ\uFFFDΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ\uFFFD",
38
+ "windows-1254": p + "ˆ‰Š‹Œ\x8D\x8E\x8F\x90‘’“”•–—˜™š›œ\x9D\x9EŸ\xA0¡¢£¤¥¦§¨©ª«¬\xAD®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏĞÑÒÓÔÕÖרÙÚÛÜİŞßàáâãäåæçèéêëìíîïğñòóôõö÷øùúûüış",
39
+ "windows-1255": p + "ˆ‰\x8A‹\x8C\x8D\x8E\x8F\x90‘’“”•–—˜™\x9A›\x9C\x9D\x9E\x9F\xA0¡¢£₪¥¦§¨©×«¬\xAD®¯°±²³´µ¶·¸¹÷»¼½¾¿\u05B0\u05B1\u05B2\u05B3\u05B4\u05B5\u05B6\u05B7\u05B8\u05B9\u05BA\u05BB\u05BC\u05BD־\u05BF׀\u05C1\u05C2׃װױײ׳״" + f(7) + i8 + f(2) + "\u200E\u200F\uFFFD",
40
+ "windows-1256": "€پ‚ƒ„…†‡ˆ‰ٹ‹Œچژڈگ‘’“”•–—ک™ڑ›œ\u200C\u200Dں\xA0،¢£¤¥¦§¨©ھ«¬\xAD®¯°±²³´µ¶·¸¹؛»¼½¾؟ہءآأؤإئابةتثجحخدذرزسشصض×طظعغـفقكàلâمنهوçèéêëىيîï\u064B\u064C\u064D\u064Eô\u064F\u0650÷\u0651ù\u0652ûü\u200E\u200Fے",
41
+ "windows-1257": "€\x81‚\x83„…†‡\x88‰\x8A‹\x8C¨ˇ¸\x90‘’“”•–—\x98™\x9A›\x9C¯˛\x9F\xA0\uFFFD¢£¤\uFFFD¦§Ø©Ŗ«¬\xAD®Æ°±²³´" + s + "˙",
42
+ "windows-1258": p + "ˆ‰\x8A‹Œ\x8D\x8E\x8F\x90‘’“”•–—˜™\x9A›œ\x9D\x9EŸ\xA0¡¢£¤¥¦§¨©ª«¬\xAD®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂĂÄÅÆÇÈÉÊË\u0300ÍÎÏĐÑ\u0309ÓÔƠÖרÙÚÛÜƯ\u0303ßàáâăäåæçèéêë\u0301íîïđñ\u0323óôơö÷øùúûüư₫",
43
+ "windows-874": "€\x81\x82\x83\x84…\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90‘’“”•–—\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะ\u0E31าำ\u0E34\u0E35\u0E36\u0E37\u0E38\u0E39\u0E3A" + f(4) + "฿เแโใไๅๆ\u0E47\u0E48\u0E49\u0E4A\u0E4B\u0E4C\u0E4D\u0E4E๏๐๑๒๓๔๕๖๗๘๙๚๛" + f(4),
44
+ "x-mac-cyrillic": ch + "†°Ґ£§•¶І®©™Ђђ≠Ѓѓ∞±≤≥іµґЈЄєЇїЉљЊњјЅ¬√ƒ≈∆«»…\xA0ЋћЌќѕ–—“”‘’÷„ЎўЏџ№Ёёя" + cl1 + "€"
45
+ }
@@ -0,0 +1,83 @@
1
+ import { asciiPrefix, decodeLatin1 } from './latin1.js'
2
+ import encodings from './single-byte.encodings.js'
3
+ import { decode2string } from './_utils.js'
4
+
5
+ export const E_STRICT = 'Input is not well-formed for this encoding'
6
+ const xUserDefined = 'x-user-defined'
7
+
8
+ export const assertEncoding = (encoding) => {
9
+ if (Object.hasOwn(encodings, encoding) || encoding === xUserDefined) return
10
+ throw new RangeError('Unsupported encoding')
11
+ }
12
+
13
+ function getEncoding(encoding) {
14
+ assertEncoding(encoding)
15
+ if (encoding === xUserDefined) {
16
+ return Array.from({ length: 128 }, (_, i) => String.fromCharCode(0xf7_80 + i)).join('')
17
+ }
18
+
19
+ return encodings[encoding]
20
+ }
21
+
22
+ const mappers = new Map()
23
+ const decoders = new Map()
24
+
25
+ // Used only on Node.js, no reason to optimize for anything else
26
+ // E.g. avoiding .from and filling zero-initialized arr manually is faster on Hermes, but we avoid this codepath on Hermes completely
27
+ export function encodingMapper(encoding) {
28
+ const cached = mappers.get(encoding)
29
+ if (cached) return cached
30
+
31
+ const incomplete = getEncoding(encoding).includes('\uFFFD')
32
+ let map
33
+ const mapper = (arr, start = 0) => {
34
+ if (!map) {
35
+ map = Uint16Array.from({ length: 256 }, (_, i) => i) // Unicode subset
36
+ const strings = getEncoding(encoding).split('')
37
+ map.set(Uint16Array.from(strings.map((x) => x.charCodeAt(0))), 128)
38
+ }
39
+
40
+ const o = Uint16Array.from(start === 0 ? arr : arr.subarray(start)) // copy to modify in-place, also those are 16-bit now
41
+ let i = 0
42
+ for (const end7 = o.length - 7; i < end7; i += 8) {
43
+ o[i] = map[o[i]]
44
+ o[i + 1] = map[o[i + 1]]
45
+ o[i + 2] = map[o[i + 2]]
46
+ o[i + 3] = map[o[i + 3]]
47
+ o[i + 4] = map[o[i + 4]]
48
+ o[i + 5] = map[o[i + 5]]
49
+ o[i + 6] = map[o[i + 6]]
50
+ o[i + 7] = map[o[i + 7]]
51
+ }
52
+
53
+ for (const end = o.length; i < end; i++) o[i] = map[o[i]]
54
+ return o
55
+ }
56
+
57
+ mappers.set(encoding, { mapper, incomplete })
58
+ return { mapper, incomplete }
59
+ }
60
+
61
+ export function encodingDecoder(encoding) {
62
+ const cached = decoders.get(encoding)
63
+ if (cached) return cached
64
+
65
+ let strings
66
+ const incomplete = getEncoding(encoding).includes('\uFFFD')
67
+ const decoder = (arr, loose = false) => {
68
+ if (!strings) {
69
+ const part = getEncoding(encoding).split('')
70
+ strings = Array.from({ length: 128 }, (_, i) => String.fromCharCode(i)).concat(part)
71
+ while (strings.length < 256) strings.push(String.fromCharCode(strings.length))
72
+ }
73
+
74
+ const prefix = decodeLatin1(arr, 0, asciiPrefix(arr))
75
+ if (prefix.length === arr.length) return prefix
76
+ const suffix = decode2string(arr, prefix.length, arr.length, strings)
77
+ if (!loose && incomplete && suffix.includes('\uFFFD')) throw new TypeError(E_STRICT)
78
+ return prefix + suffix
79
+ }
80
+
81
+ decoders.set(encoding, decoder)
82
+ return decoder
83
+ }