@exodus/bytes 1.0.0-rc.8 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +286 -4
- package/array.d.ts +24 -0
- package/base58.js +16 -8
- package/base64.d.ts +76 -0
- package/bigint.js +14 -0
- package/encoding-lite.js +7 -0
- package/encoding.js +12 -0
- package/fallback/_utils.js +100 -10
- package/fallback/encoding.js +290 -0
- package/fallback/encoding.labels.js +46 -0
- package/fallback/encoding.util.js +34 -0
- package/fallback/hex.js +2 -70
- package/fallback/latin1.js +2 -1
- package/fallback/multi-byte.encodings.cjs +1 -0
- package/fallback/multi-byte.encodings.json +545 -0
- package/fallback/multi-byte.js +448 -0
- package/fallback/multi-byte.table.js +114 -0
- package/fallback/single-byte.encodings.js +61 -0
- package/fallback/single-byte.js +86 -0
- package/fallback/utf16.js +180 -0
- package/hex.d.ts +22 -0
- package/hex.node.js +2 -0
- package/multi-byte.js +13 -0
- package/multi-byte.node.js +25 -0
- package/package.json +62 -13
- package/single-byte.js +54 -0
- package/single-byte.node.js +62 -0
- package/utf16.js +73 -0
- package/utf16.node.js +79 -0
- package/utf8.d.ts +42 -0
- package/utf8.js +7 -9
- package/utf8.node.js +8 -5
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
import { asciiPrefix, decodeLatin1 } from './latin1.js'
|
|
2
|
+
import { getTable } from './multi-byte.table.js'
|
|
3
|
+
|
|
4
|
+
export const E_STRICT = 'Input is not well-formed for this encoding'
|
|
5
|
+
|
|
6
|
+
// TODO: optimize
|
|
7
|
+
|
|
8
|
+
// If the decoder is not cleared properly, state can be preserved between non-streaming calls!
|
|
9
|
+
// See comment about fatal stream
|
|
10
|
+
|
|
11
|
+
// All except iso-2022-jp are ASCII supersets
|
|
12
|
+
// When adding something that is not an ASCII superset, ajust the ASCII fast path
|
|
13
|
+
const REP = 0xff_fd
|
|
14
|
+
const mappers = {
|
|
15
|
+
// https://encoding.spec.whatwg.org/#euc-kr-decoder
|
|
16
|
+
'euc-kr': () => {
|
|
17
|
+
const euc = getTable('euc-kr')
|
|
18
|
+
let lead = 0
|
|
19
|
+
|
|
20
|
+
const pushback = []
|
|
21
|
+
const bytes = (b) => {
|
|
22
|
+
if (lead) {
|
|
23
|
+
const cp = b >= 0x41 && b <= 0xfe ? euc[(lead - 0x81) * 190 + b - 0x41] : undefined
|
|
24
|
+
lead = 0
|
|
25
|
+
if (cp !== undefined && cp !== REP) return cp
|
|
26
|
+
if (b < 128) pushback.push(b)
|
|
27
|
+
return -2
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if (b < 128) return b
|
|
31
|
+
if (b < 0x81 || b === 0xff) return -2
|
|
32
|
+
lead = b
|
|
33
|
+
return -1
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const eof = () => {
|
|
37
|
+
if (!lead) return null
|
|
38
|
+
lead = 0
|
|
39
|
+
return -2
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return { bytes, eof, pushback }
|
|
43
|
+
},
|
|
44
|
+
// https://encoding.spec.whatwg.org/#euc-jp-decoder
|
|
45
|
+
'euc-jp': () => {
|
|
46
|
+
const jis0208 = getTable('jis0208')
|
|
47
|
+
const jis0212 = getTable('jis0212')
|
|
48
|
+
let j12 = false
|
|
49
|
+
let lead = 0
|
|
50
|
+
|
|
51
|
+
const pushback = []
|
|
52
|
+
const bytes = (b) => {
|
|
53
|
+
if (lead === 0x8e && b >= 0xa1 && b <= 0xdf) {
|
|
54
|
+
lead = 0
|
|
55
|
+
return 0xfe_c0 + b
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if (lead === 0x8f && b >= 0xa1 && b <= 0xfe) {
|
|
59
|
+
j12 = true
|
|
60
|
+
lead = b
|
|
61
|
+
return -1
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
if (lead) {
|
|
65
|
+
let cp
|
|
66
|
+
if (lead >= 0xa1 && lead <= 0xfe && b >= 0xa1 && b <= 0xfe) {
|
|
67
|
+
cp = (j12 ? jis0212 : jis0208)[(lead - 0xa1) * 94 + b - 0xa1]
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
lead = 0
|
|
71
|
+
j12 = false
|
|
72
|
+
if (cp !== undefined && cp !== REP) return cp
|
|
73
|
+
if (b < 128) pushback.push(b)
|
|
74
|
+
return -2
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if (b < 128) return b
|
|
78
|
+
if ((b < 0xa1 && b !== 0x8e && b !== 0x8f) || b === 0xff) return -2
|
|
79
|
+
lead = b
|
|
80
|
+
return -1
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// eslint-disable-next-line sonarjs/no-identical-functions
|
|
84
|
+
const eof = () => {
|
|
85
|
+
if (!lead) return null
|
|
86
|
+
lead = 0
|
|
87
|
+
return -2
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return { bytes, eof, pushback }
|
|
91
|
+
},
|
|
92
|
+
// https://encoding.spec.whatwg.org/#iso-2022-jp-decoder
|
|
93
|
+
// Per-letter of the spec, don't shortcut on state changes on EOF. Some code is regrouped but preserving the logic
|
|
94
|
+
'iso-2022-jp': () => {
|
|
95
|
+
const jis0208 = getTable('jis0208')
|
|
96
|
+
const EOF = -1
|
|
97
|
+
let dState = 1
|
|
98
|
+
let oState = 1
|
|
99
|
+
let lead = 0
|
|
100
|
+
let out = false
|
|
101
|
+
|
|
102
|
+
const pushback = []
|
|
103
|
+
const bytes = (b) => {
|
|
104
|
+
if (dState < 5) {
|
|
105
|
+
if (b === EOF) return null
|
|
106
|
+
if (b === 0x1b) {
|
|
107
|
+
dState = 6 // escape start
|
|
108
|
+
return -1
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
switch (dState) {
|
|
113
|
+
case 1:
|
|
114
|
+
case 2:
|
|
115
|
+
// ASCII, Roman (common)
|
|
116
|
+
out = false
|
|
117
|
+
if (dState === 2) {
|
|
118
|
+
if (b === 0x5c) return 0xa5
|
|
119
|
+
if (b === 0x7e) return 0x20_3e
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (b <= 0x7f && b !== 0x0e && b !== 0x0f) return b
|
|
123
|
+
return -2
|
|
124
|
+
case 3:
|
|
125
|
+
// Katakana
|
|
126
|
+
out = false
|
|
127
|
+
if (b >= 0x21 && b <= 0x5f) return 0xff_40 + b
|
|
128
|
+
return -2
|
|
129
|
+
case 4:
|
|
130
|
+
// Leading byte
|
|
131
|
+
out = false
|
|
132
|
+
if ((b >= 0x21) & (b <= 0x7e)) {
|
|
133
|
+
lead = b
|
|
134
|
+
dState = 5
|
|
135
|
+
return -1
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return -2
|
|
139
|
+
case 5:
|
|
140
|
+
// Trailing byte
|
|
141
|
+
out = false
|
|
142
|
+
if (b === 0x1b) {
|
|
143
|
+
dState = 6 // escape start
|
|
144
|
+
return -2
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
dState = 4
|
|
148
|
+
if (b >= 0x21 && b <= 0x7e) {
|
|
149
|
+
const cp = jis0208[(lead - 0x21) * 94 + b - 0x21]
|
|
150
|
+
return cp !== undefined && cp !== REP ? cp : -2
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
return -2
|
|
154
|
+
case 6:
|
|
155
|
+
// Escape start
|
|
156
|
+
if (b === 0x24 || b === 0x28) {
|
|
157
|
+
lead = b
|
|
158
|
+
dState = 7
|
|
159
|
+
return -1
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
out = false
|
|
163
|
+
dState = oState
|
|
164
|
+
if (b !== EOF) pushback.push(b)
|
|
165
|
+
return -2
|
|
166
|
+
case 7: {
|
|
167
|
+
// Escape
|
|
168
|
+
const l = lead
|
|
169
|
+
lead = 0
|
|
170
|
+
let s
|
|
171
|
+
if (l === 0x28) {
|
|
172
|
+
// eslint-disable-next-line unicorn/prefer-switch
|
|
173
|
+
if (b === 0x42) {
|
|
174
|
+
s = 1
|
|
175
|
+
} else if (b === 0x4a) {
|
|
176
|
+
s = 2
|
|
177
|
+
} else if (b === 0x49) {
|
|
178
|
+
s = 3
|
|
179
|
+
}
|
|
180
|
+
} else if (l === 0x24 && (b === 0x40 || b === 0x42)) {
|
|
181
|
+
s = 4
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (s) {
|
|
185
|
+
dState = oState = s
|
|
186
|
+
const output = out
|
|
187
|
+
out = true
|
|
188
|
+
return output ? -2 : -1
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
out = false
|
|
192
|
+
dState = oState
|
|
193
|
+
if (b !== EOF) pushback.push(b)
|
|
194
|
+
pushback.push(l)
|
|
195
|
+
return -2
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const eof = () => bytes(EOF)
|
|
201
|
+
|
|
202
|
+
return { bytes, eof, pushback }
|
|
203
|
+
},
|
|
204
|
+
// https://encoding.spec.whatwg.org/#shift_jis-decoder
|
|
205
|
+
shift_jis: () => {
|
|
206
|
+
const jis0208 = getTable('jis0208')
|
|
207
|
+
let lead = 0
|
|
208
|
+
|
|
209
|
+
const pushback = []
|
|
210
|
+
const bytes = (b) => {
|
|
211
|
+
if (lead) {
|
|
212
|
+
const l = lead
|
|
213
|
+
lead = 0
|
|
214
|
+
if (b >= 0x40 && b <= 0xfc && b !== 0x7f) {
|
|
215
|
+
const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41)
|
|
216
|
+
if (p >= 8836 && p <= 10_715) return 0xe0_00 - 8836 + p // 16-bit
|
|
217
|
+
const cp = jis0208[p]
|
|
218
|
+
if (cp !== undefined && cp !== REP) return cp
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
if (b < 128) pushback.push(b)
|
|
222
|
+
return -2
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
if (b <= 0x80) return b // 0x80 is allowed
|
|
226
|
+
if (b >= 0xa1 && b <= 0xdf) return 0xff_61 - 0xa1 + b
|
|
227
|
+
if (b < 0x81 || (b > 0x9f && b < 0xe0) || b > 0xfc) return -2
|
|
228
|
+
lead = b
|
|
229
|
+
return -1
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// eslint-disable-next-line sonarjs/no-identical-functions
|
|
233
|
+
const eof = () => {
|
|
234
|
+
if (!lead) return null
|
|
235
|
+
lead = 0 // this clears state completely on EOF
|
|
236
|
+
return -2
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
return { bytes, eof, pushback }
|
|
240
|
+
},
|
|
241
|
+
// https://encoding.spec.whatwg.org/#gbk-decoder
|
|
242
|
+
gbk: () => mappers.gb18030(), // 10.1.1. GBK’s decoder is gb18030’s decoder
|
|
243
|
+
// https://encoding.spec.whatwg.org/#gb18030-decoder
|
|
244
|
+
gb18030: () => {
|
|
245
|
+
const gb18030 = getTable('gb18030')
|
|
246
|
+
const gb18030r = getTable('gb18030-ranges')
|
|
247
|
+
let g1 = 0, g2 = 0, g3 = 0 // prettier-ignore
|
|
248
|
+
const index = (p) => {
|
|
249
|
+
if ((p > 39_419 && p < 189_000) || p > 1_237_575) return
|
|
250
|
+
if (p === 7457) return 0xe7_c7
|
|
251
|
+
let a = 0, b = 0 // prettier-ignore
|
|
252
|
+
for (const [c, d] of gb18030r) {
|
|
253
|
+
if (c > p) break
|
|
254
|
+
a = c
|
|
255
|
+
b = d
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
return b + p - a
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
const pushback = []
|
|
262
|
+
const bytes = (b) => {
|
|
263
|
+
if (g3) {
|
|
264
|
+
if (b < 0x30 || b > 0x39) {
|
|
265
|
+
pushback.push(b, g3, g2)
|
|
266
|
+
g1 = g2 = g3 = 0
|
|
267
|
+
return -2
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
const cp = index((g1 - 0x81) * 12_600 + (g2 - 0x30) * 1260 + (g3 - 0x81) * 10 + b - 0x30)
|
|
271
|
+
g1 = g2 = g3 = 0
|
|
272
|
+
if (cp !== undefined) return cp // Can validly return replacement
|
|
273
|
+
return -2
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
if (g2) {
|
|
277
|
+
if (b >= 0x81 && b <= 0xfe) {
|
|
278
|
+
g3 = b
|
|
279
|
+
return -1
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
pushback.push(b, g2)
|
|
283
|
+
g1 = g2 = 0
|
|
284
|
+
return -2
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
if (g1) {
|
|
288
|
+
if (b >= 0x30 && b <= 0x39) {
|
|
289
|
+
g2 = b
|
|
290
|
+
return -1
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
let cp
|
|
294
|
+
if (b >= 0x40 && b <= 0xfe && b !== 0x7f) {
|
|
295
|
+
cp = gb18030[(g1 - 0x81) * 190 + b - (b < 0x7f ? 0x40 : 0x41)]
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
g1 = 0
|
|
299
|
+
if (cp !== undefined && cp !== REP) return cp
|
|
300
|
+
if (b < 128) pushback.push(b)
|
|
301
|
+
return -2
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
if (b < 128) return b
|
|
305
|
+
if (b === 0x80) return 0x20_ac
|
|
306
|
+
if (b === 0xff) return -2
|
|
307
|
+
g1 = b
|
|
308
|
+
return -1
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
const eof = () => {
|
|
312
|
+
if (!g1 && !g2 && !g3) return null
|
|
313
|
+
g1 = g2 = g3 = 0
|
|
314
|
+
return -2
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
return { bytes, eof, pushback }
|
|
318
|
+
},
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
export const isAsciiSuperset = (enc) => enc !== 'iso-2022-jp' // all others are ASCII supersets and can use fast path
|
|
322
|
+
|
|
323
|
+
export function multibyteDecoder(enc, loose = false) {
|
|
324
|
+
if (enc === 'big5') return big5decoder(loose)
|
|
325
|
+
if (!Object.hasOwn(mappers, enc)) throw new RangeError('Unsupported encoding')
|
|
326
|
+
|
|
327
|
+
// Input is assumed to be typechecked already
|
|
328
|
+
let mapper
|
|
329
|
+
const asciiSuperset = isAsciiSuperset(enc)
|
|
330
|
+
return (arr, stream = false) => {
|
|
331
|
+
const onErr = loose
|
|
332
|
+
? () => '\uFFFD'
|
|
333
|
+
: () => {
|
|
334
|
+
mapper.pushback.length = 0 // the queue is cleared on returning an error
|
|
335
|
+
// The correct way per spec seems to be not destoying the decoder state in stream mode, even when fatal
|
|
336
|
+
// Decoders big5, euc-jp, euc-kr, shift_jis, gb18030 / gbk - all clear state before throwing unless EOF, so not affected
|
|
337
|
+
// iso-2022-jp is the only tricky one one where this !stream check matters in non-stream mode
|
|
338
|
+
if (!stream) mapper = null // destroy state, effectively the same as 'do not flush' = false, but early
|
|
339
|
+
throw new TypeError(E_STRICT)
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
let res = ''
|
|
343
|
+
const length = arr.length
|
|
344
|
+
if (asciiSuperset && !mapper) {
|
|
345
|
+
res = decodeLatin1(arr, 0, asciiPrefix(arr))
|
|
346
|
+
if (res.length === arr.length) return res // ascii
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
if (!mapper) mapper = mappers[enc]()
|
|
350
|
+
const { bytes, eof, pushback } = mapper
|
|
351
|
+
let i = res.length
|
|
352
|
+
|
|
353
|
+
// First, dump everything until EOF
|
|
354
|
+
// Same as the full loop, but without EOF handling
|
|
355
|
+
while (i < length || pushback.length > 0) {
|
|
356
|
+
const c = bytes(pushback.length > 0 ? pushback.pop() : arr[i++])
|
|
357
|
+
if (c >= 0) {
|
|
358
|
+
res += String.fromCodePoint(c) // gb18030 returns codepoints above 0xFFFF from ranges
|
|
359
|
+
} else if (c === -2) {
|
|
360
|
+
res += onErr()
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// Then, dump EOF. This needs the same loop as the characters can be pushed back
|
|
365
|
+
// TODO: only some encodings need this, most can be optimized
|
|
366
|
+
if (!stream) {
|
|
367
|
+
while (i <= length || pushback.length > 0) {
|
|
368
|
+
const isEOF = i === length && pushback.length === 0
|
|
369
|
+
const c = isEOF ? eof() : bytes(pushback.length > 0 ? pushback.pop() : arr[i++])
|
|
370
|
+
if (isEOF && c === null) break // clean exit
|
|
371
|
+
if (c === -1) continue // consuming
|
|
372
|
+
if (c === -2) {
|
|
373
|
+
res += onErr()
|
|
374
|
+
} else {
|
|
375
|
+
res += String.fromCodePoint(c) // gb18030 returns codepoints above 0xFFFF from ranges
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// Chrome and WebKit fail on this, we don't: completely destroy the old decoder instance when finished streaming
|
|
381
|
+
// > If this’s do not flush is false, then set this’s decoder to a new instance of this’s encoding’s decoder,
|
|
382
|
+
// > Set this’s do not flush to options["stream"]
|
|
383
|
+
if (!stream) mapper = null
|
|
384
|
+
|
|
385
|
+
return res
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
// The only decoder which returns multiple codepoints per byte, also has non-charcode codepoints
|
|
390
|
+
// We store that as strings
|
|
391
|
+
function big5decoder(loose) {
|
|
392
|
+
// Input is assumed to be typechecked already
|
|
393
|
+
let lead = 0
|
|
394
|
+
let big5
|
|
395
|
+
const pushback = []
|
|
396
|
+
return (arr, stream = false) => {
|
|
397
|
+
const onErr = loose
|
|
398
|
+
? () => '\uFFFD'
|
|
399
|
+
: () => {
|
|
400
|
+
pushback.length = 0 // the queue is cleared on returning an error
|
|
401
|
+
// Lead is always already cleared before throwing
|
|
402
|
+
throw new TypeError(E_STRICT)
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
let res = ''
|
|
406
|
+
const length = arr.length
|
|
407
|
+
if (!lead) {
|
|
408
|
+
res = decodeLatin1(arr, 0, asciiPrefix(arr))
|
|
409
|
+
if (res.length === arr.length) return res // ascii
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
if (!big5) big5 = getTable('big5')
|
|
413
|
+
for (let i = res.length; i < length || pushback.length > 0; ) {
|
|
414
|
+
const b = pushback.length > 0 ? pushback.pop() : arr[i++]
|
|
415
|
+
if (lead) {
|
|
416
|
+
let cp
|
|
417
|
+
if ((b >= 0x40 && b <= 0x7e) || (b >= 0xa1 && b !== 0xff)) {
|
|
418
|
+
cp = big5[(lead - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)]
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
lead = 0
|
|
422
|
+
if (cp) {
|
|
423
|
+
res += cp // strings
|
|
424
|
+
} else {
|
|
425
|
+
res += onErr()
|
|
426
|
+
if (b < 128) pushback.push(b)
|
|
427
|
+
}
|
|
428
|
+
} else if (b < 128) {
|
|
429
|
+
res += String.fromCharCode(b)
|
|
430
|
+
} else if (b < 0x81 || b === 0xff) {
|
|
431
|
+
res += onErr()
|
|
432
|
+
} else {
|
|
433
|
+
lead = b
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
if (!stream) {
|
|
438
|
+
// Destroy decoder state
|
|
439
|
+
pushback.length = 0
|
|
440
|
+
if (lead) {
|
|
441
|
+
lead = 0
|
|
442
|
+
res += onErr()
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
return res
|
|
447
|
+
}
|
|
448
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import { fromBase64url } from '@exodus/bytes/base64.js'
|
|
2
|
+
import { utf16toString } from '@exodus/bytes/utf16.js'
|
|
3
|
+
import loadEncodings from './multi-byte.encodings.cjs'
|
|
4
|
+
import { to16input } from './utf16.js'
|
|
5
|
+
|
|
6
|
+
export const sizes = {
|
|
7
|
+
jis0208: 11_104,
|
|
8
|
+
jis0212: 7211,
|
|
9
|
+
'euc-kr': 23_750,
|
|
10
|
+
gb18030: 23_940,
|
|
11
|
+
big5: 19_782,
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
// This is huge. It's _much_ smaller than https://npmjs.com/text-encoding though
|
|
15
|
+
// Exactly as mapped by the index table
|
|
16
|
+
// 0,x - hole of x empty elements
|
|
17
|
+
// n,c - continious [c, ...] of length n
|
|
18
|
+
// $.. - references to common chunks
|
|
19
|
+
// -{x} - same as 1,{x}
|
|
20
|
+
|
|
21
|
+
// See tests/multi-byte.test.js to verify that this data decodes exactly into the encoding spec tables
|
|
22
|
+
|
|
23
|
+
let indices
|
|
24
|
+
const tables = new Map()
|
|
25
|
+
/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
|
|
26
|
+
|
|
27
|
+
function loadBase64(str) {
|
|
28
|
+
const x = fromBase64url(str)
|
|
29
|
+
const len = x.length
|
|
30
|
+
const len2 = len >> 1
|
|
31
|
+
const y = new Uint8Array(len)
|
|
32
|
+
let a = -1, b = 0 // prettier-ignore
|
|
33
|
+
for (let i = 0, j = 0; i < len; i += 2, j++) {
|
|
34
|
+
a = (a + x[j] + 1) & 0xff
|
|
35
|
+
b = (b + x[len2 + j]) & 0xff
|
|
36
|
+
y[i] = a
|
|
37
|
+
y[i + 1] = b
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return y
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function unwrap(res, t, pos, stringMode = false) {
|
|
44
|
+
let code = 0
|
|
45
|
+
for (let i = 0; i < t.length; i++) {
|
|
46
|
+
let x = t[i]
|
|
47
|
+
if (typeof x === 'number') {
|
|
48
|
+
if (x === 0) {
|
|
49
|
+
pos += t[++i]
|
|
50
|
+
} else {
|
|
51
|
+
if (x < 0) {
|
|
52
|
+
code -= x
|
|
53
|
+
x = 1
|
|
54
|
+
} else {
|
|
55
|
+
code += t[++i]
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if (stringMode) {
|
|
59
|
+
for (let k = 0; k < x; k++, pos++, code++) res[pos] = String.fromCodePoint(code)
|
|
60
|
+
} else {
|
|
61
|
+
for (let k = 0; k < x; k++, pos++, code++) res[pos] = code
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
} else if (x[0] === '$' && Object.hasOwn(indices, x)) {
|
|
65
|
+
pos = unwrap(res, indices[x], pos, stringMode) // self-reference using shared chunks
|
|
66
|
+
} else if (stringMode) {
|
|
67
|
+
const s = [...utf16toString(loadBase64(x), 'uint8-le')] // splits by codepoints
|
|
68
|
+
for (let i = 0; i < s.length; ) res[pos++] = s[i++] // TODO: splice?
|
|
69
|
+
code = s[s.length - 1].codePointAt(0) + 1
|
|
70
|
+
} else {
|
|
71
|
+
const u16 = to16input(loadBase64(x), true) // data is little-endian
|
|
72
|
+
res.set(u16, pos)
|
|
73
|
+
pos += u16.length
|
|
74
|
+
code = u16[u16.length - 1] + 1
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return pos
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export function getTable(id) {
|
|
82
|
+
const cached = tables.get(id)
|
|
83
|
+
if (cached) return cached
|
|
84
|
+
|
|
85
|
+
if (!indices) indices = loadEncodings() // lazy-load
|
|
86
|
+
if (!Object.hasOwn(indices, id)) throw new Error('Unknown encoding')
|
|
87
|
+
if (!indices[id]) throw new Error('Table already used (likely incorrect bundler dedupe)')
|
|
88
|
+
|
|
89
|
+
let res
|
|
90
|
+
if (id.endsWith('-ranges')) {
|
|
91
|
+
res = []
|
|
92
|
+
let a = 0, b = 0 // prettier-ignore
|
|
93
|
+
const idx = indices[id]
|
|
94
|
+
while (idx.length > 0) res.push([(a += idx.shift()), (b += idx.shift())]) // destroying, we remove it later anyway
|
|
95
|
+
} else if (id === 'big5') {
|
|
96
|
+
if (!Object.hasOwn(sizes, id)) throw new Error('Unknown encoding')
|
|
97
|
+
res = new Array(sizes[id]) // array of strings or undefined
|
|
98
|
+
unwrap(res, indices[id], 0, true)
|
|
99
|
+
// Pointer code updates are embedded into the table
|
|
100
|
+
res[1133] = '\xCA\u0304'
|
|
101
|
+
res[1135] = '\xCA\u030C'
|
|
102
|
+
res[1164] = '\xEA\u0304'
|
|
103
|
+
res[1166] = '\xEA\u030C'
|
|
104
|
+
} else {
|
|
105
|
+
if (!Object.hasOwn(sizes, id)) throw new Error('Unknown encoding')
|
|
106
|
+
res = new Uint16Array(sizes[id])
|
|
107
|
+
res.fill(0xff_fd)
|
|
108
|
+
unwrap(res, indices[id], 0, false)
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
indices[id] = null // gc
|
|
112
|
+
tables.set(id, res)
|
|
113
|
+
return res
|
|
114
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
// See tests/encoding/fixtures/single-byte/dump.js for generator
|
|
2
|
+
|
|
3
|
+
const r = 0xff_fd
|
|
4
|
+
const e = (x) => new Array(x).fill(1)
|
|
5
|
+
const h = (x) => new Array(x).fill(r)
|
|
6
|
+
|
|
7
|
+
/* eslint-disable unicorn/numeric-separators-style, @exodus/export-default/named */
|
|
8
|
+
|
|
9
|
+
// Common ranges
|
|
10
|
+
|
|
11
|
+
// prettier-ignore
|
|
12
|
+
const k8a = [9345,2,10,4,4,4,4,8,8,8,8,68,4,4,4,4,1,1,1,-627,640,-903,1,46,28,1,-8645,8833,-8817,2,5,64,9305,1,1,-8449]
|
|
13
|
+
// prettier-ignore
|
|
14
|
+
const k8b = [-30,1,21,-18,1,15,-17,18,-13,...e(7),16,-15,1,1,1,-13,-4,26,-1,-20,17,5,-4,-2,3]
|
|
15
|
+
const p1 = [8237, -8235, 8089, -7816, 7820, 8, -6, 1]
|
|
16
|
+
const p2 = [-99, 12, 20, -12, 17, 37, -29, 2]
|
|
17
|
+
// prettier-ignore
|
|
18
|
+
const p3 = [1,1,65,-63,158,-156,1,1,1,40,30,42,-46,6,-66,1,83,-6,-6,-67,176,...p2,-114,121,-119,1,1,155,-49,25,16,-142,159,2,-158,38,42,-46,6,-35,1,52,-6,-6,-36,145,...p2,-83,90,-88,1,1,124,-49,25,16,-111,128,2]
|
|
19
|
+
const i0 = e(33)
|
|
20
|
+
// prettier-ignore
|
|
21
|
+
const i2 = [-40,-147,1,64,-62,117,-51,-63,69,-67,79,-77,79,-77,1,64,2,51,4,-116,1,124,-122,1,129,22,-148,150,-148,1,133,-131,118,-116,1,33,-31,86,-51,-32,38,-36,48,-46,48,-46,1,33,2,51,4,-85,1,93,-91,1,98,22,-117,119,-117,1,102,374]
|
|
22
|
+
const i4a = [-75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1]
|
|
23
|
+
const i4b = [34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1]
|
|
24
|
+
const i7 = [721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r]
|
|
25
|
+
const i8 = [...e(26), r, r, 6692, 1, r]
|
|
26
|
+
const w0 = [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104]
|
|
27
|
+
const w8 = [8072, 1, 3, 1, 5, -15, 1]
|
|
28
|
+
const w1 = [...w8, -7480, 7750, -8129, 7897, -7911, -182]
|
|
29
|
+
const w3 = [...w8, -8060, 8330, -8328, 8096, -8094]
|
|
30
|
+
const m0 = [8558, -8328, 8374, -66, -8539, 16, 8043, -8070]
|
|
31
|
+
|
|
32
|
+
// prettier-ignore
|
|
33
|
+
export default {
|
|
34
|
+
ibm866: [913,...e(47),8530,1,1,-145,34,61,1,-12,-1,14,-18,6,6,-1,-1,-75,4,32,-8,-16,-28,60,34,1,-5,-6,21,-3,-6,-16,28,-5,1,-4,1,-12,-1,-6,1,24,-1,-82,-12,124,-4,8,4,-16,-8512,...e(15),-78,80,-77,80,-77,80,-73,80,-942,8553,-8546,8547,-260,-8306,9468,-9472],
|
|
35
|
+
'iso-8859-10': [...i0,100,14,16,8,-2,14,-143,148,-43,80,6,23,-208,189,-32,-154,85,14,16,8,-2,14,-128,133,-43,80,6,23,7831,-7850,-32,...i4a,1,1,117,7,-121,1,1,1,146,-144,154,-152,...e(5),...i4b,1,1,86,7,-90,1,1,1,115,-113,123,-121,1,1,1,1,58],
|
|
36
|
+
'iso-8859-13': [...i0,8061,-8059,1,1,8058,-8056,1,49,-47,173,-171,1,1,1,24,-22,1,1,1,8041,-8039,...p3,7835],
|
|
37
|
+
'iso-8859-14': [...i0,7522,1,-7520,103,1,7423,-7523,7641,-7639,7641,-119,231,-7749,1,202,7334,1,-7423,1,7455,1,-7563,7584,43,-42,44,-35,147,-111,1,-36,-7585,...e(15),165,-163,...e(5),7572,-7570,...e(5),153,-151,...e(16),134,-132,...e(5),7541,-7539,...e(5),122],
|
|
38
|
+
'iso-8859-15': [...i0,1,1,1,8201,-8199,187,-185,186,-184,...e(10),202,-200,1,1,199,-197,1,1,151,1,37],
|
|
39
|
+
'iso-8859-16': [...i0,100,1,60,8043,-142,-7870,-185,186,-184,367,-365,206,-204,205,1,-203,1,91,54,59,7840,-8039,1,199,-113,268,-350,151,1,37,4,-188,1,1,64,-62,66,-64,...e(9),65,51,-113,1,1,124,-122,132,22,-151,1,1,1,60,258,-315,1,1,1,33,-31,35,-33,...e(9),34,51,-82,1,1,93,-91,101,22,-120,1,1,1,29,258],
|
|
40
|
+
'iso-8859-2': [...i0,100,468,-407,-157,153,29,-179,1,184,-2,6,21,-204,208,-2,-203,85,470,-409,-142,138,29,364,-527,169,-2,6,21,355,-351,-2,...i2],
|
|
41
|
+
'iso-8859-3': [...i0,134,434,-565,1,r,128,-125,1,136,46,-64,22,-135,r,206,-203,119,-117,1,1,1,112,-110,1,121,46,-64,22,-120,r,191,-188,1,1,r,2,70,-2,-65,...e(8),r,2,1,1,1,76,-74,1,69,-67,1,1,1,144,-16,-125,1,1,1,r,2,39,-2,-34,...e(8),r,2,1,1,1,45,-43,1,38,-36,1,1,1,113,-16,380],
|
|
42
|
+
'iso-8859-4': [...i0,100,52,30,-178,132,19,-148,1,184,-78,16,68,-185,208,-206,1,85,470,-388,-163,117,19,395,-527,169,-78,16,68,-29,52,-51,...i4a,92,-26,53,7,-22,-98,1,1,1,1,154,-152,1,1,140,2,-139,...i4b,61,-26,53,7,-22,-67,1,1,1,1,123,-121,1,1,109,2,366],
|
|
43
|
+
'iso-8859-5': [...i0,865,...e(11),-863,865,...e(65),7367,-7365,...e(11),-949,951,1],
|
|
44
|
+
'iso-8859-6': [...i0,r,r,r,4,...h(7),1384,-1375,...h(13),1390,r,r,r,4,r,2,...e(25),r,r,r,r,r,6,...e(18),...h(13)],
|
|
45
|
+
'iso-8859-7': [...i0,8056,1,-8054,8201,3,-8201,1,1,1,721,-719,1,1,r,8040,-8037,1,1,1,721,1,1,-719,...i7],
|
|
46
|
+
'iso-8859-8': [...i0,r,2,...e(7),46,-44,...e(14),62,-60,1,1,1,...h(32),8025,-6727,...i8],
|
|
47
|
+
'koi8-r': [...k8a,8450,...e(14),-8544,8545,...e(10),-9411,933,...k8b,-28,...k8b],
|
|
48
|
+
'koi8-u': [...k8a,3,8448,-8446,1,8448,1,1,1,1,-8394,-51,8448,1,1,1,-8544,3,8543,-8541,1,8543,1,1,1,1,-8410,-130,-869,933,...k8b,-28,...k8b],
|
|
49
|
+
macintosh: [69,1,2,2,8,5,6,5,-1,2,2,-1,2,2,2,-1,2,1,2,-1,2,1,2,2,-1,2,2,-1,5,-1,2,1,7972,-8048,-14,1,4,8059,-8044,41,-49,-5,8313,-8302,-12,8632,-8602,18,8518,-8557,8627,1,-8640,16,8525,15,-2,-7759,7787,-8577,16,751,-707,18,-57,-30,11,...m0,32,3,18,125,1,7872,1,8,1,-5,1,-7970,9427,-9419,121,7884,104,-115,1,56007,1,-56033,-8042,8035,4,18,-8046,8,-9,10,-3,5,1,1,-3,7,1,63531,-63533,8,1,-2,88,405,22,-557,553,1,1,-546,549,-2,-20],
|
|
50
|
+
'windows-1250': [...w0,-7888,7897,-7903,10,25,-4,-233,...w8,-8060,8330,-8129,7897,-7903,10,25,-4,-218,551,17,-407,-157,96,-94,1,1,1,181,-179,1,1,1,205,-203,1,554,-409,-142,1,1,1,1,77,90,-164,130,416,-415,62,...i2],
|
|
51
|
+
'windows-1251': [899,1,7191,-7111,7115,8,-6,1,139,-124,-7207,7216,-7215,2,-1,4,67,7110,1,3,1,5,-15,1,-8060,8330,-7369,7137,-7136,2,-1,4,-959,878,80,-86,-868,1004,-1002,1,858,-856,859,-857,1,1,1,857,-855,1,853,80,59,-988,1,1,922,7365,-7362,-921,925,-83,80,2,-71,...e(63)],
|
|
52
|
+
'windows-1252': [...p1,-7515,7530,-7888,7897,-7911,-197,240,-238,1,...w1,225,-6],
|
|
53
|
+
'windows-1253': [...p1,-8089,8104,-8102,8111,-8109,1,1,1,1,...w3,1,1,1,1,741,1,-739,1,1,1,1,1,1,r,2,1,1,1,8039,-8037,1,1,1,721,-719,1,1,...i7],
|
|
54
|
+
'windows-1254': [...p1,-7515,7530,-7888,7897,-7911,-197,1,1,1,...w1,1,218,-216,...e(47),79,-77,...e(11),84,46,-127,...e(16),48,-46,...e(11),53,46],
|
|
55
|
+
'windows-1255': [...p1,-7515,7530,-8102,8111,-8109,1,1,1,1,...w8,-7480,7750,-8328,8096,-8094,...e(7),8199,-8197,1,1,1,1,46,-44,...e(14),62,-60,1,1,1,1,1265,...e(19),45,1,1,1,1,...h(7),-36,...i8],
|
|
56
|
+
'windows-1256': [8237,-6702,6556,-7816,7820,8,-6,1,-7515,7530,-6583,6592,-7911,1332,18,-16,39,6505,1,3,1,5,-15,1,-6507,6777,-6801,6569,-7911,7865,1,-6483,-1562,1388,-1386,...e(7),1557,-1555,...e(14),1378,-1376,1,1,1,1377,162,-160,...e(21),-1375,1376,1,1,1,6,1,1,1,-1379,1380,-1378,1379,1,1,1,-1377,1,1,1,1,1374,1,-1372,1,1372,1,1,1,-1370,1371,1,-1369,1370,-1368,1369,-1367,1,7954,1,-6461],
|
|
57
|
+
'windows-1257': [...w0,-8102,8111,-8109,28,543,-527,-40,...w3,19,556,-572,1,r,2,1,1,r,2,1,49,-47,173,-171,1,1,1,24,-22,...e(5),...p3,347],
|
|
58
|
+
'windows-1258': [...p1,-7515,7530,-8102,8111,-7911,-197,1,1,1,...w8,-7480,7750,-8328,8096,-7911,-182,1,218,-216,...e(34),64,-62,...e(7),565,-563,1,1,65,-63,568,-566,1,204,-202,1,1,1,1,1,1,211,340,-548,1,1,1,33,-31,...e(7),534,-532,1,1,34,-32,562,-560,1,173,-171,1,1,1,1,1,1,180,7931],
|
|
59
|
+
'windows-874': [8237,-8235,1,1,1,8098,-8096,...e(10),...w8,-8060,...e(8),3425,...e(57),r,r,r,r,5,...e(28),r,r,r,r],
|
|
60
|
+
'x-mac-cyrillic': [913,...e(31),7153,-8048,992,-1005,4,8059,-8044,848,-856,-5,8313,-7456,80,7694,-7773,80,7627,-8557,8627,1,-7695,-929,988,-137,-4,80,-77,80,-78,80,-79,80,-2,-83,-857,...m0,875,80,-79,80,-7,7102,1,8,1,-5,1,-7970,7975,-7184,80,-79,80,7351,-7445,80,-2,-31,...e(30),7262]
|
|
61
|
+
}
|