@exodus/bytes 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/fallback/encoding.js +11 -1
- package/fallback/multi-byte.js +267 -234
- package/package.json +1 -1
package/fallback/encoding.js
CHANGED
|
@@ -47,6 +47,10 @@ export function normalizeEncoding(label) {
|
|
|
47
47
|
|
|
48
48
|
const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false })
|
|
49
49
|
|
|
50
|
+
// TODO: make this more strict against Symbol.toStringTag
|
|
51
|
+
// Is not very significant though, anything faking Symbol.toStringTag could as well override
|
|
52
|
+
// prototypes, which is not something we protect against
|
|
53
|
+
|
|
50
54
|
function isAnyArrayBuffer(x) {
|
|
51
55
|
if (x instanceof ArrayBuffer) return true
|
|
52
56
|
if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return true
|
|
@@ -55,6 +59,12 @@ function isAnyArrayBuffer(x) {
|
|
|
55
59
|
return s === '[object ArrayBuffer]' || s === '[object SharedArrayBuffer]'
|
|
56
60
|
}
|
|
57
61
|
|
|
62
|
+
function isAnyUint8Array(x) {
|
|
63
|
+
if (x instanceof Uint8Array) return true
|
|
64
|
+
if (!x || !ArrayBuffer.isView(x) || x.BYTES_PER_ELEMENT !== 1) return false
|
|
65
|
+
return Object.prototype.toString.call(x) === '[object Uint8Array]'
|
|
66
|
+
}
|
|
67
|
+
|
|
58
68
|
const fromSource = (x) => {
|
|
59
69
|
if (x instanceof Uint8Array) return x
|
|
60
70
|
if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
|
|
@@ -217,7 +227,7 @@ export class TextEncoder {
|
|
|
217
227
|
|
|
218
228
|
encodeInto(str, target) {
|
|
219
229
|
if (typeof str !== 'string') str = `${str}`
|
|
220
|
-
if (!(target
|
|
230
|
+
if (!isAnyUint8Array(target)) throw new TypeError('Target must be an Uint8Array')
|
|
221
231
|
if (target.buffer.detached) return { read: 0, written: 0 } // Until https://github.com/whatwg/encoding/issues/324 is resolved
|
|
222
232
|
|
|
223
233
|
const tlen = target.length
|
package/fallback/multi-byte.js
CHANGED
|
@@ -8,37 +8,57 @@ export const E_STRICT = 'Input is not well-formed for this encoding'
|
|
|
8
8
|
// If the decoder is not cleared properly, state can be preserved between non-streaming calls!
|
|
9
9
|
// See comment about fatal stream
|
|
10
10
|
|
|
11
|
-
//
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
const mappers = {
|
|
15
|
-
// https://encoding.spec.whatwg.org/#euc-kr-decoder
|
|
16
|
-
'euc-kr': (err) => {
|
|
17
|
-
const euc = getTable('euc-kr')
|
|
18
|
-
let lead = 0
|
|
11
|
+
// Common between euc-kr and big5
|
|
12
|
+
function bigDecoder(err, pair) {
|
|
13
|
+
let lead = 0
|
|
19
14
|
|
|
20
|
-
|
|
21
|
-
const
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
15
|
+
const decodeLead = (b) => {
|
|
16
|
+
const str = pair(lead, b)
|
|
17
|
+
lead = 0
|
|
18
|
+
if (str) return str
|
|
19
|
+
return b < 128 ? String.fromCharCode(err(), b) : String.fromCharCode(err())
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const decode = (arr, start, end, stream) => {
|
|
23
|
+
let res = ''
|
|
24
|
+
let i = start
|
|
29
25
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
26
|
+
if (lead && i < end) res += decodeLead(arr[i++])
|
|
27
|
+
while (i < end) {
|
|
28
|
+
const b = arr[i++]
|
|
29
|
+
if (b < 128) {
|
|
30
|
+
res += String.fromCharCode(b)
|
|
31
|
+
} else if (b === 0x80 || b === 0xff) {
|
|
32
|
+
res += String.fromCharCode(err())
|
|
33
|
+
} else {
|
|
34
|
+
lead = b
|
|
35
|
+
if (i < end) res += decodeLead(arr[i++])
|
|
36
|
+
}
|
|
33
37
|
}
|
|
34
38
|
|
|
35
|
-
|
|
36
|
-
if (!lead) return null
|
|
39
|
+
if (lead && !stream) {
|
|
37
40
|
lead = 0
|
|
38
|
-
|
|
41
|
+
res += String.fromCharCode(err())
|
|
39
42
|
}
|
|
40
43
|
|
|
41
|
-
return
|
|
44
|
+
return res
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return { decode, isAscii: () => lead === 0 }
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// All except iso-2022-jp are ASCII supersets
|
|
51
|
+
// When adding something that is not an ASCII superset, ajust the ASCII fast path
|
|
52
|
+
const REP = 0xff_fd
|
|
53
|
+
const mappers = {
|
|
54
|
+
// https://encoding.spec.whatwg.org/#euc-kr-decoder
|
|
55
|
+
'euc-kr': (err) => {
|
|
56
|
+
const euc = getTable('euc-kr')
|
|
57
|
+
return bigDecoder(err, (l, b) => {
|
|
58
|
+
if (b < 0x41 || b > 0xfe) return
|
|
59
|
+
const cp = euc[(l - 0x81) * 190 + b - 0x41]
|
|
60
|
+
return cp !== undefined && cp !== REP ? String.fromCharCode(cp) : undefined
|
|
61
|
+
})
|
|
42
62
|
},
|
|
43
63
|
// https://encoding.spec.whatwg.org/#euc-jp-decoder
|
|
44
64
|
'euc-jp': (err) => {
|
|
@@ -47,64 +67,71 @@ const mappers = {
|
|
|
47
67
|
let j12 = false
|
|
48
68
|
let lead = 0
|
|
49
69
|
|
|
50
|
-
const
|
|
51
|
-
const bytes = (b) => {
|
|
70
|
+
const decodeLead = (b) => {
|
|
52
71
|
if (lead === 0x8e && b >= 0xa1 && b <= 0xdf) {
|
|
53
72
|
lead = 0
|
|
54
|
-
return 0xfe_c0 + b
|
|
73
|
+
return String.fromCharCode(0xfe_c0 + b)
|
|
55
74
|
}
|
|
56
75
|
|
|
57
76
|
if (lead === 0x8f && b >= 0xa1 && b <= 0xfe) {
|
|
58
77
|
j12 = true
|
|
59
78
|
lead = b
|
|
60
|
-
return
|
|
79
|
+
return ''
|
|
61
80
|
}
|
|
62
81
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
82
|
+
let cp
|
|
83
|
+
if (lead >= 0xa1 && lead <= 0xfe && b >= 0xa1 && b <= 0xfe) {
|
|
84
|
+
cp = (j12 ? jis0212 : jis0208)[(lead - 0xa1) * 94 + b - 0xa1]
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
lead = 0
|
|
88
|
+
j12 = false
|
|
89
|
+
if (cp !== undefined && cp !== REP) return String.fromCharCode(cp)
|
|
90
|
+
return b < 128 ? String.fromCharCode(err(), b) : String.fromCharCode(err())
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const decode = (arr, start, end, stream) => {
|
|
94
|
+
let res = ''
|
|
95
|
+
let i = start
|
|
96
|
+
|
|
97
|
+
if (lead && i < end) res += decodeLead(arr[i++])
|
|
98
|
+
if (lead && i < end) res += decodeLead(arr[i++]) // could be two leads, but no more
|
|
99
|
+
while (i < end) {
|
|
100
|
+
const b = arr[i++]
|
|
101
|
+
if (b < 128) {
|
|
102
|
+
res += String.fromCharCode(b)
|
|
103
|
+
} else if ((b < 0xa1 && b !== 0x8e && b !== 0x8f) || b === 0xff) {
|
|
104
|
+
res += String.fromCharCode(err())
|
|
105
|
+
} else {
|
|
106
|
+
lead = b
|
|
107
|
+
if (i < end) res += decodeLead(arr[i++])
|
|
108
|
+
if (lead && i < end) res += decodeLead(arr[i++]) // could be two leads
|
|
67
109
|
}
|
|
110
|
+
}
|
|
68
111
|
|
|
112
|
+
if (lead && !stream) {
|
|
69
113
|
lead = 0
|
|
70
|
-
j12 = false
|
|
71
|
-
|
|
72
|
-
if (b < 128) pushback.push(b)
|
|
73
|
-
return err()
|
|
114
|
+
j12 = false // can be true only when lead is non-zero
|
|
115
|
+
res += String.fromCharCode(err())
|
|
74
116
|
}
|
|
75
117
|
|
|
76
|
-
|
|
77
|
-
if ((b < 0xa1 && b !== 0x8e && b !== 0x8f) || b === 0xff) return err()
|
|
78
|
-
lead = b
|
|
118
|
+
return res
|
|
79
119
|
}
|
|
80
120
|
|
|
81
|
-
//
|
|
82
|
-
const eof = () => {
|
|
83
|
-
if (!lead) return null
|
|
84
|
-
lead = 0
|
|
85
|
-
return err()
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
return { bytes, eof, pushback }
|
|
121
|
+
return { decode, isAscii: () => lead === 0 } // j12 can be true only when lead is non-zero
|
|
89
122
|
},
|
|
90
123
|
// https://encoding.spec.whatwg.org/#iso-2022-jp-decoder
|
|
91
|
-
// Per-letter of the spec, don't shortcut on state changes on EOF. Some code is regrouped but preserving the logic
|
|
92
124
|
'iso-2022-jp': (err) => {
|
|
93
125
|
const jis0208 = getTable('jis0208')
|
|
94
|
-
const EOF = -1
|
|
95
126
|
let dState = 1
|
|
96
127
|
let oState = 1
|
|
97
|
-
let lead = 0
|
|
128
|
+
let lead = 0 // 0 or 0x21-0x7e
|
|
98
129
|
let out = false
|
|
99
130
|
|
|
100
|
-
const
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
if (b === 0x1b) {
|
|
105
|
-
dState = 6 // escape start
|
|
106
|
-
return
|
|
107
|
-
}
|
|
131
|
+
const bytes = (pushback, b) => {
|
|
132
|
+
if (dState < 5 && b === 0x1b) {
|
|
133
|
+
dState = 6 // escape start
|
|
134
|
+
return
|
|
108
135
|
}
|
|
109
136
|
|
|
110
137
|
switch (dState) {
|
|
@@ -156,7 +183,7 @@ const mappers = {
|
|
|
156
183
|
|
|
157
184
|
out = false
|
|
158
185
|
dState = oState
|
|
159
|
-
|
|
186
|
+
pushback.push(b)
|
|
160
187
|
return err()
|
|
161
188
|
case 7: {
|
|
162
189
|
// Escape
|
|
@@ -185,52 +212,117 @@ const mappers = {
|
|
|
185
212
|
|
|
186
213
|
out = false
|
|
187
214
|
dState = oState
|
|
188
|
-
|
|
189
|
-
|
|
215
|
+
pushback.push(b, l)
|
|
216
|
+
return err()
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
const eof = (pushback) => {
|
|
222
|
+
if (dState < 5) return null
|
|
223
|
+
out = false
|
|
224
|
+
switch (dState) {
|
|
225
|
+
case 5:
|
|
226
|
+
dState = 4
|
|
227
|
+
return err()
|
|
228
|
+
case 6:
|
|
229
|
+
dState = oState
|
|
230
|
+
return err()
|
|
231
|
+
case 7: {
|
|
232
|
+
dState = oState
|
|
233
|
+
pushback.push(lead)
|
|
234
|
+
lead = 0
|
|
190
235
|
return err()
|
|
191
236
|
}
|
|
192
237
|
}
|
|
193
238
|
}
|
|
194
239
|
|
|
195
|
-
const
|
|
240
|
+
const decode = (arr, start, end, stream) => {
|
|
241
|
+
let res = ''
|
|
242
|
+
let i = start
|
|
243
|
+
const pushback = [] // local and auto-cleared
|
|
244
|
+
|
|
245
|
+
// First, dump everything until EOF
|
|
246
|
+
// Same as the full loop, but without EOF handling
|
|
247
|
+
while (i < end || pushback.length > 0) {
|
|
248
|
+
const c = bytes(pushback, pushback.length > 0 ? pushback.pop() : arr[i++])
|
|
249
|
+
if (c !== undefined) res += String.fromCodePoint(c)
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// Then, dump EOF. This needs the same loop as the characters can be pushed back
|
|
253
|
+
if (!stream) {
|
|
254
|
+
while (i <= end || pushback.length > 0) {
|
|
255
|
+
if (i < end || pushback.length > 0) {
|
|
256
|
+
const c = bytes(pushback, pushback.length > 0 ? pushback.pop() : arr[i++])
|
|
257
|
+
if (c !== undefined) res += String.fromCodePoint(c)
|
|
258
|
+
} else {
|
|
259
|
+
const c = eof(pushback)
|
|
260
|
+
if (c === null) break // clean exit
|
|
261
|
+
res += String.fromCodePoint(c)
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// Chrome and WebKit fail on this, we don't: completely destroy the old decoder state when finished streaming
|
|
267
|
+
// > If this’s do not flush is false, then set this’s decoder to a new instance of this’s encoding’s decoder,
|
|
268
|
+
// > Set this’s do not flush to options["stream"]
|
|
269
|
+
if (!stream) {
|
|
270
|
+
dState = oState = 1
|
|
271
|
+
lead = 0
|
|
272
|
+
out = false
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
return res
|
|
276
|
+
}
|
|
196
277
|
|
|
197
|
-
return {
|
|
278
|
+
return { decode, isAscii: () => false }
|
|
198
279
|
},
|
|
199
280
|
// https://encoding.spec.whatwg.org/#shift_jis-decoder
|
|
200
281
|
shift_jis: (err) => {
|
|
201
282
|
const jis0208 = getTable('jis0208')
|
|
202
283
|
let lead = 0
|
|
203
284
|
|
|
204
|
-
const
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
if (
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
const cp = jis0208[p]
|
|
213
|
-
if (cp !== undefined && cp !== REP) return cp
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
if (b < 128) pushback.push(b)
|
|
217
|
-
return err()
|
|
285
|
+
const decodeLead = (b) => {
|
|
286
|
+
const l = lead
|
|
287
|
+
lead = 0
|
|
288
|
+
if (b >= 0x40 && b <= 0xfc && b !== 0x7f) {
|
|
289
|
+
const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41)
|
|
290
|
+
if (p >= 8836 && p <= 10_715) return String.fromCharCode(0xe0_00 - 8836 + p)
|
|
291
|
+
const cp = jis0208[p]
|
|
292
|
+
if (cp !== undefined && cp !== REP) return String.fromCharCode(cp)
|
|
218
293
|
}
|
|
219
294
|
|
|
220
|
-
|
|
221
|
-
if (b >= 0xa1 && b <= 0xdf) return 0xff_61 - 0xa1 + b
|
|
222
|
-
if (b < 0x81 || (b > 0x9f && b < 0xe0) || b > 0xfc) return err()
|
|
223
|
-
lead = b
|
|
295
|
+
return b < 128 ? String.fromCharCode(err(), b) : String.fromCharCode(err())
|
|
224
296
|
}
|
|
225
297
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
298
|
+
const decode = (arr, start, end, stream) => {
|
|
299
|
+
let res = ''
|
|
300
|
+
let i = start
|
|
301
|
+
|
|
302
|
+
if (lead && i < end) res += decodeLead(arr[i++])
|
|
303
|
+
while (i < end) {
|
|
304
|
+
const b = arr[i++]
|
|
305
|
+
if (b <= 0x80) {
|
|
306
|
+
res += String.fromCharCode(b) // 0x80 is allowed
|
|
307
|
+
} else if (b >= 0xa1 && b <= 0xdf) {
|
|
308
|
+
res += String.fromCharCode(0xfe_c0 + b)
|
|
309
|
+
} else if (b === 0xa0 || b > 0xfc) {
|
|
310
|
+
res += String.fromCharCode(err())
|
|
311
|
+
} else {
|
|
312
|
+
lead = b
|
|
313
|
+
if (i < end) res += decodeLead(arr[i++])
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
if (lead && !stream) {
|
|
318
|
+
lead = 0
|
|
319
|
+
res += String.fromCharCode(err())
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
return res
|
|
231
323
|
}
|
|
232
324
|
|
|
233
|
-
return {
|
|
325
|
+
return { decode, isAscii: () => lead === 0 }
|
|
234
326
|
},
|
|
235
327
|
// https://encoding.spec.whatwg.org/#gbk-decoder
|
|
236
328
|
gbk: (err) => mappers.gb18030(err), // 10.1.1. GBK’s decoder is gb18030’s decoder
|
|
@@ -252,179 +344,120 @@ const mappers = {
|
|
|
252
344
|
return b + p - a
|
|
253
345
|
}
|
|
254
346
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
347
|
+
// g1 is 0 or 0x81-0xfe
|
|
348
|
+
// g2 is 0 or 0x30-0x39
|
|
349
|
+
// g3 is 0 or 0x81-0xfe
|
|
350
|
+
|
|
351
|
+
const decode = (arr, start, end, stream) => {
|
|
352
|
+
let res = ''
|
|
353
|
+
let i = start
|
|
354
|
+
const pushback = [] // local and auto-cleared
|
|
355
|
+
|
|
356
|
+
// First, dump everything until EOF
|
|
357
|
+
// Same as the full loop, but without EOF handling
|
|
358
|
+
while (i < end || pushback.length > 0) {
|
|
359
|
+
const b = pushback.length > 0 ? pushback.pop() : arr[i++]
|
|
360
|
+
if (g3) {
|
|
361
|
+
if (b < 0x30 || b > 0x39) {
|
|
362
|
+
pushback.push(b, g3, g2)
|
|
363
|
+
g1 = g2 = g3 = 0
|
|
364
|
+
res += String.fromCharCode(err())
|
|
365
|
+
} else {
|
|
366
|
+
const p = index((g1 - 0x81) * 12_600 + (g2 - 0x30) * 1260 + (g3 - 0x81) * 10 + b - 0x30)
|
|
367
|
+
g1 = g2 = g3 = 0
|
|
368
|
+
if (p === undefined) {
|
|
369
|
+
res += String.fromCharCode(err())
|
|
370
|
+
} else {
|
|
371
|
+
res += String.fromCodePoint(p) // Can validly return replacement
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
} else if (g2) {
|
|
375
|
+
if (b >= 0x81 && b <= 0xfe) {
|
|
376
|
+
g3 = b
|
|
377
|
+
} else {
|
|
378
|
+
pushback.push(b, g2)
|
|
379
|
+
g1 = g2 = 0
|
|
380
|
+
res += String.fromCharCode(err())
|
|
381
|
+
}
|
|
382
|
+
} else if (g1) {
|
|
383
|
+
if (b >= 0x30 && b <= 0x39) {
|
|
384
|
+
g2 = b
|
|
385
|
+
} else {
|
|
386
|
+
let cp
|
|
387
|
+
if (b >= 0x40 && b <= 0xfe && b !== 0x7f) {
|
|
388
|
+
cp = gb18030[(g1 - 0x81) * 190 + b - (b < 0x7f ? 0x40 : 0x41)]
|
|
389
|
+
}
|
|
269
390
|
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
391
|
+
g1 = 0
|
|
392
|
+
if (cp !== undefined && cp !== REP) {
|
|
393
|
+
res += String.fromCodePoint(cp)
|
|
394
|
+
} else {
|
|
395
|
+
res += String.fromCharCode(err())
|
|
396
|
+
if (b < 128) res += String.fromCharCode(b) // can be processed immediately
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
} else if (b < 128) {
|
|
400
|
+
res += String.fromCharCode(b)
|
|
401
|
+
} else if (b === 0x80) {
|
|
402
|
+
res += '\u20AC'
|
|
403
|
+
} else if (b === 0xff) {
|
|
404
|
+
res += String.fromCharCode(err())
|
|
405
|
+
} else {
|
|
406
|
+
g1 = b
|
|
274
407
|
}
|
|
275
|
-
|
|
276
|
-
pushback.push(b, g2)
|
|
277
|
-
g1 = g2 = 0
|
|
278
|
-
return err()
|
|
279
408
|
}
|
|
280
409
|
|
|
281
|
-
if
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
let cp
|
|
288
|
-
if (b >= 0x40 && b <= 0xfe && b !== 0x7f) {
|
|
289
|
-
cp = gb18030[(g1 - 0x81) * 190 + b - (b < 0x7f ? 0x40 : 0x41)]
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
g1 = 0
|
|
293
|
-
if (cp !== undefined && cp !== REP) return cp
|
|
294
|
-
if (b < 128) pushback.push(b)
|
|
295
|
-
return err()
|
|
410
|
+
// if g1 = 0 then g2 = g3 = 0
|
|
411
|
+
if (g1 && !stream) {
|
|
412
|
+
g1 = g2 = g3 = 0
|
|
413
|
+
res += String.fromCharCode(err())
|
|
296
414
|
}
|
|
297
415
|
|
|
298
|
-
|
|
299
|
-
if (b === 0x80) return 0x20_ac
|
|
300
|
-
if (b === 0xff) return err()
|
|
301
|
-
g1 = b
|
|
416
|
+
return res
|
|
302
417
|
}
|
|
303
418
|
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
419
|
+
return { decode, isAscii: () => g1 === 0 } // if g1 = 0 then g2 = g3 = 0
|
|
420
|
+
},
|
|
421
|
+
// https://encoding.spec.whatwg.org/#big5
|
|
422
|
+
big5: (err) => {
|
|
423
|
+
// The only decoder which returns multiple codepoints per byte, also has non-charcode codepoints
|
|
424
|
+
// We store that as strings
|
|
425
|
+
const big5 = getTable('big5')
|
|
426
|
+
return bigDecoder(err, (l, b) => {
|
|
427
|
+
if (b < 0x40 || (b > 0x7e && b < 0xa1) || b === 0xff) return
|
|
428
|
+
return big5[(l - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)] // strings
|
|
429
|
+
})
|
|
311
430
|
},
|
|
312
431
|
}
|
|
313
432
|
|
|
314
433
|
export const isAsciiSuperset = (enc) => enc !== 'iso-2022-jp' // all others are ASCII supersets and can use fast path
|
|
315
434
|
|
|
316
435
|
export function multibyteDecoder(enc, loose = false) {
|
|
317
|
-
if (enc === 'big5') return big5decoder(loose)
|
|
318
436
|
if (!Object.hasOwn(mappers, enc)) throw new RangeError('Unsupported encoding')
|
|
319
437
|
|
|
320
438
|
// Input is assumed to be typechecked already
|
|
321
439
|
let mapper
|
|
322
440
|
const asciiSuperset = isAsciiSuperset(enc)
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
throw new TypeError(E_STRICT)
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
let res = ''
|
|
336
|
-
const length = arr.length
|
|
337
|
-
if (asciiSuperset && !mapper) {
|
|
338
|
-
res = decodeLatin1(arr, 0, asciiPrefix(arr))
|
|
339
|
-
if (res.length === arr.length) return res // ascii
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
if (!mapper) mapper = mappers[enc](onErr)
|
|
343
|
-
const { bytes, eof, pushback } = mapper
|
|
344
|
-
let i = res.length
|
|
345
|
-
|
|
346
|
-
// First, dump everything until EOF
|
|
347
|
-
// Same as the full loop, but without EOF handling
|
|
348
|
-
while (i < length || pushback.length > 0) {
|
|
349
|
-
const c = bytes(pushback.length > 0 ? pushback.pop() : arr[i++])
|
|
350
|
-
if (c === undefined) continue // consuming
|
|
351
|
-
res += String.fromCodePoint(c) // gb18030 returns codepoints above 0xFFFF from ranges
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
// Then, dump EOF. This needs the same loop as the characters can be pushed back
|
|
355
|
-
// TODO: only some encodings need this, most can be optimized
|
|
356
|
-
if (!stream) {
|
|
357
|
-
while (i <= length || pushback.length > 0) {
|
|
358
|
-
const isEOF = i === length && pushback.length === 0
|
|
359
|
-
const c = isEOF ? eof() : bytes(pushback.length > 0 ? pushback.pop() : arr[i++])
|
|
360
|
-
if (isEOF && c === null) break // clean exit
|
|
361
|
-
if (c === undefined) continue // consuming
|
|
362
|
-
res += String.fromCodePoint(c) // gb18030 returns codepoints above 0xFFFF from ranges
|
|
441
|
+
let streaming // because onErr is cached in mapper
|
|
442
|
+
const onErr = loose
|
|
443
|
+
? () => REP
|
|
444
|
+
: () => {
|
|
445
|
+
// The correct way per spec seems to be not destoying the decoder state in stream mode, even when fatal
|
|
446
|
+
// Decoders big5, euc-jp, euc-kr, shift_jis, gb18030 / gbk - all clear state before throwing unless EOF, so not affected
|
|
447
|
+
// iso-2022-jp is the only tricky one one where this !stream check matters in non-stream mode
|
|
448
|
+
if (!streaming) mapper = null // destroy state, effectively the same as 'do not flush' = false, but early
|
|
449
|
+
throw new TypeError(E_STRICT)
|
|
363
450
|
}
|
|
364
|
-
}
|
|
365
|
-
|
|
366
|
-
// Chrome and WebKit fail on this, we don't: completely destroy the old decoder instance when finished streaming
|
|
367
|
-
// > If this’s do not flush is false, then set this’s decoder to a new instance of this’s encoding’s decoder,
|
|
368
|
-
// > Set this’s do not flush to options["stream"]
|
|
369
|
-
if (!stream) mapper = null
|
|
370
451
|
|
|
371
|
-
return res
|
|
372
|
-
}
|
|
373
|
-
}
|
|
374
|
-
|
|
375
|
-
// The only decoder which returns multiple codepoints per byte, also has non-charcode codepoints
|
|
376
|
-
// We store that as strings
|
|
377
|
-
function big5decoder(loose) {
|
|
378
|
-
// Input is assumed to be typechecked already
|
|
379
|
-
let lead = 0
|
|
380
|
-
let big5
|
|
381
452
|
return (arr, stream = false) => {
|
|
382
|
-
const onErr = loose
|
|
383
|
-
? () => '\uFFFD'
|
|
384
|
-
: () => {
|
|
385
|
-
// Lead is always already cleared before throwing
|
|
386
|
-
throw new TypeError(E_STRICT)
|
|
387
|
-
}
|
|
388
|
-
|
|
389
453
|
let res = ''
|
|
390
|
-
|
|
391
|
-
if (!lead) {
|
|
454
|
+
if (asciiSuperset && (!mapper || mapper.isAscii?.())) {
|
|
392
455
|
res = decodeLatin1(arr, 0, asciiPrefix(arr))
|
|
393
456
|
if (res.length === arr.length) return res // ascii
|
|
394
457
|
}
|
|
395
458
|
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
if (lead) {
|
|
400
|
-
let cp
|
|
401
|
-
if ((b >= 0x40 && b <= 0x7e) || (b >= 0xa1 && b !== 0xff)) {
|
|
402
|
-
cp = big5[(lead - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)]
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
lead = 0
|
|
406
|
-
if (cp) {
|
|
407
|
-
res += cp // strings
|
|
408
|
-
} else {
|
|
409
|
-
res += onErr()
|
|
410
|
-
// same as pushing it back: lead is cleared, pushed back can't contain more than 1 byte
|
|
411
|
-
if (b < 128) res += String.fromCharCode(b)
|
|
412
|
-
}
|
|
413
|
-
} else if (b < 128) {
|
|
414
|
-
res += String.fromCharCode(b)
|
|
415
|
-
} else if (b < 0x81 || b === 0xff) {
|
|
416
|
-
res += onErr()
|
|
417
|
-
} else {
|
|
418
|
-
lead = b
|
|
419
|
-
}
|
|
420
|
-
}
|
|
421
|
-
|
|
422
|
-
if (!stream && lead) {
|
|
423
|
-
// Destroy decoder state
|
|
424
|
-
lead = 0
|
|
425
|
-
res += onErr()
|
|
426
|
-
}
|
|
427
|
-
|
|
428
|
-
return res
|
|
459
|
+
streaming = stream // affects onErr
|
|
460
|
+
if (!mapper) mapper = mappers[enc](onErr)
|
|
461
|
+
return res + mapper.decode(arr, res.length, arr.length, stream)
|
|
429
462
|
}
|
|
430
463
|
}
|