@exodus/bytes 1.8.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -5
- package/array.d.ts +0 -1
- package/base58.js +1 -1
- package/base58check.js +1 -2
- package/base64.d.ts +0 -1
- package/encoding-browser.browser.js +29 -0
- package/encoding-browser.d.ts +1 -0
- package/encoding-browser.js +1 -0
- package/encoding-browser.native.js +1 -0
- package/fallback/_utils.js +1 -0
- package/fallback/encoding.api.js +81 -0
- package/fallback/encoding.js +6 -82
- package/fallback/latin1.js +1 -0
- package/fallback/multi-byte.js +456 -71
- package/fallback/multi-byte.table.js +20 -15
- package/fallback/single-byte.js +1 -1
- package/fallback/utf16.js +45 -26
- package/fallback/utf8.js +1 -1
- package/hex.d.ts +0 -1
- package/index.d.ts +43 -0
- package/index.js +5 -0
- package/multi-byte.js +7 -1
- package/multi-byte.node.js +7 -1
- package/package.json +32 -3
- package/single-byte.js +9 -11
- package/single-byte.node.js +29 -26
- package/utf16.js +1 -0
- package/utf16.node.js +6 -2
package/fallback/multi-byte.js
CHANGED
|
@@ -1,76 +1,89 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { E_STRING } from './_utils.js'
|
|
2
|
+
import { asciiPrefix, decodeAscii, decodeLatin1, decodeUCS2, encodeAscii } from './latin1.js'
|
|
2
3
|
import { getTable } from './multi-byte.table.js'
|
|
3
4
|
|
|
4
5
|
export const E_STRICT = 'Input is not well-formed for this encoding'
|
|
5
6
|
|
|
6
|
-
|
|
7
|
+
/* Decoders */
|
|
7
8
|
|
|
8
9
|
// If the decoder is not cleared properly, state can be preserved between non-streaming calls!
|
|
9
10
|
// See comment about fatal stream
|
|
10
11
|
|
|
11
|
-
//
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
if (typeof p === 'number') {
|
|
21
|
-
o16[oi++] = p
|
|
22
|
-
} else if (p) {
|
|
23
|
-
// This is still faster than string concatenation. Can we optimize strings though?
|
|
24
|
-
for (let i = 0; i < p.length; i++) o16[oi++] = p.charCodeAt(i)
|
|
25
|
-
} else {
|
|
26
|
-
o16[oi++] = err()
|
|
27
|
-
if (b < 128) o16[oi++] = b
|
|
28
|
-
}
|
|
29
|
-
}
|
|
12
|
+
// All except iso-2022-jp are ASCII supersets
|
|
13
|
+
// When adding something that is not an ASCII superset, ajust the ASCII fast path
|
|
14
|
+
const mappers = {
|
|
15
|
+
// https://encoding.spec.whatwg.org/#euc-kr-decoder
|
|
16
|
+
'euc-kr': (err) => {
|
|
17
|
+
const euc = getTable('euc-kr')
|
|
18
|
+
let lead = 0
|
|
19
|
+
let oi = 0
|
|
20
|
+
let o16
|
|
30
21
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
oi = 0
|
|
35
|
-
|
|
36
|
-
if (lead && i < end) decodeLead(arr[i++])
|
|
37
|
-
while (i < end) {
|
|
38
|
-
const b = arr[i++]
|
|
39
|
-
if (b < 128) {
|
|
40
|
-
o16[oi++] = b
|
|
41
|
-
} else if (b === 0x80 || b === 0xff) {
|
|
22
|
+
const decodeLead = (b) => {
|
|
23
|
+
if (b < 0x41 || b > 0xfe) {
|
|
24
|
+
lead = 0
|
|
42
25
|
o16[oi++] = err()
|
|
26
|
+
if (b < 128) o16[oi++] = b
|
|
43
27
|
} else {
|
|
44
|
-
|
|
45
|
-
|
|
28
|
+
const p = euc[(lead - 0x81) * 190 + b - 0x41]
|
|
29
|
+
lead = 0
|
|
30
|
+
if (p) {
|
|
31
|
+
o16[oi++] = p
|
|
32
|
+
} else {
|
|
33
|
+
o16[oi++] = err()
|
|
34
|
+
if (b < 128) o16[oi++] = b
|
|
35
|
+
}
|
|
46
36
|
}
|
|
47
37
|
}
|
|
48
38
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
o16
|
|
52
|
-
|
|
39
|
+
const decode = (arr, start, end, stream) => {
|
|
40
|
+
let i = start
|
|
41
|
+
o16 = new Uint16Array(end - start + (lead ? 1 : 0)) // there are pairs but they consume more than one byte
|
|
42
|
+
oi = 0
|
|
53
43
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
44
|
+
// Fast path
|
|
45
|
+
if (!lead) {
|
|
46
|
+
for (const last1 = end - 1; i < last1; ) {
|
|
47
|
+
const l = arr[i]
|
|
48
|
+
if (l < 128) {
|
|
49
|
+
o16[oi++] = l
|
|
50
|
+
i++
|
|
51
|
+
} else {
|
|
52
|
+
if (l === 0x80 || l === 0xff) break
|
|
53
|
+
const b = arr[i + 1]
|
|
54
|
+
if (b < 0x41 || b === 0xff) break
|
|
55
|
+
const p = euc[(l - 0x81) * 190 + b - 0x41]
|
|
56
|
+
if (!p) break
|
|
57
|
+
o16[oi++] = p
|
|
58
|
+
i += 2
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
58
62
|
|
|
59
|
-
|
|
60
|
-
|
|
63
|
+
if (lead && i < end) decodeLead(arr[i++])
|
|
64
|
+
while (i < end) {
|
|
65
|
+
const b = arr[i++]
|
|
66
|
+
if (b < 128) {
|
|
67
|
+
o16[oi++] = b
|
|
68
|
+
} else if (b === 0x80 || b === 0xff) {
|
|
69
|
+
o16[oi++] = err()
|
|
70
|
+
} else {
|
|
71
|
+
lead = b
|
|
72
|
+
if (i < end) decodeLead(arr[i++])
|
|
73
|
+
}
|
|
74
|
+
}
|
|
61
75
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
})
|
|
76
|
+
if (lead && !stream) {
|
|
77
|
+
lead = 0
|
|
78
|
+
o16[oi++] = err()
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const res = decodeUCS2(o16, oi)
|
|
82
|
+
o16 = null
|
|
83
|
+
return res
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return { decode, isAscii: () => lead === 0 }
|
|
74
87
|
},
|
|
75
88
|
// https://encoding.spec.whatwg.org/#euc-jp-decoder
|
|
76
89
|
'euc-jp': (err) => {
|
|
@@ -96,7 +109,7 @@ const mappers = {
|
|
|
96
109
|
|
|
97
110
|
lead = 0
|
|
98
111
|
j12 = false
|
|
99
|
-
if (cp
|
|
112
|
+
if (cp) {
|
|
100
113
|
o16[oi++] = cp
|
|
101
114
|
} else {
|
|
102
115
|
o16[oi++] = err()
|
|
@@ -110,6 +123,30 @@ const mappers = {
|
|
|
110
123
|
o16 = new Uint16Array(end - start + (lead ? 1 : 0))
|
|
111
124
|
oi = 0
|
|
112
125
|
|
|
126
|
+
// Fast path, non-j12
|
|
127
|
+
// lead = 0 means j12 = 0
|
|
128
|
+
if (!lead) {
|
|
129
|
+
for (const last1 = end - 1; i < last1; ) {
|
|
130
|
+
const l = arr[i]
|
|
131
|
+
if (l < 128) {
|
|
132
|
+
o16[oi++] = l
|
|
133
|
+
i++
|
|
134
|
+
} else {
|
|
135
|
+
const b = arr[i + 1]
|
|
136
|
+
if (l === 0x8e && b >= 0xa1 && b <= 0xdf) {
|
|
137
|
+
o16[oi++] = 0xfe_c0 + b
|
|
138
|
+
i += 2
|
|
139
|
+
} else {
|
|
140
|
+
if (l < 0xa1 || l === 0xff || b < 0xa1 || b === 0xff) break
|
|
141
|
+
const cp = jis0208[(l - 0xa1) * 94 + b - 0xa1]
|
|
142
|
+
if (!cp) break
|
|
143
|
+
o16[oi++] = cp
|
|
144
|
+
i += 2
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
113
150
|
if (lead && i < end) decodeLead(arr[i++])
|
|
114
151
|
if (lead && i < end) decodeLead(arr[i++]) // could be two leads, but no more
|
|
115
152
|
while (i < end) {
|
|
@@ -187,7 +224,7 @@ const mappers = {
|
|
|
187
224
|
dState = 4
|
|
188
225
|
if (b >= 0x21 && b <= 0x7e) {
|
|
189
226
|
const cp = jis0208[(lead - 0x21) * 94 + b - 0x21]
|
|
190
|
-
if (cp
|
|
227
|
+
if (cp) return cp
|
|
191
228
|
}
|
|
192
229
|
|
|
193
230
|
return err()
|
|
@@ -314,7 +351,7 @@ const mappers = {
|
|
|
314
351
|
}
|
|
315
352
|
|
|
316
353
|
const cp = jis0208[p]
|
|
317
|
-
if (cp
|
|
354
|
+
if (cp) {
|
|
318
355
|
o16[oi++] = cp
|
|
319
356
|
return
|
|
320
357
|
}
|
|
@@ -329,6 +366,34 @@ const mappers = {
|
|
|
329
366
|
oi = 0
|
|
330
367
|
let i = start
|
|
331
368
|
|
|
369
|
+
// Fast path
|
|
370
|
+
if (!lead) {
|
|
371
|
+
for (const last1 = end - 1; i < last1; ) {
|
|
372
|
+
const l = arr[i]
|
|
373
|
+
if (l <= 0x80) {
|
|
374
|
+
o16[oi++] = l
|
|
375
|
+
i++
|
|
376
|
+
} else if (l >= 0xa1 && l <= 0xdf) {
|
|
377
|
+
o16[oi++] = 0xfe_c0 + l
|
|
378
|
+
i++
|
|
379
|
+
} else {
|
|
380
|
+
if (l === 0xa0 || l > 0xfc) break
|
|
381
|
+
const b = arr[i + 1]
|
|
382
|
+
if (b < 0x40 || b > 0xfc || b === 0x7f) break
|
|
383
|
+
const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41)
|
|
384
|
+
if (p >= 8836 && p <= 10_715) {
|
|
385
|
+
o16[oi++] = 0xe0_00 - 8836 + p
|
|
386
|
+
i += 2
|
|
387
|
+
} else {
|
|
388
|
+
const cp = jis0208[p]
|
|
389
|
+
if (!cp) break
|
|
390
|
+
o16[oi++] = cp
|
|
391
|
+
i += 2
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
|
|
332
397
|
if (lead && i < end) decodeLead(arr[i++])
|
|
333
398
|
while (i < end) {
|
|
334
399
|
const b = arr[i++]
|
|
@@ -386,6 +451,36 @@ const mappers = {
|
|
|
386
451
|
let i = start
|
|
387
452
|
const pushback = [] // local and auto-cleared
|
|
388
453
|
|
|
454
|
+
// Fast path for 2-byte only
|
|
455
|
+
// pushback is always empty ad start, and g1 = 0 means g2 = g3 = 0
|
|
456
|
+
if (g1 === 0) {
|
|
457
|
+
for (const last1 = end - 1; i < last1; ) {
|
|
458
|
+
const b = arr[i]
|
|
459
|
+
if (b < 128) {
|
|
460
|
+
o16[oi++] = b
|
|
461
|
+
i++
|
|
462
|
+
} else if (b === 0x80) {
|
|
463
|
+
o16[oi++] = 0x20_ac
|
|
464
|
+
i++
|
|
465
|
+
} else {
|
|
466
|
+
if (b === 0xff) break
|
|
467
|
+
const n = arr[i + 1]
|
|
468
|
+
let cp
|
|
469
|
+
if (n < 0x7f) {
|
|
470
|
+
if (n < 0x40) break
|
|
471
|
+
cp = gb18030[(b - 0x81) * 190 + n - 0x40]
|
|
472
|
+
} else {
|
|
473
|
+
if (n === 0xff || n === 0x7f) break
|
|
474
|
+
cp = gb18030[(b - 0x81) * 190 + n - 0x41]
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
if (!cp) break
|
|
478
|
+
o16[oi++] = cp // 16-bit
|
|
479
|
+
i += 2
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
|
|
389
484
|
// First, dump everything until EOF
|
|
390
485
|
// Same as the full loop, but without EOF handling
|
|
391
486
|
while (i < end || pushback.length > 0) {
|
|
@@ -395,11 +490,7 @@ const mappers = {
|
|
|
395
490
|
// hence, 3 checks for g3 is faster than 3 checks for g1
|
|
396
491
|
if (g2) {
|
|
397
492
|
if (g3) {
|
|
398
|
-
if (b
|
|
399
|
-
pushback.push(b, g3, g2)
|
|
400
|
-
g1 = g2 = g3 = 0
|
|
401
|
-
o16[oi++] = err()
|
|
402
|
-
} else {
|
|
493
|
+
if (b <= 0x39 && b >= 0x30) {
|
|
403
494
|
const p = index(
|
|
404
495
|
(g1 - 0x81) * 12_600 + (g2 - 0x30) * 1260 + (g3 - 0x81) * 10 + b - 0x30
|
|
405
496
|
)
|
|
@@ -413,6 +504,10 @@ const mappers = {
|
|
|
413
504
|
o16[oi++] = 0xd8_00 | (d >> 10)
|
|
414
505
|
o16[oi++] = 0xdc_00 | (d & 0x3_ff)
|
|
415
506
|
}
|
|
507
|
+
} else {
|
|
508
|
+
pushback.push(b, g3, g2)
|
|
509
|
+
g1 = g2 = g3 = 0
|
|
510
|
+
o16[oi++] = err()
|
|
416
511
|
}
|
|
417
512
|
} else if (b >= 0x81 && b <= 0xfe) {
|
|
418
513
|
g3 = b
|
|
@@ -421,7 +516,7 @@ const mappers = {
|
|
|
421
516
|
g1 = g2 = 0
|
|
422
517
|
o16[oi++] = err()
|
|
423
518
|
}
|
|
424
|
-
} else if (b
|
|
519
|
+
} else if (b <= 0x39 && b >= 0x30) {
|
|
425
520
|
g2 = b
|
|
426
521
|
} else {
|
|
427
522
|
let cp
|
|
@@ -430,7 +525,7 @@ const mappers = {
|
|
|
430
525
|
}
|
|
431
526
|
|
|
432
527
|
g1 = 0
|
|
433
|
-
if (cp
|
|
528
|
+
if (cp) {
|
|
434
529
|
o16[oi++] = cp // 16-bit
|
|
435
530
|
} else {
|
|
436
531
|
o16[oi++] = err()
|
|
@@ -464,10 +559,85 @@ const mappers = {
|
|
|
464
559
|
// The only decoder which returns multiple codepoints per byte, also has non-charcode codepoints
|
|
465
560
|
// We store that as strings
|
|
466
561
|
const big5 = getTable('big5')
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
562
|
+
let lead = 0
|
|
563
|
+
let oi = 0
|
|
564
|
+
let o16
|
|
565
|
+
|
|
566
|
+
const decodeLead = (b) => {
|
|
567
|
+
if (b < 0x40 || (b > 0x7e && b < 0xa1) || b === 0xff) {
|
|
568
|
+
lead = 0
|
|
569
|
+
o16[oi++] = err()
|
|
570
|
+
if (b < 128) o16[oi++] = b
|
|
571
|
+
} else {
|
|
572
|
+
const p = big5[(lead - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)]
|
|
573
|
+
lead = 0
|
|
574
|
+
if (p > 0x1_00_00) {
|
|
575
|
+
o16[oi++] = p >> 16
|
|
576
|
+
o16[oi++] = p & 0xff_ff
|
|
577
|
+
} else if (p) {
|
|
578
|
+
o16[oi++] = p
|
|
579
|
+
} else {
|
|
580
|
+
o16[oi++] = err()
|
|
581
|
+
if (b < 128) o16[oi++] = b
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
// eslint-disable-next-line sonarjs/no-identical-functions
|
|
587
|
+
const decode = (arr, start, end, stream) => {
|
|
588
|
+
let i = start
|
|
589
|
+
o16 = new Uint16Array(end - start + (lead ? 1 : 0)) // there are pairs but they consume more than one byte
|
|
590
|
+
oi = 0
|
|
591
|
+
|
|
592
|
+
// Fast path
|
|
593
|
+
if (!lead) {
|
|
594
|
+
for (const last1 = end - 1; i < last1; ) {
|
|
595
|
+
const l = arr[i]
|
|
596
|
+
if (l < 128) {
|
|
597
|
+
o16[oi++] = l
|
|
598
|
+
i++
|
|
599
|
+
} else {
|
|
600
|
+
if (l === 0x80 || l === 0xff) break
|
|
601
|
+
const b = arr[i + 1]
|
|
602
|
+
if (b < 0x40 || (b > 0x7e && b < 0xa1) || b === 0xff) break
|
|
603
|
+
const p = big5[(l - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)]
|
|
604
|
+
if (p > 0x1_00_00) {
|
|
605
|
+
o16[oi++] = p >> 16
|
|
606
|
+
o16[oi++] = p & 0xff_ff
|
|
607
|
+
} else {
|
|
608
|
+
if (!p) break
|
|
609
|
+
o16[oi++] = p
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
i += 2
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
if (lead && i < end) decodeLead(arr[i++])
|
|
618
|
+
while (i < end) {
|
|
619
|
+
const b = arr[i++]
|
|
620
|
+
if (b < 128) {
|
|
621
|
+
o16[oi++] = b
|
|
622
|
+
} else if (b === 0x80 || b === 0xff) {
|
|
623
|
+
o16[oi++] = err()
|
|
624
|
+
} else {
|
|
625
|
+
lead = b
|
|
626
|
+
if (i < end) decodeLead(arr[i++])
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
if (lead && !stream) {
|
|
631
|
+
lead = 0
|
|
632
|
+
o16[oi++] = err()
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
const res = decodeUCS2(o16, oi)
|
|
636
|
+
o16 = null
|
|
637
|
+
return res
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
return { decode, isAscii: () => lead === 0 }
|
|
471
641
|
},
|
|
472
642
|
}
|
|
473
643
|
|
|
@@ -482,7 +652,7 @@ export function multibyteDecoder(enc, loose = false) {
|
|
|
482
652
|
const asciiSuperset = isAsciiSuperset(enc)
|
|
483
653
|
let streaming // because onErr is cached in mapper
|
|
484
654
|
const onErr = loose
|
|
485
|
-
? () =>
|
|
655
|
+
? () => 0xff_fd
|
|
486
656
|
: () => {
|
|
487
657
|
// The correct way per spec seems to be not destoying the decoder state in stream mode, even when fatal
|
|
488
658
|
// Decoders big5, euc-jp, euc-kr, shift_jis, gb18030 / gbk - all clear state before throwing unless EOF, so not affected
|
|
@@ -504,3 +674,218 @@ export function multibyteDecoder(enc, loose = false) {
|
|
|
504
674
|
return res + mapper.decode(arr, res.length, arr.length, stream)
|
|
505
675
|
}
|
|
506
676
|
}
|
|
677
|
+
|
|
678
|
+
/* Encoders */
|
|
679
|
+
|
|
680
|
+
const maps = new Map()
|
|
681
|
+
const e7 = [[148, 236], [149, 237], [150, 243]] // prettier-ignore
|
|
682
|
+
const e8 = [[30, 89], [38, 97], [43, 102], [44, 103], [50, 109], [67, 126], [84, 144], [100, 160]] // prettier-ignore
|
|
683
|
+
const preencoders = {
|
|
684
|
+
__proto__: null,
|
|
685
|
+
big5: (p) => ((((p / 157) | 0) + 0x81) << 8) | ((p % 157 < 0x3f ? 0x40 : 0x62) + (p % 157)),
|
|
686
|
+
shift_jis: (p) => {
|
|
687
|
+
const l = (p / 188) | 0
|
|
688
|
+
const t = p % 188
|
|
689
|
+
return ((l + (l < 0x1f ? 0x81 : 0xc1)) << 8) | ((t < 0x3f ? 0x40 : 0x41) + t)
|
|
690
|
+
},
|
|
691
|
+
'euc-jp': (p) => ((((p / 94) | 0) + 0xa1) << 8) | ((p % 94) + 0xa1),
|
|
692
|
+
'euc-kr': (p) => ((((p / 190) | 0) + 0x81) << 8) | ((p % 190) + 0x41),
|
|
693
|
+
gb18030: (p) => ((((p / 190) | 0) + 0x81) << 8) | ((p % 190 < 0x3f ? 0x40 : 0x41) + (p % 190)),
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
preencoders.gbk = preencoders.gb18030
|
|
697
|
+
|
|
698
|
+
// We accept that encoders use non-trivial amount of mem, for perf
|
|
699
|
+
// most are are 128 KiB mem, big5 is 380 KiB, lazy-loaded at first use
|
|
700
|
+
function getMap(id, size) {
|
|
701
|
+
const cached = maps.get(id)
|
|
702
|
+
if (cached) return cached
|
|
703
|
+
let tname = id
|
|
704
|
+
const sjis = id === 'shift_jis'
|
|
705
|
+
if (id === 'gbk') tname = 'gb18030'
|
|
706
|
+
if (id === 'euc-jp' || sjis) tname = 'jis0208'
|
|
707
|
+
const table = getTable(tname)
|
|
708
|
+
const map = new Uint16Array(size)
|
|
709
|
+
const enc = preencoders[id] || ((p) => p + 1)
|
|
710
|
+
for (let i = 0; i < table.length; i++) {
|
|
711
|
+
const c = table[i]
|
|
712
|
+
if (!c) continue
|
|
713
|
+
if (id === 'big5') {
|
|
714
|
+
if (i < 5024) continue // this also skips multi-codepoint strings
|
|
715
|
+
// In big5, all return first entries except for these
|
|
716
|
+
if (
|
|
717
|
+
map[c] &&
|
|
718
|
+
c !== 0x25_50 &&
|
|
719
|
+
c !== 0x25_5e &&
|
|
720
|
+
c !== 0x25_61 &&
|
|
721
|
+
c !== 0x25_6a &&
|
|
722
|
+
c !== 0x53_41 &&
|
|
723
|
+
c !== 0x53_45
|
|
724
|
+
) {
|
|
725
|
+
continue
|
|
726
|
+
}
|
|
727
|
+
} else {
|
|
728
|
+
if (sjis && i >= 8272 && i <= 8835) continue
|
|
729
|
+
if (map[c]) continue
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
if (c > 0xff_ff) {
|
|
733
|
+
// always a single codepoint here
|
|
734
|
+
const s = String.fromCharCode(c >> 16, c & 0xff_ff)
|
|
735
|
+
map[s.codePointAt(0)] = enc(i)
|
|
736
|
+
} else {
|
|
737
|
+
map[c] = enc(i)
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
for (let i = 0; i < 0x80; i++) map[i] = i
|
|
742
|
+
if (sjis || id === 'euc-jp') {
|
|
743
|
+
if (sjis) map[0x80] = 0x80
|
|
744
|
+
const d = sjis ? 0xfe_c0 : 0x70_c0
|
|
745
|
+
for (let i = 0xff_61; i <= 0xff_9f; i++) map[i] = i - d
|
|
746
|
+
map[0x22_12] = map[0xff_0d]
|
|
747
|
+
map[0xa5] = 0x5c
|
|
748
|
+
map[0x20_3e] = 0x7e
|
|
749
|
+
} else if (tname === 'gb18030') {
|
|
750
|
+
if (id === 'gbk') map[0x20_ac] = 0x80
|
|
751
|
+
for (let i = 0xe7_8d; i <= 0xe7_93; i++) map[i] = i - 0x40_b4
|
|
752
|
+
for (const [a, b] of e7) map[0xe7_00 | a] = 0xa6_00 | b
|
|
753
|
+
for (const [a, b] of e8) map[0xe8_00 | a] = 0xfe_00 | b
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
maps.set(id, map)
|
|
757
|
+
return map
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
const encoders = new Set(['big5', 'euc-kr', 'euc-jp', 'shift_jis', 'gbk', 'gb18030'])
|
|
761
|
+
const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
|
|
762
|
+
let gb18030r
|
|
763
|
+
|
|
764
|
+
export function multibyteEncoder(enc, onError) {
|
|
765
|
+
if (!encoders.has(enc)) throw new RangeError('Unsupported encoding')
|
|
766
|
+
const size = enc === 'big5' ? 0x2_f8_a7 : 0x1_00_00 // for big5, max codepoint in table + 1
|
|
767
|
+
const width = enc === 'gb18030' ? 4 : 2
|
|
768
|
+
const map = getMap(enc, size)
|
|
769
|
+
if (enc === 'gb18030' && !gb18030r) gb18030r = getTable('gb18030-ranges')
|
|
770
|
+
|
|
771
|
+
return (str) => {
|
|
772
|
+
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
773
|
+
if (!NON_LATIN.test(str)) {
|
|
774
|
+
try {
|
|
775
|
+
return encodeAscii(str, E_STRICT)
|
|
776
|
+
} catch {}
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
const length = str.length
|
|
780
|
+
const u8 = new Uint8Array(length * width)
|
|
781
|
+
let i = 0
|
|
782
|
+
while (i < length) {
|
|
783
|
+
const x = str.charCodeAt(i)
|
|
784
|
+
if (x >= 128) break
|
|
785
|
+
u8[i++] = x
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
// eslint-disable-next-line unicorn/consistent-function-scoping
|
|
789
|
+
const err = (code) => {
|
|
790
|
+
if (onError) return onError(code, u8, i)
|
|
791
|
+
throw new TypeError(E_STRICT)
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
if (!map || map.length < size) /* c8 ignore next */ throw new Error('Unreachable') // Important for perf
|
|
795
|
+
|
|
796
|
+
if (enc === 'gb18030') {
|
|
797
|
+
// Deduping this branch hurts other encoders perf
|
|
798
|
+
const encode = (cp) => {
|
|
799
|
+
let a = 0, b = 0 // prettier-ignore
|
|
800
|
+
for (const [c, d] of gb18030r) {
|
|
801
|
+
if (d > cp) break
|
|
802
|
+
a = c
|
|
803
|
+
b = d
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
let rp = cp === 0xe7_c7 ? 7457 : a + cp - b
|
|
807
|
+
u8[i++] = 0x81 + ((rp / 12_600) | 0)
|
|
808
|
+
rp %= 12_600
|
|
809
|
+
u8[i++] = 0x30 + ((rp / 1260) | 0)
|
|
810
|
+
rp %= 1260
|
|
811
|
+
u8[i++] = 0x81 + ((rp / 10) | 0)
|
|
812
|
+
u8[i++] = 0x30 + (rp % 10)
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
for (let j = i; j < length; j++) {
|
|
816
|
+
const x = str.charCodeAt(j)
|
|
817
|
+
if (x >= 0xd8_00 && x < 0xe0_00) {
|
|
818
|
+
if (x >= 0xdc_00 || j + 1 === length) {
|
|
819
|
+
i += err(x) // lone
|
|
820
|
+
} else {
|
|
821
|
+
const x1 = str.charCodeAt(j + 1)
|
|
822
|
+
if (x1 < 0xdc_00 || x1 >= 0xe0_00) {
|
|
823
|
+
i += err(x) // lone
|
|
824
|
+
} else {
|
|
825
|
+
j++ // consume x1
|
|
826
|
+
encode(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10)))
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
} else {
|
|
830
|
+
const e = map[x]
|
|
831
|
+
if (e & 0xff_00) {
|
|
832
|
+
u8[i++] = e >> 8
|
|
833
|
+
u8[i++] = e & 0xff
|
|
834
|
+
} else if (e || x === 0) {
|
|
835
|
+
u8[i++] = e
|
|
836
|
+
} else if (x === 0xe5_e5) {
|
|
837
|
+
i += err(x)
|
|
838
|
+
} else {
|
|
839
|
+
encode(x)
|
|
840
|
+
}
|
|
841
|
+
}
|
|
842
|
+
}
|
|
843
|
+
} else {
|
|
844
|
+
const long =
|
|
845
|
+
enc === 'big5'
|
|
846
|
+
? (x) => {
|
|
847
|
+
const e = map[x]
|
|
848
|
+
if (e & 0xff_00) {
|
|
849
|
+
u8[i++] = e >> 8
|
|
850
|
+
u8[i++] = e & 0xff
|
|
851
|
+
} else if (e || x === 0) {
|
|
852
|
+
u8[i++] = e
|
|
853
|
+
} else {
|
|
854
|
+
i += err(x)
|
|
855
|
+
}
|
|
856
|
+
}
|
|
857
|
+
: (x) => {
|
|
858
|
+
i += err(x)
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
for (let j = i; j < length; j++) {
|
|
862
|
+
const x = str.charCodeAt(j)
|
|
863
|
+
if (x >= 0xd8_00 && x < 0xe0_00) {
|
|
864
|
+
if (x >= 0xdc_00 || j + 1 === length) {
|
|
865
|
+
i += err(x) // lone
|
|
866
|
+
} else {
|
|
867
|
+
const x1 = str.charCodeAt(j + 1)
|
|
868
|
+
if (x1 < 0xdc_00 || x1 >= 0xe0_00) {
|
|
869
|
+
i += err(x) // lone
|
|
870
|
+
} else {
|
|
871
|
+
j++ // consume x1
|
|
872
|
+
long(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10)))
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
} else {
|
|
876
|
+
const e = map[x]
|
|
877
|
+
if (e & 0xff_00) {
|
|
878
|
+
u8[i++] = e >> 8
|
|
879
|
+
u8[i++] = e & 0xff
|
|
880
|
+
} else if (e || x === 0) {
|
|
881
|
+
u8[i++] = e
|
|
882
|
+
} else {
|
|
883
|
+
i += err(x)
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
return i === u8.length ? u8 : u8.subarray(0, i)
|
|
890
|
+
}
|
|
891
|
+
}
|
|
@@ -40,7 +40,7 @@ function loadBase64(str) {
|
|
|
40
40
|
return y
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
-
function unwrap(res, t, pos,
|
|
43
|
+
function unwrap(res, t, pos, highMode = false) {
|
|
44
44
|
let code = 0
|
|
45
45
|
for (let i = 0; i < t.length; i++) {
|
|
46
46
|
let x = t[i]
|
|
@@ -55,25 +55,30 @@ function unwrap(res, t, pos, stringMode = false) {
|
|
|
55
55
|
code += t[++i]
|
|
56
56
|
}
|
|
57
57
|
|
|
58
|
-
if (
|
|
58
|
+
if (highMode) {
|
|
59
59
|
for (let k = 0; k < x; k++, pos++, code++) {
|
|
60
|
-
|
|
60
|
+
if (code <= 0xff_ff) {
|
|
61
|
+
res[pos] = code
|
|
62
|
+
} else {
|
|
63
|
+
const c = String.fromCodePoint(code)
|
|
64
|
+
res[pos] = (c.charCodeAt(0) << 16) | c.charCodeAt(1)
|
|
65
|
+
}
|
|
61
66
|
}
|
|
62
67
|
} else {
|
|
63
68
|
for (let k = 0; k < x; k++, pos++, code++) res[pos] = code
|
|
64
69
|
}
|
|
65
70
|
}
|
|
66
71
|
} else if (x[0] === '$' && Object.hasOwn(indices, x)) {
|
|
67
|
-
pos = unwrap(res, indices[x], pos,
|
|
68
|
-
} else if (
|
|
72
|
+
pos = unwrap(res, indices[x], pos, highMode) // self-reference using shared chunks
|
|
73
|
+
} else if (highMode) {
|
|
69
74
|
const s = [...utf16toString(loadBase64(x), 'uint8-le')] // splits by codepoints
|
|
70
|
-
let
|
|
75
|
+
let c
|
|
71
76
|
for (let i = 0; i < s.length; ) {
|
|
72
|
-
|
|
73
|
-
res[pos++] =
|
|
77
|
+
c = s[i++]
|
|
78
|
+
res[pos++] = c.length === 1 ? c.charCodeAt(0) : (c.charCodeAt(0) << 16) | c.charCodeAt(1)
|
|
74
79
|
}
|
|
75
80
|
|
|
76
|
-
code =
|
|
81
|
+
code = c.codePointAt(0) + 1
|
|
77
82
|
} else {
|
|
78
83
|
const u16 = to16input(loadBase64(x), true) // data is little-endian
|
|
79
84
|
res.set(u16, pos)
|
|
@@ -101,17 +106,17 @@ export function getTable(id) {
|
|
|
101
106
|
while (idx.length > 0) res.push([(a += idx.shift()), (b += idx.shift())]) // destroying, we remove it later anyway
|
|
102
107
|
} else if (id === 'big5') {
|
|
103
108
|
if (!Object.hasOwn(sizes, id)) throw new Error('Unknown encoding')
|
|
104
|
-
res = new
|
|
109
|
+
res = new Uint32Array(sizes[id]) // array of strings or undefined
|
|
105
110
|
unwrap(res, indices[id], 0, true)
|
|
106
111
|
// Pointer code updates are embedded into the table
|
|
107
|
-
|
|
108
|
-
res[
|
|
109
|
-
res[
|
|
110
|
-
res[
|
|
112
|
+
// These are skipped in encoder as encoder uses only pointers >= (0xA1 - 0x81) * 157
|
|
113
|
+
res[1133] = 0xca_03_04
|
|
114
|
+
res[1135] = 0xca_03_0c
|
|
115
|
+
res[1164] = 0xea_03_04
|
|
116
|
+
res[1166] = 0xea_03_0c
|
|
111
117
|
} else {
|
|
112
118
|
if (!Object.hasOwn(sizes, id)) throw new Error('Unknown encoding')
|
|
113
119
|
res = new Uint16Array(sizes[id])
|
|
114
|
-
res.fill(0xff_fd)
|
|
115
120
|
unwrap(res, indices[id], 0, false)
|
|
116
121
|
}
|
|
117
122
|
|