@exodus/bytes 1.8.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,76 +1,89 @@
1
- import { asciiPrefix, decodeAscii, decodeLatin1, decodeUCS2 } from './latin1.js'
1
+ import { E_STRING } from './_utils.js'
2
+ import { asciiPrefix, decodeAscii, decodeLatin1, decodeUCS2, encodeAscii } from './latin1.js'
2
3
  import { getTable } from './multi-byte.table.js'
3
4
 
4
5
  export const E_STRICT = 'Input is not well-formed for this encoding'
5
6
 
6
- // TODO: optimize
7
+ /* Decoders */
7
8
 
8
9
  // If the decoder is not cleared properly, state can be preserved between non-streaming calls!
9
10
  // See comment about fatal stream
10
11
 
11
- // Common between euc-kr and big5
12
- function bigDecoder(err, pair) {
13
- let lead = 0
14
- let oi = 0
15
- let o16
16
-
17
- const decodeLead = (b) => {
18
- const p = pair(lead, b)
19
- lead = 0
20
- if (typeof p === 'number') {
21
- o16[oi++] = p
22
- } else if (p) {
23
- // This is still faster than string concatenation. Can we optimize strings though?
24
- for (let i = 0; i < p.length; i++) o16[oi++] = p.charCodeAt(i)
25
- } else {
26
- o16[oi++] = err()
27
- if (b < 128) o16[oi++] = b
28
- }
29
- }
12
+ // All except iso-2022-jp are ASCII supersets
13
+ // When adding something that is not an ASCII superset, ajust the ASCII fast path
14
+ const mappers = {
15
+ // https://encoding.spec.whatwg.org/#euc-kr-decoder
16
+ 'euc-kr': (err) => {
17
+ const euc = getTable('euc-kr')
18
+ let lead = 0
19
+ let oi = 0
20
+ let o16
30
21
 
31
- const decode = (arr, start, end, stream) => {
32
- let i = start
33
- o16 = new Uint16Array(end - start + (lead ? 1 : 0)) // there are pairs but they consume more than one byte
34
- oi = 0
35
-
36
- if (lead && i < end) decodeLead(arr[i++])
37
- while (i < end) {
38
- const b = arr[i++]
39
- if (b < 128) {
40
- o16[oi++] = b
41
- } else if (b === 0x80 || b === 0xff) {
22
+ const decodeLead = (b) => {
23
+ if (b < 0x41 || b > 0xfe) {
24
+ lead = 0
42
25
  o16[oi++] = err()
26
+ if (b < 128) o16[oi++] = b
43
27
  } else {
44
- lead = b
45
- if (i < end) decodeLead(arr[i++])
28
+ const p = euc[(lead - 0x81) * 190 + b - 0x41]
29
+ lead = 0
30
+ if (p) {
31
+ o16[oi++] = p
32
+ } else {
33
+ o16[oi++] = err()
34
+ if (b < 128) o16[oi++] = b
35
+ }
46
36
  }
47
37
  }
48
38
 
49
- if (lead && !stream) {
50
- lead = 0
51
- o16[oi++] = err()
52
- }
39
+ const decode = (arr, start, end, stream) => {
40
+ let i = start
41
+ o16 = new Uint16Array(end - start + (lead ? 1 : 0)) // there are pairs but they consume more than one byte
42
+ oi = 0
53
43
 
54
- const res = decodeUCS2(o16, oi)
55
- o16 = null
56
- return res
57
- }
44
+ // Fast path
45
+ if (!lead) {
46
+ for (const last1 = end - 1; i < last1; ) {
47
+ const l = arr[i]
48
+ if (l < 128) {
49
+ o16[oi++] = l
50
+ i++
51
+ } else {
52
+ if (l === 0x80 || l === 0xff) break
53
+ const b = arr[i + 1]
54
+ if (b < 0x41 || b === 0xff) break
55
+ const p = euc[(l - 0x81) * 190 + b - 0x41]
56
+ if (!p) break
57
+ o16[oi++] = p
58
+ i += 2
59
+ }
60
+ }
61
+ }
58
62
 
59
- return { decode, isAscii: () => lead === 0 }
60
- }
63
+ if (lead && i < end) decodeLead(arr[i++])
64
+ while (i < end) {
65
+ const b = arr[i++]
66
+ if (b < 128) {
67
+ o16[oi++] = b
68
+ } else if (b === 0x80 || b === 0xff) {
69
+ o16[oi++] = err()
70
+ } else {
71
+ lead = b
72
+ if (i < end) decodeLead(arr[i++])
73
+ }
74
+ }
61
75
 
62
- // All except iso-2022-jp are ASCII supersets
63
- // When adding something that is not an ASCII superset, ajust the ASCII fast path
64
- const REP = 0xff_fd
65
- const mappers = {
66
- // https://encoding.spec.whatwg.org/#euc-kr-decoder
67
- 'euc-kr': (err) => {
68
- const euc = getTable('euc-kr')
69
- return bigDecoder(err, (l, b) => {
70
- if (b < 0x41 || b > 0xfe) return
71
- const cp = euc[(l - 0x81) * 190 + b - 0x41]
72
- return cp !== undefined && cp !== REP ? cp : undefined
73
- })
76
+ if (lead && !stream) {
77
+ lead = 0
78
+ o16[oi++] = err()
79
+ }
80
+
81
+ const res = decodeUCS2(o16, oi)
82
+ o16 = null
83
+ return res
84
+ }
85
+
86
+ return { decode, isAscii: () => lead === 0 }
74
87
  },
75
88
  // https://encoding.spec.whatwg.org/#euc-jp-decoder
76
89
  'euc-jp': (err) => {
@@ -96,7 +109,7 @@ const mappers = {
96
109
 
97
110
  lead = 0
98
111
  j12 = false
99
- if (cp !== undefined && cp !== REP) {
112
+ if (cp) {
100
113
  o16[oi++] = cp
101
114
  } else {
102
115
  o16[oi++] = err()
@@ -110,6 +123,30 @@ const mappers = {
110
123
  o16 = new Uint16Array(end - start + (lead ? 1 : 0))
111
124
  oi = 0
112
125
 
126
+ // Fast path, non-j12
127
+ // lead = 0 means j12 = 0
128
+ if (!lead) {
129
+ for (const last1 = end - 1; i < last1; ) {
130
+ const l = arr[i]
131
+ if (l < 128) {
132
+ o16[oi++] = l
133
+ i++
134
+ } else {
135
+ const b = arr[i + 1]
136
+ if (l === 0x8e && b >= 0xa1 && b <= 0xdf) {
137
+ o16[oi++] = 0xfe_c0 + b
138
+ i += 2
139
+ } else {
140
+ if (l < 0xa1 || l === 0xff || b < 0xa1 || b === 0xff) break
141
+ const cp = jis0208[(l - 0xa1) * 94 + b - 0xa1]
142
+ if (!cp) break
143
+ o16[oi++] = cp
144
+ i += 2
145
+ }
146
+ }
147
+ }
148
+ }
149
+
113
150
  if (lead && i < end) decodeLead(arr[i++])
114
151
  if (lead && i < end) decodeLead(arr[i++]) // could be two leads, but no more
115
152
  while (i < end) {
@@ -187,7 +224,7 @@ const mappers = {
187
224
  dState = 4
188
225
  if (b >= 0x21 && b <= 0x7e) {
189
226
  const cp = jis0208[(lead - 0x21) * 94 + b - 0x21]
190
- if (cp !== undefined && cp !== REP) return cp
227
+ if (cp) return cp
191
228
  }
192
229
 
193
230
  return err()
@@ -314,7 +351,7 @@ const mappers = {
314
351
  }
315
352
 
316
353
  const cp = jis0208[p]
317
- if (cp !== undefined && cp !== REP) {
354
+ if (cp) {
318
355
  o16[oi++] = cp
319
356
  return
320
357
  }
@@ -329,6 +366,34 @@ const mappers = {
329
366
  oi = 0
330
367
  let i = start
331
368
 
369
+ // Fast path
370
+ if (!lead) {
371
+ for (const last1 = end - 1; i < last1; ) {
372
+ const l = arr[i]
373
+ if (l <= 0x80) {
374
+ o16[oi++] = l
375
+ i++
376
+ } else if (l >= 0xa1 && l <= 0xdf) {
377
+ o16[oi++] = 0xfe_c0 + l
378
+ i++
379
+ } else {
380
+ if (l === 0xa0 || l > 0xfc) break
381
+ const b = arr[i + 1]
382
+ if (b < 0x40 || b > 0xfc || b === 0x7f) break
383
+ const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41)
384
+ if (p >= 8836 && p <= 10_715) {
385
+ o16[oi++] = 0xe0_00 - 8836 + p
386
+ i += 2
387
+ } else {
388
+ const cp = jis0208[p]
389
+ if (!cp) break
390
+ o16[oi++] = cp
391
+ i += 2
392
+ }
393
+ }
394
+ }
395
+ }
396
+
332
397
  if (lead && i < end) decodeLead(arr[i++])
333
398
  while (i < end) {
334
399
  const b = arr[i++]
@@ -386,6 +451,36 @@ const mappers = {
386
451
  let i = start
387
452
  const pushback = [] // local and auto-cleared
388
453
 
454
+ // Fast path for 2-byte only
455
+ // pushback is always empty ad start, and g1 = 0 means g2 = g3 = 0
456
+ if (g1 === 0) {
457
+ for (const last1 = end - 1; i < last1; ) {
458
+ const b = arr[i]
459
+ if (b < 128) {
460
+ o16[oi++] = b
461
+ i++
462
+ } else if (b === 0x80) {
463
+ o16[oi++] = 0x20_ac
464
+ i++
465
+ } else {
466
+ if (b === 0xff) break
467
+ const n = arr[i + 1]
468
+ let cp
469
+ if (n < 0x7f) {
470
+ if (n < 0x40) break
471
+ cp = gb18030[(b - 0x81) * 190 + n - 0x40]
472
+ } else {
473
+ if (n === 0xff || n === 0x7f) break
474
+ cp = gb18030[(b - 0x81) * 190 + n - 0x41]
475
+ }
476
+
477
+ if (!cp) break
478
+ o16[oi++] = cp // 16-bit
479
+ i += 2
480
+ }
481
+ }
482
+ }
483
+
389
484
  // First, dump everything until EOF
390
485
  // Same as the full loop, but without EOF handling
391
486
  while (i < end || pushback.length > 0) {
@@ -395,11 +490,7 @@ const mappers = {
395
490
  // hence, 3 checks for g3 is faster than 3 checks for g1
396
491
  if (g2) {
397
492
  if (g3) {
398
- if (b < 0x30 || b > 0x39) {
399
- pushback.push(b, g3, g2)
400
- g1 = g2 = g3 = 0
401
- o16[oi++] = err()
402
- } else {
493
+ if (b <= 0x39 && b >= 0x30) {
403
494
  const p = index(
404
495
  (g1 - 0x81) * 12_600 + (g2 - 0x30) * 1260 + (g3 - 0x81) * 10 + b - 0x30
405
496
  )
@@ -413,6 +504,10 @@ const mappers = {
413
504
  o16[oi++] = 0xd8_00 | (d >> 10)
414
505
  o16[oi++] = 0xdc_00 | (d & 0x3_ff)
415
506
  }
507
+ } else {
508
+ pushback.push(b, g3, g2)
509
+ g1 = g2 = g3 = 0
510
+ o16[oi++] = err()
416
511
  }
417
512
  } else if (b >= 0x81 && b <= 0xfe) {
418
513
  g3 = b
@@ -421,7 +516,7 @@ const mappers = {
421
516
  g1 = g2 = 0
422
517
  o16[oi++] = err()
423
518
  }
424
- } else if (b >= 0x30 && b <= 0x39) {
519
+ } else if (b <= 0x39 && b >= 0x30) {
425
520
  g2 = b
426
521
  } else {
427
522
  let cp
@@ -430,7 +525,7 @@ const mappers = {
430
525
  }
431
526
 
432
527
  g1 = 0
433
- if (cp !== undefined && cp !== REP) {
528
+ if (cp) {
434
529
  o16[oi++] = cp // 16-bit
435
530
  } else {
436
531
  o16[oi++] = err()
@@ -464,10 +559,85 @@ const mappers = {
464
559
  // The only decoder which returns multiple codepoints per byte, also has non-charcode codepoints
465
560
  // We store that as strings
466
561
  const big5 = getTable('big5')
467
- return bigDecoder(err, (l, b) => {
468
- if (b < 0x40 || (b > 0x7e && b < 0xa1) || b === 0xff) return
469
- return big5[(l - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)] // strings
470
- })
562
+ let lead = 0
563
+ let oi = 0
564
+ let o16
565
+
566
+ const decodeLead = (b) => {
567
+ if (b < 0x40 || (b > 0x7e && b < 0xa1) || b === 0xff) {
568
+ lead = 0
569
+ o16[oi++] = err()
570
+ if (b < 128) o16[oi++] = b
571
+ } else {
572
+ const p = big5[(lead - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)]
573
+ lead = 0
574
+ if (p > 0x1_00_00) {
575
+ o16[oi++] = p >> 16
576
+ o16[oi++] = p & 0xff_ff
577
+ } else if (p) {
578
+ o16[oi++] = p
579
+ } else {
580
+ o16[oi++] = err()
581
+ if (b < 128) o16[oi++] = b
582
+ }
583
+ }
584
+ }
585
+
586
+ // eslint-disable-next-line sonarjs/no-identical-functions
587
+ const decode = (arr, start, end, stream) => {
588
+ let i = start
589
+ o16 = new Uint16Array(end - start + (lead ? 1 : 0)) // there are pairs but they consume more than one byte
590
+ oi = 0
591
+
592
+ // Fast path
593
+ if (!lead) {
594
+ for (const last1 = end - 1; i < last1; ) {
595
+ const l = arr[i]
596
+ if (l < 128) {
597
+ o16[oi++] = l
598
+ i++
599
+ } else {
600
+ if (l === 0x80 || l === 0xff) break
601
+ const b = arr[i + 1]
602
+ if (b < 0x40 || (b > 0x7e && b < 0xa1) || b === 0xff) break
603
+ const p = big5[(l - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)]
604
+ if (p > 0x1_00_00) {
605
+ o16[oi++] = p >> 16
606
+ o16[oi++] = p & 0xff_ff
607
+ } else {
608
+ if (!p) break
609
+ o16[oi++] = p
610
+ }
611
+
612
+ i += 2
613
+ }
614
+ }
615
+ }
616
+
617
+ if (lead && i < end) decodeLead(arr[i++])
618
+ while (i < end) {
619
+ const b = arr[i++]
620
+ if (b < 128) {
621
+ o16[oi++] = b
622
+ } else if (b === 0x80 || b === 0xff) {
623
+ o16[oi++] = err()
624
+ } else {
625
+ lead = b
626
+ if (i < end) decodeLead(arr[i++])
627
+ }
628
+ }
629
+
630
+ if (lead && !stream) {
631
+ lead = 0
632
+ o16[oi++] = err()
633
+ }
634
+
635
+ const res = decodeUCS2(o16, oi)
636
+ o16 = null
637
+ return res
638
+ }
639
+
640
+ return { decode, isAscii: () => lead === 0 }
471
641
  },
472
642
  }
473
643
 
@@ -482,7 +652,7 @@ export function multibyteDecoder(enc, loose = false) {
482
652
  const asciiSuperset = isAsciiSuperset(enc)
483
653
  let streaming // because onErr is cached in mapper
484
654
  const onErr = loose
485
- ? () => REP
655
+ ? () => 0xff_fd
486
656
  : () => {
487
657
  // The correct way per spec seems to be not destoying the decoder state in stream mode, even when fatal
488
658
  // Decoders big5, euc-jp, euc-kr, shift_jis, gb18030 / gbk - all clear state before throwing unless EOF, so not affected
@@ -504,3 +674,289 @@ export function multibyteDecoder(enc, loose = false) {
504
674
  return res + mapper.decode(arr, res.length, arr.length, stream)
505
675
  }
506
676
  }
677
+
678
+ /* Encoders */
679
+
680
+ const maps = new Map()
681
+ const e7 = [[148, 236], [149, 237], [150, 243]] // prettier-ignore
682
+ const e8 = [[30, 89], [38, 97], [43, 102], [44, 103], [50, 109], [67, 126], [84, 144], [100, 160]] // prettier-ignore
683
+ const preencoders = {
684
+ __proto__: null,
685
+ big5: (p) => ((((p / 157) | 0) + 0x81) << 8) | ((p % 157 < 0x3f ? 0x40 : 0x62) + (p % 157)),
686
+ shift_jis: (p) => {
687
+ const l = (p / 188) | 0
688
+ const t = p % 188
689
+ return ((l + (l < 0x1f ? 0x81 : 0xc1)) << 8) | ((t < 0x3f ? 0x40 : 0x41) + t)
690
+ },
691
+ 'iso-2022-jp': (p) => ((((p / 94) | 0) + 0x21) << 8) | ((p % 94) + 0x21),
692
+ 'euc-jp': (p) => ((((p / 94) | 0) + 0xa1) << 8) | ((p % 94) + 0xa1),
693
+ 'euc-kr': (p) => ((((p / 190) | 0) + 0x81) << 8) | ((p % 190) + 0x41),
694
+ gb18030: (p) => ((((p / 190) | 0) + 0x81) << 8) | ((p % 190 < 0x3f ? 0x40 : 0x41) + (p % 190)),
695
+ }
696
+
697
+ preencoders.gbk = preencoders.gb18030
698
+
699
+ // We accept that encoders use non-trivial amount of mem, for perf
700
+ // most are are 128 KiB mem, big5 is 380 KiB, lazy-loaded at first use
701
+ function getMap(id, size, ascii) {
702
+ const cached = maps.get(id)
703
+ if (cached) return cached
704
+ let tname = id
705
+ const sjis = id === 'shift_jis'
706
+ const iso2022jp = id === 'iso-2022-jp'
707
+ if (iso2022jp) tname = 'jis0208'
708
+ if (id === 'gbk') tname = 'gb18030'
709
+ if (id === 'euc-jp' || sjis) tname = 'jis0208'
710
+ const table = getTable(tname)
711
+ const map = new Uint16Array(size)
712
+ const enc = preencoders[id] || ((p) => p + 1)
713
+ for (let i = 0; i < table.length; i++) {
714
+ const c = table[i]
715
+ if (!c) continue
716
+ if (id === 'big5') {
717
+ if (i < 5024) continue // this also skips multi-codepoint strings
718
+ // In big5, all return first entries except for these
719
+ if (
720
+ map[c] &&
721
+ c !== 0x25_50 &&
722
+ c !== 0x25_5e &&
723
+ c !== 0x25_61 &&
724
+ c !== 0x25_6a &&
725
+ c !== 0x53_41 &&
726
+ c !== 0x53_45
727
+ ) {
728
+ continue
729
+ }
730
+ } else {
731
+ if (sjis && i >= 8272 && i <= 8835) continue
732
+ if (map[c]) continue
733
+ }
734
+
735
+ if (c > 0xff_ff) {
736
+ // always a single codepoint here
737
+ const s = String.fromCharCode(c >> 16, c & 0xff_ff)
738
+ map[s.codePointAt(0)] = enc(i)
739
+ } else {
740
+ map[c] = enc(i)
741
+ }
742
+ }
743
+
744
+ if (ascii) for (let i = 0; i < 0x80; i++) map[i] = i
745
+ if (sjis || id === 'euc-jp') {
746
+ if (sjis) map[0x80] = 0x80
747
+ const d = sjis ? 0xfe_c0 : 0x70_c0
748
+ for (let i = 0xff_61; i <= 0xff_9f; i++) map[i] = i - d
749
+ map[0x22_12] = map[0xff_0d]
750
+ map[0xa5] = 0x5c
751
+ map[0x20_3e] = 0x7e
752
+ } else if (tname === 'gb18030') {
753
+ if (id === 'gbk') map[0x20_ac] = 0x80
754
+ for (let i = 0xe7_8d; i <= 0xe7_93; i++) map[i] = i - 0x40_b4
755
+ for (const [a, b] of e7) map[0xe7_00 | a] = 0xa6_00 | b
756
+ for (const [a, b] of e8) map[0xe8_00 | a] = 0xfe_00 | b
757
+ }
758
+
759
+ maps.set(id, map)
760
+ return map
761
+ }
762
+
763
+ const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
764
+ let gb18030r, katakana
765
+
766
+ export function multibyteEncoder(enc, onError) {
767
+ if (!Object.hasOwn(mappers, enc)) throw new RangeError('Unsupported encoding')
768
+ const size = enc === 'big5' ? 0x2_f8_a7 : 0x1_00_00 // for big5, max codepoint in table + 1
769
+ const iso2022jp = enc === 'iso-2022-jp'
770
+ const gb18030 = enc === 'gb18030'
771
+ const ascii = isAsciiSuperset(enc)
772
+ const width = iso2022jp ? 5 : gb18030 ? 4 : 2
773
+ const tailsize = iso2022jp ? 3 : 0
774
+ const map = getMap(enc, size, ascii)
775
+ if (gb18030 && !gb18030r) gb18030r = getTable('gb18030-ranges')
776
+ if (iso2022jp && !katakana) katakana = getTable('iso-2022-jp-katakana')
777
+ return (str) => {
778
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
779
+ if (ascii && !NON_LATIN.test(str)) {
780
+ try {
781
+ return encodeAscii(str, E_STRICT)
782
+ } catch {}
783
+ }
784
+
785
+ const length = str.length
786
+ const u8 = new Uint8Array(length * width + tailsize)
787
+ let i = 0
788
+
789
+ if (ascii) {
790
+ while (i < length) {
791
+ const x = str.charCodeAt(i)
792
+ if (x >= 128) break
793
+ u8[i++] = x
794
+ }
795
+ }
796
+
797
+ // eslint-disable-next-line unicorn/consistent-function-scoping
798
+ const err = (code) => {
799
+ if (onError) return onError(code, u8, i)
800
+ throw new TypeError(E_STRICT)
801
+ }
802
+
803
+ if (!map || map.length < size) /* c8 ignore next */ throw new Error('Unreachable') // Important for perf
804
+
805
+ if (iso2022jp) {
806
+ let state = 0 // 0 = ASCII, 1 = Roman, 2 = jis0208
807
+ const restore = () => {
808
+ state = 0
809
+ u8[i++] = 0x1b
810
+ u8[i++] = 0x28
811
+ u8[i++] = 0x42
812
+ }
813
+
814
+ for (let j = 0; j < length; j++) {
815
+ let x = str.charCodeAt(j)
816
+ if (x >= 0xd8_00 && x < 0xe0_00) {
817
+ if (state === 2) restore()
818
+ if (x >= 0xdc_00 || j + 1 === length) {
819
+ i += err(x) // lone
820
+ } else {
821
+ const x1 = str.charCodeAt(j + 1)
822
+ if (x1 < 0xdc_00 || x1 >= 0xe0_00) {
823
+ i += err(x) // lone
824
+ } else {
825
+ j++ // consume x1
826
+ i += err(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10)))
827
+ }
828
+ }
829
+ } else if (x < 0x80) {
830
+ if (state === 2 || (state === 1 && (x === 0x5c || x === 0x7e))) restore()
831
+ if (x === 0xe || x === 0xf || x === 0x1b) {
832
+ i += err(0xff_fd) // 12.2.2. step 3: This returns U+FFFD rather than codePoint to prevent attacks
833
+ } else {
834
+ u8[i++] = x
835
+ }
836
+ } else if (x === 0xa5 || x === 0x20_3e) {
837
+ if (state !== 1) {
838
+ state = 1
839
+ u8[i++] = 0x1b
840
+ u8[i++] = 0x28
841
+ u8[i++] = 0x4a
842
+ }
843
+
844
+ u8[i++] = x === 0xa5 ? 0x5c : 0x7e
845
+ } else {
846
+ if (x === 0x22_12) x = 0xff_0d
847
+ if (x >= 0xff_61 && x <= 0xff_9f) x = katakana[x - 0xff_61]
848
+ const e = map[x]
849
+ if (e) {
850
+ if (state !== 2) {
851
+ state = 2
852
+ u8[i++] = 0x1b
853
+ u8[i++] = 0x24
854
+ u8[i++] = 0x42
855
+ }
856
+
857
+ u8[i++] = e >> 8
858
+ u8[i++] = e & 0xff
859
+ } else {
860
+ if (state === 2) restore()
861
+ i += err(x)
862
+ }
863
+ }
864
+ }
865
+
866
+ if (state) restore()
867
+ } else if (gb18030) {
868
+ // Deduping this branch hurts other encoders perf
869
+ const encode = (cp) => {
870
+ let a = 0, b = 0 // prettier-ignore
871
+ for (const [c, d] of gb18030r) {
872
+ if (d > cp) break
873
+ a = c
874
+ b = d
875
+ }
876
+
877
+ let rp = cp === 0xe7_c7 ? 7457 : a + cp - b
878
+ u8[i++] = 0x81 + ((rp / 12_600) | 0)
879
+ rp %= 12_600
880
+ u8[i++] = 0x30 + ((rp / 1260) | 0)
881
+ rp %= 1260
882
+ u8[i++] = 0x81 + ((rp / 10) | 0)
883
+ u8[i++] = 0x30 + (rp % 10)
884
+ }
885
+
886
+ for (let j = i; j < length; j++) {
887
+ const x = str.charCodeAt(j)
888
+ if (x >= 0xd8_00 && x < 0xe0_00) {
889
+ if (x >= 0xdc_00 || j + 1 === length) {
890
+ i += err(x) // lone
891
+ } else {
892
+ const x1 = str.charCodeAt(j + 1)
893
+ if (x1 < 0xdc_00 || x1 >= 0xe0_00) {
894
+ i += err(x) // lone
895
+ } else {
896
+ j++ // consume x1
897
+ encode(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10)))
898
+ }
899
+ }
900
+ } else {
901
+ const e = map[x]
902
+ if (e & 0xff_00) {
903
+ u8[i++] = e >> 8
904
+ u8[i++] = e & 0xff
905
+ } else if (e || x === 0) {
906
+ u8[i++] = e
907
+ } else if (x === 0xe5_e5) {
908
+ i += err(x)
909
+ } else {
910
+ encode(x)
911
+ }
912
+ }
913
+ }
914
+ } else {
915
+ const long =
916
+ enc === 'big5'
917
+ ? (x) => {
918
+ const e = map[x]
919
+ if (e & 0xff_00) {
920
+ u8[i++] = e >> 8
921
+ u8[i++] = e & 0xff
922
+ } else if (e || x === 0) {
923
+ u8[i++] = e
924
+ } else {
925
+ i += err(x)
926
+ }
927
+ }
928
+ : (x) => {
929
+ i += err(x)
930
+ }
931
+
932
+ for (let j = i; j < length; j++) {
933
+ const x = str.charCodeAt(j)
934
+ if (x >= 0xd8_00 && x < 0xe0_00) {
935
+ if (x >= 0xdc_00 || j + 1 === length) {
936
+ i += err(x) // lone
937
+ } else {
938
+ const x1 = str.charCodeAt(j + 1)
939
+ if (x1 < 0xdc_00 || x1 >= 0xe0_00) {
940
+ i += err(x) // lone
941
+ } else {
942
+ j++ // consume x1
943
+ long(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10)))
944
+ }
945
+ }
946
+ } else {
947
+ const e = map[x]
948
+ if (e & 0xff_00) {
949
+ u8[i++] = e >> 8
950
+ u8[i++] = e & 0xff
951
+ } else if (e || x === 0) {
952
+ u8[i++] = e
953
+ } else {
954
+ i += err(x)
955
+ }
956
+ }
957
+ }
958
+ }
959
+
960
+ return i === u8.length ? u8 : u8.subarray(0, i)
961
+ }
962
+ }