@exodus/bytes 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -33,16 +33,30 @@ See [Performance](./Performance.md) for more info
33
33
  import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding.js'
34
34
  ```
35
35
 
36
- Less than half the bundle size of [text-encoding](https://npmjs.com/text-encoding), [whatwg-encoding](https://npmjs.com/whatwg-encoding) or [iconv-lite](https://npmjs.com/iconv-lite) (gzipped or not), and [is much faster](#fast).
37
- See also [lite version](#lite-version).
36
+ Less than half the bundle size of [text-encoding](https://npmjs.com/text-encoding), [whatwg-encoding](https://npmjs.com/whatwg-encoding) or [iconv-lite](https://npmjs.com/iconv-lite) (gzipped or not).\
37
+ Also [much faster](#fast) than all of those.
38
38
 
39
- Spec compliant, passing WPT and covered with extra tests.
39
+ > [!TIP]
40
+ > See also the [lite version](#lite-version) to get this down to 9 KiB gzipped.
40
41
 
41
- Moreover, tests for this library uncovered [bugs in all major implementations](https://docs.google.com/spreadsheets/d/1pdEefRG6r9fZy61WHGz0TKSt8cO4ISWqlpBN5KntIvQ/edit).
42
+ Spec compliant, passing WPT and covered with extra tests.\
43
+ Moreover, tests for this library uncovered [bugs in all major implementations](https://docs.google.com/spreadsheets/d/1pdEefRG6r9fZy61WHGz0TKSt8cO4ISWqlpBN5KntIvQ/edit).\
44
+ Including all three major browser engines being wrong at UTF-8.\
45
+ See [WPT pull request](https://github.com/web-platform-tests/wpt/pull/56892).
42
46
 
43
- [Faster than Node.js native implementation on Node.js](https://github.com/nodejs/node/issues/61041#issuecomment-3649242024).
47
+ It works correctly even in environments that have native implementations broken (that's all of them currently).\
44
48
  Runs (and passes WPT) on Node.js built without ICU.
45
49
 
50
+ > [!NOTE]
51
+ > [Faster than Node.js native implementation on Node.js](https://github.com/nodejs/node/issues/61041#issuecomment-3649242024).
52
+ >
53
+ > The JS multi-byte version is as fast as native impl in Node.js and browsers, but (unlike them) returns correct results.
54
+ >
55
+ > For encodings where native version is known to be fast and correct, it is automatically used.\
56
+ > Some single-byte encodings are faster than native in all three major browser engines.
57
+
58
+ See [analysis table](https://docs.google.com/spreadsheets/d/1pdEefRG6r9fZy61WHGz0TKSt8cO4ISWqlpBN5KntIvQ/edit) for more info.
59
+
46
60
  ### Caveat: `TextDecoder` / `TextEncoder` APIs are lossy by default per spec
47
61
 
48
62
  _These are only provided as a compatibility layer, prefer hardened APIs instead in new code._
@@ -128,3 +128,9 @@ export function decode2string(arr, start, end, m) {
128
128
  export function assert(condition, msg) {
129
129
  if (!condition) throw new Error(msg)
130
130
  }
131
+
132
+ // On arrays in heap (<= 64) it's cheaper to copy into a pooled buffer than lazy-create the ArrayBuffer storage
133
+ export const toBuf = (x) =>
134
+ x.byteLength <= 64 && x.BYTES_PER_ELEMENT === 1
135
+ ? Buffer.from(x)
136
+ : Buffer.from(x.buffer, x.byteOffset, x.byteLength)
@@ -5,6 +5,7 @@ import {
5
5
  nativeBuffer,
6
6
  isHermes,
7
7
  isDeno,
8
+ isLE,
8
9
  } from './_utils.js'
9
10
 
10
11
  // See http://stackoverflow.com/a/22747272/680742, which says that lowest limit is in Chrome, with 0xffff args
@@ -60,6 +61,16 @@ export function decodeLatin1(arr, start = 0, stop = arr.length) {
60
61
  return String.fromCharCode.apply(String, sliced)
61
62
  }
62
63
 
64
+ // Unchecked for well-formedness, raw. Expects Uint16Array input
65
+ export const decodeUCS2 =
66
+ nativeBuffer && isLE && !isDeno
67
+ ? (u16, stop = u16.length) => {
68
+ // TODO: fast path for BE, perhaps faster path for Deno. Note that decoder replaces, this function doesn't
69
+ if (stop > 32) return nativeBuffer.from(u16.buffer, u16.byteOffset, stop * 2).ucs2Slice() // from 64 bytes, below are in heap
70
+ return decodeLatin1(u16, 0, stop)
71
+ }
72
+ : (u16, stop = u16.length) => decodeLatin1(u16, 0, stop)
73
+
63
74
  // Does not check input, uses best available method
64
75
  // Building an array for this is only faster than proper string concatenation when TextDecoder or native Buffer are available
65
76
  export const decodeAscii = nativeBuffer
@@ -70,7 +81,10 @@ export const decodeAscii = nativeBuffer
70
81
  : nativeDecoder.decode(a) // On Node.js, utf8 decoder is faster than latin1
71
82
  : nativeDecoderLatin1
72
83
  ? (a) => nativeDecoderLatin1.decode(a) // On browsers (specifically WebKit), latin1 decoder is faster than utf8
73
- : (a) => decodeLatin1(new Uint8Array(a.buffer, a.byteOffset, a.byteLength)) // Fallback. We shouldn't get here, constructing with strings directly is faster
84
+ : (a) =>
85
+ decodeLatin1(
86
+ a instanceof Uint8Array ? a : new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
87
+ )
74
88
 
75
89
  /* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
76
90
 
@@ -1,4 +1,4 @@
1
- import { asciiPrefix, decodeLatin1 } from './latin1.js'
1
+ import { asciiPrefix, decodeAscii, decodeLatin1, decodeUCS2 } from './latin1.js'
2
2
  import { getTable } from './multi-byte.table.js'
3
3
 
4
4
  export const E_STRICT = 'Input is not well-formed for this encoding'
@@ -11,36 +11,48 @@ export const E_STRICT = 'Input is not well-formed for this encoding'
11
11
  // Common between euc-kr and big5
12
12
  function bigDecoder(err, pair) {
13
13
  let lead = 0
14
+ let oi = 0
15
+ let o16
14
16
 
15
17
  const decodeLead = (b) => {
16
- const str = pair(lead, b)
18
+ const p = pair(lead, b)
17
19
  lead = 0
18
- if (str) return str
19
- return b < 128 ? String.fromCharCode(err(), b) : String.fromCharCode(err())
20
+ if (typeof p === 'number') {
21
+ o16[oi++] = p
22
+ } else if (p) {
23
+ // This is still faster than string concatenation. Can we optimize strings though?
24
+ for (let i = 0; i < p.length; i++) o16[oi++] = p.charCodeAt(i)
25
+ } else {
26
+ o16[oi++] = err()
27
+ if (b < 128) o16[oi++] = b
28
+ }
20
29
  }
21
30
 
22
31
  const decode = (arr, start, end, stream) => {
23
- let res = ''
24
32
  let i = start
33
+ o16 = new Uint16Array(end - start + (lead ? 1 : 0)) // there are pairs but they consume more than one byte
34
+ oi = 0
25
35
 
26
- if (lead && i < end) res += decodeLead(arr[i++])
36
+ if (lead && i < end) decodeLead(arr[i++])
27
37
  while (i < end) {
28
38
  const b = arr[i++]
29
39
  if (b < 128) {
30
- res += String.fromCharCode(b)
40
+ o16[oi++] = b
31
41
  } else if (b === 0x80 || b === 0xff) {
32
- res += String.fromCharCode(err())
42
+ o16[oi++] = err()
33
43
  } else {
34
44
  lead = b
35
- if (i < end) res += decodeLead(arr[i++])
45
+ if (i < end) decodeLead(arr[i++])
36
46
  }
37
47
  }
38
48
 
39
49
  if (lead && !stream) {
40
50
  lead = 0
41
- res += String.fromCharCode(err())
51
+ o16[oi++] = err()
42
52
  }
43
53
 
54
+ const res = decodeUCS2(o16, oi)
55
+ o16 = null
44
56
  return res
45
57
  }
46
58
 
@@ -57,7 +69,7 @@ const mappers = {
57
69
  return bigDecoder(err, (l, b) => {
58
70
  if (b < 0x41 || b > 0xfe) return
59
71
  const cp = euc[(l - 0x81) * 190 + b - 0x41]
60
- return cp !== undefined && cp !== REP ? String.fromCharCode(cp) : undefined
72
+ return cp !== undefined && cp !== REP ? cp : undefined
61
73
  })
62
74
  },
63
75
  // https://encoding.spec.whatwg.org/#euc-jp-decoder
@@ -66,55 +78,61 @@ const mappers = {
66
78
  const jis0212 = getTable('jis0212')
67
79
  let j12 = false
68
80
  let lead = 0
81
+ let oi = 0
82
+ let o16
69
83
 
70
84
  const decodeLead = (b) => {
71
85
  if (lead === 0x8e && b >= 0xa1 && b <= 0xdf) {
72
86
  lead = 0
73
- return String.fromCharCode(0xfe_c0 + b)
74
- }
75
-
76
- if (lead === 0x8f && b >= 0xa1 && b <= 0xfe) {
87
+ o16[oi++] = 0xfe_c0 + b
88
+ } else if (lead === 0x8f && b >= 0xa1 && b <= 0xfe) {
77
89
  j12 = true
78
90
  lead = b
79
- return ''
80
- }
91
+ } else {
92
+ let cp
93
+ if (lead >= 0xa1 && lead <= 0xfe && b >= 0xa1 && b <= 0xfe) {
94
+ cp = (j12 ? jis0212 : jis0208)[(lead - 0xa1) * 94 + b - 0xa1]
95
+ }
81
96
 
82
- let cp
83
- if (lead >= 0xa1 && lead <= 0xfe && b >= 0xa1 && b <= 0xfe) {
84
- cp = (j12 ? jis0212 : jis0208)[(lead - 0xa1) * 94 + b - 0xa1]
97
+ lead = 0
98
+ j12 = false
99
+ if (cp !== undefined && cp !== REP) {
100
+ o16[oi++] = cp
101
+ } else {
102
+ o16[oi++] = err()
103
+ if (b < 128) o16[oi++] = b
104
+ }
85
105
  }
86
-
87
- lead = 0
88
- j12 = false
89
- if (cp !== undefined && cp !== REP) return String.fromCharCode(cp)
90
- return b < 128 ? String.fromCharCode(err(), b) : String.fromCharCode(err())
91
106
  }
92
107
 
93
108
  const decode = (arr, start, end, stream) => {
94
- let res = ''
95
109
  let i = start
110
+ o16 = new Uint16Array(end - start + (lead ? 1 : 0))
111
+ oi = 0
96
112
 
97
- if (lead && i < end) res += decodeLead(arr[i++])
98
- if (lead && i < end) res += decodeLead(arr[i++]) // could be two leads, but no more
113
+ if (lead && i < end) decodeLead(arr[i++])
114
+ if (lead && i < end) decodeLead(arr[i++]) // could be two leads, but no more
99
115
  while (i < end) {
100
116
  const b = arr[i++]
101
117
  if (b < 128) {
102
- res += String.fromCharCode(b)
118
+ o16[oi++] = b
103
119
  } else if ((b < 0xa1 && b !== 0x8e && b !== 0x8f) || b === 0xff) {
104
- res += String.fromCharCode(err())
120
+ o16[oi++] = err()
105
121
  } else {
106
122
  lead = b
107
- if (i < end) res += decodeLead(arr[i++])
108
- if (lead && i < end) res += decodeLead(arr[i++]) // could be two leads
123
+ if (i < end) decodeLead(arr[i++])
124
+ if (lead && i < end) decodeLead(arr[i++]) // could be two leads
109
125
  }
110
126
  }
111
127
 
112
128
  if (lead && !stream) {
113
129
  lead = 0
114
130
  j12 = false // can be true only when lead is non-zero
115
- res += String.fromCharCode(err())
131
+ o16[oi++] = err()
116
132
  }
117
133
 
134
+ const res = decodeUCS2(o16, oi)
135
+ o16 = null
118
136
  return res
119
137
  }
120
138
 
@@ -238,7 +256,8 @@ const mappers = {
238
256
  }
239
257
 
240
258
  const decode = (arr, start, end, stream) => {
241
- let res = ''
259
+ const o16 = new Uint16Array(end - start + 2) // err in eof + lead from state
260
+ let oi = 0
242
261
  let i = start
243
262
  const pushback = [] // local and auto-cleared
244
263
 
@@ -246,7 +265,7 @@ const mappers = {
246
265
  // Same as the full loop, but without EOF handling
247
266
  while (i < end || pushback.length > 0) {
248
267
  const c = bytes(pushback, pushback.length > 0 ? pushback.pop() : arr[i++])
249
- if (c !== undefined) res += String.fromCodePoint(c)
268
+ if (c !== undefined) o16[oi++] = c // 16-bit
250
269
  }
251
270
 
252
271
  // Then, dump EOF. This needs the same loop as the characters can be pushed back
@@ -254,11 +273,11 @@ const mappers = {
254
273
  while (i <= end || pushback.length > 0) {
255
274
  if (i < end || pushback.length > 0) {
256
275
  const c = bytes(pushback, pushback.length > 0 ? pushback.pop() : arr[i++])
257
- if (c !== undefined) res += String.fromCodePoint(c)
276
+ if (c !== undefined) o16[oi++] = c // 16-bit
258
277
  } else {
259
278
  const c = eof(pushback)
260
279
  if (c === null) break // clean exit
261
- res += String.fromCodePoint(c)
280
+ o16[oi++] = c
262
281
  }
263
282
  }
264
283
  }
@@ -272,7 +291,7 @@ const mappers = {
272
291
  out = false
273
292
  }
274
293
 
275
- return res
294
+ return decodeUCS2(o16, oi)
276
295
  }
277
296
 
278
297
  return { decode, isAscii: () => false }
@@ -281,44 +300,57 @@ const mappers = {
281
300
  shift_jis: (err) => {
282
301
  const jis0208 = getTable('jis0208')
283
302
  let lead = 0
303
+ let oi = 0
304
+ let o16
284
305
 
285
306
  const decodeLead = (b) => {
286
307
  const l = lead
287
308
  lead = 0
288
309
  if (b >= 0x40 && b <= 0xfc && b !== 0x7f) {
289
310
  const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41)
290
- if (p >= 8836 && p <= 10_715) return String.fromCharCode(0xe0_00 - 8836 + p)
311
+ if (p >= 8836 && p <= 10_715) {
312
+ o16[oi++] = 0xe0_00 - 8836 + p
313
+ return
314
+ }
315
+
291
316
  const cp = jis0208[p]
292
- if (cp !== undefined && cp !== REP) return String.fromCharCode(cp)
317
+ if (cp !== undefined && cp !== REP) {
318
+ o16[oi++] = cp
319
+ return
320
+ }
293
321
  }
294
322
 
295
- return b < 128 ? String.fromCharCode(err(), b) : String.fromCharCode(err())
323
+ o16[oi++] = err()
324
+ if (b < 128) o16[oi++] = b
296
325
  }
297
326
 
298
327
  const decode = (arr, start, end, stream) => {
299
- let res = ''
328
+ o16 = new Uint16Array(end - start + (lead ? 1 : 0))
329
+ oi = 0
300
330
  let i = start
301
331
 
302
- if (lead && i < end) res += decodeLead(arr[i++])
332
+ if (lead && i < end) decodeLead(arr[i++])
303
333
  while (i < end) {
304
334
  const b = arr[i++]
305
335
  if (b <= 0x80) {
306
- res += String.fromCharCode(b) // 0x80 is allowed
336
+ o16[oi++] = b // 0x80 is allowed
307
337
  } else if (b >= 0xa1 && b <= 0xdf) {
308
- res += String.fromCharCode(0xfe_c0 + b)
338
+ o16[oi++] = 0xfe_c0 + b
309
339
  } else if (b === 0xa0 || b > 0xfc) {
310
- res += String.fromCharCode(err())
340
+ o16[oi++] = err()
311
341
  } else {
312
342
  lead = b
313
- if (i < end) res += decodeLead(arr[i++])
343
+ if (i < end) decodeLead(arr[i++])
314
344
  }
315
345
  }
316
346
 
317
347
  if (lead && !stream) {
318
348
  lead = 0
319
- res += String.fromCharCode(err())
349
+ o16[oi++] = err()
320
350
  }
321
351
 
352
+ const res = decodeUCS2(o16, oi)
353
+ o16 = null
322
354
  return res
323
355
  }
324
356
 
@@ -349,7 +381,8 @@ const mappers = {
349
381
  // g3 is 0 or 0x81-0xfe
350
382
 
351
383
  const decode = (arr, start, end, stream) => {
352
- let res = ''
384
+ const o16 = new Uint16Array(end - start + (g1 ? 3 : 0)) // even with pushback it's at most 1 char per byte
385
+ let oi = 0
353
386
  let i = start
354
387
  const pushback = [] // local and auto-cleared
355
388
 
@@ -357,30 +390,38 @@ const mappers = {
357
390
  // Same as the full loop, but without EOF handling
358
391
  while (i < end || pushback.length > 0) {
359
392
  const b = pushback.length > 0 ? pushback.pop() : arr[i++]
360
- if (g3) {
361
- if (b < 0x30 || b > 0x39) {
362
- pushback.push(b, g3, g2)
363
- g1 = g2 = g3 = 0
364
- res += String.fromCharCode(err())
365
- } else {
366
- const p = index((g1 - 0x81) * 12_600 + (g2 - 0x30) * 1260 + (g3 - 0x81) * 10 + b - 0x30)
367
- g1 = g2 = g3 = 0
368
- if (p === undefined) {
369
- res += String.fromCharCode(err())
393
+ if (g1) {
394
+ // g2 can be set only when g1 is set, g3 can be set only when g2 is set
395
+ // hence, 3 checks for g3 is faster than 3 checks for g1
396
+ if (g2) {
397
+ if (g3) {
398
+ if (b < 0x30 || b > 0x39) {
399
+ pushback.push(b, g3, g2)
400
+ g1 = g2 = g3 = 0
401
+ o16[oi++] = err()
402
+ } else {
403
+ const p = index(
404
+ (g1 - 0x81) * 12_600 + (g2 - 0x30) * 1260 + (g3 - 0x81) * 10 + b - 0x30
405
+ )
406
+ g1 = g2 = g3 = 0
407
+ if (p === undefined) {
408
+ o16[oi++] = err()
409
+ } else if (p <= 0xff_ff) {
410
+ o16[oi++] = p // Can validly return replacement
411
+ } else {
412
+ const d = p - 0x1_00_00
413
+ o16[oi++] = 0xd8_00 | (d >> 10)
414
+ o16[oi++] = 0xdc_00 | (d & 0x3_ff)
415
+ }
416
+ }
417
+ } else if (b >= 0x81 && b <= 0xfe) {
418
+ g3 = b
370
419
  } else {
371
- res += String.fromCodePoint(p) // Can validly return replacement
420
+ pushback.push(b, g2)
421
+ g1 = g2 = 0
422
+ o16[oi++] = err()
372
423
  }
373
- }
374
- } else if (g2) {
375
- if (b >= 0x81 && b <= 0xfe) {
376
- g3 = b
377
- } else {
378
- pushback.push(b, g2)
379
- g1 = g2 = 0
380
- res += String.fromCharCode(err())
381
- }
382
- } else if (g1) {
383
- if (b >= 0x30 && b <= 0x39) {
424
+ } else if (b >= 0x30 && b <= 0x39) {
384
425
  g2 = b
385
426
  } else {
386
427
  let cp
@@ -390,18 +431,18 @@ const mappers = {
390
431
 
391
432
  g1 = 0
392
433
  if (cp !== undefined && cp !== REP) {
393
- res += String.fromCodePoint(cp)
434
+ o16[oi++] = cp // 16-bit
394
435
  } else {
395
- res += String.fromCharCode(err())
396
- if (b < 128) res += String.fromCharCode(b) // can be processed immediately
436
+ o16[oi++] = err()
437
+ if (b < 128) o16[oi++] = b // can be processed immediately
397
438
  }
398
439
  }
399
440
  } else if (b < 128) {
400
- res += String.fromCharCode(b)
441
+ o16[oi++] = b
401
442
  } else if (b === 0x80) {
402
- res += '\u20AC'
443
+ o16[oi++] = 0x20_ac
403
444
  } else if (b === 0xff) {
404
- res += String.fromCharCode(err())
445
+ o16[oi++] = err()
405
446
  } else {
406
447
  g1 = b
407
448
  }
@@ -410,10 +451,10 @@ const mappers = {
410
451
  // if g1 = 0 then g2 = g3 = 0
411
452
  if (g1 && !stream) {
412
453
  g1 = g2 = g3 = 0
413
- res += String.fromCharCode(err())
454
+ o16[oi++] = err()
414
455
  }
415
456
 
416
- return res
457
+ return decodeUCS2(o16, oi)
417
458
  }
418
459
 
419
460
  return { decode, isAscii: () => g1 === 0 } // if g1 = 0 then g2 = g3 = 0
@@ -452,8 +493,9 @@ export function multibyteDecoder(enc, loose = false) {
452
493
  return (arr, stream = false) => {
453
494
  let res = ''
454
495
  if (asciiSuperset && (!mapper || mapper.isAscii?.())) {
455
- res = decodeLatin1(arr, 0, asciiPrefix(arr))
456
- if (res.length === arr.length) return res // ascii
496
+ const prefixLen = asciiPrefix(arr)
497
+ if (prefixLen === arr.length) return decodeAscii(arr) // ascii
498
+ res = decodeLatin1(arr, 0, prefixLen) // TODO: check if decodeAscii with subarray is faster for small prefixes too
457
499
  }
458
500
 
459
501
  streaming = stream // affects onErr
@@ -56,7 +56,9 @@ function unwrap(res, t, pos, stringMode = false) {
56
56
  }
57
57
 
58
58
  if (stringMode) {
59
- for (let k = 0; k < x; k++, pos++, code++) res[pos] = String.fromCodePoint(code)
59
+ for (let k = 0; k < x; k++, pos++, code++) {
60
+ res[pos] = code <= 0xff_ff ? code : String.fromCodePoint(code)
61
+ }
60
62
  } else {
61
63
  for (let k = 0; k < x; k++, pos++, code++) res[pos] = code
62
64
  }
@@ -65,8 +67,13 @@ function unwrap(res, t, pos, stringMode = false) {
65
67
  pos = unwrap(res, indices[x], pos, stringMode) // self-reference using shared chunks
66
68
  } else if (stringMode) {
67
69
  const s = [...utf16toString(loadBase64(x), 'uint8-le')] // splits by codepoints
68
- for (let i = 0; i < s.length; ) res[pos++] = s[i++] // TODO: splice?
69
- code = s[s.length - 1].codePointAt(0) + 1
70
+ let char
71
+ for (let i = 0; i < s.length; ) {
72
+ char = s[i++]
73
+ res[pos++] = char.length === 1 ? char.charCodeAt(0) : char // strings only for high codepoints
74
+ }
75
+
76
+ code = char.codePointAt(0) + 1
70
77
  } else {
71
78
  const u16 = to16input(loadBase64(x), true) // data is little-endian
72
79
  res.set(u16, pos)
@@ -1,4 +1,4 @@
1
- import { asciiPrefix, decodeLatin1 } from './latin1.js'
1
+ import { asciiPrefix, decodeAscii, decodeLatin1 } from './latin1.js'
2
2
  import encodings from './single-byte.encodings.js'
3
3
  import { decode2string } from './_utils.js'
4
4
 
@@ -74,8 +74,9 @@ export function encodingDecoder(encoding) {
74
74
  strings = allCodes.map((c) => String.fromCharCode(c))
75
75
  }
76
76
 
77
- const prefix = decodeLatin1(arr, 0, asciiPrefix(arr))
78
- if (prefix.length === arr.length) return prefix
77
+ const prefixLen = asciiPrefix(arr)
78
+ if (prefixLen === arr.length) return decodeAscii(arr)
79
+ const prefix = decodeLatin1(arr, 0, prefixLen) // TODO: check if decodeAscii with subarray is faster for small prefixes too
79
80
  const suffix = decode2string(arr, prefix.length, arr.length, strings)
80
81
  if (!loose && incomplete && suffix.includes('\uFFFD')) throw new TypeError(E_STRICT)
81
82
  return prefix + suffix
package/fallback/utf16.js CHANGED
@@ -1,4 +1,4 @@
1
- import { decodeLatin1, encodeCharcodes } from './latin1.js'
1
+ import { decodeUCS2, encodeCharcodes } from './latin1.js'
2
2
  import { isLE } from './_utils.js'
3
3
 
4
4
  export const E_STRICT = 'Input is not well-formed utf16'
@@ -38,9 +38,9 @@ export function to16input(u8, le) {
38
38
  }
39
39
 
40
40
  export const decode = (u16, loose = false, checked = false) => {
41
- if (checked || isWellFormed(u16)) return decodeLatin1(u16, 0, u16.length) // it's capable of decoding Uint16Array to UTF-16 as well
41
+ if (checked || isWellFormed(u16)) return decodeUCS2(u16)
42
42
  if (!loose) throw new TypeError(E_STRICT)
43
- return decodeLatin1(toWellFormed(Uint16Array.from(u16)), 0, u16.length) // cloned for replacement
43
+ return decodeUCS2(toWellFormed(Uint16Array.from(u16))) // cloned for replacement
44
44
  }
45
45
 
46
46
  export function encode(str, loose = false, checked = false, swapped = false) {
@@ -1,10 +1,8 @@
1
1
  import { assertUint8 } from './assert.js'
2
- import { isDeno } from './fallback/_utils.js'
2
+ import { isDeno, toBuf } from './fallback/_utils.js'
3
3
  import { isAsciiSuperset, multibyteDecoder } from './fallback/multi-byte.js'
4
4
  import { isAscii } from 'node:buffer'
5
5
 
6
- const toBuf = (x) => Buffer.from(x.buffer, x.byteOffset, x.byteLength)
7
-
8
6
  export function createMultibyteDecoder(encoding, loose = false) {
9
7
  const jsDecoder = multibyteDecoder(encoding, loose) // asserts
10
8
  let streaming = false
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@exodus/bytes",
3
- "version": "1.5.0",
3
+ "version": "1.6.0",
4
4
  "description": "Various operations on Uint8Array data",
5
5
  "scripts": {
6
6
  "lint": "eslint .",
package/single-byte.js CHANGED
@@ -6,6 +6,12 @@ const { TextDecoder } = globalThis
6
6
 
7
7
  let windows1252works
8
8
 
9
+ // prettier-ignore
10
+ const skipNative = new Set([
11
+ 'iso-8859-16', // iso-8859-16 is somehow broken in WebKit, at least on CI
12
+ 'iso-8859-6', 'iso-8859-8', 'iso-8859-8-i', // slow in all 3 engines
13
+ ])
14
+
9
15
  function shouldUseNative(enc) {
10
16
  // https://issues.chromium.org/issues/468458388
11
17
  // Also might be incorrectly imlemented on platforms as Latin1 (e.g. in Node.js) or regress
@@ -24,8 +30,7 @@ function shouldUseNative(enc) {
24
30
  return windows1252works
25
31
  }
26
32
 
27
- // iso-8859-16 is somehow broken in WebKit, at least on CI
28
- return enc !== 'iso-8859-16'
33
+ return !skipNative.has(enc)
29
34
  }
30
35
 
31
36
  export function createSinglebyteDecoder(encoding, loose = false) {
@@ -1,11 +1,9 @@
1
1
  import { assertUint8 } from './assert.js'
2
2
  import { isAscii } from 'node:buffer'
3
- import { isDeno, isLE } from './fallback/_utils.js'
3
+ import { isDeno, isLE, toBuf } from './fallback/_utils.js'
4
4
  import { asciiPrefix } from './fallback/latin1.js'
5
5
  import { encodingMapper, encodingDecoder, E_STRICT } from './fallback/single-byte.js'
6
6
 
7
- const toBuf = (x) => Buffer.from(x.buffer, x.byteOffset, x.byteLength)
8
-
9
7
  function latin1Prefix(arr, start) {
10
8
  let p = start | 0
11
9
  const length = arr.length
package/utf8.js CHANGED
@@ -57,7 +57,7 @@ function decode(arr, loose = false) {
57
57
  if (nativeDecoder) return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
58
58
 
59
59
  // Fast path for ASCII prefix, this is faster than all alternatives below
60
- const prefix = decodeLatin1(arr, 0, asciiPrefix(arr))
60
+ const prefix = decodeLatin1(arr, 0, asciiPrefix(arr)) // No native decoder to use, so decodeAscii is useless here
61
61
  if (prefix.length === arr.length) return prefix
62
62
 
63
63
  // This codepath gives a ~3x perf boost on Hermes