@exodus/bytes 1.7.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/single-byte.js CHANGED
@@ -1,16 +1,17 @@
1
1
  import { assertUint8 } from './assert.js'
2
- import { canDecoders, nativeEncoder } from './fallback/_utils.js'
3
- import { encodeAscii } from './fallback/latin1.js'
2
+ import { canDecoders, nativeEncoder, skipWeb, E_STRING } from './fallback/_utils.js'
3
+ import { encodeAscii, encodeAsciiPrefix, encodeLatin1 } from './fallback/latin1.js'
4
4
  import { assertEncoding, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
5
5
 
6
- const { TextDecoder } = globalThis
6
+ const { TextDecoder, btoa } = globalThis
7
7
 
8
8
  let windows1252works
9
9
 
10
10
  // prettier-ignore
11
11
  const skipNative = new Set([
12
- 'iso-8859-16', // iso-8859-16 is somehow broken in WebKit, at least on CI
12
+ 'iso-8859-1', 'iso-8859-9', 'iso-8859-11', // non-WHATWG
13
13
  'iso-8859-6', 'iso-8859-8', 'iso-8859-8-i', // slow in all 3 engines
14
+ 'iso-8859-16', // iso-8859-16 is somehow broken in WebKit, at least on CI
14
15
  ])
15
16
 
16
17
  function shouldUseNative(enc) {
@@ -63,7 +64,20 @@ const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
63
64
  function encode(s, m) {
64
65
  const len = s.length
65
66
  const x = new Uint8Array(len)
66
- for (let i = 0; i < len; i++) {
67
+ let i = nativeEncoder ? 0 : encodeAsciiPrefix(x, s)
68
+
69
+ for (const len3 = len - 3; i < len3; i += 4) {
70
+ const x0 = s.charCodeAt(i), x1 = s.charCodeAt(i + 1), x2 = s.charCodeAt(i + 2), x3 = s.charCodeAt(i + 3) // prettier-ignore
71
+ const c0 = m[x0], c1 = m[x1], c2 = m[x2], c3 = m[x3] // prettier-ignore
72
+ if ((!c0 && x0) || (!c1 && x1) || (!c2 && x2) || (!c3 && x3)) return null
73
+
74
+ x[i] = c0
75
+ x[i + 1] = c1
76
+ x[i + 2] = c2
77
+ x[i + 3] = c3
78
+ }
79
+
80
+ for (; i < len; i++) {
67
81
  const x0 = s.charCodeAt(i)
68
82
  const c0 = m[x0]
69
83
  if (!c0 && x0) return null
@@ -73,16 +87,33 @@ function encode(s, m) {
73
87
  return x
74
88
  }
75
89
 
90
+ // fromBase64+btoa path is faster on everything where fromBase64 is fast
91
+ const useLatin1btoa = Uint8Array.fromBase64 && btoa && !skipWeb
92
+
76
93
  export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
77
94
  // TODO: replacement, truncate (replacement will need varying length)
78
95
  if (mode !== 'fatal') throw new Error('Unsupported mode')
79
96
  const m = encodeMap(encoding) // asserts
97
+ const isLatin1 = encoding === 'iso-8859-1'
80
98
 
81
99
  // No single-byte encoder produces surrogate pairs, so any surrogate is invalid
82
100
  // This needs special treatment only to decide how many replacement chars to output, one or two
83
101
  // Not much use in running isWellFormed, most likely cause of error is unmapped chars, not surrogate pairs
84
102
  return (s) => {
85
- if (typeof s !== 'string') throw new TypeError('Input is not a string')
103
+ if (typeof s !== 'string') throw new TypeError(E_STRING)
104
+ if (isLatin1) {
105
+ // max limit is to not produce base64 strings that are too long
106
+ if (useLatin1btoa && s.length >= 1024 && s.length < 1e8) {
107
+ try {
108
+ return Uint8Array.fromBase64(btoa(s)) // fails on non-latin1
109
+ } catch {
110
+ throw new TypeError(E_STRICT)
111
+ }
112
+ }
113
+
114
+ if (NON_LATIN.test(s)) throw new TypeError(E_STRICT)
115
+ return encodeLatin1(s)
116
+ }
86
117
 
87
118
  // Instead of an ASCII regex check, encode optimistically - this is faster
88
119
  // Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
@@ -98,5 +129,7 @@ export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
98
129
  }
99
130
  }
100
131
 
132
+ export const latin1toString = createSinglebyteDecoder('iso-8859-1')
133
+ export const latin1fromString = createSinglebyteEncoder('iso-8859-1')
101
134
  export const windows1252toString = createSinglebyteDecoder('windows-1252')
102
135
  export const windows1252fromString = createSinglebyteEncoder('windows-1252')
@@ -1,6 +1,6 @@
1
1
  import { assertUint8 } from './assert.js'
2
2
  import { isAscii } from 'node:buffer'
3
- import { isDeno, isLE, toBuf } from './fallback/_utils.js'
3
+ import { isDeno, isLE, toBuf, E_STRING } from './fallback/_utils.js'
4
4
  import { asciiPrefix } from './fallback/latin1.js'
5
5
  import { encodingMapper, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
6
6
 
@@ -23,7 +23,6 @@ function latin1Prefix(arr, start) {
23
23
 
24
24
  export function createSinglebyteDecoder(encoding, loose = false) {
25
25
  if (typeof loose !== 'boolean') throw new TypeError('loose option should be boolean')
26
- const latin1path = encoding === 'windows-1252'
27
26
  if (isDeno) {
28
27
  const jsDecoder = encodingDecoder(encoding) // asserts
29
28
  return (arr) => {
@@ -34,11 +33,13 @@ export function createSinglebyteDecoder(encoding, loose = false) {
34
33
  }
35
34
  }
36
35
 
36
+ const isLatin1 = encoding === 'iso-8859-1'
37
+ const latin1path = encoding === 'windows-1252'
37
38
  const { incomplete, mapper } = encodingMapper(encoding) // asserts
38
39
  return (arr) => {
39
40
  assertUint8(arr)
40
41
  if (arr.byteLength === 0) return ''
41
- if (isAscii(arr)) return toBuf(arr).latin1Slice(0, arr.byteLength) // .latin1Slice is faster than .asciiSlice
42
+ if (isLatin1 || isAscii(arr)) return toBuf(arr).latin1Slice() // .latin1Slice is faster than .asciiSlice
42
43
 
43
44
  // Node.js TextDecoder is broken, so we can't use it. It's also slow anyway
44
45
 
@@ -60,13 +61,45 @@ export function createSinglebyteDecoder(encoding, loose = false) {
60
61
 
61
62
  const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
62
63
 
64
+ function encode(s, m) {
65
+ const len = s.length
66
+ let i = 0
67
+ const b = Buffer.from(s, 'utf-16le') // aligned
68
+ if (!isLE) b.swap16()
69
+ const x = new Uint16Array(b.buffer, b.byteOffset, b.byteLength / 2)
70
+ for (const len3 = len - 3; i < len3; i += 4) {
71
+ const x0 = x[i], x1 = x[i + 1], x2 = x[i + 2], x3 = x[i + 3] // prettier-ignore
72
+ const c0 = m[x0], c1 = m[x1], c2 = m[x2], c3 = m[x3] // prettier-ignore
73
+ if (!(c0 && c1 && c2 && c3) && ((!c0 && x0) || (!c1 && x1) || (!c2 && x2) || (!c3 && x3))) return null // prettier-ignore
74
+ x[i] = c0
75
+ x[i + 1] = c1
76
+ x[i + 2] = c2
77
+ x[i + 3] = c3
78
+ }
79
+
80
+ for (; i < len; i++) {
81
+ const x0 = x[i]
82
+ const c0 = m[x0]
83
+ if (!c0 && x0) return null
84
+ x[i] = c0
85
+ }
86
+
87
+ return new Uint8Array(x)
88
+ }
89
+
63
90
  export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
64
91
  // TODO: replacement, truncate (replacement will need varying length)
65
92
  if (mode !== 'fatal') throw new Error('Unsupported mode')
66
93
  const m = encodeMap(encoding) // asserts
94
+ const isLatin1 = encoding === 'iso-8859-1'
67
95
 
68
96
  return (s) => {
69
- if (typeof s !== 'string') throw new TypeError('Input is not a string')
97
+ if (typeof s !== 'string') throw new TypeError(E_STRING)
98
+ if (isLatin1) {
99
+ if (NON_LATIN.test(s)) throw new TypeError(E_STRICT)
100
+ const b = Buffer.from(s, 'latin1')
101
+ return new Uint8Array(b.buffer, b.byteOffset, b.byteLength)
102
+ }
70
103
 
71
104
  // Instead of an ASCII regex check, encode optimistically - this is faster
72
105
  // Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
@@ -75,34 +108,13 @@ export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
75
108
  if (b.length === s.length) return new Uint8Array(b.buffer, b.byteOffset, b.byteLength)
76
109
  }
77
110
 
78
- const len = s.length
79
- let i = 0
80
- const b = Buffer.from(s, 'utf-16le') // aligned
81
- if (!isLE) b.swap16()
82
- const x = new Uint16Array(b.buffer, b.byteOffset, b.byteLength / 2)
83
- for (const len3 = len - 3; i < len3; i += 4) {
84
- const x0 = x[i], x1 = x[i + 1], x2 = x[i + 2], x3 = x[i + 3] // prettier-ignore
85
- const c0 = m[x0], c1 = m[x1], c2 = m[x2], c3 = m[x3] // prettier-ignore
86
- if (!(c0 && c1 && c2 && c3) && ((!c0 && x0) || (!c1 && x1) || (!c2 && x2) || (!c3 && x3))) {
87
- throw new TypeError(E_STRICT)
88
- }
89
-
90
- x[i] = c0
91
- x[i + 1] = c1
92
- x[i + 2] = c2
93
- x[i + 3] = c3
94
- }
95
-
96
- for (; i < len; i++) {
97
- const x0 = x[i]
98
- const c0 = m[x0]
99
- if (!c0 && x0) throw new TypeError(E_STRICT)
100
- x[i] = c0
101
- }
102
-
103
- return new Uint8Array(x)
111
+ const res = encode(s, m)
112
+ if (!res) throw new TypeError(E_STRICT)
113
+ return res
104
114
  }
105
115
  }
106
116
 
117
+ export const latin1toString = createSinglebyteDecoder('iso-8859-1')
118
+ export const latin1fromString = createSinglebyteEncoder('iso-8859-1')
107
119
  export const windows1252toString = createSinglebyteDecoder('windows-1252')
108
120
  export const windows1252fromString = createSinglebyteEncoder('windows-1252')
package/utf16.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import * as js from './fallback/utf16.js'
2
- import { canDecoders, isLE } from './fallback/_utils.js'
2
+ import { canDecoders, isLE, E_STRING } from './fallback/_utils.js'
3
3
 
4
4
  const { TextDecoder } = globalThis // Buffer is optional
5
5
  const ignoreBOM = true
@@ -18,7 +18,7 @@ const { E_STRICT, E_STRICT_UNICODE } = js
18
18
  const to8 = (a) => new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
19
19
 
20
20
  function encode(str, loose = false, format = 'uint16') {
21
- if (typeof str !== 'string') throw new TypeError('Input is not a string')
21
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
22
22
  if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
23
23
  throw new TypeError('Unknown format')
24
24
  }
@@ -34,6 +34,7 @@ function encode(str, loose = false, format = 'uint16') {
34
34
 
35
35
  if (format === 'uint8-le' || format === 'uint8-be') return to8(u16) // Already swapped
36
36
  if (format === 'uint16') return u16
37
+ /* c8 ignore next */
37
38
  throw new Error('Unreachable')
38
39
  }
39
40
 
package/utf16.node.js CHANGED
@@ -1,4 +1,4 @@
1
- import { isDeno, isLE } from './fallback/_utils.js'
1
+ import { isDeno, isLE, E_STRING } from './fallback/_utils.js'
2
2
  import { E_STRICT, E_STRICT_UNICODE } from './fallback/utf16.js'
3
3
 
4
4
  if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
@@ -9,7 +9,7 @@ const to8 = (a) => new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
9
9
  // Unlike utf8, operates on Uint16Arrays by default
10
10
 
11
11
  function encode(str, loose = false, format = 'uint16') {
12
- if (typeof str !== 'string') throw new TypeError('Input is not a string')
12
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
13
13
  if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
14
14
  throw new TypeError('Unknown format')
15
15
  }
@@ -30,11 +30,15 @@ function encode(str, loose = false, format = 'uint16') {
30
30
  return new Uint16Array(b.buffer, b.byteOffset, b.byteLength / 2)
31
31
  }
32
32
 
33
+ /* c8 ignore next */
33
34
  throw new Error('Unreachable')
34
35
  }
35
36
 
36
- const swapped = (x, swap) =>
37
- swap ? Buffer.from(x).swap16() : Buffer.from(x.buffer, x.byteOffset, x.byteLength)
37
+ // Convert to Buffer view or a swapped Buffer copy
38
+ const swapped = (x, swap) => {
39
+ const b = Buffer.from(x.buffer, x.byteOffset, x.byteLength)
40
+ return swap ? Buffer.from(b).swap16() : b
41
+ }
38
42
 
39
43
  // We skip TextDecoder on Node.js, as it's is somewhy significantly slower than Buffer for utf16
40
44
  // Also, it incorrectly misses replacements with Node.js is built without ICU, we fix that
package/utf8.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { assertUint8 } from './assert.js'
2
2
  import { typedView } from './array.js'
3
- import { isHermes, nativeDecoder, nativeEncoder } from './fallback/_utils.js'
3
+ import { isHermes, nativeDecoder, nativeEncoder, E_STRING } from './fallback/_utils.js'
4
4
  import { asciiPrefix, decodeLatin1 } from './fallback/latin1.js'
5
5
  import * as js from './fallback/utf8.js'
6
6
 
@@ -44,7 +44,7 @@ function deLoose(str, loose, res) {
44
44
  }
45
45
 
46
46
  function encode(str, loose = false) {
47
- if (typeof str !== 'string') throw new TypeError('Input is not a string')
47
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
48
48
  if (str.length === 0) return new Uint8Array() // faster than Uint8Array.of
49
49
  if (nativeEncoder) return deLoose(str, loose, nativeEncoder.encode(str))
50
50
  // No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder
package/utf8.node.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import { assertUint8 } from './assert.js'
2
2
  import { typedView } from './array.js'
3
+ import { E_STRING } from './fallback/_utils.js'
3
4
  import { E_STRICT, E_STRICT_UNICODE } from './fallback/utf8.js'
4
5
  import { isAscii } from 'node:buffer'
5
6
 
@@ -17,7 +18,7 @@ try {
17
18
  }
18
19
 
19
20
  function encode(str, loose = false) {
20
- if (typeof str !== 'string') throw new TypeError('Input is not a string')
21
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
21
22
  const strLength = str.length
22
23
  if (strLength === 0) return new Uint8Array() // faster than Uint8Array.of
23
24
  let res