@exodus/bytes 1.12.0 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/fallback/utf16.js CHANGED
@@ -1,14 +1,81 @@
1
- import { decodeUCS2, encodeCharcodes } from './latin1.js'
2
- import { isLE } from './_utils.js'
1
+ import { decodeUCS2 } from './latin1.js'
2
+ import { assertU8, E_STRING, E_STRICT_UNICODE } from './_utils.js'
3
+ import { nativeDecoder, isLE, encodeCharcodes } from './platform.js'
3
4
 
4
5
  export const E_STRICT = 'Input is not well-formed utf16'
5
- export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
6
+ const isWellFormedStr = /* @__PURE__ */ (() => String.prototype.isWellFormed)()
7
+ const toWellFormedStr = /* @__PURE__ */ (() => String.prototype.toWellFormed)()
6
8
 
7
9
  const replacementCodepoint = 0xff_fd
8
10
  const replacementCodepointSwapped = 0xfd_ff
9
11
 
10
12
  const to16 = (a) => new Uint16Array(a.buffer, a.byteOffset, a.byteLength / 2) // Requires checked length and alignment!
11
13
 
14
+ export function encodeApi(str, loose, format) {
15
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
16
+ if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
17
+ throw new TypeError('Unknown format')
18
+ }
19
+
20
+ // On v8 and SpiderMonkey, check via isWellFormed is faster than js
21
+ // On JSC, check during loop is faster than isWellFormed
22
+ // If isWellFormed is available, we skip check during decoding and recheck after
23
+ // If isWellFormed is unavailable, we check in js during decoding
24
+ if (!loose && isWellFormedStr && !isWellFormedStr.call(str)) throw new TypeError(E_STRICT_UNICODE)
25
+ const shouldSwap = (isLE && format === 'uint8-be') || (!isLE && format === 'uint8-le')
26
+ const u16 = encode(str, loose, !loose && isWellFormedStr, shouldSwap)
27
+
28
+ // Bytes are already swapped and format is already checked, we need to just cast the view
29
+ return format === 'uint16' ? u16 : new Uint8Array(u16.buffer, u16.byteOffset, u16.byteLength)
30
+ }
31
+
32
+ const fatalLE = nativeDecoder ? new TextDecoder('utf-16le', { ignoreBOM: true, fatal: true }) : null
33
+ const looseLE = nativeDecoder ? new TextDecoder('utf-16le', { ignoreBOM: true }) : null
34
+ const fatalBE = nativeDecoder ? new TextDecoder('utf-16be', { ignoreBOM: true, fatal: true }) : null
35
+ const looseBE = nativeDecoder ? new TextDecoder('utf-16be', { ignoreBOM: true }) : null
36
+
37
+ export function decodeApiDecoders(input, loose, format) {
38
+ if (format === 'uint16') {
39
+ if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
40
+ } else if (format === 'uint8-le' || format === 'uint8-be') {
41
+ assertU8(input)
42
+ if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
43
+ } else {
44
+ throw new TypeError('Unknown format')
45
+ }
46
+
47
+ const le = format === 'uint8-le' || (format === 'uint16' && isLE)
48
+ return (le ? (loose ? looseLE : fatalLE) : loose ? looseBE : fatalBE).decode(input)
49
+ }
50
+
51
+ export function decodeApiJS(input, loose, format) {
52
+ let u16
53
+ switch (format) {
54
+ case 'uint16':
55
+ if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
56
+ u16 = input
57
+ break
58
+ case 'uint8-le':
59
+ assertU8(input)
60
+ if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
61
+ u16 = to16input(input, true)
62
+ break
63
+ case 'uint8-be':
64
+ assertU8(input)
65
+ if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
66
+ u16 = to16input(input, false)
67
+ break
68
+ default:
69
+ throw new TypeError('Unknown format')
70
+ }
71
+
72
+ const str = decode(u16, loose, (!loose && isWellFormedStr) || (loose && toWellFormedStr))
73
+ if (!loose && isWellFormedStr && !isWellFormedStr.call(str)) throw new TypeError(E_STRICT)
74
+ if (loose && toWellFormedStr) return toWellFormedStr.call(str)
75
+
76
+ return str
77
+ }
78
+
12
79
  export function to16input(u8, le) {
13
80
  // Assume even number of bytes
14
81
  if (le === isLE) return to16(u8.byteOffset % 2 === 0 ? u8 : Uint8Array.from(u8))
@@ -0,0 +1,2 @@
1
+ export const decodeFast = null
2
+ export const encode = null
@@ -0,0 +1 @@
1
+ export { decodeFast, encode } from './utf8.js'
@@ -0,0 +1 @@
1
+ export { decodeFast, encode } from './utf8.js'
package/fallback/utf8.js CHANGED
@@ -1,9 +1,31 @@
1
- import { encodeAsciiPrefix } from './latin1.js'
1
+ import { E_STRICT_UNICODE } from './_utils.js'
2
+ import { isHermes } from './platform.js'
3
+ import { asciiPrefix, decodeLatin1, encodeAsciiPrefix } from './latin1.js'
2
4
 
3
5
  export const E_STRICT = 'Input is not well-formed utf8'
4
- export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
5
6
 
6
7
  const replacementPoint = 0xff_fd
8
+ const shouldUseEscapePath = isHermes // faster only on Hermes, js path beats it on normal engines
9
+ const { decodeURIComponent, escape } = globalThis
10
+
11
+ export function decodeFast(arr, loose) {
12
+ // Fast path for ASCII prefix, this is faster than all alternatives below
13
+ const prefix = decodeLatin1(arr, 0, asciiPrefix(arr)) // No native decoder to use, so decodeAscii is useless here
14
+ if (prefix.length === arr.length) return prefix
15
+
16
+ // This codepath gives a ~3x perf boost on Hermes
17
+ if (shouldUseEscapePath && escape && decodeURIComponent) {
18
+ const o = escape(decodeLatin1(arr, prefix.length, arr.length))
19
+ try {
20
+ return prefix + decodeURIComponent(o) // Latin1 to utf8
21
+ } catch {
22
+ if (!loose) throw new TypeError(E_STRICT)
23
+ // Ok, we have to use manual implementation for loose decoder
24
+ }
25
+ }
26
+
27
+ return prefix + decode(arr, loose, prefix.length)
28
+ }
7
29
 
8
30
  // https://encoding.spec.whatwg.org/#utf-8-decoder
9
31
  // We are most likely in loose mode, for non-loose escape & decodeURIComponent solved everything
@@ -27,7 +49,7 @@ export function decode(arr, loose, start = 0) {
27
49
  const byte = arr[i]
28
50
  if (byte < 0x80) {
29
51
  tmp[ti++] = byte
30
- // ascii fast path is in ../utf8.js, this is called only on non-ascii input
52
+ // ascii fast path is in decodeFast(), this is called only on non-ascii input
31
53
  // so we don't unroll this anymore
32
54
  } else if (byte < 0xc2) {
33
55
  if (!loose) throw new TypeError(E_STRICT)
package/hex.js CHANGED
@@ -1,19 +1,17 @@
1
- import { assertUint8 } from './assert.js'
2
1
  import { typedView } from './array.js'
3
- import { skipWeb } from './fallback/_utils.js'
2
+ import { assertU8 } from './fallback/_utils.js'
4
3
  import * as js from './fallback/hex.js'
5
4
 
6
5
  const { toHex: webHex } = Uint8Array.prototype // Modern engines have this
7
6
 
8
7
  export function toHex(arr) {
9
- assertUint8(arr)
8
+ assertU8(arr)
10
9
  if (arr.length === 0) return ''
11
- if (!skipWeb && webHex && arr.toHex === webHex) return arr.toHex()
10
+ if (webHex && arr.toHex === webHex) return arr.toHex()
12
11
  return js.toHex(arr)
13
12
  }
14
13
 
15
14
  // Unlike Buffer.from(), throws on invalid input
16
- export const fromHex =
17
- !skipWeb && Uint8Array.fromHex
18
- ? (str, format = 'uint8') => typedView(Uint8Array.fromHex(str), format)
19
- : (str, format = 'uint8') => typedView(js.fromHex(str), format)
15
+ export const fromHex = Uint8Array.fromHex
16
+ ? (str, format = 'uint8') => typedView(Uint8Array.fromHex(str), format)
17
+ : (str, format = 'uint8') => typedView(js.fromHex(str), format)
package/hex.node.js CHANGED
@@ -1,6 +1,5 @@
1
- import { assertUint8 } from './assert.js'
2
1
  import { typedView } from './array.js'
3
- import { E_STRING } from './fallback/_utils.js'
2
+ import { assertU8, E_STRING } from './fallback/_utils.js'
4
3
  import { E_HEX } from './fallback/hex.js'
5
4
 
6
5
  if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
@@ -9,7 +8,7 @@ const { toHex: webHex } = Uint8Array.prototype // Modern engines have this
9
8
  const denoBug = Buffer.from('ag', 'hex').length > 0
10
9
 
11
10
  export function toHex(arr) {
12
- assertUint8(arr)
11
+ assertU8(arr)
13
12
  if (arr.length === 0) return ''
14
13
  if (webHex && arr.toHex === webHex) return arr.toHex()
15
14
  if (arr.constructor === Buffer && Buffer.isBuffer(arr)) return arr.hexSlice(0, arr.byteLength)
package/multi-byte.js CHANGED
@@ -1,11 +1,11 @@
1
- import { assertUint8 } from './assert.js'
1
+ import { assertU8 } from './fallback/_utils.js'
2
2
  import { multibyteDecoder, multibyteEncoder } from './fallback/multi-byte.js'
3
3
 
4
4
  export function createMultibyteDecoder(encoding, loose = false) {
5
5
  const jsDecoder = multibyteDecoder(encoding, loose) // asserts
6
6
  let streaming = false
7
7
  return (arr, stream = false) => {
8
- assertUint8(arr)
8
+ assertU8(arr)
9
9
  if (!streaming && arr.byteLength === 0) return ''
10
10
  streaming = stream
11
11
  return jsDecoder(arr, stream)
@@ -1,5 +1,5 @@
1
- import { assertUint8 } from './assert.js'
2
- import { isDeno, toBuf } from './fallback/_utils.js'
1
+ import { assertU8, toBuf } from './fallback/_utils.js'
2
+ import { isDeno } from './fallback/platform.js'
3
3
  import { isAsciiSuperset, multibyteDecoder, multibyteEncoder } from './fallback/multi-byte.js'
4
4
  import { isAscii } from 'node:buffer'
5
5
 
@@ -8,7 +8,7 @@ export function createMultibyteDecoder(encoding, loose = false) {
8
8
  let streaming = false
9
9
  const asciiSuperset = isAsciiSuperset(encoding)
10
10
  return (arr, stream = false) => {
11
- assertUint8(arr)
11
+ assertU8(arr)
12
12
  if (!streaming) {
13
13
  if (arr.byteLength === 0) return ''
14
14
  if (asciiSuperset && isAscii(arr)) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@exodus/bytes",
3
- "version": "1.12.0",
3
+ "version": "1.14.0",
4
4
  "description": "Various operations on Uint8Array data",
5
5
  "keywords": [
6
6
  "encoding",
@@ -40,8 +40,8 @@
40
40
  "test:chrome:puppeteer": "exodus-test --engine=chrome:puppeteer",
41
41
  "test:chromium:playwright": "exodus-test --engine=chromium:playwright",
42
42
  "test:webkit:playwright": "exodus-test --engine=webkit:playwright",
43
- "test:firefox:puppeteer": "exodus-test --engine=firefox:puppeteer",
44
- "test:firefox:playwright": "exodus-test --engine=firefox:playwright",
43
+ "test:firefox:puppeteer": "exodus-test --engine=firefox:puppeteer --testTimeout=60000",
44
+ "test:firefox:playwright": "exodus-test --engine=firefox:playwright --testTimeout=60000",
45
45
  "test:servo:bundle": "exodus-test --engine=servo:bundle",
46
46
  "test": "exodus-test",
47
47
  "size": "esbuild --minify --bundle",
@@ -76,6 +76,9 @@
76
76
  "/fallback/hex.js",
77
77
  "/fallback/latin1.js",
78
78
  "/fallback/percent.js",
79
+ "/fallback/platform.js",
80
+ "/fallback/platform.browser.js",
81
+ "/fallback/platform.native.js",
79
82
  "/fallback/multi-byte.encodings.cjs",
80
83
  "/fallback/multi-byte.encodings.json",
81
84
  "/fallback/multi-byte.js",
@@ -84,6 +87,9 @@
84
87
  "/fallback/single-byte.js",
85
88
  "/fallback/utf16.js",
86
89
  "/fallback/utf8.js",
90
+ "/fallback/utf8.auto.js",
91
+ "/fallback/utf8.auto.browser.js",
92
+ "/fallback/utf8.auto.native.js",
87
93
  "/array.js",
88
94
  "/array.d.ts",
89
95
  "/assert.js",
@@ -121,6 +127,8 @@
121
127
  "/single-byte.node.js",
122
128
  "/utf16.js",
123
129
  "/utf16.d.ts",
130
+ "/utf16.browser.js",
131
+ "/utf16.native.js",
124
132
  "/utf16.node.js",
125
133
  "/utf8.js",
126
134
  "/utf8.d.ts",
@@ -200,6 +208,8 @@
200
208
  "./utf16.js": {
201
209
  "types": "./utf16.d.ts",
202
210
  "node": "./utf16.node.js",
211
+ "react-native": "./utf16.native.js",
212
+ "browser": "./utf16.browser.js",
203
213
  "default": "./utf16.js"
204
214
  },
205
215
  "./utf8.js": {
@@ -216,9 +226,20 @@
216
226
  "default": "./wif.js"
217
227
  }
218
228
  },
229
+ "browser": {
230
+ "./utf16.js": "./utf16.browser.js",
231
+ "./fallback/platform.js": "./fallback/platform.browser.js",
232
+ "./fallback/utf8.auto.js": "./fallback/utf8.auto.browser.js"
233
+ },
219
234
  "react-native": {
220
- "./encoding-browser.js": "./encoding-browser.native.js"
235
+ "./encoding-browser.js": "./encoding-browser.native.js",
236
+ "./utf16.js": "./utf16.native.js",
237
+ "./fallback/platform.js": "./fallback/platform.native.js",
238
+ "./fallback/utf8.auto.js": "./fallback/utf8.auto.native.js"
221
239
  },
240
+ "sideEffects": [
241
+ "./encoding.js"
242
+ ],
222
243
  "peerDependencies": {
223
244
  "@noble/hashes": "^1.8.0 || ^2.0.0"
224
245
  },
@@ -232,7 +253,7 @@
232
253
  "@exodus/crypto": "^1.0.0-rc.30",
233
254
  "@exodus/eslint-config": "^5.24.0",
234
255
  "@exodus/prettier": "^1.0.0",
235
- "@exodus/test": "1.0.0-rc.114",
256
+ "@exodus/test": "1.0.0-rc.115",
236
257
  "@hexagon/base64": "^2.0.4",
237
258
  "@noble/hashes": "^2.0.1",
238
259
  "@oslojs/encoding": "^1.1.0",
@@ -254,7 +275,7 @@
254
275
  "decode-utf8": "^1.0.1",
255
276
  "electron": "36.5.0",
256
277
  "encode-utf8": "^2.0.0",
257
- "esbuild": "^0.27.2",
278
+ "esbuild": "^0.27.3",
258
279
  "eslint": "^8.44.0",
259
280
  "fast-base64-decode": "^2.0.0",
260
281
  "fast-base64-encode": "^1.0.0",
@@ -270,7 +291,7 @@
270
291
  "utf8": "^3.0.0",
271
292
  "web-streams-polyfill": "^4.2.0",
272
293
  "wif": "^5.0.0",
273
- "workerd": "^1.20260206.0"
294
+ "workerd": "^1.20260210.0"
274
295
  },
275
296
  "prettier": "@exodus/prettier",
276
297
  "packageManager": "pnpm@10.12.1+sha256.889bac470ec93ccc3764488a19d6ba8f9c648ad5e50a9a6e4be3768a5de387a3"
package/single-byte.js CHANGED
@@ -1,5 +1,5 @@
1
- import { assertUint8 } from './assert.js'
2
- import { nativeDecoderLatin1, nativeEncoder, skipWeb, E_STRING } from './fallback/_utils.js'
1
+ import { assertU8, E_STRING } from './fallback/_utils.js'
2
+ import { nativeDecoderLatin1, nativeEncoder } from './fallback/platform.js'
3
3
  import { encodeAscii, encodeAsciiPrefix, encodeLatin1 } from './fallback/latin1.js'
4
4
  import { assertEncoding, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
5
5
 
@@ -44,7 +44,7 @@ export function createSinglebyteDecoder(encoding, loose = false) {
44
44
  try {
45
45
  const decoder = new TextDecoder(encoding, { fatal: !loose })
46
46
  return (arr) => {
47
- assertUint8(arr)
47
+ assertU8(arr)
48
48
  if (arr.byteLength === 0) return ''
49
49
  return decoder.decode(arr)
50
50
  }
@@ -53,7 +53,7 @@ export function createSinglebyteDecoder(encoding, loose = false) {
53
53
 
54
54
  const jsDecoder = encodingDecoder(encoding)
55
55
  return (arr) => {
56
- assertUint8(arr)
56
+ assertU8(arr)
57
57
  if (arr.byteLength === 0) return ''
58
58
  return jsDecoder(arr, loose)
59
59
  }
@@ -88,7 +88,7 @@ function encode(s, m) {
88
88
  }
89
89
 
90
90
  // fromBase64+btoa path is faster on everything where fromBase64 is fast
91
- const useLatin1btoa = Uint8Array.fromBase64 && btoa && !skipWeb
91
+ const useLatin1btoa = Uint8Array.fromBase64 && btoa
92
92
 
93
93
  export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
94
94
  // TODO: replacement, truncate (replacement will need varying length)
@@ -129,7 +129,7 @@ export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
129
129
  }
130
130
  }
131
131
 
132
- export const latin1toString = createSinglebyteDecoder('iso-8859-1')
133
- export const latin1fromString = createSinglebyteEncoder('iso-8859-1')
134
- export const windows1252toString = createSinglebyteDecoder('windows-1252')
135
- export const windows1252fromString = createSinglebyteEncoder('windows-1252')
132
+ export const latin1toString = /* @__PURE__ */ createSinglebyteDecoder('iso-8859-1')
133
+ export const latin1fromString = /* @__PURE__ */ createSinglebyteEncoder('iso-8859-1')
134
+ export const windows1252toString = /* @__PURE__ */ createSinglebyteDecoder('windows-1252')
135
+ export const windows1252fromString = /* @__PURE__ */ createSinglebyteEncoder('windows-1252')
@@ -1,6 +1,6 @@
1
- import { assertUint8 } from './assert.js'
2
1
  import { isAscii } from 'node:buffer'
3
- import { isDeno, isLE, toBuf, E_STRING } from './fallback/_utils.js'
2
+ import { assertU8, toBuf, E_STRING } from './fallback/_utils.js'
3
+ import { isDeno, isLE } from './fallback/platform.js'
4
4
  import { asciiPrefix } from './fallback/latin1.js'
5
5
  import { encodingMapper, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
6
6
 
@@ -26,7 +26,7 @@ export function createSinglebyteDecoder(encoding, loose = false) {
26
26
  if (isDeno) {
27
27
  const jsDecoder = encodingDecoder(encoding) // asserts
28
28
  return (arr) => {
29
- assertUint8(arr)
29
+ assertU8(arr)
30
30
  if (arr.byteLength === 0) return ''
31
31
  if (isAscii(arr)) return toBuf(arr).toString()
32
32
  return jsDecoder(arr, loose) // somewhy faster on Deno anyway, TODO: optimize?
@@ -37,7 +37,7 @@ export function createSinglebyteDecoder(encoding, loose = false) {
37
37
  const latin1path = encoding === 'windows-1252'
38
38
  const { incomplete, mapper } = encodingMapper(encoding) // asserts
39
39
  return (arr) => {
40
- assertUint8(arr)
40
+ assertU8(arr)
41
41
  if (arr.byteLength === 0) return ''
42
42
  if (isLatin1 || isAscii(arr)) return toBuf(arr).latin1Slice() // .latin1Slice is faster than .asciiSlice
43
43
 
@@ -114,7 +114,7 @@ export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
114
114
  }
115
115
  }
116
116
 
117
- export const latin1toString = createSinglebyteDecoder('iso-8859-1')
118
- export const latin1fromString = createSinglebyteEncoder('iso-8859-1')
119
- export const windows1252toString = createSinglebyteDecoder('windows-1252')
120
- export const windows1252fromString = createSinglebyteEncoder('windows-1252')
117
+ export const latin1toString = /* @__PURE__ */ createSinglebyteDecoder('iso-8859-1')
118
+ export const latin1fromString = /* @__PURE__ */ createSinglebyteEncoder('iso-8859-1')
119
+ export const windows1252toString = /* @__PURE__ */ createSinglebyteDecoder('windows-1252')
120
+ export const windows1252fromString = /* @__PURE__ */ createSinglebyteEncoder('windows-1252')
@@ -0,0 +1,8 @@
1
+ // We trust browsers to always have correct TextDecoder for utf-16le/utf-16be with ignoreBOM without streaming
2
+
3
+ import { encodeApi, decodeApiDecoders } from './fallback/utf16.js'
4
+
5
+ export const utf16fromString = (str, format = 'uint16') => encodeApi(str, false, format)
6
+ export const utf16fromStringLoose = (str, format = 'uint16') => encodeApi(str, true, format)
7
+ export const utf16toString = (arr, format = 'uint16') => decodeApiDecoders(arr, false, format)
8
+ export const utf16toStringLoose = (arr, format = 'uint16') => decodeApiDecoders(arr, true, format)
package/utf16.js CHANGED
@@ -1,90 +1 @@
1
- import * as js from './fallback/utf16.js'
2
- import { nativeDecoder, isLE, E_STRING } from './fallback/_utils.js'
3
-
4
- const { TextDecoder } = globalThis
5
-
6
- function checkDecoders() {
7
- // Not all barebone engines with TextDecoder support something except utf-8
8
- // Also workerd specifically has a broken utf-16le implementation
9
- if (!nativeDecoder) return false
10
- try {
11
- const a = new TextDecoder('utf-16le').decode(Uint8Array.of(1, 2, 3, 0xd8))
12
- const b = new TextDecoder('utf-16be').decode(Uint8Array.of(2, 1, 0xd8, 3))
13
- return a === b && a === '\u0201\uFFFD'
14
- } catch {}
15
-
16
- return false
17
- }
18
-
19
- const canDecoders = checkDecoders()
20
- const ignoreBOM = true
21
- const decoderFatalLE = canDecoders ? new TextDecoder('utf-16le', { ignoreBOM, fatal: true }) : null
22
- const decoderLooseLE = canDecoders ? new TextDecoder('utf-16le', { ignoreBOM }) : null
23
- const decoderFatalBE = canDecoders ? new TextDecoder('utf-16be', { ignoreBOM, fatal: true }) : null
24
- const decoderLooseBE = canDecoders ? new TextDecoder('utf-16be', { ignoreBOM }) : null
25
- const decoderFatal16 = isLE ? decoderFatalLE : decoderFatalBE
26
- const decoderLoose16 = isLE ? decoderLooseLE : decoderLooseBE
27
- const { isWellFormed, toWellFormed } = String.prototype
28
-
29
- const { E_STRICT, E_STRICT_UNICODE } = js
30
-
31
- // Unlike utf8, operates on Uint16Arrays by default
32
-
33
- const to8 = (a) => new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
34
-
35
- function encode(str, loose = false, format = 'uint16') {
36
- if (typeof str !== 'string') throw new TypeError(E_STRING)
37
- if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
38
- throw new TypeError('Unknown format')
39
- }
40
-
41
- const shouldSwap = (isLE && format === 'uint8-be') || (!isLE && format === 'uint8-le')
42
-
43
- // On v8 and SpiderMonkey, check via isWellFormed is faster than js
44
- // On JSC, check during loop is faster than isWellFormed
45
- // If isWellFormed is available, we skip check during decoding and recheck after
46
- // If isWellFormed is unavailable, we check in js during decoding
47
- if (!loose && isWellFormed && !isWellFormed.call(str)) throw new TypeError(E_STRICT_UNICODE)
48
- const u16 = js.encode(str, loose, !loose && isWellFormed, shouldSwap)
49
-
50
- if (format === 'uint8-le' || format === 'uint8-be') return to8(u16) // Already swapped
51
- if (format === 'uint16') return u16
52
- /* c8 ignore next */
53
- throw new Error('Unreachable')
54
- }
55
-
56
- function decode(input, loose = false, format = 'uint16') {
57
- let u16
58
- switch (format) {
59
- case 'uint16':
60
- if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
61
- if (canDecoders) return loose ? decoderLoose16.decode(input) : decoderFatal16.decode(input)
62
- u16 = input
63
- break
64
- case 'uint8-le':
65
- if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
66
- if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
67
- if (canDecoders) return loose ? decoderLooseLE.decode(input) : decoderFatalLE.decode(input)
68
- u16 = js.to16input(input, true)
69
- break
70
- case 'uint8-be':
71
- if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
72
- if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
73
- if (canDecoders) return loose ? decoderLooseBE.decode(input) : decoderFatalBE.decode(input)
74
- u16 = js.to16input(input, false)
75
- break
76
- default:
77
- throw new TypeError('Unknown format')
78
- }
79
-
80
- const str = js.decode(u16, loose, (!loose && isWellFormed) || (loose && toWellFormed))
81
- if (!loose && isWellFormed && !isWellFormed.call(str)) throw new TypeError(E_STRICT)
82
- if (loose && toWellFormed) return toWellFormed.call(str)
83
-
84
- return str
85
- }
86
-
87
- export const utf16fromString = (str, format = 'uint16') => encode(str, false, format)
88
- export const utf16fromStringLoose = (str, format = 'uint16') => encode(str, true, format)
89
- export const utf16toString = (arr, format = 'uint16') => decode(arr, false, format)
90
- export const utf16toStringLoose = (arr, format = 'uint16') => decode(arr, true, format)
1
+ export * from './utf16.native.js'
@@ -0,0 +1,22 @@
1
+ import { encodeApi, decodeApiDecoders, decodeApiJS } from './fallback/utf16.js'
2
+ import { nativeDecoder } from './fallback/platform.native.js'
3
+
4
+ function checkDecoders() {
5
+ // Not all barebone engines with TextDecoder support something except utf-8
6
+ // Also workerd specifically has a broken utf-16le implementation
7
+ if (!nativeDecoder) return false
8
+ try {
9
+ const a = new TextDecoder('utf-16le').decode(Uint8Array.of(1, 2, 3, 0xd8))
10
+ const b = new TextDecoder('utf-16be').decode(Uint8Array.of(2, 1, 0xd8, 3))
11
+ return a === b && a === '\u0201\uFFFD'
12
+ } catch {}
13
+
14
+ return false
15
+ }
16
+
17
+ const decode = checkDecoders() ? decodeApiDecoders : decodeApiJS
18
+
19
+ export const utf16fromString = (str, format = 'uint16') => encodeApi(str, false, format)
20
+ export const utf16fromStringLoose = (str, format = 'uint16') => encodeApi(str, true, format)
21
+ export const utf16toString = (arr, format = 'uint16') => decode(arr, false, format)
22
+ export const utf16toStringLoose = (arr, format = 'uint16') => decode(arr, true, format)
package/utf16.node.js CHANGED
@@ -1,5 +1,6 @@
1
- import { isDeno, isLE, E_STRING } from './fallback/_utils.js'
2
- import { E_STRICT, E_STRICT_UNICODE } from './fallback/utf16.js'
1
+ import { assertU8, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
2
+ import { isDeno, isLE } from './fallback/platform.js'
3
+ import { E_STRICT, decodeApiDecoders } from './fallback/utf16.js'
3
4
 
4
5
  if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
5
6
 
@@ -48,7 +49,7 @@ function decodeNode(input, loose = false, format = 'uint16') {
48
49
  if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
49
50
  ble = swapped(input, !isLE)
50
51
  } else if (format === 'uint8-le' || format === 'uint8-be') {
51
- if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
52
+ assertU8(input)
52
53
  if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
53
54
  ble = swapped(input, format === 'uint8-be')
54
55
  } else {
@@ -61,23 +62,7 @@ function decodeNode(input, loose = false, format = 'uint16') {
61
62
  throw new TypeError(E_STRICT)
62
63
  }
63
64
 
64
- function decodeDecoder(input, loose = false, format = 'uint16') {
65
- let encoding
66
- if (format === 'uint16') {
67
- if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
68
- encoding = isLE ? 'utf-16le' : 'utf-16be'
69
- } else if (format === 'uint8-le' || format === 'uint8-be') {
70
- if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
71
- if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
72
- encoding = format === 'uint8-le' ? 'utf-16le' : 'utf-16be'
73
- } else {
74
- throw new TypeError('Unknown format')
75
- }
76
-
77
- return new TextDecoder(encoding, { ignoreBOM: true, fatal: !loose }).decode(input) // TODO: cache decoder?
78
- }
79
-
80
- const decode = isDeno ? decodeDecoder : decodeNode
65
+ const decode = isDeno ? decodeApiDecoders : decodeNode
81
66
 
82
67
  export const utf16fromString = (str, format = 'uint16') => encode(str, false, format)
83
68
  export const utf16fromStringLoose = (str, format = 'uint16') => encode(str, true, format)
package/utf8.js CHANGED
@@ -1,10 +1,8 @@
1
- import { assertUint8 } from './assert.js'
2
1
  import { typedView } from './array.js'
3
- import { isHermes, nativeDecoder, nativeEncoder, E_STRING } from './fallback/_utils.js'
4
- import { asciiPrefix, decodeLatin1 } from './fallback/latin1.js'
5
- import * as js from './fallback/utf8.js'
2
+ import { assertU8, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
3
+ import { nativeDecoder, nativeEncoder } from './fallback/platform.js'
4
+ import * as js from './fallback/utf8.auto.js'
6
5
 
7
- const { TextDecoder, decodeURIComponent, escape } = globalThis // Buffer is optional
8
6
  // ignoreBOM: true means that BOM will be left as-is, i.e. will be present in the output
9
7
  // We don't want to strip anything unexpectedly
10
8
  const decoderLoose = nativeDecoder
@@ -13,10 +11,6 @@ const decoderFatal = nativeDecoder
13
11
  : null
14
12
  const { isWellFormed } = String.prototype
15
13
 
16
- const { E_STRICT, E_STRICT_UNICODE } = js
17
-
18
- const shouldUseEscapePath = isHermes // faster only on Hermes, js path beats it on normal engines
19
-
20
14
  function deLoose(str, loose, res) {
21
15
  if (loose || str.length === res.length) return res // length is equal only for ascii, which is automatically fine
22
16
  if (isWellFormed) {
@@ -35,7 +29,7 @@ function deLoose(str, loose, res) {
35
29
  start = pos + 1
36
30
  if (res[pos + 1] === 0xbf && res[pos + 2] === 0xbd) {
37
31
  // Found a replacement char in output, need to recheck if we encoded the input correctly
38
- if (!nativeDecoder && str.length < 1e7) {
32
+ if (js.decodeFast && !nativeDecoder && str.length < 1e7) {
39
33
  // This is ~2x faster than decode in Hermes
40
34
  try {
41
35
  if (encodeURI(str) !== null) return res // guard against optimizing out
@@ -51,32 +45,19 @@ function deLoose(str, loose, res) {
51
45
  function encode(str, loose = false) {
52
46
  if (typeof str !== 'string') throw new TypeError(E_STRING)
53
47
  if (str.length === 0) return new Uint8Array() // faster than Uint8Array.of
54
- if (nativeEncoder) return deLoose(str, loose, nativeEncoder.encode(str))
48
+ if (nativeEncoder || !js.encode) return deLoose(str, loose, nativeEncoder.encode(str))
55
49
  // No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder
56
50
  return js.encode(str, loose)
57
51
  }
58
52
 
59
53
  function decode(arr, loose = false) {
60
- assertUint8(arr)
54
+ assertU8(arr)
61
55
  if (arr.byteLength === 0) return ''
62
- if (nativeDecoder) return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
63
-
64
- // Fast path for ASCII prefix, this is faster than all alternatives below
65
- const prefix = decodeLatin1(arr, 0, asciiPrefix(arr)) // No native decoder to use, so decodeAscii is useless here
66
- if (prefix.length === arr.length) return prefix
67
-
68
- // This codepath gives a ~3x perf boost on Hermes
69
- if (shouldUseEscapePath && escape && decodeURIComponent) {
70
- const o = escape(decodeLatin1(arr, prefix.length, arr.length))
71
- try {
72
- return prefix + decodeURIComponent(o) // Latin1 to utf8
73
- } catch {
74
- if (!loose) throw new TypeError(E_STRICT)
75
- // Ok, we have to use manual implementation for loose decoder
76
- }
56
+ if (nativeDecoder || !js.decodeFast) {
57
+ return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
77
58
  }
78
59
 
79
- return prefix + js.decode(arr, loose, prefix.length)
60
+ return js.decodeFast(arr, loose)
80
61
  }
81
62
 
82
63
  export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format)