@exodus/bytes 1.11.0 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/fallback/utf16.js CHANGED
@@ -1,14 +1,81 @@
1
1
  import { decodeUCS2, encodeCharcodes } from './latin1.js'
2
- import { isLE } from './_utils.js'
2
+ import { assertU8, E_STRING, E_STRICT_UNICODE } from './_utils.js'
3
+ import { nativeDecoder, isLE } from './platform.js'
3
4
 
4
5
  export const E_STRICT = 'Input is not well-formed utf16'
5
- export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
6
+ const isWellFormedStr = String.prototype.isWellFormed
7
+ const toWellFormedStr = /* @__PURE__ */ (() => String.prototype.toWellFormed)()
6
8
 
7
9
  const replacementCodepoint = 0xff_fd
8
10
  const replacementCodepointSwapped = 0xfd_ff
9
11
 
10
12
  const to16 = (a) => new Uint16Array(a.buffer, a.byteOffset, a.byteLength / 2) // Requires checked length and alignment!
11
13
 
14
+ export function encodeApi(str, loose, format) {
15
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
16
+ if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
17
+ throw new TypeError('Unknown format')
18
+ }
19
+
20
+ // On v8 and SpiderMonkey, check via isWellFormed is faster than js
21
+ // On JSC, check during loop is faster than isWellFormed
22
+ // If isWellFormed is available, we skip check during decoding and recheck after
23
+ // If isWellFormed is unavailable, we check in js during decoding
24
+ if (!loose && isWellFormedStr && !isWellFormedStr.call(str)) throw new TypeError(E_STRICT_UNICODE)
25
+ const shouldSwap = (isLE && format === 'uint8-be') || (!isLE && format === 'uint8-le')
26
+ const u16 = encode(str, loose, !loose && isWellFormedStr, shouldSwap)
27
+
28
+ // Bytes are already swapped and format is already checked, we need to just cast the view
29
+ return format === 'uint16' ? u16 : new Uint8Array(u16.buffer, u16.byteOffset, u16.byteLength)
30
+ }
31
+
32
+ const fatalLE = nativeDecoder ? new TextDecoder('utf-16le', { ignoreBOM: true, fatal: true }) : null
33
+ const looseLE = nativeDecoder ? new TextDecoder('utf-16le', { ignoreBOM: true }) : null
34
+ const fatalBE = nativeDecoder ? new TextDecoder('utf-16be', { ignoreBOM: true, fatal: true }) : null
35
+ const looseBE = nativeDecoder ? new TextDecoder('utf-16be', { ignoreBOM: true }) : null
36
+
37
+ export function decodeApiDecoders(input, loose, format) {
38
+ if (format === 'uint16') {
39
+ if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
40
+ } else if (format === 'uint8-le' || format === 'uint8-be') {
41
+ assertU8(input)
42
+ if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
43
+ } else {
44
+ throw new TypeError('Unknown format')
45
+ }
46
+
47
+ const le = format === 'uint8-le' || (format === 'uint16' && isLE)
48
+ return (le ? (loose ? looseLE : fatalLE) : loose ? looseBE : fatalBE).decode(input)
49
+ }
50
+
51
+ export function decodeApiJS(input, loose, format) {
52
+ let u16
53
+ switch (format) {
54
+ case 'uint16':
55
+ if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
56
+ u16 = input
57
+ break
58
+ case 'uint8-le':
59
+ assertU8(input)
60
+ if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
61
+ u16 = to16input(input, true)
62
+ break
63
+ case 'uint8-be':
64
+ assertU8(input)
65
+ if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
66
+ u16 = to16input(input, false)
67
+ break
68
+ default:
69
+ throw new TypeError('Unknown format')
70
+ }
71
+
72
+ const str = decode(u16, loose, (!loose && isWellFormedStr) || (loose && toWellFormedStr))
73
+ if (!loose && isWellFormedStr && !isWellFormedStr.call(str)) throw new TypeError(E_STRICT)
74
+ if (loose && toWellFormedStr) return toWellFormedStr.call(str)
75
+
76
+ return str
77
+ }
78
+
12
79
  export function to16input(u8, le) {
13
80
  // Assume even number of bytes
14
81
  if (le === isLE) return to16(u8.byteOffset % 2 === 0 ? u8 : Uint8Array.from(u8))
@@ -0,0 +1,2 @@
1
+ export const decodeFast = null
2
+ export const encode = null
@@ -0,0 +1 @@
1
+ export { decodeFast, encode } from './utf8.js'
@@ -0,0 +1 @@
1
+ export { decodeFast, encode } from './utf8.js'
package/fallback/utf8.js CHANGED
@@ -1,9 +1,31 @@
1
- import { encodeAsciiPrefix } from './latin1.js'
1
+ import { E_STRICT_UNICODE } from './_utils.js'
2
+ import { isHermes } from './platform.js'
3
+ import { asciiPrefix, decodeLatin1, encodeAsciiPrefix } from './latin1.js'
2
4
 
3
5
  export const E_STRICT = 'Input is not well-formed utf8'
4
- export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
5
6
 
6
7
  const replacementPoint = 0xff_fd
8
+ const shouldUseEscapePath = isHermes // faster only on Hermes, js path beats it on normal engines
9
+ const { decodeURIComponent, escape } = globalThis
10
+
11
+ export function decodeFast(arr, loose) {
12
+ // Fast path for ASCII prefix, this is faster than all alternatives below
13
+ const prefix = decodeLatin1(arr, 0, asciiPrefix(arr)) // No native decoder to use, so decodeAscii is useless here
14
+ if (prefix.length === arr.length) return prefix
15
+
16
+ // This codepath gives a ~3x perf boost on Hermes
17
+ if (shouldUseEscapePath && escape && decodeURIComponent) {
18
+ const o = escape(decodeLatin1(arr, prefix.length, arr.length))
19
+ try {
20
+ return prefix + decodeURIComponent(o) // Latin1 to utf8
21
+ } catch {
22
+ if (!loose) throw new TypeError(E_STRICT)
23
+ // Ok, we have to use manual implementation for loose decoder
24
+ }
25
+ }
26
+
27
+ return prefix + decode(arr, loose, prefix.length)
28
+ }
7
29
 
8
30
  // https://encoding.spec.whatwg.org/#utf-8-decoder
9
31
  // We are most likely in loose mode, for non-loose escape & decodeURIComponent solved everything
@@ -27,7 +49,7 @@ export function decode(arr, loose, start = 0) {
27
49
  const byte = arr[i]
28
50
  if (byte < 0x80) {
29
51
  tmp[ti++] = byte
30
- // ascii fast path is in ../utf8.js, this is called only on non-ascii input
52
+ // ascii fast path is in decodeFast(), this is called only on non-ascii input
31
53
  // so we don't unroll this anymore
32
54
  } else if (byte < 0xc2) {
33
55
  if (!loose) throw new TypeError(E_STRICT)
package/hex.js CHANGED
@@ -1,19 +1,17 @@
1
- import { assertUint8 } from './assert.js'
2
1
  import { typedView } from './array.js'
3
- import { skipWeb } from './fallback/_utils.js'
2
+ import { assertU8 } from './fallback/_utils.js'
4
3
  import * as js from './fallback/hex.js'
5
4
 
6
5
  const { toHex: webHex } = Uint8Array.prototype // Modern engines have this
7
6
 
8
7
  export function toHex(arr) {
9
- assertUint8(arr)
8
+ assertU8(arr)
10
9
  if (arr.length === 0) return ''
11
- if (!skipWeb && webHex && arr.toHex === webHex) return arr.toHex()
10
+ if (webHex && arr.toHex === webHex) return arr.toHex()
12
11
  return js.toHex(arr)
13
12
  }
14
13
 
15
14
  // Unlike Buffer.from(), throws on invalid input
16
- export const fromHex =
17
- !skipWeb && Uint8Array.fromHex
18
- ? (str, format = 'uint8') => typedView(Uint8Array.fromHex(str), format)
19
- : (str, format = 'uint8') => typedView(js.fromHex(str), format)
15
+ export const fromHex = Uint8Array.fromHex
16
+ ? (str, format = 'uint8') => typedView(Uint8Array.fromHex(str), format)
17
+ : (str, format = 'uint8') => typedView(js.fromHex(str), format)
package/hex.node.js CHANGED
@@ -1,6 +1,5 @@
1
- import { assertUint8 } from './assert.js'
2
1
  import { typedView } from './array.js'
3
- import { E_STRING } from './fallback/_utils.js'
2
+ import { assertU8, E_STRING } from './fallback/_utils.js'
4
3
  import { E_HEX } from './fallback/hex.js'
5
4
 
6
5
  if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
@@ -9,7 +8,7 @@ const { toHex: webHex } = Uint8Array.prototype // Modern engines have this
9
8
  const denoBug = Buffer.from('ag', 'hex').length > 0
10
9
 
11
10
  export function toHex(arr) {
12
- assertUint8(arr)
11
+ assertU8(arr)
13
12
  if (arr.length === 0) return ''
14
13
  if (webHex && arr.toHex === webHex) return arr.toHex()
15
14
  if (arr.constructor === Buffer && Buffer.isBuffer(arr)) return arr.hexSlice(0, arr.byteLength)
package/multi-byte.js CHANGED
@@ -1,11 +1,11 @@
1
- import { assertUint8 } from './assert.js'
1
+ import { assertU8 } from './fallback/_utils.js'
2
2
  import { multibyteDecoder, multibyteEncoder } from './fallback/multi-byte.js'
3
3
 
4
4
  export function createMultibyteDecoder(encoding, loose = false) {
5
5
  const jsDecoder = multibyteDecoder(encoding, loose) // asserts
6
6
  let streaming = false
7
7
  return (arr, stream = false) => {
8
- assertUint8(arr)
8
+ assertU8(arr)
9
9
  if (!streaming && arr.byteLength === 0) return ''
10
10
  streaming = stream
11
11
  return jsDecoder(arr, stream)
@@ -1,5 +1,5 @@
1
- import { assertUint8 } from './assert.js'
2
- import { isDeno, toBuf } from './fallback/_utils.js'
1
+ import { assertU8, toBuf } from './fallback/_utils.js'
2
+ import { isDeno } from './fallback/platform.js'
3
3
  import { isAsciiSuperset, multibyteDecoder, multibyteEncoder } from './fallback/multi-byte.js'
4
4
  import { isAscii } from 'node:buffer'
5
5
 
@@ -8,7 +8,7 @@ export function createMultibyteDecoder(encoding, loose = false) {
8
8
  let streaming = false
9
9
  const asciiSuperset = isAsciiSuperset(encoding)
10
10
  return (arr, stream = false) => {
11
- assertUint8(arr)
11
+ assertU8(arr)
12
12
  if (!streaming) {
13
13
  if (arr.byteLength === 0) return ''
14
14
  if (asciiSuperset && isAscii(arr)) {
package/package.json CHANGED
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "@exodus/bytes",
3
- "version": "1.11.0",
3
+ "version": "1.13.0",
4
4
  "description": "Various operations on Uint8Array data",
5
5
  "keywords": [
6
6
  "encoding",
7
- "uint8array",
8
- "textdecoder",
9
- "textencoder",
7
+ "Uint8Array",
8
+ "TextDecoder",
9
+ "TextEncoder",
10
10
  "utf8",
11
11
  "utf16",
12
12
  "hex",
@@ -27,10 +27,14 @@
27
27
  "test:spidermonkey": "exodus-test --engine=spidermonkey:bundle",
28
28
  "test:hermes": "exodus-test --engine=hermes:bundle",
29
29
  "test:quickjs": "exodus-test --engine=quickjs:bundle",
30
- "test:xs": "EXODUS_TEST_IGNORE='tests/whatwg.browser.test.js' exodus-test --engine=xs:bundle",
30
+ "test:xs": "exodus-test --engine=xs:bundle",
31
31
  "test:engine262": "exodus-test --engine=engine262:bundle",
32
+ "test:graaljs": "exodus-test --engine=graaljs:bundle",
33
+ "test:escargot": "exodus-test --engine=escargot:bundle",
34
+ "test:boa": "exodus-test --engine=boa:bundle",
32
35
  "test:deno": "exodus-test --engine=deno:pure",
33
36
  "test:bun": "exodus-test --engine=bun:pure",
37
+ "test:workerd": "exodus-test --engine=workerd:bundle",
34
38
  "test:electron:bundle": "exodus-test --engine=electron:bundle",
35
39
  "test:electron:as-node": "exodus-test --engine=electron-as-node:test",
36
40
  "test:chrome:puppeteer": "exodus-test --engine=chrome:puppeteer",
@@ -72,6 +76,9 @@
72
76
  "/fallback/hex.js",
73
77
  "/fallback/latin1.js",
74
78
  "/fallback/percent.js",
79
+ "/fallback/platform.js",
80
+ "/fallback/platform.browser.js",
81
+ "/fallback/platform.native.js",
75
82
  "/fallback/multi-byte.encodings.cjs",
76
83
  "/fallback/multi-byte.encodings.json",
77
84
  "/fallback/multi-byte.js",
@@ -80,6 +87,9 @@
80
87
  "/fallback/single-byte.js",
81
88
  "/fallback/utf16.js",
82
89
  "/fallback/utf8.js",
90
+ "/fallback/utf8.auto.js",
91
+ "/fallback/utf8.auto.browser.js",
92
+ "/fallback/utf8.auto.native.js",
83
93
  "/array.js",
84
94
  "/array.d.ts",
85
95
  "/assert.js",
@@ -117,6 +127,8 @@
117
127
  "/single-byte.node.js",
118
128
  "/utf16.js",
119
129
  "/utf16.d.ts",
130
+ "/utf16.browser.js",
131
+ "/utf16.native.js",
120
132
  "/utf16.node.js",
121
133
  "/utf8.js",
122
134
  "/utf8.d.ts",
@@ -196,6 +208,8 @@
196
208
  "./utf16.js": {
197
209
  "types": "./utf16.d.ts",
198
210
  "node": "./utf16.node.js",
211
+ "react-native": "./utf16.native.js",
212
+ "browser": "./utf16.browser.js",
199
213
  "default": "./utf16.js"
200
214
  },
201
215
  "./utf8.js": {
@@ -212,8 +226,16 @@
212
226
  "default": "./wif.js"
213
227
  }
214
228
  },
229
+ "browser": {
230
+ "./utf16.js": "./utf16.browser.js",
231
+ "./fallback/platform.js": "./fallback/platform.browser.js",
232
+ "./fallback/utf8.auto.js": "./fallback/utf8.auto.browser.js"
233
+ },
215
234
  "react-native": {
216
- "./encoding-browser.js": "./encoding-browser.native.js"
235
+ "./encoding-browser.js": "./encoding-browser.native.js",
236
+ "./utf16.js": "./utf16.native.js",
237
+ "./fallback/platform.js": "./fallback/platform.native.js",
238
+ "./fallback/utf8.auto.js": "./fallback/utf8.auto.native.js"
217
239
  },
218
240
  "peerDependencies": {
219
241
  "@noble/hashes": "^1.8.0 || ^2.0.0"
@@ -228,7 +250,7 @@
228
250
  "@exodus/crypto": "^1.0.0-rc.30",
229
251
  "@exodus/eslint-config": "^5.24.0",
230
252
  "@exodus/prettier": "^1.0.0",
231
- "@exodus/test": "^1.0.0-rc.109",
253
+ "@exodus/test": "1.0.0-rc.115",
232
254
  "@hexagon/base64": "^2.0.4",
233
255
  "@noble/hashes": "^2.0.1",
234
256
  "@oslojs/encoding": "^1.1.0",
@@ -250,7 +272,7 @@
250
272
  "decode-utf8": "^1.0.1",
251
273
  "electron": "36.5.0",
252
274
  "encode-utf8": "^2.0.0",
253
- "esbuild": "^0.27.2",
275
+ "esbuild": "^0.27.3",
254
276
  "eslint": "^8.44.0",
255
277
  "fast-base64-decode": "^2.0.0",
256
278
  "fast-base64-encode": "^1.0.0",
@@ -265,7 +287,8 @@
265
287
  "uint8array-tools": "^0.0.9",
266
288
  "utf8": "^3.0.0",
267
289
  "web-streams-polyfill": "^4.2.0",
268
- "wif": "^5.0.0"
290
+ "wif": "^5.0.0",
291
+ "workerd": "^1.20260206.0"
269
292
  },
270
293
  "prettier": "@exodus/prettier",
271
294
  "packageManager": "pnpm@10.12.1+sha256.889bac470ec93ccc3764488a19d6ba8f9c648ad5e50a9a6e4be3768a5de387a3"
package/single-byte.js CHANGED
@@ -1,5 +1,5 @@
1
- import { assertUint8 } from './assert.js'
2
- import { canDecoders, nativeEncoder, skipWeb, E_STRING } from './fallback/_utils.js'
1
+ import { assertU8, E_STRING } from './fallback/_utils.js'
2
+ import { nativeDecoderLatin1, nativeEncoder } from './fallback/platform.js'
3
3
  import { encodeAscii, encodeAsciiPrefix, encodeLatin1 } from './fallback/latin1.js'
4
4
  import { assertEncoding, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
5
5
 
@@ -39,12 +39,12 @@ export function createSinglebyteDecoder(encoding, loose = false) {
39
39
  if (typeof loose !== 'boolean') throw new TypeError('loose option should be boolean')
40
40
  assertEncoding(encoding)
41
41
 
42
- if (canDecoders && shouldUseNative(encoding)) {
42
+ if (nativeDecoderLatin1 && shouldUseNative(encoding)) {
43
43
  // In try, as not all encodings might be implemented in all engines which have native TextDecoder
44
44
  try {
45
45
  const decoder = new TextDecoder(encoding, { fatal: !loose })
46
46
  return (arr) => {
47
- assertUint8(arr)
47
+ assertU8(arr)
48
48
  if (arr.byteLength === 0) return ''
49
49
  return decoder.decode(arr)
50
50
  }
@@ -53,7 +53,7 @@ export function createSinglebyteDecoder(encoding, loose = false) {
53
53
 
54
54
  const jsDecoder = encodingDecoder(encoding)
55
55
  return (arr) => {
56
- assertUint8(arr)
56
+ assertU8(arr)
57
57
  if (arr.byteLength === 0) return ''
58
58
  return jsDecoder(arr, loose)
59
59
  }
@@ -88,7 +88,7 @@ function encode(s, m) {
88
88
  }
89
89
 
90
90
  // fromBase64+btoa path is faster on everything where fromBase64 is fast
91
- const useLatin1btoa = Uint8Array.fromBase64 && btoa && !skipWeb
91
+ const useLatin1btoa = Uint8Array.fromBase64 && btoa
92
92
 
93
93
  export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
94
94
  // TODO: replacement, truncate (replacement will need varying length)
@@ -1,6 +1,6 @@
1
- import { assertUint8 } from './assert.js'
2
1
  import { isAscii } from 'node:buffer'
3
- import { isDeno, isLE, toBuf, E_STRING } from './fallback/_utils.js'
2
+ import { assertU8, toBuf, E_STRING } from './fallback/_utils.js'
3
+ import { isDeno, isLE } from './fallback/platform.js'
4
4
  import { asciiPrefix } from './fallback/latin1.js'
5
5
  import { encodingMapper, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
6
6
 
@@ -26,7 +26,7 @@ export function createSinglebyteDecoder(encoding, loose = false) {
26
26
  if (isDeno) {
27
27
  const jsDecoder = encodingDecoder(encoding) // asserts
28
28
  return (arr) => {
29
- assertUint8(arr)
29
+ assertU8(arr)
30
30
  if (arr.byteLength === 0) return ''
31
31
  if (isAscii(arr)) return toBuf(arr).toString()
32
32
  return jsDecoder(arr, loose) // somewhy faster on Deno anyway, TODO: optimize?
@@ -37,7 +37,7 @@ export function createSinglebyteDecoder(encoding, loose = false) {
37
37
  const latin1path = encoding === 'windows-1252'
38
38
  const { incomplete, mapper } = encodingMapper(encoding) // asserts
39
39
  return (arr) => {
40
- assertUint8(arr)
40
+ assertU8(arr)
41
41
  if (arr.byteLength === 0) return ''
42
42
  if (isLatin1 || isAscii(arr)) return toBuf(arr).latin1Slice() // .latin1Slice is faster than .asciiSlice
43
43
 
@@ -0,0 +1,8 @@
1
+ // We trust browsers to always have correct TextDecoder for utf-16le/utf-16be with ignoreBOM without streaming
2
+
3
+ import { encodeApi, decodeApiDecoders } from './fallback/utf16.js'
4
+
5
+ export const utf16fromString = (str, format = 'uint16') => encodeApi(str, false, format)
6
+ export const utf16fromStringLoose = (str, format = 'uint16') => encodeApi(str, true, format)
7
+ export const utf16toString = (arr, format = 'uint16') => decodeApiDecoders(arr, false, format)
8
+ export const utf16toStringLoose = (arr, format = 'uint16') => decodeApiDecoders(arr, true, format)
package/utf16.js CHANGED
@@ -1,75 +1 @@
1
- import * as js from './fallback/utf16.js'
2
- import { canDecoders, isLE, E_STRING } from './fallback/_utils.js'
3
-
4
- const { TextDecoder } = globalThis // Buffer is optional
5
- const ignoreBOM = true
6
- const decoderFatalLE = canDecoders ? new TextDecoder('utf-16le', { ignoreBOM, fatal: true }) : null
7
- const decoderLooseLE = canDecoders ? new TextDecoder('utf-16le', { ignoreBOM }) : null
8
- const decoderFatalBE = canDecoders ? new TextDecoder('utf-16be', { ignoreBOM, fatal: true }) : null
9
- const decoderLooseBE = canDecoders ? new TextDecoder('utf-16be', { ignoreBOM }) : null
10
- const decoderFatal16 = isLE ? decoderFatalLE : decoderFatalBE
11
- const decoderLoose16 = isLE ? decoderLooseLE : decoderLooseBE
12
- const { isWellFormed, toWellFormed } = String.prototype
13
-
14
- const { E_STRICT, E_STRICT_UNICODE } = js
15
-
16
- // Unlike utf8, operates on Uint16Arrays by default
17
-
18
- const to8 = (a) => new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
19
-
20
- function encode(str, loose = false, format = 'uint16') {
21
- if (typeof str !== 'string') throw new TypeError(E_STRING)
22
- if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
23
- throw new TypeError('Unknown format')
24
- }
25
-
26
- const shouldSwap = (isLE && format === 'uint8-be') || (!isLE && format === 'uint8-le')
27
-
28
- // On v8 and SpiderMonkey, check via isWellFormed is faster than js
29
- // On JSC, check during loop is faster than isWellFormed
30
- // If isWellFormed is available, we skip check during decoding and recheck after
31
- // If isWellFormed is unavailable, we check in js during decoding
32
- if (!loose && isWellFormed && !isWellFormed.call(str)) throw new TypeError(E_STRICT_UNICODE)
33
- const u16 = js.encode(str, loose, !loose && isWellFormed, shouldSwap)
34
-
35
- if (format === 'uint8-le' || format === 'uint8-be') return to8(u16) // Already swapped
36
- if (format === 'uint16') return u16
37
- /* c8 ignore next */
38
- throw new Error('Unreachable')
39
- }
40
-
41
- function decode(input, loose = false, format = 'uint16') {
42
- let u16
43
- switch (format) {
44
- case 'uint16':
45
- if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
46
- if (canDecoders) return loose ? decoderLoose16.decode(input) : decoderFatal16.decode(input)
47
- u16 = input
48
- break
49
- case 'uint8-le':
50
- if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
51
- if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
52
- if (canDecoders) return loose ? decoderLooseLE.decode(input) : decoderFatalLE.decode(input)
53
- u16 = js.to16input(input, true)
54
- break
55
- case 'uint8-be':
56
- if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
57
- if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
58
- if (canDecoders) return loose ? decoderLooseBE.decode(input) : decoderFatalBE.decode(input)
59
- u16 = js.to16input(input, false)
60
- break
61
- default:
62
- throw new TypeError('Unknown format')
63
- }
64
-
65
- const str = js.decode(u16, loose, (!loose && isWellFormed) || (loose && toWellFormed))
66
- if (!loose && isWellFormed && !isWellFormed.call(str)) throw new TypeError(E_STRICT)
67
- if (loose && toWellFormed) return toWellFormed.call(str)
68
-
69
- return str
70
- }
71
-
72
- export const utf16fromString = (str, format = 'uint16') => encode(str, false, format)
73
- export const utf16fromStringLoose = (str, format = 'uint16') => encode(str, true, format)
74
- export const utf16toString = (arr, format = 'uint16') => decode(arr, false, format)
75
- export const utf16toStringLoose = (arr, format = 'uint16') => decode(arr, true, format)
1
+ export * from './utf16.native.js'
@@ -0,0 +1,22 @@
1
+ import { encodeApi, decodeApiDecoders, decodeApiJS } from './fallback/utf16.js'
2
+ import { nativeDecoder } from './fallback/platform.native.js'
3
+
4
+ function checkDecoders() {
5
+ // Not all barebone engines with TextDecoder support something except utf-8
6
+ // Also workerd specifically has a broken utf-16le implementation
7
+ if (!nativeDecoder) return false
8
+ try {
9
+ const a = new TextDecoder('utf-16le').decode(Uint8Array.of(1, 2, 3, 0xd8))
10
+ const b = new TextDecoder('utf-16be').decode(Uint8Array.of(2, 1, 0xd8, 3))
11
+ return a === b && a === '\u0201\uFFFD'
12
+ } catch {}
13
+
14
+ return false
15
+ }
16
+
17
+ const decode = checkDecoders() ? decodeApiDecoders : decodeApiJS
18
+
19
+ export const utf16fromString = (str, format = 'uint16') => encodeApi(str, false, format)
20
+ export const utf16fromStringLoose = (str, format = 'uint16') => encodeApi(str, true, format)
21
+ export const utf16toString = (arr, format = 'uint16') => decode(arr, false, format)
22
+ export const utf16toStringLoose = (arr, format = 'uint16') => decode(arr, true, format)
package/utf16.node.js CHANGED
@@ -1,5 +1,6 @@
1
- import { isDeno, isLE, E_STRING } from './fallback/_utils.js'
2
- import { E_STRICT, E_STRICT_UNICODE } from './fallback/utf16.js'
1
+ import { assertU8, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
2
+ import { isDeno, isLE } from './fallback/platform.js'
3
+ import { E_STRICT, decodeApiDecoders } from './fallback/utf16.js'
3
4
 
4
5
  if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
5
6
 
@@ -48,7 +49,7 @@ function decodeNode(input, loose = false, format = 'uint16') {
48
49
  if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
49
50
  ble = swapped(input, !isLE)
50
51
  } else if (format === 'uint8-le' || format === 'uint8-be') {
51
- if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
52
+ assertU8(input)
52
53
  if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
53
54
  ble = swapped(input, format === 'uint8-be')
54
55
  } else {
@@ -61,23 +62,7 @@ function decodeNode(input, loose = false, format = 'uint16') {
61
62
  throw new TypeError(E_STRICT)
62
63
  }
63
64
 
64
- function decodeDecoder(input, loose = false, format = 'uint16') {
65
- let encoding
66
- if (format === 'uint16') {
67
- if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
68
- encoding = isLE ? 'utf-16le' : 'utf-16be'
69
- } else if (format === 'uint8-le' || format === 'uint8-be') {
70
- if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
71
- if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
72
- encoding = format === 'uint8-le' ? 'utf-16le' : 'utf-16be'
73
- } else {
74
- throw new TypeError('Unknown format')
75
- }
76
-
77
- return new TextDecoder(encoding, { ignoreBOM: true, fatal: !loose }).decode(input) // TODO: cache decoder?
78
- }
79
-
80
- const decode = isDeno ? decodeDecoder : decodeNode
65
+ const decode = isDeno ? decodeApiDecoders : decodeNode
81
66
 
82
67
  export const utf16fromString = (str, format = 'uint16') => encode(str, false, format)
83
68
  export const utf16fromStringLoose = (str, format = 'uint16') => encode(str, true, format)
package/utf8.js CHANGED
@@ -1,10 +1,8 @@
1
- import { assertUint8 } from './assert.js'
2
1
  import { typedView } from './array.js'
3
- import { isHermes, nativeDecoder, nativeEncoder, E_STRING } from './fallback/_utils.js'
4
- import { asciiPrefix, decodeLatin1 } from './fallback/latin1.js'
5
- import * as js from './fallback/utf8.js'
2
+ import { assertU8, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
3
+ import { nativeDecoder, nativeEncoder } from './fallback/platform.js'
4
+ import * as js from './fallback/utf8.auto.js'
6
5
 
7
- const { TextDecoder, decodeURIComponent, escape } = globalThis // Buffer is optional
8
6
  // ignoreBOM: true means that BOM will be left as-is, i.e. will be present in the output
9
7
  // We don't want to strip anything unexpectedly
10
8
  const decoderLoose = nativeDecoder
@@ -13,10 +11,6 @@ const decoderFatal = nativeDecoder
13
11
  : null
14
12
  const { isWellFormed } = String.prototype
15
13
 
16
- const { E_STRICT, E_STRICT_UNICODE } = js
17
-
18
- const shouldUseEscapePath = isHermes // faster only on Hermes, js path beats it on normal engines
19
-
20
14
  function deLoose(str, loose, res) {
21
15
  if (loose || str.length === res.length) return res // length is equal only for ascii, which is automatically fine
22
16
  if (isWellFormed) {
@@ -35,7 +29,7 @@ function deLoose(str, loose, res) {
35
29
  start = pos + 1
36
30
  if (res[pos + 1] === 0xbf && res[pos + 2] === 0xbd) {
37
31
  // Found a replacement char in output, need to recheck if we encoded the input correctly
38
- if (!nativeDecoder && str.length < 1e7) {
32
+ if (js.decodeFast && !nativeDecoder && str.length < 1e7) {
39
33
  // This is ~2x faster than decode in Hermes
40
34
  try {
41
35
  if (encodeURI(str) !== null) return res // guard against optimizing out
@@ -51,32 +45,19 @@ function deLoose(str, loose, res) {
51
45
  function encode(str, loose = false) {
52
46
  if (typeof str !== 'string') throw new TypeError(E_STRING)
53
47
  if (str.length === 0) return new Uint8Array() // faster than Uint8Array.of
54
- if (nativeEncoder) return deLoose(str, loose, nativeEncoder.encode(str))
48
+ if (nativeEncoder || !js.encode) return deLoose(str, loose, nativeEncoder.encode(str))
55
49
  // No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder
56
50
  return js.encode(str, loose)
57
51
  }
58
52
 
59
53
  function decode(arr, loose = false) {
60
- assertUint8(arr)
54
+ assertU8(arr)
61
55
  if (arr.byteLength === 0) return ''
62
- if (nativeDecoder) return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
63
-
64
- // Fast path for ASCII prefix, this is faster than all alternatives below
65
- const prefix = decodeLatin1(arr, 0, asciiPrefix(arr)) // No native decoder to use, so decodeAscii is useless here
66
- if (prefix.length === arr.length) return prefix
67
-
68
- // This codepath gives a ~3x perf boost on Hermes
69
- if (shouldUseEscapePath && escape && decodeURIComponent) {
70
- const o = escape(decodeLatin1(arr, prefix.length, arr.length))
71
- try {
72
- return prefix + decodeURIComponent(o) // Latin1 to utf8
73
- } catch {
74
- if (!loose) throw new TypeError(E_STRICT)
75
- // Ok, we have to use manual implementation for loose decoder
76
- }
56
+ if (nativeDecoder || !js.decodeFast) {
57
+ return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
77
58
  }
78
59
 
79
- return prefix + js.decode(arr, loose, prefix.length)
60
+ return js.decodeFast(arr, loose)
80
61
  }
81
62
 
82
63
  export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format)
package/utf8.node.js CHANGED
@@ -1,7 +1,6 @@
1
- import { assertUint8 } from './assert.js'
2
1
  import { typedView } from './array.js'
3
- import { E_STRING } from './fallback/_utils.js'
4
- import { E_STRICT, E_STRICT_UNICODE } from './fallback/utf8.js'
2
+ import { assertU8, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
3
+ import { E_STRICT } from './fallback/utf8.js'
5
4
  import { isAscii } from 'node:buffer'
6
5
 
7
6
  if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
@@ -40,7 +39,7 @@ function encode(str, loose = false) {
40
39
  }
41
40
 
42
41
  function decode(arr, loose = false) {
43
- assertUint8(arr)
42
+ assertU8(arr)
44
43
  const byteLength = arr.byteLength
45
44
  if (byteLength === 0) return ''
46
45
  if (byteLength > 0x6_00 && !(isDeno && loose) && isAscii(arr)) {