@exodus/bytes 1.7.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # `@exodus/bytes`
2
2
 
3
+ [![](https://flat.badgen.net/npm/v/@exodus/bytes)](https://npmjs.org/package/@exodus/bytes)
4
+ [![](https://flat.badgen.net/github/release/ExodusOSS/bytes?icon=github)](https://github.com/ExodusOSS/bytes/releases)
5
+ [![](https://flat.badgen.net/npm/dm/@exodus/bytes)](https://www.npmcharts.com/compare/@exodus/bytes?minimal=true)
6
+ [![](https://flat.badgen.net/npm/license/@exodus/bytes)](https://github.com/ExodusOSS/bytes/blob/HEAD/LICENSE)
7
+
3
8
  `Uint8Array` conversion to and from `base64`, `base32`, `base58`, `hex`, `utf8`, `utf16`, `bech32` and `wif`
4
9
 
5
10
  And a [`TextEncoder` / `TextDecoder` polyfill](#textencoder--textdecoder-polyfill)
@@ -38,7 +43,7 @@ Less than half the bundle size of [text-encoding](https://npmjs.com/text-encodin
38
43
  Also [much faster](#fast) than all of those.
39
44
 
40
45
  > [!TIP]
41
- > See also the [lite version](#lite-version) to get this down to 9 KiB gzipped.
46
+ > See also the [lite version](#lite-version) to get this down to 10 KiB gzipped.
42
47
 
43
48
  Spec compliant, passing WPT and covered with extra tests.\
44
49
  Moreover, tests for this library uncovered [bugs in all major implementations](https://docs.google.com/spreadsheets/d/1pdEefRG6r9fZy61WHGz0TKSt8cO4ISWqlpBN5KntIvQ/edit).\
@@ -81,9 +86,9 @@ import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding-lite.js'
81
86
  import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding-lite.js' // Requires Streams
82
87
  ```
83
88
 
84
- This reduces the bundle size 10x:\
85
- from 90 KiB gzipped for `@exodus/bytes/encoding.js` to 9 KiB gzipped for `@exodus/bytes/encoding-lite.js`.\
86
- (For comparison, `text-encoding` module is 190 KiB gzipped, and `iconv-lite` is 194 KiB gzipped).
89
+ This reduces the bundle size 9x:\
90
+ from 90 KiB gzipped for `@exodus/bytes/encoding.js` to 10 KiB gzipped for `@exodus/bytes/encoding-lite.js`.\
91
+ (For comparison, `text-encoding` module is 190 KiB gzipped, and `iconv-lite` is 194 KiB gzipped):
87
92
 
88
93
  It still supports `utf-8`, `utf-16le`, `utf-16be` and all single-byte encodings specified by the spec,
89
94
  the only difference is support for legacy multi-byte encodings.
@@ -125,18 +130,50 @@ import { utf16fromStringLoose, utf16toStringLoose } from '@exodus/bytes/utf16.js
125
130
  ```js
126
131
  import { createSinglebyteDecoder, createSinglebyteEncoder } from '@exodus/bytes/single-byte.js'
127
132
  import { windows1252toString, windows1252fromString } from '@exodus/bytes/single-byte.js'
133
+ import { latin1toString, latin1fromString } from '@exodus/bytes/single-byte.js'
128
134
  ```
129
135
 
130
- Decode the legacy single-byte encodings according to the [Encoding standard](https://encoding.spec.whatwg.org/)
131
- ([§9](https://encoding.spec.whatwg.org/#legacy-single-byte-encodings) and
132
- 14.5](https://encoding.spec.whatwg.org/#x-user-defined)).
136
+ Decode / encode the legacy single-byte encodings according to the
137
+ [Encoding standard](https://encoding.spec.whatwg.org/)
138
+ (9](https://encoding.spec.whatwg.org/#legacy-single-byte-encodings),
139
+ [§14.5](https://encoding.spec.whatwg.org/#x-user-defined)),
140
+ and [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859) `iso-8859-*` mappings.
133
141
 
134
- Supports all single-byte encodings listed in the standard:
142
+ Supports all single-byte encodings listed in the WHATWG Encoding standard:
135
143
  `ibm866`, `iso-8859-2`, `iso-8859-3`, `iso-8859-4`, `iso-8859-5`, `iso-8859-6`, `iso-8859-7`, `iso-8859-8`,
136
144
  `iso-8859-8-i`, `iso-8859-10`, `iso-8859-13`, `iso-8859-14`, `iso-8859-15`, `iso-8859-16`, `koi8-r`, `koi8-u`,
137
145
  `macintosh`, `windows-874`, `windows-1250`, `windows-1251`, `windows-1252`, `windows-1253`, `windows-1254`,
138
146
  `windows-1255`, `windows-1256`, `windows-1257`, `windows-1258`, `x-mac-cyrillic` and `x-user-defined`.
139
147
 
148
+ Also supports `iso-8859-1`, `iso-8859-9`, `iso-8859-11` as defined at
149
+ [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859)
150
+ (and all other `iso-8859-*` encodings there as they match WHATWG).
151
+
152
+ > [!NOTE]
153
+ > While all `iso-8859-*` encodings supported by the [WHATWG Encoding standard](https://encoding.spec.whatwg.org/) match
154
+ > [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859), the WHATWG Encoding spec doesn't support
155
+ > `iso-8859-1`, `iso-8859-9`, `iso-8859-11`, and instead maps them as labels to `windows-1252`, `windows-1254`, `windows-874`.\
156
+ > `createSinglebyteDecoder()` (unlike `TextDecoder` or `legacyHookDecode()`) does not do such mapping,
157
+ > so its results will differ from `TextDecoder` for those encoding names.
158
+
159
+ ```js
160
+ > new TextDecoder('iso-8859-1').encoding
161
+ 'windows-1252'
162
+ > new TextDecoder('iso-8859-9').encoding
163
+ 'windows-1254'
164
+ > new TextDecoder('iso-8859-11').encoding
165
+ 'windows-874'
166
+ > new TextDecoder('iso-8859-9').decode(Uint8Array.of(0x80, 0x81, 0xd0))
167
+ '€\x81Ğ' // this is actually decoded according to windows-1254 per TextDecoder spec
168
+ > createSinglebyteDecoder('iso-8859-9')(Uint8Array.of(0x80, 0x81, 0xd0))
169
+ '\x80\x81Ğ' // this is iso-8859-9 as defined at https://unicode.org/Public/MAPPINGS/ISO8859/8859-9.txt
170
+ ```
171
+
172
+ All WHATWG Encoding spec [`windows-*` encodings](https://encoding.spec.whatwg.org/#windows-874) are supersets of
173
+ corresponding [unicode.org encodings](https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/), meaning that
174
+ they encode/decode all the old valid (non-replacement) strings / byte sequences identically, but can also support
175
+ a wider range of inputs.
176
+
140
177
  ##### `createSinglebyteDecoder(encoding, loose = false)`
141
178
 
142
179
  Create a decoder for a supported one-byte `encoding`, given its lowercased name `encoding`.
@@ -152,12 +189,35 @@ Returns a function `encode(string)` that encodes a string to bytes.
152
189
  In `'fatal'` mode (default), will throw on non well-formed strings or any codepoints which could
153
190
  not be encoded in the target encoding.
154
191
 
192
+ ##### `latin1toString(arr)`
193
+
194
+ Decode `iso-8859-1` bytes to a string.
195
+
196
+ There is no loose variant for this encoding, all bytes can be decoded.
197
+
198
+ Same as:
199
+ ```js
200
+ const latin1toString = createSinglebyteDecoder('iso-8859-1')
201
+ ```
202
+
203
+ Note: this is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as
204
+ those alias to `new TextDecoder('windows-1252')`.
205
+
206
+ ##### `latin1fromString(string)`
207
+
208
+ Encode a string to `iso-8859-1` bytes.
209
+
210
+ Will throw on non well-formed strings or any codepoints which could not be encoded in `iso-8859-1`.
211
+
212
+ Same as:
213
+ ```js
214
+ const latin1fromString = createSinglebyteEncoder('iso-8859-1', { mode: 'fatal' })
215
+ ```
216
+
155
217
  ##### `windows1252toString(arr)`
156
218
 
157
219
  Decode `windows-1252` bytes to a string.
158
220
 
159
- Also supports `ascii` and `latin-1` as those are strict subsets of `windows-1252`.
160
-
161
221
  There is no loose variant for this encoding, all bytes can be decoded.
162
222
 
163
223
  Same as:
@@ -169,8 +229,6 @@ const windows1252toString = createSinglebyteDecoder('windows-1252')
169
229
 
170
230
  Encode a string to `windows-1252` bytes.
171
231
 
172
- Also supports `ascii` and `latin-1` as those are strict subsets of `windows-1252`.
173
-
174
232
  Will throw on non well-formed strings or any codepoints which could not be encoded in `windows-1252`.
175
233
 
176
234
  Same as:
@@ -212,6 +270,8 @@ import { fromBigInt, toBigInt } from '@exodus/bytes/bigint.js'
212
270
 
213
271
  ### `@exodus/bytes/hex.js`
214
272
 
273
+ Implements Base16 from [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648) (no differences from [RFC3548](https://datatracker.ietf.org/doc/html/rfc4648)).
274
+
215
275
  ```js
216
276
  import { fromHex, toHex } from '@exodus/bytes/hex.js'
217
277
  ```
@@ -221,6 +281,8 @@ import { fromHex, toHex } from '@exodus/bytes/hex.js'
221
281
 
222
282
  ### `@exodus/bytes/base64.js`
223
283
 
284
+ Implements Base64 from [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648) (no differences from [RFC3548](https://datatracker.ietf.org/doc/html/rfc4648)).
285
+
224
286
  ```js
225
287
  import { fromBase64, toBase64 } from '@exodus/bytes/base64.js'
226
288
  import { fromBase64url, toBase64url } from '@exodus/bytes/base64.js'
@@ -235,6 +297,8 @@ import { fromBase64any } from '@exodus/bytes/base64.js'
235
297
 
236
298
  ### `@exodus/bytes/base32.js`
237
299
 
300
+ Implements Base32 from [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648) (no differences from [RFC3548](https://datatracker.ietf.org/doc/html/rfc4648)).
301
+
238
302
  ```js
239
303
  import { fromBase32, toBase32 } from '@exodus/bytes/base32.js'
240
304
  import { fromBase32hex, toBase32hex } from '@exodus/bytes/base32.js'
@@ -247,10 +311,12 @@ import { fromBase32hex, toBase32hex } from '@exodus/bytes/base32.js'
247
311
 
248
312
  ### `@exodus/bytes/bech32.js`
249
313
 
314
+ Implements [BIP-0173](https://github.com/bitcoin/bips/blob/master/bip-0173.mediawiki#specification) and [BIP-0350](https://github.com/bitcoin/bips/blob/master/bip-0350.mediawiki#specification).
315
+
250
316
  ```js
251
317
  import { fromBech32, toBech32 } from '@exodus/bytes/bech32.js'
252
- import { fromBech32m, toBech32m } from '@exodus/bytes/base32.js'
253
- import { getPrefix } from '@exodus/bytes/base32.js'
318
+ import { fromBech32m, toBech32m } from '@exodus/bytes/bech32.js'
319
+ import { getPrefix } from '@exodus/bytes/bech32.js'
254
320
  ```
255
321
 
256
322
  ##### `getPrefix(str, limit = 90)`
@@ -282,7 +348,7 @@ import { fromBase58checkSync, toBase58checkSync } from '@exodus/bytes/base58chec
282
348
  import { makeBase58check } from '@exodus/bytes/base58check.js'
283
349
  ```
284
350
 
285
- On non-Node.js, requires peer dependency [@exodus/crypto](https://www.npmjs.com/package/@exodus/crypto) to be installed.
351
+ On non-Node.js, requires peer dependency [@noble/hashes](https://www.npmjs.com/package/@noble/hashes) to be installed.
286
352
 
287
353
  ##### `async fromBase58check(str, format = 'uint8')`
288
354
  ##### `async toBase58check(arr)`
@@ -297,7 +363,7 @@ import { fromWifString, toWifString } from '@exodus/bytes/wif.js'
297
363
  import { fromWifStringSync, toWifStringSync } from '@exodus/bytes/wif.js'
298
364
  ```
299
365
 
300
- On non-Node.js, requires peer dependency [@exodus/crypto](https://www.npmjs.com/package/@exodus/crypto) to be installed.
366
+ On non-Node.js, requires peer dependency [@noble/hashes](https://www.npmjs.com/package/@noble/hashes) to be installed.
301
367
 
302
368
  ##### `async fromWifString(string, version)`
303
369
  ##### `fromWifStringSync(string, version)`
@@ -466,6 +532,27 @@ true
466
532
  '%'
467
533
  ```
468
534
 
535
+ ### `@exodus/bytes/encoding-browser.js`
536
+
537
+ ```js
538
+ import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding-browser.js'
539
+ import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding-browser.js' // Requires Streams
540
+
541
+ // Hooks for standards
542
+ import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding-browser.js'
543
+ ```
544
+
545
+ Same as `@exodus/bytes/encoding.js`, but in browsers instead of polyfilling just uses whatever the
546
+ browser provides, drastically reducing the bundle size (to less than 2 KiB gzipped).
547
+
548
+ Under non-browser engines (Node.js, React Native, etc.) a full polyfill is used as those platforms
549
+ do not provide sufficiently complete / non-buggy `TextDecoder` APIs.
550
+
551
+ > [!NOTE]
552
+ > Implementations in browsers [have bugs](https://docs.google.com/spreadsheets/d/1pdEefRG6r9fZy61WHGz0TKSt8cO4ISWqlpBN5KntIvQ/edit),
553
+ > but they are fixing them and the expected update window is short.\
554
+ > If you want to circumvent browser bugs, use full `@exodus/bytes/encoding.js` import.
555
+
469
556
  ## License
470
557
 
471
558
  [MIT](./LICENSE)
package/array.d.ts CHANGED
@@ -21,4 +21,3 @@ export type OutputFormat = 'uint8' | 'buffer';
21
21
  export function typedView(arr: ArrayBufferView, format: 'uint8'): Uint8Array;
22
22
  export function typedView(arr: ArrayBufferView, format: 'buffer'): Buffer;
23
23
  export function typedView(arr: ArrayBufferView, format: OutputFormat): Uint8Array | Buffer;
24
-
package/base32.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import { assertEmptyRest } from './assert.js'
2
2
  import { typedView } from './array.js'
3
+ import { E_STRING } from './fallback/_utils.js'
3
4
  import * as js from './fallback/base32.js'
4
5
 
5
6
  // See https://datatracker.ietf.org/doc/html/rfc4648
@@ -25,7 +26,7 @@ export function fromBase32hex(str, options) {
25
26
  }
26
27
 
27
28
  function fromBase32common(str, isBase32Hex, padding, format, rest) {
28
- if (typeof str !== 'string') throw new TypeError('Input is not a string')
29
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
29
30
  if (rest !== null) assertEmptyRest(rest)
30
31
 
31
32
  if (padding === true) {
package/base58.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { typedView } from './array.js'
2
2
  import { assertUint8 } from './assert.js'
3
- import { nativeDecoder, nativeEncoder, isHermes } from './fallback/_utils.js'
3
+ import { nativeDecoder, nativeEncoder, isHermes, E_STRING } from './fallback/_utils.js'
4
4
  import { encodeAscii, decodeAscii } from './fallback/latin1.js'
5
5
 
6
6
  const alphabet58 = [...'123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz']
@@ -122,7 +122,7 @@ function toBase58core(arr, alphabet, codes) {
122
122
  }
123
123
 
124
124
  function fromBase58core(str, alphabet, codes, format = 'uint8') {
125
- if (typeof str !== 'string') throw new TypeError('Input is not a string')
125
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
126
126
  const length = str.length
127
127
  if (length === 0) return typedView(new Uint8Array(), format)
128
128
 
@@ -207,7 +207,7 @@ function fromBase58core(str, alphabet, codes, format = 'uint8') {
207
207
  }
208
208
 
209
209
  at = k + 1
210
- if (c !== 0 || at < zeros) throw new Error('Unexpected') // unreachable
210
+ if (c !== 0 || at < zeros) /* c8 ignore next */ throw new Error('Unexpected') // unreachable
211
211
  }
212
212
  }
213
213
 
package/base58check.js CHANGED
@@ -1,11 +1,10 @@
1
- import { hashSync } from '@exodus/crypto/hash' // eslint-disable-line @exodus/import/no-deprecated
1
+ import { sha256 } from '@noble/hashes/sha2.js'
2
2
  import { makeBase58check } from './fallback/base58check.js'
3
3
 
4
4
  // Note: while API is async, we use hashSync for now until we improve webcrypto perf for hash256
5
5
  // Inputs to base58 are typically very small, and that makes a difference
6
6
 
7
7
  // eslint-disable-next-line @exodus/import/no-deprecated
8
- const sha256 = (x) => hashSync('sha256', x, 'uint8')
9
8
  const hash256sync = (x) => sha256(sha256(x))
10
9
  const hash256 = hash256sync // See note at the top
11
10
  const {
package/base64.d.ts CHANGED
@@ -73,4 +73,3 @@ export function fromBase64url(str: string, options: FromBase64Options & { format
73
73
  */
74
74
  export function fromBase64any(str: string, options?: FromBase64Options): Uint8ArrayBuffer;
75
75
  export function fromBase64any(str: string, options: FromBase64Options & { format: 'buffer' }): Buffer;
76
-
package/base64.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { assertUint8, assertEmptyRest } from './assert.js'
2
2
  import { typedView } from './array.js'
3
- import { isHermes, skipWeb } from './fallback/_utils.js'
3
+ import { isHermes, skipWeb, E_STRING } from './fallback/_utils.js'
4
4
  import { decodeLatin1, encodeLatin1 } from './fallback/latin1.js'
5
5
  import * as js from './fallback/base64.js'
6
6
 
@@ -79,7 +79,7 @@ export function fromBase64any(str, { format = 'uint8', padding = 'both', ...rest
79
79
  }
80
80
 
81
81
  function fromBase64common(str, isBase64url, padding, format, rest) {
82
- if (typeof str !== 'string') throw new TypeError('Input is not a string')
82
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
83
83
  if (rest !== null) assertEmptyRest(rest)
84
84
  const auto = padding === 'both' ? str.endsWith('=') : undefined
85
85
  // Older JSC supporting Uint8Array.fromBase64 lacks proper checks
package/bech32.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { assertUint8 } from './assert.js'
2
- import { nativeEncoder } from './fallback/_utils.js'
2
+ import { nativeEncoder, E_STRING } from './fallback/_utils.js'
3
3
  import { decodeAscii, encodeAscii, encodeLatin1 } from './fallback/latin1.js'
4
4
 
5
5
  const alphabet = [...'qpzry9x8gf2tvdw0s3jn54khce6mua7l']
@@ -12,7 +12,6 @@ const E_MIXED = 'Mixed-case string'
12
12
  const E_PADDING = 'Padding is invalid'
13
13
  const E_CHECKSUM = 'Invalid checksum'
14
14
  const E_CHARACTER = 'Non-bech32 character'
15
- const E_STRING = 'Input is not a string'
16
15
 
17
16
  // nativeEncoder path uses encodeAscii which asserts ascii, otherwise we have 0-255 bytes from encodeLatin1
18
17
  const c2x = new Int8Array(nativeEncoder ? 128 : 256).fill(-1)
@@ -0,0 +1,29 @@
1
+ import {
2
+ fromSource,
3
+ getBOMEncoding,
4
+ normalizeEncoding,
5
+ E_ENCODING,
6
+ } from './fallback/encoding.api.js'
7
+ import labels from './fallback/encoding.labels.js'
8
+
9
+ // Lite-weight version which re-exports existing implementations on browsers,
10
+ // while still being aliased to the full impl in RN and Node.js
11
+
12
+ // WARNING: Note that browsers have bugs (which hopefully will get fixed soon)
13
+
14
+ const { TextDecoder, TextEncoder, TextDecoderStream, TextEncoderStream } = globalThis
15
+
16
+ export { normalizeEncoding, getBOMEncoding, labelToName } from './fallback/encoding.api.js'
17
+ export { TextDecoder, TextEncoder, TextDecoderStream, TextEncoderStream }
18
+
19
+ // https://encoding.spec.whatwg.org/#decode
20
+ export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
21
+ let u8 = fromSource(input)
22
+ const bomEncoding = getBOMEncoding(u8)
23
+ if (bomEncoding) u8 = u8.subarray(bomEncoding === 'utf-8' ? 3 : 2)
24
+ const enc = bomEncoding ?? normalizeEncoding(fallbackEncoding) // "the byte order mark is more authoritative than anything else"
25
+ if (enc === 'utf-8') return new TextDecoder('utf-8', { ignoreBOM: true }).decode(u8) // fast path
26
+ if (enc === 'replacement') return u8.byteLength > 0 ? '\uFFFD' : ''
27
+ if (!Object.hasOwn(labels, enc)) throw new RangeError(E_ENCODING)
28
+ return new TextDecoder(enc, { ignoreBOM: true }).decode(u8)
29
+ }
@@ -0,0 +1 @@
1
+ export * from './encoding.js'
@@ -0,0 +1 @@
1
+ export * from './encoding.js'
@@ -0,0 +1 @@
1
+ export * from './encoding.js'
@@ -0,0 +1 @@
1
+ export * from './encoding.js'
package/encoding.d.ts ADDED
@@ -0,0 +1,58 @@
1
+ /// <reference types="node" />
2
+
3
+ /**
4
+ * Converts an encoding label to its name, as an ASCII-lowercased string
5
+ * @param label - The encoding label to normalize
6
+ * @returns The normalized encoding name, or null if invalid
7
+ */
8
+ export function normalizeEncoding(label: string): string | null;
9
+
10
+ /**
11
+ * Implements BOM sniff (https://encoding.spec.whatwg.org/#bom-sniff) legacy hook.
12
+ * @param input - The bytes to check for BOM
13
+ * @returns The encoding ('utf-8', 'utf-16le', 'utf-16be'), or null if no BOM found
14
+ */
15
+ export function getBOMEncoding(
16
+ input: ArrayBufferLike | ArrayBufferView
17
+ ): 'utf-8' | 'utf-16le' | 'utf-16be' | null;
18
+
19
+ /**
20
+ * Implements decode (https://encoding.spec.whatwg.org/#decode) legacy hook.
21
+ * @param input - The bytes to decode
22
+ * @param fallbackEncoding - The encoding to use if no BOM detected (default: 'utf-8')
23
+ * @returns The decoded string
24
+ */
25
+ export function legacyHookDecode(
26
+ input: ArrayBufferLike | ArrayBufferView,
27
+ fallbackEncoding?: string
28
+ ): string;
29
+
30
+ /**
31
+ * Converts an encoding label to its name, as a case-sensitive string.
32
+ * @param label - The encoding label
33
+ * @returns The proper case encoding name, or null if invalid
34
+ */
35
+ export function labelToName(label: string): string | null;
36
+
37
+ /**
38
+ * Text decoder for decoding bytes to strings in various encodings
39
+ * Supports strict and lossy modes
40
+ */
41
+ export const TextDecoder: typeof globalThis.TextDecoder;
42
+
43
+ /**
44
+ * Text encoder for encoding strings to UTF-8 bytes
45
+ */
46
+ export const TextEncoder: typeof globalThis.TextEncoder;
47
+
48
+ /**
49
+ * Transform stream wrapper for TextDecoder
50
+ * Decodes chunks of bytes to strings
51
+ */
52
+ export const TextDecoderStream: typeof globalThis.TextDecoderStream;
53
+
54
+ /**
55
+ * Transform stream wrapper for TextEncoder
56
+ * Encodes chunks of strings to UTF-8 bytes
57
+ */
58
+ export const TextEncoderStream: typeof globalThis.TextEncoderStream;
@@ -47,6 +47,7 @@ function shouldSkipBuiltins() {
47
47
  return /firefox/i.test(g.navigator.userAgent || '') // as simple as we can
48
48
  }
49
49
 
50
+ /* c8 ignore next */
50
51
  return false // eslint-disable-line no-unreachable
51
52
  }
52
53
 
@@ -128,3 +129,5 @@ export const toBuf = (x) =>
128
129
  x.byteLength <= 64 && x.BYTES_PER_ELEMENT === 1
129
130
  ? Buffer.from(x)
130
131
  : Buffer.from(x.buffer, x.byteOffset, x.byteLength)
132
+
133
+ export const E_STRING = 'Input is not a string'
@@ -0,0 +1,81 @@
1
+ import labels from './encoding.labels.js'
2
+
3
+ let labelsMap
4
+
5
+ export const E_ENCODING = 'Unknown encoding'
6
+
7
+ // Warning: unlike whatwg-encoding, returns lowercased labels
8
+ // Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
9
+ // https://encoding.spec.whatwg.org/#names-and-labels
10
+ export function normalizeEncoding(label) {
11
+ // fast path
12
+ if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
13
+ if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
14
+ // full map
15
+ if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
16
+ const low = `${label}`.trim().toLowerCase()
17
+ if (Object.hasOwn(labels, low)) return low
18
+ if (!labelsMap) {
19
+ labelsMap = new Map()
20
+ for (const [label, aliases] of Object.entries(labels)) {
21
+ for (const alias of aliases) labelsMap.set(alias, label)
22
+ }
23
+ }
24
+
25
+ const mapped = labelsMap.get(low)
26
+ if (mapped) return mapped
27
+ return null
28
+ }
29
+
30
+ // TODO: make this more strict against Symbol.toStringTag
31
+ // Is not very significant though, anything faking Symbol.toStringTag could as well override
32
+ // prototypes, which is not something we protect against
33
+
34
+ function isAnyArrayBuffer(x) {
35
+ if (x instanceof ArrayBuffer) return true
36
+ if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return true
37
+ if (!x || typeof x.byteLength !== 'number') return false
38
+ const s = Object.prototype.toString.call(x)
39
+ return s === '[object ArrayBuffer]' || s === '[object SharedArrayBuffer]'
40
+ }
41
+
42
+ export function fromSource(x) {
43
+ if (x instanceof Uint8Array) return x
44
+ if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
45
+ if (isAnyArrayBuffer(x)) {
46
+ if ('detached' in x) return x.detached === true ? new Uint8Array() : new Uint8Array(x)
47
+ // Old engines without .detached, try-catch
48
+ try {
49
+ return new Uint8Array(x)
50
+ } catch {
51
+ return new Uint8Array()
52
+ }
53
+ }
54
+
55
+ throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
56
+ }
57
+
58
+ // Warning: unlike whatwg-encoding, returns lowercased labels
59
+ // Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
60
+ export function getBOMEncoding(input) {
61
+ const u8 = fromSource(input) // asserts
62
+ if (u8.length >= 3 && u8[0] === 0xef && u8[1] === 0xbb && u8[2] === 0xbf) return 'utf-8'
63
+ if (u8.length < 2) return null
64
+ if (u8[0] === 0xff && u8[1] === 0xfe) return 'utf-16le'
65
+ if (u8[0] === 0xfe && u8[1] === 0xff) return 'utf-16be'
66
+ return null
67
+ }
68
+
69
+ const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
70
+
71
+ // Unlike normalizeEncoding, case-sensitive
72
+ // https://encoding.spec.whatwg.org/#names-and-labels
73
+ export function labelToName(label) {
74
+ const enc = normalizeEncoding(label)
75
+ if (enc === 'utf-8') return 'UTF-8' // fast path
76
+ if (!enc) return enc
77
+ if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
78
+ if (enc === 'big5') return 'Big5'
79
+ if (enc === 'shift_jis') return 'Shift_JIS'
80
+ return enc
81
+ }
@@ -5,14 +5,15 @@ import { utf16toString, utf16toStringLoose } from '@exodus/bytes/utf16.js'
5
5
  import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/bytes/utf8.js'
6
6
  import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
7
7
  import labels from './encoding.labels.js'
8
+ import { fromSource, getBOMEncoding, normalizeEncoding, E_ENCODING } from './encoding.api.js'
8
9
  import { unfinishedBytes } from './encoding.util.js'
9
10
 
10
- const E_OPTIONS = 'The "options" argument must be of type object'
11
- const E_ENCODING = 'Unknown encoding'
12
- const replacementChar = '\uFFFD'
11
+ export { labelToName, getBOMEncoding, normalizeEncoding } from './encoding.api.js'
13
12
 
13
+ const E_OPTIONS = 'The "options" argument must be of type object'
14
14
  const E_MULTI =
15
15
  'Legacy multi-byte encodings are disabled in /encoding-lite.js, use /encoding.js for full encodings range support'
16
+ const replacementChar = '\uFFFD'
16
17
  const multibyteSet = new Set(['big5', 'euc-kr', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'gbk', 'gb18030']) // prettier-ignore
17
18
  let createMultibyteDecoder
18
19
 
@@ -20,67 +21,14 @@ export function setMultibyteDecoder(createDecoder) {
20
21
  createMultibyteDecoder = createDecoder
21
22
  }
22
23
 
23
- let labelsMap
24
-
25
- // Warning: unlike whatwg-encoding, returns lowercased labels
26
- // Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
27
- // https://encoding.spec.whatwg.org/#names-and-labels
28
- export function normalizeEncoding(label) {
29
- // fast path
30
- if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
31
- if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
32
- // full map
33
- if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
34
- const low = `${label}`.trim().toLowerCase()
35
- if (Object.hasOwn(labels, low)) return low
36
- if (!labelsMap) {
37
- labelsMap = new Map()
38
- for (const [label, aliases] of Object.entries(labels)) {
39
- for (const alias of aliases) labelsMap.set(alias, label)
40
- }
41
- }
42
-
43
- const mapped = labelsMap.get(low)
44
- if (mapped) return mapped
45
- return null
46
- }
47
-
48
24
  const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false })
49
25
 
50
- // TODO: make this more strict against Symbol.toStringTag
51
- // Is not very significant though, anything faking Symbol.toStringTag could as well override
52
- // prototypes, which is not something we protect against
53
-
54
- function isAnyArrayBuffer(x) {
55
- if (x instanceof ArrayBuffer) return true
56
- if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return true
57
- if (!x || typeof x.byteLength !== 'number') return false
58
- const s = Object.prototype.toString.call(x)
59
- return s === '[object ArrayBuffer]' || s === '[object SharedArrayBuffer]'
60
- }
61
-
62
26
  function isAnyUint8Array(x) {
63
27
  if (x instanceof Uint8Array) return true
64
28
  if (!x || !ArrayBuffer.isView(x) || x.BYTES_PER_ELEMENT !== 1) return false
65
29
  return Object.prototype.toString.call(x) === '[object Uint8Array]'
66
30
  }
67
31
 
68
- const fromSource = (x) => {
69
- if (x instanceof Uint8Array) return x
70
- if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
71
- if (isAnyArrayBuffer(x)) {
72
- if ('detached' in x) return x.detached === true ? new Uint8Array() : new Uint8Array(x)
73
- // Old engines without .detached, try-catch
74
- try {
75
- return new Uint8Array(x)
76
- } catch {
77
- return new Uint8Array()
78
- }
79
- }
80
-
81
- throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
82
- }
83
-
84
32
  function unicodeDecoder(encoding, loose) {
85
33
  if (encoding === 'utf-8') return loose ? utf8toStringLoose : utf8toString // likely
86
34
  const form = encoding === 'utf-16le' ? 'uint8-le' : 'uint8-be'
@@ -215,6 +163,7 @@ export class TextDecoder {
215
163
  return u.byteLength >= 2 && u[0] === 0xfe && u[1] === 0xff ? 2 : 0
216
164
  }
217
165
 
166
+ /* c8 ignore next */
218
167
  throw new Error('Unreachable')
219
168
  }
220
169
  }
@@ -341,17 +290,6 @@ export class TextEncoderStream {
341
290
  }
342
291
  }
343
292
 
344
- // Warning: unlike whatwg-encoding, returns lowercased labels
345
- // Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
346
- export function getBOMEncoding(input) {
347
- const u8 = fromSource(input) // asserts
348
- if (u8.length >= 3 && u8[0] === 0xef && u8[1] === 0xbb && u8[2] === 0xbf) return 'utf-8'
349
- if (u8.length < 2) return null
350
- if (u8[0] === 0xff && u8[1] === 0xfe) return 'utf-16le'
351
- if (u8[0] === 0xfe && u8[1] === 0xff) return 'utf-16be'
352
- return null
353
- }
354
-
355
293
  // https://encoding.spec.whatwg.org/#decode
356
294
  // Warning: encoding sniffed from BOM takes preference over the supplied one
357
295
  // Warning: lossy, performs replacement, no option of throwing
@@ -368,7 +306,7 @@ export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
368
306
  let suffix = ''
369
307
  if (u8.byteLength % 2 !== 0) {
370
308
  suffix = replacementChar
371
- u8 = u8.subarray(0, -1)
309
+ u8 = u8.subarray(0, -unfinishedBytes(u8, u8.byteLength, enc))
372
310
  }
373
311
 
374
312
  return utf16toStringLoose(u8, enc === 'utf-16le' ? 'uint8-le' : 'uint8-be') + suffix
@@ -387,17 +325,3 @@ export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
387
325
 
388
326
  return createSinglebyteDecoder(enc, true)(u8)
389
327
  }
390
-
391
- const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
392
-
393
- // Unlike normalizeEncoding, case-sensitive
394
- // https://encoding.spec.whatwg.org/#names-and-labels
395
- export function labelToName(label) {
396
- const enc = normalizeEncoding(label)
397
- if (enc === 'utf-8') return 'UTF-8' // fast path
398
- if (!enc) return enc
399
- if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
400
- if (enc === 'big5') return 'Big5'
401
- if (enc === 'shift_jis') return 'Shift_JIS'
402
- return enc
403
- }