@exodus/bytes 1.6.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # `@exodus/bytes`
2
2
 
3
+ [![](https://flat.badgen.net/npm/v/@exodus/bytes)](https://npmjs.org/package/@exodus/bytes)
4
+ ![](https://flat.badgen.net/npm/dm/@exodus/bytes)
5
+ [![](https://flat.badgen.net/npm/license/@exodus/bytes)](https://github.com/ExodusOSS/bytes/blob/HEAD/LICENSE)
6
+
3
7
  `Uint8Array` conversion to and from `base64`, `base32`, `base58`, `hex`, `utf8`, `utf16`, `bech32` and `wif`
4
8
 
5
9
  And a [`TextEncoder` / `TextDecoder` polyfill](#textencoder--textdecoder-polyfill)
@@ -31,13 +35,14 @@ See [Performance](./Performance.md) for more info
31
35
 
32
36
  ```js
33
37
  import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding.js'
38
+ import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding.js' // Requires Streams
34
39
  ```
35
40
 
36
41
  Less than half the bundle size of [text-encoding](https://npmjs.com/text-encoding), [whatwg-encoding](https://npmjs.com/whatwg-encoding) or [iconv-lite](https://npmjs.com/iconv-lite) (gzipped or not).\
37
42
  Also [much faster](#fast) than all of those.
38
43
 
39
44
  > [!TIP]
40
- > See also the [lite version](#lite-version) to get this down to 9 KiB gzipped.
45
+ > See also the [lite version](#lite-version) to get this down to 10 KiB gzipped.
41
46
 
42
47
  Spec compliant, passing WPT and covered with extra tests.\
43
48
  Moreover, tests for this library uncovered [bugs in all major implementations](https://docs.google.com/spreadsheets/d/1pdEefRG6r9fZy61WHGz0TKSt8cO4ISWqlpBN5KntIvQ/edit).\
@@ -77,11 +82,12 @@ _These are only provided as a compatibility layer, prefer hardened APIs instead
77
82
  If you don't need support for legacy multi-byte encodings, you can use the lite import:
78
83
  ```js
79
84
  import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding-lite.js'
85
+ import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding-lite.js' // Requires Streams
80
86
  ```
81
87
 
82
- This reduces the bundle size 10x:\
83
- from 90 KiB gzipped for `@exodus/bytes/encoding.js` to 9 KiB gzipped for `@exodus/bytes/encoding-lite.js`.\
84
- (For comparison, `text-encoding` module is 190 KiB gzipped, and `iconv-lite` is 194 KiB gzipped).
88
+ This reduces the bundle size 9x:\
89
+ from 90 KiB gzipped for `@exodus/bytes/encoding.js` to 10 KiB gzipped for `@exodus/bytes/encoding-lite.js`.\
90
+ (For comparison, `text-encoding` module is 190 KiB gzipped, and `iconv-lite` is 194 KiB gzipped):
85
91
 
86
92
  It still supports `utf-8`, `utf-16le`, `utf-16be` and all single-byte encodings specified by the spec,
87
93
  the only difference is support for legacy multi-byte encodings.
@@ -121,32 +127,90 @@ import { utf16fromStringLoose, utf16toStringLoose } from '@exodus/bytes/utf16.js
121
127
  ### `@exodus/bytes/single-byte.js`
122
128
 
123
129
  ```js
124
- import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
125
- import { windows1252toString } from '@exodus/bytes/single-byte.js'
130
+ import { createSinglebyteDecoder, createSinglebyteEncoder } from '@exodus/bytes/single-byte.js'
131
+ import { windows1252toString, windows1252fromString } from '@exodus/bytes/single-byte.js'
126
132
  ```
127
133
 
128
- Decode the legacy single-byte encodings according to the [Encoding standard](https://encoding.spec.whatwg.org/)
129
- ([§9](https://encoding.spec.whatwg.org/#legacy-single-byte-encodings) and
130
- 14.5](https://encoding.spec.whatwg.org/#x-user-defined)).
134
+ Decode / encode the legacy single-byte encodings according to the
135
+ [Encoding standard](https://encoding.spec.whatwg.org/)
136
+ (9](https://encoding.spec.whatwg.org/#legacy-single-byte-encodings),
137
+ [§14.5](https://encoding.spec.whatwg.org/#x-user-defined)),
138
+ and [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859) `iso-8859-*` mappings.
131
139
 
132
- Supports all single-byte encodings listed in the standard:
140
+ Supports all single-byte encodings listed in the WHATWG Encoding standard:
133
141
  `ibm866`, `iso-8859-2`, `iso-8859-3`, `iso-8859-4`, `iso-8859-5`, `iso-8859-6`, `iso-8859-7`, `iso-8859-8`,
134
142
  `iso-8859-8-i`, `iso-8859-10`, `iso-8859-13`, `iso-8859-14`, `iso-8859-15`, `iso-8859-16`, `koi8-r`, `koi8-u`,
135
143
  `macintosh`, `windows-874`, `windows-1250`, `windows-1251`, `windows-1252`, `windows-1253`, `windows-1254`,
136
144
  `windows-1255`, `windows-1256`, `windows-1257`, `windows-1258`, `x-mac-cyrillic` and `x-user-defined`.
137
145
 
146
+ Also supports `iso-8859-1`, `iso-8859-9`, `iso-8859-11` as defined at
147
+ [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859)
148
+ (and all other `iso-8859-*` encodings there as they match WHATWG).
149
+
150
+ > [!NOTE]
151
+ > While all `iso-8859-*` encodings supported by the [WHATWG Encoding standard](https://encoding.spec.whatwg.org/) match
152
+ > [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859), the WHATWG Encoding spec doesn't support
153
+ > `iso-8859-1`, `iso-8859-9`, `iso-8859-11`, and instead maps them as labels to `windows-1252`, `windows-1254`, `windows-874`.\
154
+ > `createSinglebyteDecoder()` (unlike `TextDecoder` or `legacyHookDecode()`) does not do such mapping,
155
+ > so its results will differ from `TextDecoder` for those encoding names.
156
+
157
+ ```js
158
+ > new TextDecoder('iso-8859-1').encoding
159
+ 'windows-1252'
160
+ > new TextDecoder('iso-8859-9').encoding
161
+ 'windows-1254'
162
+ > new TextDecoder('iso-8859-11').encoding
163
+ 'windows-874'
164
+ > new TextDecoder('iso-8859-9').decode(Uint8Array.of(0x80, 0x81, 0xd0))
165
+ '€\x81Ğ' // this is actually decoded according to windows-1254 per TextDecoder spec
166
+ > createSinglebyteDecoder('iso-8859-9')(Uint8Array.of(0x80, 0x81, 0xd0))
167
+ '\x80\x81Ğ' // this is iso-8859-9 as defined at https://unicode.org/Public/MAPPINGS/ISO8859/8859-9.txt
168
+ ```
169
+
138
170
  ##### `createSinglebyteDecoder(encoding, loose = false)`
139
171
 
140
- Create a decoder for a supported one-byte `encoding`, given it's lowercased name `encoding`.
172
+ Create a decoder for a supported one-byte `encoding`, given its lowercased name `encoding`.
141
173
 
142
174
  Returns a function `decode(arr)` that decodes bytes to a string.
143
175
 
176
+ ##### `createSinglebyteEncoder(encoding, { mode = 'fatal' })`
177
+
178
+ Create an encoder for a supported one-byte `encoding`, given its lowercased name `encoding`.
179
+
180
+ Returns a function `encode(string)` that encodes a string to bytes.
181
+
182
+ In `'fatal'` mode (default), will throw on non well-formed strings or any codepoints which could
183
+ not be encoded in the target encoding.
184
+
185
+ ##### `latin1toString(arr)`
186
+
187
+ Decode `iso-8859-1` bytes to a string.
188
+
189
+ There is no loose variant for this encoding, all bytes can be decoded.
190
+
191
+ Same as:
192
+ ```js
193
+ const latin1toString = createSinglebyteDecoder('iso-8859-1')
194
+ ```
195
+
196
+ Note: this is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as
197
+ those alias to `new TextDecoder('windows-1252')`.
198
+
199
+ ##### `latin1fromString(string)`
200
+
201
+ Encode a string to `iso-8859-1` bytes.
202
+
203
+ Will throw on non well-formed strings or any codepoints which could not be encoded in `iso-8859-1`.
204
+
205
+ Same as:
206
+ ```js
207
+ const latin1fromString = createSinglebyteEncoder('iso-8859-1', { mode: 'fatal' })
208
+ ```
209
+
144
210
  ##### `windows1252toString(arr)`
145
211
 
146
212
  Decode `windows-1252` bytes to a string.
147
213
 
148
- Also supports `ascii` and `latin-1` as those are strict subsets of `windows-1252`.
149
-
150
214
  There is no loose variant for this encoding, all bytes can be decoded.
151
215
 
152
216
  Same as:
@@ -154,6 +218,17 @@ Same as:
154
218
  const windows1252toString = createSinglebyteDecoder('windows-1252')
155
219
  ```
156
220
 
221
+ ##### `windows1252fromString(string)`
222
+
223
+ Encode a string to `windows-1252` bytes.
224
+
225
+ Will throw on non well-formed strings or any codepoints which could not be encoded in `windows-1252`.
226
+
227
+ Same as:
228
+ ```js
229
+ const windows1252fromString = createSinglebyteEncoder('windows-1252', { mode: 'fatal' })
230
+ ```
231
+
157
232
  ### `@exodus/bytes/multi-byte.js`
158
233
 
159
234
  ```js
@@ -171,7 +246,7 @@ Supports all legacy multi-byte encodings listed in the standard:
171
246
 
172
247
  ##### `createMultibyteDecoder(encoding, loose = false)`
173
248
 
174
- Create a decoder for a supported legacy multi-byte `encoding`, given it's lowercased name `encoding`.
249
+ Create a decoder for a supported legacy multi-byte `encoding`, given its lowercased name `encoding`.
175
250
 
176
251
  Returns a function `decode(arr, stream = false)` that decodes bytes to a string.
177
252
 
@@ -188,6 +263,8 @@ import { fromBigInt, toBigInt } from '@exodus/bytes/bigint.js'
188
263
 
189
264
  ### `@exodus/bytes/hex.js`
190
265
 
266
+ Implements Base16 from [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648) (no differences from [RFC3548](https://datatracker.ietf.org/doc/html/rfc4648)).
267
+
191
268
  ```js
192
269
  import { fromHex, toHex } from '@exodus/bytes/hex.js'
193
270
  ```
@@ -197,6 +274,8 @@ import { fromHex, toHex } from '@exodus/bytes/hex.js'
197
274
 
198
275
  ### `@exodus/bytes/base64.js`
199
276
 
277
+ Implements Base64 from [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648) (no differences from [RFC3548](https://datatracker.ietf.org/doc/html/rfc4648)).
278
+
200
279
  ```js
201
280
  import { fromBase64, toBase64 } from '@exodus/bytes/base64.js'
202
281
  import { fromBase64url, toBase64url } from '@exodus/bytes/base64.js'
@@ -211,6 +290,8 @@ import { fromBase64any } from '@exodus/bytes/base64.js'
211
290
 
212
291
  ### `@exodus/bytes/base32.js`
213
292
 
293
+ Implements Base32 from [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648) (no differences from [RFC3548](https://datatracker.ietf.org/doc/html/rfc4648)).
294
+
214
295
  ```js
215
296
  import { fromBase32, toBase32 } from '@exodus/bytes/base32.js'
216
297
  import { fromBase32hex, toBase32hex } from '@exodus/bytes/base32.js'
@@ -223,6 +304,8 @@ import { fromBase32hex, toBase32hex } from '@exodus/bytes/base32.js'
223
304
 
224
305
  ### `@exodus/bytes/bech32.js`
225
306
 
307
+ Implements [BIP-0173](https://github.com/bitcoin/bips/blob/master/bip-0173.mediawiki#specification) and [BIP-0350](https://github.com/bitcoin/bips/blob/master/bip-0350.mediawiki#specification).
308
+
226
309
  ```js
227
310
  import { fromBech32, toBech32 } from '@exodus/bytes/bech32.js'
228
311
  import { fromBech32m, toBech32m } from '@exodus/bytes/base32.js'
@@ -284,6 +367,7 @@ On non-Node.js, requires peer dependency [@exodus/crypto](https://www.npmjs.com/
284
367
 
285
368
  ```js
286
369
  import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding.js'
370
+ import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding.js' // Requires Streams
287
371
 
288
372
  // Hooks for standards
289
373
  import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding.js'
@@ -291,7 +375,9 @@ import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from
291
375
 
292
376
  Implements the [Encoding standard](https://encoding.spec.whatwg.org/):
293
377
  [TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
294
- [TextEncoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
378
+ [TextEncoder](https://encoding.spec.whatwg.org/#interface-textencoder),
379
+ [TextDecoderStream](https://encoding.spec.whatwg.org/#interface-textdecoderstream),
380
+ [TextEncoderStream](https://encoding.spec.whatwg.org/#interface-textencoderstream),
295
381
  some [hooks](https://encoding.spec.whatwg.org/#specification-hooks) (see below).
296
382
 
297
383
  #### `new TextDecoder(label = 'utf-8', { fatal = false, ignoreBOM = false })`
@@ -300,7 +386,21 @@ some [hooks](https://encoding.spec.whatwg.org/#specification-hooks) (see below).
300
386
 
301
387
  #### `new TextEncoder()`
302
388
 
303
- [TextEncoder](https://encoding.spec.whatwg.org/#interface-textdecoder) implementation/polyfill.
389
+ [TextEncoder](https://encoding.spec.whatwg.org/#interface-textencoder) implementation/polyfill.
390
+
391
+ #### `new TextDecoderStream(label = 'utf-8', { fatal = false, ignoreBOM = false })`
392
+
393
+ [TextDecoderStream](https://encoding.spec.whatwg.org/#interface-textdecoderstream) implementation/polyfill.
394
+
395
+ Requires [Streams](https://streams.spec.whatwg.org/) to be either supported by the platform or
396
+ [polyfilled](https://npmjs.com/package/web-streams-polyfill).
397
+
398
+ #### `new TextEncoderStream()`
399
+
400
+ [TextEncoderStream](https://encoding.spec.whatwg.org/#interface-textencoderstream) implementation/polyfill.
401
+
402
+ Requires [Streams](https://streams.spec.whatwg.org/) to be either supported by the platform or
403
+ [polyfilled](https://npmjs.com/package/web-streams-polyfill).
304
404
 
305
405
  #### `labelToName(label)`
306
406
 
@@ -370,6 +470,7 @@ new TextDecoder(getBOMEncoding(input) ?? fallbackEncoding).decode(input)
370
470
 
371
471
  ```js
372
472
  import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding-lite.js'
473
+ import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding-lite.js' // Requires Streams
373
474
 
374
475
  // Hooks for standards
375
476
  import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding-lite.js'
@@ -393,7 +494,9 @@ To avoid inconsistencies, the exported classes and methods are exactly the same
393
494
  > lite = require('@exodus/bytes/encoding-lite.js')
394
495
  [Module: null prototype] {
395
496
  TextDecoder: [class TextDecoder],
497
+ TextDecoderStream: [class TextDecoderStream],
396
498
  TextEncoder: [class TextEncoder],
499
+ TextEncoderStream: [class TextEncoderStream],
397
500
  getBOMEncoding: [Function: getBOMEncoding],
398
501
  labelToName: [Function: labelToName],
399
502
  legacyHookDecode: [Function: legacyHookDecode],
@@ -406,7 +509,9 @@ Error: Legacy multi-byte encodings are disabled in /encoding-lite.js, use /encod
406
509
  > full = require('@exodus/bytes/encoding.js')
407
510
  [Module: null prototype] {
408
511
  TextDecoder: [class TextDecoder],
512
+ TextDecoderStream: [class TextDecoderStream],
409
513
  TextEncoder: [class TextEncoder],
514
+ TextEncoderStream: [class TextEncoderStream],
410
515
  getBOMEncoding: [Function: getBOMEncoding],
411
516
  labelToName: [Function: labelToName],
412
517
  legacyHookDecode: [Function: legacyHookDecode],
package/base32.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import { assertEmptyRest } from './assert.js'
2
2
  import { typedView } from './array.js'
3
+ import { E_STRING } from './fallback/_utils.js'
3
4
  import * as js from './fallback/base32.js'
4
5
 
5
6
  // See https://datatracker.ietf.org/doc/html/rfc4648
@@ -25,7 +26,7 @@ export function fromBase32hex(str, options) {
25
26
  }
26
27
 
27
28
  function fromBase32common(str, isBase32Hex, padding, format, rest) {
28
- if (typeof str !== 'string') throw new TypeError('Input is not a string')
29
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
29
30
  if (rest !== null) assertEmptyRest(rest)
30
31
 
31
32
  if (padding === true) {
package/base58.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { typedView } from './array.js'
2
2
  import { assertUint8 } from './assert.js'
3
- import { nativeDecoder, nativeEncoder, isHermes } from './fallback/_utils.js'
3
+ import { nativeDecoder, nativeEncoder, isHermes, E_STRING } from './fallback/_utils.js'
4
4
  import { encodeAscii, decodeAscii } from './fallback/latin1.js'
5
5
 
6
6
  const alphabet58 = [...'123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz']
@@ -122,7 +122,7 @@ function toBase58core(arr, alphabet, codes) {
122
122
  }
123
123
 
124
124
  function fromBase58core(str, alphabet, codes, format = 'uint8') {
125
- if (typeof str !== 'string') throw new TypeError('Input is not a string')
125
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
126
126
  const length = str.length
127
127
  if (length === 0) return typedView(new Uint8Array(), format)
128
128
 
package/base64.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { assertUint8, assertEmptyRest } from './assert.js'
2
2
  import { typedView } from './array.js'
3
- import { isHermes, skipWeb } from './fallback/_utils.js'
3
+ import { isHermes, skipWeb, E_STRING } from './fallback/_utils.js'
4
4
  import { decodeLatin1, encodeLatin1 } from './fallback/latin1.js'
5
5
  import * as js from './fallback/base64.js'
6
6
 
@@ -79,7 +79,7 @@ export function fromBase64any(str, { format = 'uint8', padding = 'both', ...rest
79
79
  }
80
80
 
81
81
  function fromBase64common(str, isBase64url, padding, format, rest) {
82
- if (typeof str !== 'string') throw new TypeError('Input is not a string')
82
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
83
83
  if (rest !== null) assertEmptyRest(rest)
84
84
  const auto = padding === 'both' ? str.endsWith('=') : undefined
85
85
  // Older JSC supporting Uint8Array.fromBase64 lacks proper checks
package/bech32.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { assertUint8 } from './assert.js'
2
- import { nativeEncoder } from './fallback/_utils.js'
2
+ import { nativeEncoder, E_STRING } from './fallback/_utils.js'
3
3
  import { decodeAscii, encodeAscii, encodeLatin1 } from './fallback/latin1.js'
4
4
 
5
5
  const alphabet = [...'qpzry9x8gf2tvdw0s3jn54khce6mua7l']
@@ -12,7 +12,6 @@ const E_MIXED = 'Mixed-case string'
12
12
  const E_PADDING = 'Padding is invalid'
13
13
  const E_CHECKSUM = 'Invalid checksum'
14
14
  const E_CHARACTER = 'Non-bech32 character'
15
- const E_STRING = 'Input is not a string'
16
15
 
17
16
  // nativeEncoder path uses encodeAscii which asserts ascii, otherwise we have 0-255 bytes from encodeLatin1
18
17
  const c2x = new Int8Array(nativeEncoder ? 128 : 256).fill(-1)
@@ -179,6 +178,9 @@ function assertDecodeArgs(str, limit) {
179
178
  if (typeof limit !== 'number' || str.length < 8 || !(str.length <= limit)) throw new Error(E_SIZE)
180
179
  }
181
180
 
181
+ // this is instant on 8-bit strings
182
+ const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
183
+
182
184
  function fromBech32enc(str, limit, encoding) {
183
185
  assertDecodeArgs(str, limit)
184
186
  const lower = str.toLowerCase()
@@ -195,6 +197,7 @@ function fromBech32enc(str, limit, encoding) {
195
197
  if (wordsLength < 0) throw new Error(E_SIZE)
196
198
  const bytesLength = (wordsLength * 5) >> 3
197
199
  const slice = str.slice(split + 1)
200
+ if (!nativeEncoder && NON_LATIN.test(slice)) throw new SyntaxError(E_CHARACTER) // otherwise can't use encodeLatin1
198
201
  const c = nativeEncoder ? encodeAscii(slice, E_CHARACTER) : encodeLatin1(slice) // suboptimal, but only affects non-Hermes barebones
199
202
  const bytes = new Uint8Array(bytesLength)
200
203
 
@@ -0,0 +1 @@
1
+ export * from './encoding.js'
package/encoding-lite.js CHANGED
@@ -1,6 +1,8 @@
1
1
  export {
2
2
  TextDecoder,
3
3
  TextEncoder,
4
+ TextDecoderStream,
5
+ TextEncoderStream,
4
6
  normalizeEncoding,
5
7
  getBOMEncoding,
6
8
  labelToName,
package/encoding.d.ts ADDED
@@ -0,0 +1,58 @@
1
+ /// <reference types="node" />
2
+
3
+ /**
4
+ * Converts an encoding label to its name, as an ASCII-lowercased string
5
+ * @param label - The encoding label to normalize
6
+ * @returns The normalized encoding name, or null if invalid
7
+ */
8
+ export function normalizeEncoding(label: string): string | null;
9
+
10
+ /**
11
+ * Implements BOM sniff (https://encoding.spec.whatwg.org/#bom-sniff) legacy hook.
12
+ * @param input - The bytes to check for BOM
13
+ * @returns The encoding ('utf-8', 'utf-16le', 'utf-16be'), or null if no BOM found
14
+ */
15
+ export function getBOMEncoding(
16
+ input: ArrayBufferLike | ArrayBufferView
17
+ ): 'utf-8' | 'utf-16le' | 'utf-16be' | null;
18
+
19
+ /**
20
+ * Implements decode (https://encoding.spec.whatwg.org/#decode) legacy hook.
21
+ * @param input - The bytes to decode
22
+ * @param fallbackEncoding - The encoding to use if no BOM detected (default: 'utf-8')
23
+ * @returns The decoded string
24
+ */
25
+ export function legacyHookDecode(
26
+ input: ArrayBufferLike | ArrayBufferView,
27
+ fallbackEncoding?: string
28
+ ): string;
29
+
30
+ /**
31
+ * Converts an encoding label to its name, as a case-sensitive string.
32
+ * @param label - The encoding label
33
+ * @returns The proper case encoding name, or null if invalid
34
+ */
35
+ export function labelToName(label: string): string | null;
36
+
37
+ /**
38
+ * Text decoder for decoding bytes to strings in various encodings
39
+ * Supports strict and lossy modes
40
+ */
41
+ export const TextDecoder: typeof globalThis.TextDecoder;
42
+
43
+ /**
44
+ * Text encoder for encoding strings to UTF-8 bytes
45
+ */
46
+ export const TextEncoder: typeof globalThis.TextEncoder;
47
+
48
+ /**
49
+ * Transform stream wrapper for TextDecoder
50
+ * Decodes chunks of bytes to strings
51
+ */
52
+ export const TextDecoderStream: typeof globalThis.TextDecoderStream;
53
+
54
+ /**
55
+ * Transform stream wrapper for TextEncoder
56
+ * Encodes chunks of strings to UTF-8 bytes
57
+ */
58
+ export const TextEncoderStream: typeof globalThis.TextEncoderStream;
package/encoding.js CHANGED
@@ -6,6 +6,8 @@ setMultibyteDecoder(createMultibyteDecoder)
6
6
  export {
7
7
  TextDecoder,
8
8
  TextEncoder,
9
+ TextDecoderStream,
10
+ TextEncoderStream,
9
11
  normalizeEncoding,
10
12
  getBOMEncoding,
11
13
  labelToName,
@@ -5,14 +5,8 @@ export const isHermes = Boolean(globalThis.HermesInternal)
5
5
  export const isDeno = Boolean(globalThis.Deno)
6
6
  export const isLE = new Uint8Array(Uint16Array.of(258).buffer)[0] === 2
7
7
 
8
- let isNative = (x) => {
9
- if (!x) return false
10
- if (haveNativeBuffer) return true // we consider Node.js TextDecoder/TextEncoder native
11
- const s = `${x}`
12
- // See https://github.com/facebook/hermes/pull/1855#issuecomment-3659386410
13
- return s.includes('[native code]') || s.includes(`[bytecode]`) // Static Hermes has [bytecode] for contrib, which includes TextEncoder/TextDecoder
14
- }
15
-
8
+ // We consider Node.js TextDecoder/TextEncoder native
9
+ let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]'))
16
10
  if (!haveNativeBuffer && isNative(() => {})) isNative = () => false // e.g. XS, we don't want false positives
17
11
 
18
12
  export const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
@@ -134,3 +128,5 @@ export const toBuf = (x) =>
134
128
  x.byteLength <= 64 && x.BYTES_PER_ELEMENT === 1
135
129
  ? Buffer.from(x)
136
130
  : Buffer.from(x.buffer, x.byteOffset, x.byteLength)
131
+
132
+ export const E_STRING = 'Input is not a string'
@@ -68,7 +68,16 @@ function isAnyUint8Array(x) {
68
68
  const fromSource = (x) => {
69
69
  if (x instanceof Uint8Array) return x
70
70
  if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
71
- if (isAnyArrayBuffer(x)) return new Uint8Array(x)
71
+ if (isAnyArrayBuffer(x)) {
72
+ if ('detached' in x) return x.detached === true ? new Uint8Array() : new Uint8Array(x)
73
+ // Old engines without .detached, try-catch
74
+ try {
75
+ return new Uint8Array(x)
76
+ } catch {
77
+ return new Uint8Array()
78
+ }
79
+ }
80
+
72
81
  throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
73
82
  }
74
83
 
@@ -259,6 +268,79 @@ export class TextEncoder {
259
268
  }
260
269
  }
261
270
 
271
+ const E_NO_STREAMS = 'TransformStream global not present in the environment'
272
+
273
+ // https://encoding.spec.whatwg.org/#interface-textdecoderstream
274
+ export class TextDecoderStream {
275
+ constructor(encoding = 'utf-8', options = {}) {
276
+ if (!globalThis.TransformStream) throw new Error(E_NO_STREAMS)
277
+ const decoder = new TextDecoder(encoding, options)
278
+ const transform = new TransformStream({
279
+ transform: (chunk, controller) => {
280
+ const value = decoder.decode(fromSource(chunk), { stream: true })
281
+ if (value) controller.enqueue(value)
282
+ },
283
+ flush: (controller) => {
284
+ // https://streams.spec.whatwg.org/#dom-transformer-flush
285
+ const value = decoder.decode()
286
+ if (value) controller.enqueue(value)
287
+ // No need to call .terminate() (Node.js is wrong)
288
+ },
289
+ })
290
+
291
+ define(this, 'encoding', decoder.encoding)
292
+ define(this, 'fatal', decoder.fatal)
293
+ define(this, 'ignoreBOM', decoder.ignoreBOM)
294
+ define(this, 'readable', transform.readable)
295
+ define(this, 'writable', transform.writable)
296
+ }
297
+
298
+ get [Symbol.toStringTag]() {
299
+ return 'TextDecoderStream'
300
+ }
301
+ }
302
+
303
+ // https://encoding.spec.whatwg.org/#interface-textencoderstream
304
+ // Only UTF-8 per spec
305
+ export class TextEncoderStream {
306
+ constructor() {
307
+ if (!globalThis.TransformStream) throw new Error(E_NO_STREAMS)
308
+ let lead
309
+ const transform = new TransformStream({
310
+ // https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk
311
+ // Not identical in code, but reuses loose mode to have identical behavior
312
+ transform: (chunk, controller) => {
313
+ let s = String(chunk) // DOMString, might contain unpaired surrogates
314
+ if (s.length === 0) return
315
+ if (lead) {
316
+ s = lead + s
317
+ lead = null
318
+ }
319
+
320
+ const last = s.charCodeAt(s.length - 1) // Can't come from previous lead due to length check
321
+ if ((last & 0xfc_00) === 0xd8_00) {
322
+ lead = s[s.length - 1]
323
+ s = s.slice(0, -1)
324
+ }
325
+
326
+ if (s) controller.enqueue(utf8fromStringLoose(s))
327
+ },
328
+ // https://encoding.spec.whatwg.org/#encode-and-flush
329
+ flush: (controller) => {
330
+ if (lead) controller.enqueue(Uint8Array.of(0xef, 0xbf, 0xbd))
331
+ },
332
+ })
333
+
334
+ define(this, 'encoding', 'utf-8')
335
+ define(this, 'readable', transform.readable)
336
+ define(this, 'writable', transform.writable)
337
+ }
338
+
339
+ get [Symbol.toStringTag]() {
340
+ return 'TextEncoderStream'
341
+ }
342
+ }
343
+
262
344
  // Warning: unlike whatwg-encoding, returns lowercased labels
263
345
  // Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
264
346
  export function getBOMEncoding(input) {
package/fallback/hex.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { assertUint8 } from '../assert.js'
2
- import { nativeDecoder, nativeEncoder, decode2string } from './_utils.js'
2
+ import { nativeDecoder, nativeEncoder, decode2string, E_STRING } from './_utils.js'
3
3
  import { encodeAscii, decodeAscii } from './latin1.js'
4
4
 
5
5
  let hexArray // array of 256 bytes converted to two-char hex strings
@@ -52,7 +52,7 @@ export function toHex(arr) {
52
52
  }
53
53
 
54
54
  export function fromHex(str) {
55
- if (typeof str !== 'string') throw new TypeError('Input is not a string')
55
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
56
56
  if (str.length % 2 !== 0) throw new SyntaxError(E_HEX)
57
57
 
58
58
  const length = str.length / 2 // this helps Hermes in loops
@@ -6,12 +6,19 @@ import {
6
6
  isHermes,
7
7
  isDeno,
8
8
  isLE,
9
+ skipWeb,
9
10
  } from './_utils.js'
10
11
 
12
+ const { atob } = globalThis
13
+ const { toBase64: web64 } = Uint8Array.prototype
14
+
11
15
  // See http://stackoverflow.com/a/22747272/680742, which says that lowest limit is in Chrome, with 0xffff args
12
16
  // On Hermes, actual max is 0x20_000 minus current stack depth, 1/16 of that should be safe
13
17
  const maxFunctionArgs = 0x20_00
14
18
 
19
+ // toBase64+atob path is faster on everything where fromBase64 is fast
20
+ const useLatin1atob = web64 && atob && !skipWeb
21
+
15
22
  export function asciiPrefix(arr) {
16
23
  let p = 0 // verified ascii bytes
17
24
  const length = arr.length
@@ -46,6 +53,18 @@ export function decodeLatin1(arr, start = 0, stop = arr.length) {
46
53
  stop |= 0
47
54
  const total = stop - start
48
55
  if (total === 0) return ''
56
+
57
+ if (
58
+ useLatin1atob &&
59
+ total >= 256 &&
60
+ total < 1e8 &&
61
+ arr.toBase64 === web64 &&
62
+ arr.BYTES_PER_ELEMENT === 1
63
+ ) {
64
+ const sliced = start === 0 && stop === arr.length ? arr : arr.subarray(start, stop)
65
+ return atob(sliced.toBase64())
66
+ }
67
+
49
68
  if (total > maxFunctionArgs) {
50
69
  let prefix = ''
51
70
  for (let i = start; i < stop; ) {
@@ -107,8 +126,23 @@ export const encodeCharcodes = isHermes
107
126
  return arr
108
127
  }
109
128
 
129
+ export function encodeAsciiPrefix(x, s) {
130
+ let i = 0
131
+ for (const len3 = s.length - 3; i < len3; i += 4) {
132
+ const x0 = s.charCodeAt(i), x1 = s.charCodeAt(i + 1), x2 = s.charCodeAt(i + 2), x3 = s.charCodeAt(i + 3) // prettier-ignore
133
+ if ((x0 | x1 | x2 | x3) >= 128) break
134
+ x[i] = x0
135
+ x[i + 1] = x1
136
+ x[i + 2] = x2
137
+ x[i + 3] = x3
138
+ }
139
+
140
+ return i
141
+ }
142
+
110
143
  /* eslint-enable @exodus/mutable/no-param-reassign-prop-only */
111
144
 
145
+ // Warning: can be used only on checked strings, converts strings to 8-bit
112
146
  export const encodeLatin1 = (str) => encodeCharcodes(str, new Uint8Array(str.length))
113
147
 
114
148
  // Expects nativeEncoder to be present
@@ -474,6 +474,7 @@ const mappers = {
474
474
  export const isAsciiSuperset = (enc) => enc !== 'iso-2022-jp' // all others are ASCII supersets and can use fast path
475
475
 
476
476
  export function multibyteDecoder(enc, loose = false) {
477
+ if (typeof loose !== 'boolean') throw new TypeError('loose option should be boolean')
477
478
  if (!Object.hasOwn(mappers, enc)) throw new RangeError('Unsupported encoding')
478
479
 
479
480
  // Input is assumed to be typechecked already
@@ -8,54 +8,75 @@ const h = (x) => new Array(x).fill(r)
8
8
 
9
9
  // Common ranges
10
10
 
11
- // prettier-ignore
12
- const k8a = [9345,2,10,4,4,4,4,8,8,8,8,68,4,4,4,4,1,1,1,-627,640,-903,1,46,28,1,-8645,8833,-8817,2,5,64,9305,1,1,-8449]
13
- // prettier-ignore
14
- const k8b = [-30,1,21,-18,1,15,-17,18,-13,...e(7),16,-15,1,1,1,-13,-4,26,-1,-20,17,5,-4,-2,3]
15
- const p1 = [8237, -8235, 8089, -7816, 7820, 8, -6, 1]
16
- const p2 = [-99, 12, 20, -12, 17, 37, -29, 2]
17
- // prettier-ignore
18
- const p3 = [1,1,65,-63,158,-156,1,1,1,40,30,42,-46,6,-66,1,83,-6,-6,-67,176,...p2,-114,121,-119,1,1,155,-49,25,16,-142,159,2,-158,38,42,-46,6,-35,1,52,-6,-6,-36,145,...p2,-83,90,-88,1,1,124,-49,25,16,-111,128,2]
19
- const i0 = e(33)
20
11
  // prettier-ignore
21
12
  const i2 = [-40,-147,1,64,-62,117,-51,-63,69,-67,79,-77,79,-77,1,64,2,51,4,-116,1,124,-122,1,129,22,-148,150,-148,1,133,-131,118,-116,1,33,-31,86,-51,-32,38,-36,48,-46,48,-46,1,33,2,51,4,-85,1,93,-91,1,98,22,-117,119,-117,1,102,374]
22
- const i4a = [-75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1]
23
- const i4b = [34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1]
24
- const i7 = [721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r]
25
- const i8 = [...e(26), r, r, 6692, 1, r]
13
+ const i4a = [-75, -63, e(5), 104, -34, -67, 79, -77, 75, -73, 1]
14
+ const i4b = [34, -32, e(5), 73, -34, -36, 48, -46, 44, -42, 1]
15
+ const i7 = [721, 1, 1, -719, 721, -719, 721, e(19), r, 2, e(43), r]
16
+ const i8 = [e(26), r, r, 6692, 1, r]
17
+ const i9 = [79, -77, e(11), 84, 46, -127, e(16), 48, -46, e(11), 53, 46]
18
+ const iB = [3425, e(57), h(4), 5, e(28), h(4)]
19
+ const p2 = [-99, 12, 20, -12, 17, 37, -29, 2]
20
+ const p1 = [8237, -8235, 8089, -7816, 7820, 8, -6, 1]
26
21
  const w0 = [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104]
27
22
  const w8 = [8072, 1, 3, 1, 5, -15, 1]
28
- const w1 = [...w8, -7480, 7750, -8129, 7897, -7911, -182]
29
- const w3 = [...w8, -8060, 8330, -8328, 8096, -8094]
23
+ const w1 = [w8, -7480, 7750, -8129, 7897, -7911, -182]
24
+ const w3 = [w8, -8060, 8330, -8328, 8096, -8094]
30
25
  const m0 = [8558, -8328, 8374, -66, -8539, 16, 8043, -8070]
26
+ // prettier-ignore
27
+ const p3 = [1,1,65,-63,158,-156,1,1,1,40,30,42,-46,6,-66,1,83,-6,-6,-67,176,p2,-114,121,-119,1,1,155,-49,25,16,-142,159,2,-158,38,42,-46,6,-35,1,52,-6,-6,-36,145,p2,-83,90,-88,1,1,124,-49,25,16,-111,128,2]
28
+ // prettier-ignore
29
+ const k8a = [9345,2,10,4,4,4,4,8,8,8,8,68,4,4,4,4,1,1,1,-627,640,-903,1,46,28,1,-8645,8833,-8817,2,5,64,9305,1,1,-8449]
30
+ // prettier-ignore
31
+ const k8b = [-30,1,21,-18,1,15,-17,18,-13,e(7),16,-15,1,1,1,-13,-4,26,-1,-20,17,5,-4,-2,3]
31
32
 
32
33
  // prettier-ignore
33
- export default {
34
- ibm866: [913,...e(47),8530,1,1,-145,34,61,1,-12,-1,14,-18,6,6,-1,-1,-75,4,32,-8,-16,-28,60,34,1,-5,-6,21,-3,-6,-16,28,-5,1,-4,1,-12,-1,-6,1,24,-1,-82,-12,124,-4,8,4,-16,-8512,...e(15),-78,80,-77,80,-77,80,-73,80,-942,8553,-8546,8547,-260,-8306,9468,-9472],
35
- 'iso-8859-10': [...i0,100,14,16,8,-2,14,-143,148,-43,80,6,23,-208,189,-32,-154,85,14,16,8,-2,14,-128,133,-43,80,6,23,7831,-7850,-32,...i4a,1,1,117,7,-121,1,1,1,146,-144,154,-152,...e(5),...i4b,1,1,86,7,-90,1,1,1,115,-113,123,-121,1,1,1,1,58],
36
- 'iso-8859-13': [...i0,8061,-8059,1,1,8058,-8056,1,49,-47,173,-171,1,1,1,24,-22,1,1,1,8041,-8039,...p3,7835],
37
- 'iso-8859-14': [...i0,7522,1,-7520,103,1,7423,-7523,7641,-7639,7641,-119,231,-7749,1,202,7334,1,-7423,1,7455,1,-7563,7584,43,-42,44,-35,147,-111,1,-36,-7585,...e(15),165,-163,...e(5),7572,-7570,...e(5),153,-151,...e(16),134,-132,...e(5),7541,-7539,...e(5),122],
38
- 'iso-8859-15': [...i0,1,1,1,8201,-8199,187,-185,186,-184,...e(10),202,-200,1,1,199,-197,1,1,151,1,37],
39
- 'iso-8859-16': [...i0,100,1,60,8043,-142,-7870,-185,186,-184,367,-365,206,-204,205,1,-203,1,91,54,59,7840,-8039,1,199,-113,268,-350,151,1,37,4,-188,1,1,64,-62,66,-64,...e(9),65,51,-113,1,1,124,-122,132,22,-151,1,1,1,60,258,-315,1,1,1,33,-31,35,-33,...e(9),34,51,-82,1,1,93,-91,101,22,-120,1,1,1,29,258],
40
- 'iso-8859-2': [...i0,100,468,-407,-157,153,29,-179,1,184,-2,6,21,-204,208,-2,-203,85,470,-409,-142,138,29,364,-527,169,-2,6,21,355,-351,-2,...i2],
41
- 'iso-8859-3': [...i0,134,434,-565,1,r,128,-125,1,136,46,-64,22,-135,r,206,-203,119,-117,1,1,1,112,-110,1,121,46,-64,22,-120,r,191,-188,1,1,r,2,70,-2,-65,...e(8),r,2,1,1,1,76,-74,1,69,-67,1,1,1,144,-16,-125,1,1,1,r,2,39,-2,-34,...e(8),r,2,1,1,1,45,-43,1,38,-36,1,1,1,113,-16,380],
42
- 'iso-8859-4': [...i0,100,52,30,-178,132,19,-148,1,184,-78,16,68,-185,208,-206,1,85,470,-388,-163,117,19,395,-527,169,-78,16,68,-29,52,-51,...i4a,92,-26,53,7,-22,-98,1,1,1,1,154,-152,1,1,140,2,-139,...i4b,61,-26,53,7,-22,-67,1,1,1,1,123,-121,1,1,109,2,366],
43
- 'iso-8859-5': [...i0,865,...e(11),-863,865,...e(65),7367,-7365,...e(11),-949,951,1],
44
- 'iso-8859-6': [...i0,r,r,r,4,...h(7),1384,-1375,...h(13),1390,r,r,r,4,r,2,...e(25),r,r,r,r,r,6,...e(18),...h(13)],
45
- 'iso-8859-7': [...i0,8056,1,-8054,8201,3,-8201,1,1,1,721,-719,1,1,r,8040,-8037,1,1,1,721,1,1,-719,...i7],
46
- 'iso-8859-8': [...i0,r,2,...e(7),46,-44,...e(14),62,-60,1,1,1,...h(32),8025,-6727,...i8],
47
- 'koi8-r': [...k8a,8450,...e(14),-8544,8545,...e(10),-9411,933,...k8b,-28,...k8b],
48
- 'koi8-u': [...k8a,3,8448,-8446,1,8448,1,1,1,1,-8394,-51,8448,1,1,1,-8544,3,8543,-8541,1,8543,1,1,1,1,-8410,-130,-869,933,...k8b,-28,...k8b],
49
- macintosh: [69,1,2,2,8,5,6,5,-1,2,2,-1,2,2,2,-1,2,1,2,-1,2,1,2,2,-1,2,2,-1,5,-1,2,1,7972,-8048,-14,1,4,8059,-8044,41,-49,-5,8313,-8302,-12,8632,-8602,18,8518,-8557,8627,1,-8640,16,8525,15,-2,-7759,7787,-8577,16,751,-707,18,-57,-30,11,...m0,32,3,18,125,1,7872,1,8,1,-5,1,-7970,9427,-9419,121,7884,104,-115,1,56007,1,-56033,-8042,8035,4,18,-8046,8,-9,10,-3,5,1,1,-3,7,1,63531,-63533,8,1,-2,88,405,22,-557,553,1,1,-546,549,-2,-20],
50
- 'windows-1250': [...w0,-7888,7897,-7903,10,25,-4,-233,...w8,-8060,8330,-8129,7897,-7903,10,25,-4,-218,551,17,-407,-157,96,-94,1,1,1,181,-179,1,1,1,205,-203,1,554,-409,-142,1,1,1,1,77,90,-164,130,416,-415,62,...i2],
51
- 'windows-1251': [899,1,7191,-7111,7115,8,-6,1,139,-124,-7207,7216,-7215,2,-1,4,67,7110,1,3,1,5,-15,1,-8060,8330,-7369,7137,-7136,2,-1,4,-959,878,80,-86,-868,1004,-1002,1,858,-856,859,-857,1,1,1,857,-855,1,853,80,59,-988,1,1,922,7365,-7362,-921,925,-83,80,2,-71,...e(63)],
52
- 'windows-1252': [...p1,-7515,7530,-7888,7897,-7911,-197,240,-238,1,...w1,225,-6],
53
- 'windows-1253': [...p1,-8089,8104,-8102,8111,-8109,1,1,1,1,...w3,1,1,1,1,741,1,-739,1,1,1,1,1,1,r,2,1,1,1,8039,-8037,1,1,1,721,-719,1,1,...i7],
54
- 'windows-1254': [...p1,-7515,7530,-7888,7897,-7911,-197,1,1,1,...w1,1,218,-216,...e(47),79,-77,...e(11),84,46,-127,...e(16),48,-46,...e(11),53,46],
55
- 'windows-1255': [...p1,-7515,7530,-8102,8111,-8109,1,1,1,1,...w8,-7480,7750,-8328,8096,-8094,...e(7),8199,-8197,1,1,1,1,46,-44,...e(14),62,-60,1,1,1,1,1265,...e(19),45,1,1,1,1,...h(7),-36,...i8],
56
- 'windows-1256': [8237,-6702,6556,-7816,7820,8,-6,1,-7515,7530,-6583,6592,-7911,1332,18,-16,39,6505,1,3,1,5,-15,1,-6507,6777,-6801,6569,-7911,7865,1,-6483,-1562,1388,-1386,...e(7),1557,-1555,...e(14),1378,-1376,1,1,1,1377,162,-160,...e(21),-1375,1376,1,1,1,6,1,1,1,-1379,1380,-1378,1379,1,1,1,-1377,1,1,1,1,1374,1,-1372,1,1372,1,1,1,-1370,1371,1,-1369,1370,-1368,1369,-1367,1,7954,1,-6461],
57
- 'windows-1257': [...w0,-8102,8111,-8109,28,543,-527,-40,...w3,19,556,-572,1,r,2,1,1,r,2,1,49,-47,173,-171,1,1,1,24,-22,...e(5),...p3,347],
58
- 'windows-1258': [...p1,-7515,7530,-8102,8111,-7911,-197,1,1,1,...w8,-7480,7750,-8328,8096,-7911,-182,1,218,-216,...e(34),64,-62,...e(7),565,-563,1,1,65,-63,568,-566,1,204,-202,1,1,1,1,1,1,211,340,-548,1,1,1,33,-31,...e(7),534,-532,1,1,34,-32,562,-560,1,173,-171,1,1,1,1,1,1,180,7931],
59
- 'windows-874': [8237,-8235,1,1,1,8098,-8096,...e(10),...w8,-8060,...e(8),3425,...e(57),r,r,r,r,5,...e(28),r,r,r,r],
60
- 'x-mac-cyrillic': [913,...e(31),7153,-8048,992,-1005,4,8059,-8044,848,-856,-5,8313,-7456,80,7694,-7773,80,7627,-8557,8627,1,-7695,-929,988,-137,-4,80,-77,80,-78,80,-79,80,-2,-83,-857,...m0,875,80,-79,80,-7,7102,1,8,1,-5,1,-7970,7975,-7184,80,-79,80,7351,-7445,80,-2,-31,...e(30),7262]
34
+ const maps = {
35
+ ibm866: [913,e(47),8530,1,1,-145,34,61,1,-12,-1,14,-18,6,6,-1,-1,-75,4,32,-8,-16,-28,60,34,1,-5,-6,21,-3,-6,-16,28,-5,1,-4,1,-12,-1,-6,1,24,-1,-82,-12,124,-4,8,4,-16,-8512,e(15),-78,80,-77,80,-77,80,-73,80,-942,8553,-8546,8547,-260,-8306,9468,-9472],
36
+ 'koi8-r': [k8a,8450,e(14),-8544,8545,e(10),-9411,933,k8b,-28,k8b],
37
+ 'koi8-u': [k8a,3,8448,-8446,1,8448,1,1,1,1,-8394,-51,8448,1,1,1,-8544,3,8543,-8541,1,8543,1,1,1,1,-8410,-130,-869,933,k8b,-28,k8b],
38
+ 'x-mac-cyrillic': [913,e(31),7153,-8048,992,-1005,4,8059,-8044,848,-856,-5,8313,-7456,80,7694,-7773,80,7627,-8557,8627,1,-7695,-929,988,-137,-4,80,-77,80,-78,80,-79,80,-2,-83,-857,m0,875,80,-79,80,-7,7102,1,8,1,-5,1,-7970,7975,-7184,80,-79,80,7351,-7445,80,-2,-31,e(30),7262],
39
+ macintosh: [69,1,2,2,8,5,6,5,-1,2,2,-1,2,2,2,-1,2,1,2,-1,2,1,2,2,-1,2,2,-1,5,-1,2,1,7972,-8048,-14,1,4,8059,-8044,41,-49,-5,8313,-8302,-12,8632,-8602,18,8518,-8557,8627,1,-8640,16,8525,15,-2,-7759,7787,-8577,16,751,-707,18,-57,-30,11,m0,32,3,18,125,1,7872,1,8,1,-5,1,-7970,9427,-9419,121,7884,104,-115,1,56007,1,-56033,-8042,8035,4,18,-8046,8,-9,10,-3,5,1,1,-3,7,1,63531,-63533,8,1,-2,88,405,22,-557,553,1,1,-546,549,-2,-20],
40
+ 'windows-874': [8237,-8235,1,1,1,8098,-8096,e(10),w8,-8060,e(8),iB],
61
41
  }
42
+
43
+ // windows-1250 - windows-1258
44
+ // prettier-ignore
45
+ ;[
46
+ [w0,-7888,7897,-7903,10,25,-4,-233,w8,-8060,8330,-8129,7897,-7903,10,25,-4,-218,551,17,-407,-157,96,-94,1,1,1,181,-179,1,1,1,205,-203,1,554,-409,-142,1,1,1,1,77,90,-164,130,416,-415,62,i2],
47
+ [899,1,7191,-7111,7115,8,-6,1,139,-124,-7207,7216,-7215,2,-1,4,67,7110,1,3,1,5,-15,1,-8060,8330,-7369,7137,-7136,2,-1,4,-959,878,80,-86,-868,1004,-1002,1,858,-856,859,-857,1,1,1,857,-855,1,853,80,59,-988,1,1,922,7365,-7362,-921,925,-83,80,2,-71,e(63)],
48
+ [p1,-7515,7530,-7888,7897,-7911,-197,240,-238,1,w1,225,-6],
49
+ [p1,-8089,8104,-8102,8111,-8109,1,1,1,1,w3,1,1,1,1,741,1,-739,e(6),r,2,1,1,1,8039,-8037,1,1,1,721,-719,1,1,i7],
50
+ [p1,-7515,7530,-7888,7897,-7911,-197,1,1,1,w1,1,218,-216,e(47),i9],
51
+ [p1,-7515,7530,-8102,8111,-8109,1,1,1,1,w8,-7480,7750,-8328,8096,-8094,e(7),8199,-8197,1,1,1,1,46,-44,e(14),62,-60,1,1,1,1,1265,e(19),45,1,1,1,1,h(7),-36,i8],
52
+ [8237,-6702,6556,-7816,7820,8,-6,1,-7515,7530,-6583,6592,-7911,1332,18,-16,39,6505,1,3,1,5,-15,1,-6507,6777,-6801,6569,-7911,7865,1,-6483,-1562,1388,-1386,e(7),1557,-1555,e(14),1378,-1376,1,1,1,1377,162,-160,e(21),-1375,1376,1,1,1,6,1,1,1,-1379,1380,-1378,1379,1,1,1,-1377,1,1,1,1,1374,1,-1372,1,1372,1,1,1,-1370,1371,1,-1369,1370,-1368,1369,-1367,1,7954,1,-6461],
53
+ [w0,-8102,8111,-8109,28,543,-527,-40,w3,19,556,-572,1,r,2,1,1,r,2,1,49,-47,173,-171,1,1,1,24,-22,e(5),p3,347],
54
+ [p1,-7515,7530,-8102,8111,-7911,-197,1,1,1,w8,-7480,7750,-8328,8096,-7911,-182,1,218,-216,e(34),64,-62,e(7),565,-563,1,1,65,-63,568,-566,1,204,-202,e(6),211,340,-548,1,1,1,33,-31,e(7),534,-532,1,1,34,-32,562,-560,1,173,-171,e(6),180,7931],
55
+ ].forEach((m, i) => {
56
+ maps[`windows-${i + 1250}`] = m
57
+ });
58
+
59
+ // iso-8859-1 - iso-8859-16
60
+ // prettier-ignore
61
+ ;[
62
+ [], // Actual Latin1 / Unicode subset, non-WHATWG, which maps iso-8859-1 to windows-1252
63
+ [100,468,-407,-157,153,29,-179,1,184,-2,6,21,-204,208,-2,-203,85,470,-409,-142,138,29,364,-527,169,-2,6,21,355,-351,-2,i2],
64
+ [134,434,-565,1,r,128,-125,1,136,46,-64,22,-135,r,206,-203,119,-117,1,1,1,112,-110,1,121,46,-64,22,-120,r,191,-188,1,1,r,2,70,-2,-65,e(8),r,2,1,1,1,76,-74,1,69,-67,1,1,1,144,-16,-125,1,1,1,r,2,39,-2,-34,e(8),r,2,1,1,1,45,-43,1,38,-36,1,1,1,113,-16,380],
65
+ [100,52,30,-178,132,19,-148,1,184,-78,16,68,-185,208,-206,1,85,470,-388,-163,117,19,395,-527,169,-78,16,68,-29,52,-51,i4a,92,-26,53,7,-22,-98,1,1,1,1,154,-152,1,1,140,2,-139,i4b,61,-26,53,7,-22,-67,1,1,1,1,123,-121,1,1,109,2,366],
66
+ [865,e(11),-863,865,e(65),7367,-7365,e(11),-949,951,1],
67
+ [r,r,r,4,h(7),1384,-1375,h(13),1390,r,r,r,4,r,2,e(25),h(5),6,e(18),h(13)],
68
+ [8056,1,-8054,8201,3,-8201,1,1,1,721,-719,1,1,r,8040,-8037,1,1,1,721,1,1,-719,i7],
69
+ [r,2,e(7),46,-44,e(14),62,-60,1,1,1,h(32),8025,-6727,i8],
70
+ [e(47),i9], // non-WHATWG, which maps iso-8859-9 to windows-1254
71
+ [100,14,16,8,-2,14,-143,148,-43,80,6,23,-208,189,-32,-154,85,14,16,8,-2,14,-128,133,-43,80,6,23,7831,-7850,-32,i4a,1,1,117,7,-121,1,1,1,146,-144,154,-152,e(5),i4b,1,1,86,7,-90,1,1,1,115,-113,123,-121,1,1,1,1,58],
72
+ iB, // non-WHATWG, which maps iso-8859-11 to windows-874
73
+ null, // no 12
74
+ [8061,-8059,1,1,8058,-8056,1,49,-47,173,-171,1,1,1,24,-22,1,1,1,8041,-8039,p3,7835],
75
+ [7522,1,-7520,103,1,7423,-7523,7641,-7639,7641,-119,231,-7749,1,202,7334,1,-7423,1,7455,1,-7563,7584,43,-42,44,-35,147,-111,1,-36,-7585,e(15),165,-163,e(5),7572,-7570,e(5),153,-151,e(16),134,-132,e(5),7541,-7539,e(5),122],
76
+ [1,1,1,8201,-8199,187,-185,186,-184,e(10),202,-200,1,1,199,-197,1,1,151,1,37],
77
+ [100,1,60,8043,-142,-7870,-185,186,-184,367,-365,206,-204,205,1,-203,1,91,54,59,7840,-8039,1,199,-113,268,-350,151,1,37,4,-188,1,1,64,-62,66,-64,e(9),65,51,-113,1,1,124,-122,132,22,-151,1,1,1,60,258,-315,1,1,1,33,-31,35,-33,e(9),34,51,-82,1,1,93,-91,101,22,-120,1,1,1,29,258],
78
+ ].forEach((m, i) => {
79
+ if (m) maps[`iso-8859-${i + 1}`] = [e(33), m]
80
+ })
81
+
82
+ export default maps
@@ -1,6 +1,6 @@
1
1
  import { asciiPrefix, decodeAscii, decodeLatin1 } from './latin1.js'
2
2
  import encodings from './single-byte.encodings.js'
3
- import { decode2string } from './_utils.js'
3
+ import { decode2string, nativeDecoder } from './_utils.js'
4
4
 
5
5
  export const E_STRICT = 'Input is not well-formed for this encoding'
6
6
  const xUserDefined = 'x-user-defined'
@@ -18,11 +18,13 @@ function getEncoding(encoding) {
18
18
  if (encoding === xUserDefined) return Array.from({ length: 128 }, (_, i) => 0xf7_80 + i)
19
19
  if (encoding === iso8i) encoding = 'iso-8859-8'
20
20
  let prev = 127
21
- return encodings[encoding].map((x) => (x === r ? x : (prev += x))) // eslint-disable-line no-return-assign
21
+ const enc = encodings[encoding].flat().flat().flat() // max depth is 3, rechecked by tests
22
+ return enc.map((x) => (x === r ? x : (prev += x))) // eslint-disable-line no-return-assign
22
23
  }
23
24
 
24
25
  const mappers = new Map()
25
26
  const decoders = new Map()
27
+ const encmaps = new Map()
26
28
 
27
29
  // Used only on Node.js, no reason to optimize for anything else
28
30
  // E.g. avoiding .from and filling zero-initialized arr manually is faster on Hermes, but we avoid this codepath on Hermes completely
@@ -31,7 +33,7 @@ export function encodingMapper(encoding) {
31
33
  if (cached) return cached
32
34
 
33
35
  const codes = getEncoding(encoding)
34
- const incomplete = codes.includes(0xff_fd)
36
+ const incomplete = codes.includes(r)
35
37
  let map
36
38
  const mapper = (arr, start = 0) => {
37
39
  if (!map) {
@@ -63,10 +65,12 @@ export function encodingMapper(encoding) {
63
65
  export function encodingDecoder(encoding) {
64
66
  const cached = decoders.get(encoding)
65
67
  if (cached) return cached
68
+ const isLatin1 = encoding === 'iso-8859-1'
69
+ if (isLatin1 && !nativeDecoder) return (arr, loose = false) => decodeLatin1(arr) // native decoder is faster for ascii below
66
70
 
67
71
  let strings
68
72
  const codes = getEncoding(encoding)
69
- const incomplete = codes.includes(0xff_fd)
73
+ const incomplete = codes.includes(r)
70
74
  const decoder = (arr, loose = false) => {
71
75
  if (!strings) {
72
76
  const allCodes = Array.from({ length: 128 }, (_, i) => i).concat(codes)
@@ -76,6 +80,7 @@ export function encodingDecoder(encoding) {
76
80
 
77
81
  const prefixLen = asciiPrefix(arr)
78
82
  if (prefixLen === arr.length) return decodeAscii(arr)
83
+ if (isLatin1) return decodeLatin1(arr) // TODO: check if decodeAscii with subarray is faster for small prefixes too
79
84
  const prefix = decodeLatin1(arr, 0, prefixLen) // TODO: check if decodeAscii with subarray is faster for small prefixes too
80
85
  const suffix = decode2string(arr, prefix.length, arr.length, strings)
81
86
  if (!loose && incomplete && suffix.includes('\uFFFD')) throw new TypeError(E_STRICT)
@@ -85,3 +90,21 @@ export function encodingDecoder(encoding) {
85
90
  decoders.set(encoding, decoder)
86
91
  return decoder
87
92
  }
93
+
94
+ export function encodeMap(encoding) {
95
+ const cached = encmaps.get(encoding)
96
+ if (cached) return cached
97
+
98
+ const codes = getEncoding(encoding)
99
+ let max = 128
100
+ while (codes.length < 128) codes.push(128 + codes.length)
101
+ for (const code of codes) if (code > max && code !== r) max = code
102
+ const map = new Uint8Array(max + 1) // < 10 KiB for all except macintosh, 63 KiB for macintosh
103
+ for (let i = 0; i < 128; i++) {
104
+ map[i] = i
105
+ if (codes[i] !== r) map[codes[i]] = 128 + i
106
+ }
107
+
108
+ encmaps.set(encoding, map)
109
+ return map
110
+ }
package/fallback/utf8.js CHANGED
@@ -1,3 +1,5 @@
1
+ import { encodeAsciiPrefix } from './latin1.js'
2
+
1
3
  export const E_STRICT = 'Input is not well-formed utf8'
2
4
  export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
3
5
 
@@ -152,9 +154,10 @@ export function encode(string, loose) {
152
154
  const length = string.length
153
155
  let small = true
154
156
  let bytes = new Uint8Array(length) // assume ascii
155
- let p = 0
156
157
 
157
- for (let i = 0; i < length; i++) {
158
+ let i = encodeAsciiPrefix(bytes, string)
159
+ let p = i
160
+ for (; i < length; i++) {
158
161
  let code = string.charCodeAt(i)
159
162
  if (code < 0x80) {
160
163
  bytes[p++] = code
package/hex.node.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import { assertUint8 } from './assert.js'
2
2
  import { typedView } from './array.js'
3
+ import { E_STRING } from './fallback/_utils.js'
3
4
  import { E_HEX } from './fallback/hex.js'
4
5
 
5
6
  if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
@@ -19,7 +20,7 @@ export function toHex(arr) {
19
20
  export const fromHex = Uint8Array.fromHex
20
21
  ? (str, format = 'uint8') => typedView(Uint8Array.fromHex(str), format)
21
22
  : (str, format = 'uint8') => {
22
- if (typeof str !== 'string') throw new TypeError('Input is not a string')
23
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
23
24
  if (str.length % 2 !== 0) throw new SyntaxError(E_HEX)
24
25
  if (denoBug && /[^\dA-Fa-f]/.test(str)) throw new SyntaxError(E_HEX)
25
26
  const buf = Buffer.from(str, 'hex') // will stop on first non-hex character, so we can just validate length
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@exodus/bytes",
3
- "version": "1.6.0",
3
+ "version": "1.8.0",
4
4
  "description": "Various operations on Uint8Array data",
5
5
  "scripts": {
6
6
  "lint": "eslint .",
@@ -72,7 +72,9 @@
72
72
  "/bech32.js",
73
73
  "/bigint.js",
74
74
  "/encoding.js",
75
+ "/encoding.d.ts",
75
76
  "/encoding-lite.js",
77
+ "/encoding-lite.d.ts",
76
78
  "/hex.js",
77
79
  "/hex.d.ts",
78
80
  "/hex.node.js",
@@ -117,8 +119,14 @@
117
119
  "node": "./single-byte.node.js",
118
120
  "default": "./single-byte.js"
119
121
  },
120
- "./encoding.js": "./encoding.js",
121
- "./encoding-lite.js": "./encoding-lite.js",
122
+ "./encoding.js": {
123
+ "types": "./encoding.d.ts",
124
+ "default": "./encoding.js"
125
+ },
126
+ "./encoding-lite.js": {
127
+ "types": "./encoding-lite.d.ts",
128
+ "default": "./encoding-lite.js"
129
+ },
122
130
  "./utf16.js": {
123
131
  "node": "./utf16.node.js",
124
132
  "default": "./utf16.js"
@@ -145,6 +153,7 @@
145
153
  "@exodus/prettier": "^1.0.0",
146
154
  "@exodus/test": "^1.0.0-rc.109",
147
155
  "@noble/hashes": "^2.0.1",
156
+ "@petamoriken/float16": "^3.9.3",
148
157
  "@scure/base": "^1.2.6",
149
158
  "@stablelib/base64": "^2.0.1",
150
159
  "@stablelib/hex": "^2.0.1",
@@ -172,7 +181,7 @@
172
181
  "typescript": "^5.9.3",
173
182
  "uint8array-tools": "^0.0.9",
174
183
  "utf8": "^3.0.0",
175
- "whatwg-encoding": "^3.1.1",
184
+ "web-streams-polyfill": "^4.2.0",
176
185
  "wif": "^5.0.0"
177
186
  },
178
187
  "prettier": "@exodus/prettier",
package/single-byte.js CHANGED
@@ -1,15 +1,17 @@
1
1
  import { assertUint8 } from './assert.js'
2
- import { canDecoders } from './fallback/_utils.js'
3
- import { assertEncoding, encodingDecoder } from './fallback/single-byte.js'
2
+ import { canDecoders, nativeEncoder, isHermes, skipWeb, E_STRING } from './fallback/_utils.js'
3
+ import { encodeAscii, encodeAsciiPrefix, encodeLatin1 } from './fallback/latin1.js'
4
+ import { assertEncoding, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
4
5
 
5
- const { TextDecoder } = globalThis
6
+ const { TextDecoder, btoa } = globalThis
6
7
 
7
8
  let windows1252works
8
9
 
9
10
  // prettier-ignore
10
11
  const skipNative = new Set([
11
- 'iso-8859-16', // iso-8859-16 is somehow broken in WebKit, at least on CI
12
+ 'iso-8859-1', 'iso-8859-9', 'iso-8859-11', // non-WHATWG
12
13
  'iso-8859-6', 'iso-8859-8', 'iso-8859-8-i', // slow in all 3 engines
14
+ 'iso-8859-16', // iso-8859-16 is somehow broken in WebKit, at least on CI
13
15
  ])
14
16
 
15
17
  function shouldUseNative(enc) {
@@ -34,6 +36,7 @@ function shouldUseNative(enc) {
34
36
  }
35
37
 
36
38
  export function createSinglebyteDecoder(encoding, loose = false) {
39
+ if (typeof loose !== 'boolean') throw new TypeError('loose option should be boolean')
37
40
  assertEncoding(encoding)
38
41
 
39
42
  if (canDecoders && shouldUseNative(encoding)) {
@@ -56,4 +59,79 @@ export function createSinglebyteDecoder(encoding, loose = false) {
56
59
  }
57
60
  }
58
61
 
62
+ const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
63
+
64
+ function encode(s, m) {
65
+ const len = s.length
66
+ const x = new Uint8Array(len)
67
+ let i = nativeEncoder ? 0 : encodeAsciiPrefix(x, s)
68
+
69
+ if (!isHermes) {
70
+ for (const len3 = len - 3; i < len3; i += 4) {
71
+ const x0 = s.charCodeAt(i), x1 = s.charCodeAt(i + 1), x2 = s.charCodeAt(i + 2), x3 = s.charCodeAt(i + 3) // prettier-ignore
72
+ const c0 = m[x0], c1 = m[x1], c2 = m[x2], c3 = m[x3] // prettier-ignore
73
+ if ((!c0 && x0) || (!c1 && x1) || (!c2 && x2) || (!c3 && x3)) throw new TypeError(E_STRICT)
74
+
75
+ x[i] = c0
76
+ x[i + 1] = c1
77
+ x[i + 2] = c2
78
+ x[i + 3] = c3
79
+ }
80
+ }
81
+
82
+ for (; i < len; i++) {
83
+ const x0 = s.charCodeAt(i)
84
+ const c0 = m[x0]
85
+ if (!c0 && x0) return null
86
+ x[i] = c0
87
+ }
88
+
89
+ return x
90
+ }
91
+
92
+ // fromBase64+btoa path is faster on everything where fromBase64 is fast
93
+ const useLatin1btoa = Uint8Array.fromBase64 && btoa && !skipWeb
94
+
95
+ export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
96
+ // TODO: replacement, truncate (replacement will need varying length)
97
+ if (mode !== 'fatal') throw new Error('Unsupported mode')
98
+ const m = encodeMap(encoding) // asserts
99
+ const isLatin1 = encoding === 'iso-8859-1'
100
+
101
+ // No single-byte encoder produces surrogate pairs, so any surrogate is invalid
102
+ // This needs special treatment only to decide how many replacement chars to output, one or two
103
+ // Not much use in running isWellFormed, most likely cause of error is unmapped chars, not surrogate pairs
104
+ return (s) => {
105
+ if (typeof s !== 'string') throw new TypeError(E_STRING)
106
+ if (isLatin1) {
107
+ // max limit is to not produce base64 strings that are too long
108
+ if (useLatin1btoa && s.length >= 1024 && s.length < 1e8) {
109
+ try {
110
+ return Uint8Array.fromBase64(btoa(s)) // fails on non-latin1
111
+ } catch {
112
+ throw new TypeError(E_STRICT)
113
+ }
114
+ }
115
+
116
+ if (NON_LATIN.test(s)) throw new TypeError(E_STRICT)
117
+ return encodeLatin1(s)
118
+ }
119
+
120
+ // Instead of an ASCII regex check, encode optimistically - this is faster
121
+ // Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
122
+ if (nativeEncoder && !NON_LATIN.test(s)) {
123
+ try {
124
+ return encodeAscii(s, E_STRICT)
125
+ } catch {}
126
+ }
127
+
128
+ const res = encode(s, m)
129
+ if (!res) throw new TypeError(E_STRICT)
130
+ return res
131
+ }
132
+ }
133
+
134
+ export const latin1toString = createSinglebyteDecoder('iso-8859-1')
135
+ export const latin1fromString = createSinglebyteEncoder('iso-8859-1')
59
136
  export const windows1252toString = createSinglebyteDecoder('windows-1252')
137
+ export const windows1252fromString = createSinglebyteEncoder('windows-1252')
@@ -1,8 +1,8 @@
1
1
  import { assertUint8 } from './assert.js'
2
2
  import { isAscii } from 'node:buffer'
3
- import { isDeno, isLE, toBuf } from './fallback/_utils.js'
3
+ import { isDeno, isLE, toBuf, E_STRING } from './fallback/_utils.js'
4
4
  import { asciiPrefix } from './fallback/latin1.js'
5
- import { encodingMapper, encodingDecoder, E_STRICT } from './fallback/single-byte.js'
5
+ import { encodingMapper, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
6
6
 
7
7
  function latin1Prefix(arr, start) {
8
8
  let p = start | 0
@@ -22,7 +22,7 @@ function latin1Prefix(arr, start) {
22
22
  }
23
23
 
24
24
  export function createSinglebyteDecoder(encoding, loose = false) {
25
- const latin1path = encoding === 'windows-1252'
25
+ if (typeof loose !== 'boolean') throw new TypeError('loose option should be boolean')
26
26
  if (isDeno) {
27
27
  const jsDecoder = encodingDecoder(encoding) // asserts
28
28
  return (arr) => {
@@ -33,11 +33,13 @@ export function createSinglebyteDecoder(encoding, loose = false) {
33
33
  }
34
34
  }
35
35
 
36
+ const isLatin1 = encoding === 'iso-8859-1'
37
+ const latin1path = encoding === 'windows-1252'
36
38
  const { incomplete, mapper } = encodingMapper(encoding) // asserts
37
39
  return (arr) => {
38
40
  assertUint8(arr)
39
41
  if (arr.byteLength === 0) return ''
40
- if (isAscii(arr)) return toBuf(arr).latin1Slice(0, arr.byteLength) // .latin1Slice is faster than .asciiSlice
42
+ if (isLatin1 || isAscii(arr)) return toBuf(arr).latin1Slice() // .latin1Slice is faster than .asciiSlice
41
43
 
42
44
  // Node.js TextDecoder is broken, so we can't use it. It's also slow anyway
43
45
 
@@ -57,4 +59,59 @@ export function createSinglebyteDecoder(encoding, loose = false) {
57
59
  }
58
60
  }
59
61
 
62
+ const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
63
+
64
+ export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
65
+ // TODO: replacement, truncate (replacement will need varying length)
66
+ if (mode !== 'fatal') throw new Error('Unsupported mode')
67
+ const m = encodeMap(encoding) // asserts
68
+ const isLatin1 = encoding === 'iso-8859-1'
69
+
70
+ return (s) => {
71
+ if (typeof s !== 'string') throw new TypeError(E_STRING)
72
+ if (isLatin1) {
73
+ if (NON_LATIN.test(s)) throw new TypeError(E_STRICT)
74
+ const b = Buffer.from(s, 'latin1')
75
+ return new Uint8Array(b.buffer, b.byteOffset, b.byteLength)
76
+ }
77
+
78
+ // Instead of an ASCII regex check, encode optimistically - this is faster
79
+ // Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
80
+ if (!NON_LATIN.test(s)) {
81
+ const b = Buffer.from(s, 'utf8') // ascii/latin1 coerces, we need to check
82
+ if (b.length === s.length) return new Uint8Array(b.buffer, b.byteOffset, b.byteLength)
83
+ }
84
+
85
+ const len = s.length
86
+ let i = 0
87
+ const b = Buffer.from(s, 'utf-16le') // aligned
88
+ if (!isLE) b.swap16()
89
+ const x = new Uint16Array(b.buffer, b.byteOffset, b.byteLength / 2)
90
+ for (const len3 = len - 3; i < len3; i += 4) {
91
+ const x0 = x[i], x1 = x[i + 1], x2 = x[i + 2], x3 = x[i + 3] // prettier-ignore
92
+ const c0 = m[x0], c1 = m[x1], c2 = m[x2], c3 = m[x3] // prettier-ignore
93
+ if (!(c0 && c1 && c2 && c3) && ((!c0 && x0) || (!c1 && x1) || (!c2 && x2) || (!c3 && x3))) {
94
+ throw new TypeError(E_STRICT)
95
+ }
96
+
97
+ x[i] = c0
98
+ x[i + 1] = c1
99
+ x[i + 2] = c2
100
+ x[i + 3] = c3
101
+ }
102
+
103
+ for (; i < len; i++) {
104
+ const x0 = x[i]
105
+ const c0 = m[x0]
106
+ if (!c0 && x0) throw new TypeError(E_STRICT)
107
+ x[i] = c0
108
+ }
109
+
110
+ return new Uint8Array(x)
111
+ }
112
+ }
113
+
114
+ export const latin1toString = createSinglebyteDecoder('iso-8859-1')
115
+ export const latin1fromString = createSinglebyteEncoder('iso-8859-1')
60
116
  export const windows1252toString = createSinglebyteDecoder('windows-1252')
117
+ export const windows1252fromString = createSinglebyteEncoder('windows-1252')
package/utf16.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import * as js from './fallback/utf16.js'
2
- import { canDecoders, isLE } from './fallback/_utils.js'
2
+ import { canDecoders, isLE, E_STRING } from './fallback/_utils.js'
3
3
 
4
4
  const { TextDecoder } = globalThis // Buffer is optional
5
5
  const ignoreBOM = true
@@ -18,7 +18,7 @@ const { E_STRICT, E_STRICT_UNICODE } = js
18
18
  const to8 = (a) => new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
19
19
 
20
20
  function encode(str, loose = false, format = 'uint16') {
21
- if (typeof str !== 'string') throw new TypeError('Input is not a string')
21
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
22
22
  if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
23
23
  throw new TypeError('Unknown format')
24
24
  }
package/utf16.node.js CHANGED
@@ -1,4 +1,4 @@
1
- import { isDeno, isLE } from './fallback/_utils.js'
1
+ import { isDeno, isLE, E_STRING } from './fallback/_utils.js'
2
2
  import { E_STRICT, E_STRICT_UNICODE } from './fallback/utf16.js'
3
3
 
4
4
  if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
@@ -9,7 +9,7 @@ const to8 = (a) => new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
9
9
  // Unlike utf8, operates on Uint16Arrays by default
10
10
 
11
11
  function encode(str, loose = false, format = 'uint16') {
12
- if (typeof str !== 'string') throw new TypeError('Input is not a string')
12
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
13
13
  if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
14
14
  throw new TypeError('Unknown format')
15
15
  }
package/utf8.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { assertUint8 } from './assert.js'
2
2
  import { typedView } from './array.js'
3
- import { isHermes, nativeDecoder, nativeEncoder } from './fallback/_utils.js'
3
+ import { isHermes, nativeDecoder, nativeEncoder, E_STRING } from './fallback/_utils.js'
4
4
  import { asciiPrefix, decodeLatin1 } from './fallback/latin1.js'
5
5
  import * as js from './fallback/utf8.js'
6
6
 
@@ -44,7 +44,7 @@ function deLoose(str, loose, res) {
44
44
  }
45
45
 
46
46
  function encode(str, loose = false) {
47
- if (typeof str !== 'string') throw new TypeError('Input is not a string')
47
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
48
48
  if (str.length === 0) return new Uint8Array() // faster than Uint8Array.of
49
49
  if (nativeEncoder) return deLoose(str, loose, nativeEncoder.encode(str))
50
50
  // No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder
package/utf8.node.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import { assertUint8 } from './assert.js'
2
2
  import { typedView } from './array.js'
3
+ import { E_STRING } from './fallback/_utils.js'
3
4
  import { E_STRICT, E_STRICT_UNICODE } from './fallback/utf8.js'
4
5
  import { isAscii } from 'node:buffer'
5
6
 
@@ -17,7 +18,7 @@ try {
17
18
  }
18
19
 
19
20
  function encode(str, loose = false) {
20
- if (typeof str !== 'string') throw new TypeError('Input is not a string')
21
+ if (typeof str !== 'string') throw new TypeError(E_STRING)
21
22
  const strLength = str.length
22
23
  if (strLength === 0) return new Uint8Array() // faster than Uint8Array.of
23
24
  let res