@exodus/bytes 1.7.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +103 -16
- package/array.d.ts +0 -1
- package/base32.js +2 -1
- package/base58.js +3 -3
- package/base58check.js +1 -2
- package/base64.d.ts +0 -1
- package/base64.js +2 -2
- package/bech32.js +1 -2
- package/encoding-browser.browser.js +29 -0
- package/encoding-browser.d.ts +1 -0
- package/encoding-browser.js +1 -0
- package/encoding-browser.native.js +1 -0
- package/encoding-lite.d.ts +1 -0
- package/encoding.d.ts +58 -0
- package/fallback/_utils.js +3 -0
- package/fallback/encoding.api.js +81 -0
- package/fallback/encoding.js +6 -82
- package/fallback/hex.js +2 -2
- package/fallback/latin1.js +34 -0
- package/fallback/multi-byte.js +456 -71
- package/fallback/multi-byte.table.js +20 -15
- package/fallback/single-byte.encodings.js +64 -43
- package/fallback/single-byte.js +7 -3
- package/fallback/utf16.js +45 -26
- package/fallback/utf8.js +6 -3
- package/hex.d.ts +0 -1
- package/hex.node.js +2 -1
- package/index.d.ts +43 -0
- package/index.js +5 -0
- package/multi-byte.js +7 -1
- package/multi-byte.node.js +7 -1
- package/package.json +42 -6
- package/single-byte.js +39 -6
- package/single-byte.node.js +42 -30
- package/utf16.js +3 -2
- package/utf16.node.js +8 -4
- package/utf8.js +2 -2
- package/utf8.node.js +2 -1
package/README.md
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# `@exodus/bytes`
|
|
2
2
|
|
|
3
|
+
[](https://npmjs.org/package/@exodus/bytes)
|
|
4
|
+
[](https://github.com/ExodusOSS/bytes/releases)
|
|
5
|
+
[](https://www.npmcharts.com/compare/@exodus/bytes?minimal=true)
|
|
6
|
+
[](https://github.com/ExodusOSS/bytes/blob/HEAD/LICENSE)
|
|
7
|
+
|
|
3
8
|
`Uint8Array` conversion to and from `base64`, `base32`, `base58`, `hex`, `utf8`, `utf16`, `bech32` and `wif`
|
|
4
9
|
|
|
5
10
|
And a [`TextEncoder` / `TextDecoder` polyfill](#textencoder--textdecoder-polyfill)
|
|
@@ -38,7 +43,7 @@ Less than half the bundle size of [text-encoding](https://npmjs.com/text-encodin
|
|
|
38
43
|
Also [much faster](#fast) than all of those.
|
|
39
44
|
|
|
40
45
|
> [!TIP]
|
|
41
|
-
> See also the [lite version](#lite-version) to get this down to
|
|
46
|
+
> See also the [lite version](#lite-version) to get this down to 10 KiB gzipped.
|
|
42
47
|
|
|
43
48
|
Spec compliant, passing WPT and covered with extra tests.\
|
|
44
49
|
Moreover, tests for this library uncovered [bugs in all major implementations](https://docs.google.com/spreadsheets/d/1pdEefRG6r9fZy61WHGz0TKSt8cO4ISWqlpBN5KntIvQ/edit).\
|
|
@@ -81,9 +86,9 @@ import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding-lite.js'
|
|
|
81
86
|
import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding-lite.js' // Requires Streams
|
|
82
87
|
```
|
|
83
88
|
|
|
84
|
-
This reduces the bundle size
|
|
85
|
-
from 90 KiB gzipped for `@exodus/bytes/encoding.js` to
|
|
86
|
-
(For comparison, `text-encoding` module is 190 KiB gzipped, and `iconv-lite` is 194 KiB gzipped)
|
|
89
|
+
This reduces the bundle size 9x:\
|
|
90
|
+
from 90 KiB gzipped for `@exodus/bytes/encoding.js` to 10 KiB gzipped for `@exodus/bytes/encoding-lite.js`.\
|
|
91
|
+
(For comparison, `text-encoding` module is 190 KiB gzipped, and `iconv-lite` is 194 KiB gzipped):
|
|
87
92
|
|
|
88
93
|
It still supports `utf-8`, `utf-16le`, `utf-16be` and all single-byte encodings specified by the spec,
|
|
89
94
|
the only difference is support for legacy multi-byte encodings.
|
|
@@ -125,18 +130,50 @@ import { utf16fromStringLoose, utf16toStringLoose } from '@exodus/bytes/utf16.js
|
|
|
125
130
|
```js
|
|
126
131
|
import { createSinglebyteDecoder, createSinglebyteEncoder } from '@exodus/bytes/single-byte.js'
|
|
127
132
|
import { windows1252toString, windows1252fromString } from '@exodus/bytes/single-byte.js'
|
|
133
|
+
import { latin1toString, latin1fromString } from '@exodus/bytes/single-byte.js'
|
|
128
134
|
```
|
|
129
135
|
|
|
130
|
-
Decode the legacy single-byte encodings according to the
|
|
131
|
-
|
|
132
|
-
[§
|
|
136
|
+
Decode / encode the legacy single-byte encodings according to the
|
|
137
|
+
[Encoding standard](https://encoding.spec.whatwg.org/)
|
|
138
|
+
([§9](https://encoding.spec.whatwg.org/#legacy-single-byte-encodings),
|
|
139
|
+
[§14.5](https://encoding.spec.whatwg.org/#x-user-defined)),
|
|
140
|
+
and [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859) `iso-8859-*` mappings.
|
|
133
141
|
|
|
134
|
-
Supports all single-byte encodings listed in the standard:
|
|
142
|
+
Supports all single-byte encodings listed in the WHATWG Encoding standard:
|
|
135
143
|
`ibm866`, `iso-8859-2`, `iso-8859-3`, `iso-8859-4`, `iso-8859-5`, `iso-8859-6`, `iso-8859-7`, `iso-8859-8`,
|
|
136
144
|
`iso-8859-8-i`, `iso-8859-10`, `iso-8859-13`, `iso-8859-14`, `iso-8859-15`, `iso-8859-16`, `koi8-r`, `koi8-u`,
|
|
137
145
|
`macintosh`, `windows-874`, `windows-1250`, `windows-1251`, `windows-1252`, `windows-1253`, `windows-1254`,
|
|
138
146
|
`windows-1255`, `windows-1256`, `windows-1257`, `windows-1258`, `x-mac-cyrillic` and `x-user-defined`.
|
|
139
147
|
|
|
148
|
+
Also supports `iso-8859-1`, `iso-8859-9`, `iso-8859-11` as defined at
|
|
149
|
+
[unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859)
|
|
150
|
+
(and all other `iso-8859-*` encodings there as they match WHATWG).
|
|
151
|
+
|
|
152
|
+
> [!NOTE]
|
|
153
|
+
> While all `iso-8859-*` encodings supported by the [WHATWG Encoding standard](https://encoding.spec.whatwg.org/) match
|
|
154
|
+
> [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859), the WHATWG Encoding spec doesn't support
|
|
155
|
+
> `iso-8859-1`, `iso-8859-9`, `iso-8859-11`, and instead maps them as labels to `windows-1252`, `windows-1254`, `windows-874`.\
|
|
156
|
+
> `createSinglebyteDecoder()` (unlike `TextDecoder` or `legacyHookDecode()`) does not do such mapping,
|
|
157
|
+
> so its results will differ from `TextDecoder` for those encoding names.
|
|
158
|
+
|
|
159
|
+
```js
|
|
160
|
+
> new TextDecoder('iso-8859-1').encoding
|
|
161
|
+
'windows-1252'
|
|
162
|
+
> new TextDecoder('iso-8859-9').encoding
|
|
163
|
+
'windows-1254'
|
|
164
|
+
> new TextDecoder('iso-8859-11').encoding
|
|
165
|
+
'windows-874'
|
|
166
|
+
> new TextDecoder('iso-8859-9').decode(Uint8Array.of(0x80, 0x81, 0xd0))
|
|
167
|
+
'€\x81Ğ' // this is actually decoded according to windows-1254 per TextDecoder spec
|
|
168
|
+
> createSinglebyteDecoder('iso-8859-9')(Uint8Array.of(0x80, 0x81, 0xd0))
|
|
169
|
+
'\x80\x81Ğ' // this is iso-8859-9 as defined at https://unicode.org/Public/MAPPINGS/ISO8859/8859-9.txt
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
All WHATWG Encoding spec [`windows-*` encodings](https://encoding.spec.whatwg.org/#windows-874) are supersets of
|
|
173
|
+
corresponding [unicode.org encodings](https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/), meaning that
|
|
174
|
+
they encode/decode all the old valid (non-replacement) strings / byte sequences identically, but can also support
|
|
175
|
+
a wider range of inputs.
|
|
176
|
+
|
|
140
177
|
##### `createSinglebyteDecoder(encoding, loose = false)`
|
|
141
178
|
|
|
142
179
|
Create a decoder for a supported one-byte `encoding`, given its lowercased name `encoding`.
|
|
@@ -152,12 +189,35 @@ Returns a function `encode(string)` that encodes a string to bytes.
|
|
|
152
189
|
In `'fatal'` mode (default), will throw on non well-formed strings or any codepoints which could
|
|
153
190
|
not be encoded in the target encoding.
|
|
154
191
|
|
|
192
|
+
##### `latin1toString(arr)`
|
|
193
|
+
|
|
194
|
+
Decode `iso-8859-1` bytes to a string.
|
|
195
|
+
|
|
196
|
+
There is no loose variant for this encoding, all bytes can be decoded.
|
|
197
|
+
|
|
198
|
+
Same as:
|
|
199
|
+
```js
|
|
200
|
+
const latin1toString = createSinglebyteDecoder('iso-8859-1')
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
Note: this is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as
|
|
204
|
+
those alias to `new TextDecoder('windows-1252')`.
|
|
205
|
+
|
|
206
|
+
##### `latin1fromString(string)`
|
|
207
|
+
|
|
208
|
+
Encode a string to `iso-8859-1` bytes.
|
|
209
|
+
|
|
210
|
+
Will throw on non well-formed strings or any codepoints which could not be encoded in `iso-8859-1`.
|
|
211
|
+
|
|
212
|
+
Same as:
|
|
213
|
+
```js
|
|
214
|
+
const latin1fromString = createSinglebyteEncoder('iso-8859-1', { mode: 'fatal' })
|
|
215
|
+
```
|
|
216
|
+
|
|
155
217
|
##### `windows1252toString(arr)`
|
|
156
218
|
|
|
157
219
|
Decode `windows-1252` bytes to a string.
|
|
158
220
|
|
|
159
|
-
Also supports `ascii` and `latin-1` as those are strict subsets of `windows-1252`.
|
|
160
|
-
|
|
161
221
|
There is no loose variant for this encoding, all bytes can be decoded.
|
|
162
222
|
|
|
163
223
|
Same as:
|
|
@@ -169,8 +229,6 @@ const windows1252toString = createSinglebyteDecoder('windows-1252')
|
|
|
169
229
|
|
|
170
230
|
Encode a string to `windows-1252` bytes.
|
|
171
231
|
|
|
172
|
-
Also supports `ascii` and `latin-1` as those are strict subsets of `windows-1252`.
|
|
173
|
-
|
|
174
232
|
Will throw on non well-formed strings or any codepoints which could not be encoded in `windows-1252`.
|
|
175
233
|
|
|
176
234
|
Same as:
|
|
@@ -212,6 +270,8 @@ import { fromBigInt, toBigInt } from '@exodus/bytes/bigint.js'
|
|
|
212
270
|
|
|
213
271
|
### `@exodus/bytes/hex.js`
|
|
214
272
|
|
|
273
|
+
Implements Base16 from [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648) (no differences from [RFC3548](https://datatracker.ietf.org/doc/html/rfc4648)).
|
|
274
|
+
|
|
215
275
|
```js
|
|
216
276
|
import { fromHex, toHex } from '@exodus/bytes/hex.js'
|
|
217
277
|
```
|
|
@@ -221,6 +281,8 @@ import { fromHex, toHex } from '@exodus/bytes/hex.js'
|
|
|
221
281
|
|
|
222
282
|
### `@exodus/bytes/base64.js`
|
|
223
283
|
|
|
284
|
+
Implements Base64 from [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648) (no differences from [RFC3548](https://datatracker.ietf.org/doc/html/rfc4648)).
|
|
285
|
+
|
|
224
286
|
```js
|
|
225
287
|
import { fromBase64, toBase64 } from '@exodus/bytes/base64.js'
|
|
226
288
|
import { fromBase64url, toBase64url } from '@exodus/bytes/base64.js'
|
|
@@ -235,6 +297,8 @@ import { fromBase64any } from '@exodus/bytes/base64.js'
|
|
|
235
297
|
|
|
236
298
|
### `@exodus/bytes/base32.js`
|
|
237
299
|
|
|
300
|
+
Implements Base32 from [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648) (no differences from [RFC3548](https://datatracker.ietf.org/doc/html/rfc4648)).
|
|
301
|
+
|
|
238
302
|
```js
|
|
239
303
|
import { fromBase32, toBase32 } from '@exodus/bytes/base32.js'
|
|
240
304
|
import { fromBase32hex, toBase32hex } from '@exodus/bytes/base32.js'
|
|
@@ -247,10 +311,12 @@ import { fromBase32hex, toBase32hex } from '@exodus/bytes/base32.js'
|
|
|
247
311
|
|
|
248
312
|
### `@exodus/bytes/bech32.js`
|
|
249
313
|
|
|
314
|
+
Implements [BIP-0173](https://github.com/bitcoin/bips/blob/master/bip-0173.mediawiki#specification) and [BIP-0350](https://github.com/bitcoin/bips/blob/master/bip-0350.mediawiki#specification).
|
|
315
|
+
|
|
250
316
|
```js
|
|
251
317
|
import { fromBech32, toBech32 } from '@exodus/bytes/bech32.js'
|
|
252
|
-
import { fromBech32m, toBech32m } from '@exodus/bytes/
|
|
253
|
-
import { getPrefix } from '@exodus/bytes/
|
|
318
|
+
import { fromBech32m, toBech32m } from '@exodus/bytes/bech32.js'
|
|
319
|
+
import { getPrefix } from '@exodus/bytes/bech32.js'
|
|
254
320
|
```
|
|
255
321
|
|
|
256
322
|
##### `getPrefix(str, limit = 90)`
|
|
@@ -282,7 +348,7 @@ import { fromBase58checkSync, toBase58checkSync } from '@exodus/bytes/base58chec
|
|
|
282
348
|
import { makeBase58check } from '@exodus/bytes/base58check.js'
|
|
283
349
|
```
|
|
284
350
|
|
|
285
|
-
On non-Node.js, requires peer dependency [@
|
|
351
|
+
On non-Node.js, requires peer dependency [@noble/hashes](https://www.npmjs.com/package/@noble/hashes) to be installed.
|
|
286
352
|
|
|
287
353
|
##### `async fromBase58check(str, format = 'uint8')`
|
|
288
354
|
##### `async toBase58check(arr)`
|
|
@@ -297,7 +363,7 @@ import { fromWifString, toWifString } from '@exodus/bytes/wif.js'
|
|
|
297
363
|
import { fromWifStringSync, toWifStringSync } from '@exodus/bytes/wif.js'
|
|
298
364
|
```
|
|
299
365
|
|
|
300
|
-
On non-Node.js, requires peer dependency [@
|
|
366
|
+
On non-Node.js, requires peer dependency [@noble/hashes](https://www.npmjs.com/package/@noble/hashes) to be installed.
|
|
301
367
|
|
|
302
368
|
##### `async fromWifString(string, version)`
|
|
303
369
|
##### `fromWifStringSync(string, version)`
|
|
@@ -466,6 +532,27 @@ true
|
|
|
466
532
|
'%'
|
|
467
533
|
```
|
|
468
534
|
|
|
535
|
+
### `@exodus/bytes/encoding-browser.js`
|
|
536
|
+
|
|
537
|
+
```js
|
|
538
|
+
import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding-browser.js'
|
|
539
|
+
import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding-browser.js' // Requires Streams
|
|
540
|
+
|
|
541
|
+
// Hooks for standards
|
|
542
|
+
import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding-browser.js'
|
|
543
|
+
```
|
|
544
|
+
|
|
545
|
+
Same as `@exodus/bytes/encoding.js`, but in browsers instead of polyfilling just uses whatever the
|
|
546
|
+
browser provides, drastically reducing the bundle size (to less than 2 KiB gzipped).
|
|
547
|
+
|
|
548
|
+
Under non-browser engines (Node.js, React Native, etc.) a full polyfill is used as those platforms
|
|
549
|
+
do not provide sufficiently complete / non-buggy `TextDecoder` APIs.
|
|
550
|
+
|
|
551
|
+
> [!NOTE]
|
|
552
|
+
> Implementations in browsers [have bugs](https://docs.google.com/spreadsheets/d/1pdEefRG6r9fZy61WHGz0TKSt8cO4ISWqlpBN5KntIvQ/edit),
|
|
553
|
+
> but they are fixing them and the expected update window is short.\
|
|
554
|
+
> If you want to circumvent browser bugs, use full `@exodus/bytes/encoding.js` import.
|
|
555
|
+
|
|
469
556
|
## License
|
|
470
557
|
|
|
471
558
|
[MIT](./LICENSE)
|
package/array.d.ts
CHANGED
|
@@ -21,4 +21,3 @@ export type OutputFormat = 'uint8' | 'buffer';
|
|
|
21
21
|
export function typedView(arr: ArrayBufferView, format: 'uint8'): Uint8Array;
|
|
22
22
|
export function typedView(arr: ArrayBufferView, format: 'buffer'): Buffer;
|
|
23
23
|
export function typedView(arr: ArrayBufferView, format: OutputFormat): Uint8Array | Buffer;
|
|
24
|
-
|
package/base32.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { assertEmptyRest } from './assert.js'
|
|
2
2
|
import { typedView } from './array.js'
|
|
3
|
+
import { E_STRING } from './fallback/_utils.js'
|
|
3
4
|
import * as js from './fallback/base32.js'
|
|
4
5
|
|
|
5
6
|
// See https://datatracker.ietf.org/doc/html/rfc4648
|
|
@@ -25,7 +26,7 @@ export function fromBase32hex(str, options) {
|
|
|
25
26
|
}
|
|
26
27
|
|
|
27
28
|
function fromBase32common(str, isBase32Hex, padding, format, rest) {
|
|
28
|
-
if (typeof str !== 'string') throw new TypeError(
|
|
29
|
+
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
29
30
|
if (rest !== null) assertEmptyRest(rest)
|
|
30
31
|
|
|
31
32
|
if (padding === true) {
|
package/base58.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { typedView } from './array.js'
|
|
2
2
|
import { assertUint8 } from './assert.js'
|
|
3
|
-
import { nativeDecoder, nativeEncoder, isHermes } from './fallback/_utils.js'
|
|
3
|
+
import { nativeDecoder, nativeEncoder, isHermes, E_STRING } from './fallback/_utils.js'
|
|
4
4
|
import { encodeAscii, decodeAscii } from './fallback/latin1.js'
|
|
5
5
|
|
|
6
6
|
const alphabet58 = [...'123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz']
|
|
@@ -122,7 +122,7 @@ function toBase58core(arr, alphabet, codes) {
|
|
|
122
122
|
}
|
|
123
123
|
|
|
124
124
|
function fromBase58core(str, alphabet, codes, format = 'uint8') {
|
|
125
|
-
if (typeof str !== 'string') throw new TypeError(
|
|
125
|
+
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
126
126
|
const length = str.length
|
|
127
127
|
if (length === 0) return typedView(new Uint8Array(), format)
|
|
128
128
|
|
|
@@ -207,7 +207,7 @@ function fromBase58core(str, alphabet, codes, format = 'uint8') {
|
|
|
207
207
|
}
|
|
208
208
|
|
|
209
209
|
at = k + 1
|
|
210
|
-
if (c !== 0 || at < zeros) throw new Error('Unexpected') // unreachable
|
|
210
|
+
if (c !== 0 || at < zeros) /* c8 ignore next */ throw new Error('Unexpected') // unreachable
|
|
211
211
|
}
|
|
212
212
|
}
|
|
213
213
|
|
package/base58check.js
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { sha256 } from '@noble/hashes/sha2.js'
|
|
2
2
|
import { makeBase58check } from './fallback/base58check.js'
|
|
3
3
|
|
|
4
4
|
// Note: while API is async, we use hashSync for now until we improve webcrypto perf for hash256
|
|
5
5
|
// Inputs to base58 are typically very small, and that makes a difference
|
|
6
6
|
|
|
7
7
|
// eslint-disable-next-line @exodus/import/no-deprecated
|
|
8
|
-
const sha256 = (x) => hashSync('sha256', x, 'uint8')
|
|
9
8
|
const hash256sync = (x) => sha256(sha256(x))
|
|
10
9
|
const hash256 = hash256sync // See note at the top
|
|
11
10
|
const {
|
package/base64.d.ts
CHANGED
|
@@ -73,4 +73,3 @@ export function fromBase64url(str: string, options: FromBase64Options & { format
|
|
|
73
73
|
*/
|
|
74
74
|
export function fromBase64any(str: string, options?: FromBase64Options): Uint8ArrayBuffer;
|
|
75
75
|
export function fromBase64any(str: string, options: FromBase64Options & { format: 'buffer' }): Buffer;
|
|
76
|
-
|
package/base64.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { assertUint8, assertEmptyRest } from './assert.js'
|
|
2
2
|
import { typedView } from './array.js'
|
|
3
|
-
import { isHermes, skipWeb } from './fallback/_utils.js'
|
|
3
|
+
import { isHermes, skipWeb, E_STRING } from './fallback/_utils.js'
|
|
4
4
|
import { decodeLatin1, encodeLatin1 } from './fallback/latin1.js'
|
|
5
5
|
import * as js from './fallback/base64.js'
|
|
6
6
|
|
|
@@ -79,7 +79,7 @@ export function fromBase64any(str, { format = 'uint8', padding = 'both', ...rest
|
|
|
79
79
|
}
|
|
80
80
|
|
|
81
81
|
function fromBase64common(str, isBase64url, padding, format, rest) {
|
|
82
|
-
if (typeof str !== 'string') throw new TypeError(
|
|
82
|
+
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
83
83
|
if (rest !== null) assertEmptyRest(rest)
|
|
84
84
|
const auto = padding === 'both' ? str.endsWith('=') : undefined
|
|
85
85
|
// Older JSC supporting Uint8Array.fromBase64 lacks proper checks
|
package/bech32.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { assertUint8 } from './assert.js'
|
|
2
|
-
import { nativeEncoder } from './fallback/_utils.js'
|
|
2
|
+
import { nativeEncoder, E_STRING } from './fallback/_utils.js'
|
|
3
3
|
import { decodeAscii, encodeAscii, encodeLatin1 } from './fallback/latin1.js'
|
|
4
4
|
|
|
5
5
|
const alphabet = [...'qpzry9x8gf2tvdw0s3jn54khce6mua7l']
|
|
@@ -12,7 +12,6 @@ const E_MIXED = 'Mixed-case string'
|
|
|
12
12
|
const E_PADDING = 'Padding is invalid'
|
|
13
13
|
const E_CHECKSUM = 'Invalid checksum'
|
|
14
14
|
const E_CHARACTER = 'Non-bech32 character'
|
|
15
|
-
const E_STRING = 'Input is not a string'
|
|
16
15
|
|
|
17
16
|
// nativeEncoder path uses encodeAscii which asserts ascii, otherwise we have 0-255 bytes from encodeLatin1
|
|
18
17
|
const c2x = new Int8Array(nativeEncoder ? 128 : 256).fill(-1)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import {
|
|
2
|
+
fromSource,
|
|
3
|
+
getBOMEncoding,
|
|
4
|
+
normalizeEncoding,
|
|
5
|
+
E_ENCODING,
|
|
6
|
+
} from './fallback/encoding.api.js'
|
|
7
|
+
import labels from './fallback/encoding.labels.js'
|
|
8
|
+
|
|
9
|
+
// Lite-weight version which re-exports existing implementations on browsers,
|
|
10
|
+
// while still being aliased to the full impl in RN and Node.js
|
|
11
|
+
|
|
12
|
+
// WARNING: Note that browsers have bugs (which hopefully will get fixed soon)
|
|
13
|
+
|
|
14
|
+
const { TextDecoder, TextEncoder, TextDecoderStream, TextEncoderStream } = globalThis
|
|
15
|
+
|
|
16
|
+
export { normalizeEncoding, getBOMEncoding, labelToName } from './fallback/encoding.api.js'
|
|
17
|
+
export { TextDecoder, TextEncoder, TextDecoderStream, TextEncoderStream }
|
|
18
|
+
|
|
19
|
+
// https://encoding.spec.whatwg.org/#decode
|
|
20
|
+
export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
|
|
21
|
+
let u8 = fromSource(input)
|
|
22
|
+
const bomEncoding = getBOMEncoding(u8)
|
|
23
|
+
if (bomEncoding) u8 = u8.subarray(bomEncoding === 'utf-8' ? 3 : 2)
|
|
24
|
+
const enc = bomEncoding ?? normalizeEncoding(fallbackEncoding) // "the byte order mark is more authoritative than anything else"
|
|
25
|
+
if (enc === 'utf-8') return new TextDecoder('utf-8', { ignoreBOM: true }).decode(u8) // fast path
|
|
26
|
+
if (enc === 'replacement') return u8.byteLength > 0 ? '\uFFFD' : ''
|
|
27
|
+
if (!Object.hasOwn(labels, enc)) throw new RangeError(E_ENCODING)
|
|
28
|
+
return new TextDecoder(enc, { ignoreBOM: true }).decode(u8)
|
|
29
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './encoding.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './encoding.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './encoding.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './encoding.js'
|
package/encoding.d.ts
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/// <reference types="node" />
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Converts an encoding label to its name, as an ASCII-lowercased string
|
|
5
|
+
* @param label - The encoding label to normalize
|
|
6
|
+
* @returns The normalized encoding name, or null if invalid
|
|
7
|
+
*/
|
|
8
|
+
export function normalizeEncoding(label: string): string | null;
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Implements BOM sniff (https://encoding.spec.whatwg.org/#bom-sniff) legacy hook.
|
|
12
|
+
* @param input - The bytes to check for BOM
|
|
13
|
+
* @returns The encoding ('utf-8', 'utf-16le', 'utf-16be'), or null if no BOM found
|
|
14
|
+
*/
|
|
15
|
+
export function getBOMEncoding(
|
|
16
|
+
input: ArrayBufferLike | ArrayBufferView
|
|
17
|
+
): 'utf-8' | 'utf-16le' | 'utf-16be' | null;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Implements decode (https://encoding.spec.whatwg.org/#decode) legacy hook.
|
|
21
|
+
* @param input - The bytes to decode
|
|
22
|
+
* @param fallbackEncoding - The encoding to use if no BOM detected (default: 'utf-8')
|
|
23
|
+
* @returns The decoded string
|
|
24
|
+
*/
|
|
25
|
+
export function legacyHookDecode(
|
|
26
|
+
input: ArrayBufferLike | ArrayBufferView,
|
|
27
|
+
fallbackEncoding?: string
|
|
28
|
+
): string;
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Converts an encoding label to its name, as a case-sensitive string.
|
|
32
|
+
* @param label - The encoding label
|
|
33
|
+
* @returns The proper case encoding name, or null if invalid
|
|
34
|
+
*/
|
|
35
|
+
export function labelToName(label: string): string | null;
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Text decoder for decoding bytes to strings in various encodings
|
|
39
|
+
* Supports strict and lossy modes
|
|
40
|
+
*/
|
|
41
|
+
export const TextDecoder: typeof globalThis.TextDecoder;
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Text encoder for encoding strings to UTF-8 bytes
|
|
45
|
+
*/
|
|
46
|
+
export const TextEncoder: typeof globalThis.TextEncoder;
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Transform stream wrapper for TextDecoder
|
|
50
|
+
* Decodes chunks of bytes to strings
|
|
51
|
+
*/
|
|
52
|
+
export const TextDecoderStream: typeof globalThis.TextDecoderStream;
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Transform stream wrapper for TextEncoder
|
|
56
|
+
* Encodes chunks of strings to UTF-8 bytes
|
|
57
|
+
*/
|
|
58
|
+
export const TextEncoderStream: typeof globalThis.TextEncoderStream;
|
package/fallback/_utils.js
CHANGED
|
@@ -47,6 +47,7 @@ function shouldSkipBuiltins() {
|
|
|
47
47
|
return /firefox/i.test(g.navigator.userAgent || '') // as simple as we can
|
|
48
48
|
}
|
|
49
49
|
|
|
50
|
+
/* c8 ignore next */
|
|
50
51
|
return false // eslint-disable-line no-unreachable
|
|
51
52
|
}
|
|
52
53
|
|
|
@@ -128,3 +129,5 @@ export const toBuf = (x) =>
|
|
|
128
129
|
x.byteLength <= 64 && x.BYTES_PER_ELEMENT === 1
|
|
129
130
|
? Buffer.from(x)
|
|
130
131
|
: Buffer.from(x.buffer, x.byteOffset, x.byteLength)
|
|
132
|
+
|
|
133
|
+
export const E_STRING = 'Input is not a string'
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import labels from './encoding.labels.js'
|
|
2
|
+
|
|
3
|
+
let labelsMap
|
|
4
|
+
|
|
5
|
+
export const E_ENCODING = 'Unknown encoding'
|
|
6
|
+
|
|
7
|
+
// Warning: unlike whatwg-encoding, returns lowercased labels
|
|
8
|
+
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
|
9
|
+
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
10
|
+
export function normalizeEncoding(label) {
|
|
11
|
+
// fast path
|
|
12
|
+
if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
|
|
13
|
+
if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
|
|
14
|
+
// full map
|
|
15
|
+
if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
|
|
16
|
+
const low = `${label}`.trim().toLowerCase()
|
|
17
|
+
if (Object.hasOwn(labels, low)) return low
|
|
18
|
+
if (!labelsMap) {
|
|
19
|
+
labelsMap = new Map()
|
|
20
|
+
for (const [label, aliases] of Object.entries(labels)) {
|
|
21
|
+
for (const alias of aliases) labelsMap.set(alias, label)
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const mapped = labelsMap.get(low)
|
|
26
|
+
if (mapped) return mapped
|
|
27
|
+
return null
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// TODO: make this more strict against Symbol.toStringTag
|
|
31
|
+
// Is not very significant though, anything faking Symbol.toStringTag could as well override
|
|
32
|
+
// prototypes, which is not something we protect against
|
|
33
|
+
|
|
34
|
+
function isAnyArrayBuffer(x) {
|
|
35
|
+
if (x instanceof ArrayBuffer) return true
|
|
36
|
+
if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return true
|
|
37
|
+
if (!x || typeof x.byteLength !== 'number') return false
|
|
38
|
+
const s = Object.prototype.toString.call(x)
|
|
39
|
+
return s === '[object ArrayBuffer]' || s === '[object SharedArrayBuffer]'
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function fromSource(x) {
|
|
43
|
+
if (x instanceof Uint8Array) return x
|
|
44
|
+
if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
|
|
45
|
+
if (isAnyArrayBuffer(x)) {
|
|
46
|
+
if ('detached' in x) return x.detached === true ? new Uint8Array() : new Uint8Array(x)
|
|
47
|
+
// Old engines without .detached, try-catch
|
|
48
|
+
try {
|
|
49
|
+
return new Uint8Array(x)
|
|
50
|
+
} catch {
|
|
51
|
+
return new Uint8Array()
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Warning: unlike whatwg-encoding, returns lowercased labels
|
|
59
|
+
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
|
60
|
+
export function getBOMEncoding(input) {
|
|
61
|
+
const u8 = fromSource(input) // asserts
|
|
62
|
+
if (u8.length >= 3 && u8[0] === 0xef && u8[1] === 0xbb && u8[2] === 0xbf) return 'utf-8'
|
|
63
|
+
if (u8.length < 2) return null
|
|
64
|
+
if (u8[0] === 0xff && u8[1] === 0xfe) return 'utf-16le'
|
|
65
|
+
if (u8[0] === 0xfe && u8[1] === 0xff) return 'utf-16be'
|
|
66
|
+
return null
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
|
|
70
|
+
|
|
71
|
+
// Unlike normalizeEncoding, case-sensitive
|
|
72
|
+
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
73
|
+
export function labelToName(label) {
|
|
74
|
+
const enc = normalizeEncoding(label)
|
|
75
|
+
if (enc === 'utf-8') return 'UTF-8' // fast path
|
|
76
|
+
if (!enc) return enc
|
|
77
|
+
if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
|
|
78
|
+
if (enc === 'big5') return 'Big5'
|
|
79
|
+
if (enc === 'shift_jis') return 'Shift_JIS'
|
|
80
|
+
return enc
|
|
81
|
+
}
|
package/fallback/encoding.js
CHANGED
|
@@ -5,14 +5,15 @@ import { utf16toString, utf16toStringLoose } from '@exodus/bytes/utf16.js'
|
|
|
5
5
|
import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/bytes/utf8.js'
|
|
6
6
|
import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
|
|
7
7
|
import labels from './encoding.labels.js'
|
|
8
|
+
import { fromSource, getBOMEncoding, normalizeEncoding, E_ENCODING } from './encoding.api.js'
|
|
8
9
|
import { unfinishedBytes } from './encoding.util.js'
|
|
9
10
|
|
|
10
|
-
|
|
11
|
-
const E_ENCODING = 'Unknown encoding'
|
|
12
|
-
const replacementChar = '\uFFFD'
|
|
11
|
+
export { labelToName, getBOMEncoding, normalizeEncoding } from './encoding.api.js'
|
|
13
12
|
|
|
13
|
+
const E_OPTIONS = 'The "options" argument must be of type object'
|
|
14
14
|
const E_MULTI =
|
|
15
15
|
'Legacy multi-byte encodings are disabled in /encoding-lite.js, use /encoding.js for full encodings range support'
|
|
16
|
+
const replacementChar = '\uFFFD'
|
|
16
17
|
const multibyteSet = new Set(['big5', 'euc-kr', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'gbk', 'gb18030']) // prettier-ignore
|
|
17
18
|
let createMultibyteDecoder
|
|
18
19
|
|
|
@@ -20,67 +21,14 @@ export function setMultibyteDecoder(createDecoder) {
|
|
|
20
21
|
createMultibyteDecoder = createDecoder
|
|
21
22
|
}
|
|
22
23
|
|
|
23
|
-
let labelsMap
|
|
24
|
-
|
|
25
|
-
// Warning: unlike whatwg-encoding, returns lowercased labels
|
|
26
|
-
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
|
27
|
-
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
28
|
-
export function normalizeEncoding(label) {
|
|
29
|
-
// fast path
|
|
30
|
-
if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
|
|
31
|
-
if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
|
|
32
|
-
// full map
|
|
33
|
-
if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
|
|
34
|
-
const low = `${label}`.trim().toLowerCase()
|
|
35
|
-
if (Object.hasOwn(labels, low)) return low
|
|
36
|
-
if (!labelsMap) {
|
|
37
|
-
labelsMap = new Map()
|
|
38
|
-
for (const [label, aliases] of Object.entries(labels)) {
|
|
39
|
-
for (const alias of aliases) labelsMap.set(alias, label)
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
const mapped = labelsMap.get(low)
|
|
44
|
-
if (mapped) return mapped
|
|
45
|
-
return null
|
|
46
|
-
}
|
|
47
|
-
|
|
48
24
|
const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false })
|
|
49
25
|
|
|
50
|
-
// TODO: make this more strict against Symbol.toStringTag
|
|
51
|
-
// Is not very significant though, anything faking Symbol.toStringTag could as well override
|
|
52
|
-
// prototypes, which is not something we protect against
|
|
53
|
-
|
|
54
|
-
function isAnyArrayBuffer(x) {
|
|
55
|
-
if (x instanceof ArrayBuffer) return true
|
|
56
|
-
if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return true
|
|
57
|
-
if (!x || typeof x.byteLength !== 'number') return false
|
|
58
|
-
const s = Object.prototype.toString.call(x)
|
|
59
|
-
return s === '[object ArrayBuffer]' || s === '[object SharedArrayBuffer]'
|
|
60
|
-
}
|
|
61
|
-
|
|
62
26
|
function isAnyUint8Array(x) {
|
|
63
27
|
if (x instanceof Uint8Array) return true
|
|
64
28
|
if (!x || !ArrayBuffer.isView(x) || x.BYTES_PER_ELEMENT !== 1) return false
|
|
65
29
|
return Object.prototype.toString.call(x) === '[object Uint8Array]'
|
|
66
30
|
}
|
|
67
31
|
|
|
68
|
-
const fromSource = (x) => {
|
|
69
|
-
if (x instanceof Uint8Array) return x
|
|
70
|
-
if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
|
|
71
|
-
if (isAnyArrayBuffer(x)) {
|
|
72
|
-
if ('detached' in x) return x.detached === true ? new Uint8Array() : new Uint8Array(x)
|
|
73
|
-
// Old engines without .detached, try-catch
|
|
74
|
-
try {
|
|
75
|
-
return new Uint8Array(x)
|
|
76
|
-
} catch {
|
|
77
|
-
return new Uint8Array()
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
|
|
82
|
-
}
|
|
83
|
-
|
|
84
32
|
function unicodeDecoder(encoding, loose) {
|
|
85
33
|
if (encoding === 'utf-8') return loose ? utf8toStringLoose : utf8toString // likely
|
|
86
34
|
const form = encoding === 'utf-16le' ? 'uint8-le' : 'uint8-be'
|
|
@@ -215,6 +163,7 @@ export class TextDecoder {
|
|
|
215
163
|
return u.byteLength >= 2 && u[0] === 0xfe && u[1] === 0xff ? 2 : 0
|
|
216
164
|
}
|
|
217
165
|
|
|
166
|
+
/* c8 ignore next */
|
|
218
167
|
throw new Error('Unreachable')
|
|
219
168
|
}
|
|
220
169
|
}
|
|
@@ -341,17 +290,6 @@ export class TextEncoderStream {
|
|
|
341
290
|
}
|
|
342
291
|
}
|
|
343
292
|
|
|
344
|
-
// Warning: unlike whatwg-encoding, returns lowercased labels
|
|
345
|
-
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
|
346
|
-
export function getBOMEncoding(input) {
|
|
347
|
-
const u8 = fromSource(input) // asserts
|
|
348
|
-
if (u8.length >= 3 && u8[0] === 0xef && u8[1] === 0xbb && u8[2] === 0xbf) return 'utf-8'
|
|
349
|
-
if (u8.length < 2) return null
|
|
350
|
-
if (u8[0] === 0xff && u8[1] === 0xfe) return 'utf-16le'
|
|
351
|
-
if (u8[0] === 0xfe && u8[1] === 0xff) return 'utf-16be'
|
|
352
|
-
return null
|
|
353
|
-
}
|
|
354
|
-
|
|
355
293
|
// https://encoding.spec.whatwg.org/#decode
|
|
356
294
|
// Warning: encoding sniffed from BOM takes preference over the supplied one
|
|
357
295
|
// Warning: lossy, performs replacement, no option of throwing
|
|
@@ -368,7 +306,7 @@ export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
|
|
|
368
306
|
let suffix = ''
|
|
369
307
|
if (u8.byteLength % 2 !== 0) {
|
|
370
308
|
suffix = replacementChar
|
|
371
|
-
u8 = u8.subarray(0, -
|
|
309
|
+
u8 = u8.subarray(0, -unfinishedBytes(u8, u8.byteLength, enc))
|
|
372
310
|
}
|
|
373
311
|
|
|
374
312
|
return utf16toStringLoose(u8, enc === 'utf-16le' ? 'uint8-le' : 'uint8-be') + suffix
|
|
@@ -387,17 +325,3 @@ export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
|
|
|
387
325
|
|
|
388
326
|
return createSinglebyteDecoder(enc, true)(u8)
|
|
389
327
|
}
|
|
390
|
-
|
|
391
|
-
const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
|
|
392
|
-
|
|
393
|
-
// Unlike normalizeEncoding, case-sensitive
|
|
394
|
-
// https://encoding.spec.whatwg.org/#names-and-labels
|
|
395
|
-
export function labelToName(label) {
|
|
396
|
-
const enc = normalizeEncoding(label)
|
|
397
|
-
if (enc === 'utf-8') return 'UTF-8' // fast path
|
|
398
|
-
if (!enc) return enc
|
|
399
|
-
if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
|
|
400
|
-
if (enc === 'big5') return 'Big5'
|
|
401
|
-
if (enc === 'shift_jis') return 'Shift_JIS'
|
|
402
|
-
return enc
|
|
403
|
-
}
|