@exodus/bytes 1.7.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -12
- package/base32.js +2 -1
- package/base58.js +2 -2
- package/base64.js +2 -2
- package/bech32.js +1 -2
- package/encoding-lite.d.ts +1 -0
- package/encoding.d.ts +58 -0
- package/fallback/_utils.js +2 -0
- package/fallback/hex.js +2 -2
- package/fallback/latin1.js +33 -0
- package/fallback/single-byte.encodings.js +64 -43
- package/fallback/single-byte.js +6 -2
- package/fallback/utf8.js +5 -2
- package/hex.node.js +2 -1
- package/package.json +11 -4
- package/single-byte.js +41 -6
- package/single-byte.node.js +13 -4
- package/utf16.js +2 -2
- package/utf16.node.js +2 -2
- package/utf8.js +2 -2
- package/utf8.node.js +2 -1
package/README.md
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
# `@exodus/bytes`
|
|
2
2
|
|
|
3
|
+
[](https://npmjs.org/package/@exodus/bytes)
|
|
4
|
+

|
|
5
|
+
[](https://github.com/ExodusOSS/bytes/blob/HEAD/LICENSE)
|
|
6
|
+
|
|
3
7
|
`Uint8Array` conversion to and from `base64`, `base32`, `base58`, `hex`, `utf8`, `utf16`, `bech32` and `wif`
|
|
4
8
|
|
|
5
9
|
And a [`TextEncoder` / `TextDecoder` polyfill](#textencoder--textdecoder-polyfill)
|
|
@@ -38,7 +42,7 @@ Less than half the bundle size of [text-encoding](https://npmjs.com/text-encodin
|
|
|
38
42
|
Also [much faster](#fast) than all of those.
|
|
39
43
|
|
|
40
44
|
> [!TIP]
|
|
41
|
-
> See also the [lite version](#lite-version) to get this down to
|
|
45
|
+
> See also the [lite version](#lite-version) to get this down to 10 KiB gzipped.
|
|
42
46
|
|
|
43
47
|
Spec compliant, passing WPT and covered with extra tests.\
|
|
44
48
|
Moreover, tests for this library uncovered [bugs in all major implementations](https://docs.google.com/spreadsheets/d/1pdEefRG6r9fZy61WHGz0TKSt8cO4ISWqlpBN5KntIvQ/edit).\
|
|
@@ -81,9 +85,9 @@ import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding-lite.js'
|
|
|
81
85
|
import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding-lite.js' // Requires Streams
|
|
82
86
|
```
|
|
83
87
|
|
|
84
|
-
This reduces the bundle size
|
|
85
|
-
from 90 KiB gzipped for `@exodus/bytes/encoding.js` to
|
|
86
|
-
(For comparison, `text-encoding` module is 190 KiB gzipped, and `iconv-lite` is 194 KiB gzipped)
|
|
88
|
+
This reduces the bundle size 9x:\
|
|
89
|
+
from 90 KiB gzipped for `@exodus/bytes/encoding.js` to 10 KiB gzipped for `@exodus/bytes/encoding-lite.js`.\
|
|
90
|
+
(For comparison, `text-encoding` module is 190 KiB gzipped, and `iconv-lite` is 194 KiB gzipped):
|
|
87
91
|
|
|
88
92
|
It still supports `utf-8`, `utf-16le`, `utf-16be` and all single-byte encodings specified by the spec,
|
|
89
93
|
the only difference is support for legacy multi-byte encodings.
|
|
@@ -127,16 +131,42 @@ import { createSinglebyteDecoder, createSinglebyteEncoder } from '@exodus/bytes/
|
|
|
127
131
|
import { windows1252toString, windows1252fromString } from '@exodus/bytes/single-byte.js'
|
|
128
132
|
```
|
|
129
133
|
|
|
130
|
-
Decode the legacy single-byte encodings according to the
|
|
131
|
-
|
|
132
|
-
[§
|
|
134
|
+
Decode / encode the legacy single-byte encodings according to the
|
|
135
|
+
[Encoding standard](https://encoding.spec.whatwg.org/)
|
|
136
|
+
([§9](https://encoding.spec.whatwg.org/#legacy-single-byte-encodings),
|
|
137
|
+
[§14.5](https://encoding.spec.whatwg.org/#x-user-defined)),
|
|
138
|
+
and [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859) `iso-8859-*` mappings.
|
|
133
139
|
|
|
134
|
-
Supports all single-byte encodings listed in the standard:
|
|
140
|
+
Supports all single-byte encodings listed in the WHATWG Encoding standard:
|
|
135
141
|
`ibm866`, `iso-8859-2`, `iso-8859-3`, `iso-8859-4`, `iso-8859-5`, `iso-8859-6`, `iso-8859-7`, `iso-8859-8`,
|
|
136
142
|
`iso-8859-8-i`, `iso-8859-10`, `iso-8859-13`, `iso-8859-14`, `iso-8859-15`, `iso-8859-16`, `koi8-r`, `koi8-u`,
|
|
137
143
|
`macintosh`, `windows-874`, `windows-1250`, `windows-1251`, `windows-1252`, `windows-1253`, `windows-1254`,
|
|
138
144
|
`windows-1255`, `windows-1256`, `windows-1257`, `windows-1258`, `x-mac-cyrillic` and `x-user-defined`.
|
|
139
145
|
|
|
146
|
+
Also supports `iso-8859-1`, `iso-8859-9`, `iso-8859-11` as defined at
|
|
147
|
+
[unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859)
|
|
148
|
+
(and all other `iso-8859-*` encodings there as they match WHATWG).
|
|
149
|
+
|
|
150
|
+
> [!NOTE]
|
|
151
|
+
> While all `iso-8859-*` encodings supported by the [WHATWG Encoding standard](https://encoding.spec.whatwg.org/) match
|
|
152
|
+
> [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859), the WHATWG Encoding spec doesn't support
|
|
153
|
+
> `iso-8859-1`, `iso-8859-9`, `iso-8859-11`, and instead maps them as labels to `windows-1252`, `windows-1254`, `windows-874`.\
|
|
154
|
+
> `createSinglebyteDecoder()` (unlike `TextDecoder` or `legacyHookDecode()`) does not do such mapping,
|
|
155
|
+
> so its results will differ from `TextDecoder` for those encoding names.
|
|
156
|
+
|
|
157
|
+
```js
|
|
158
|
+
> new TextDecoder('iso-8859-1').encoding
|
|
159
|
+
'windows-1252'
|
|
160
|
+
> new TextDecoder('iso-8859-9').encoding
|
|
161
|
+
'windows-1254'
|
|
162
|
+
> new TextDecoder('iso-8859-11').encoding
|
|
163
|
+
'windows-874'
|
|
164
|
+
> new TextDecoder('iso-8859-9').decode(Uint8Array.of(0x80, 0x81, 0xd0))
|
|
165
|
+
'€\x81Ğ' // this is actually decoded according to windows-1254 per TextDecoder spec
|
|
166
|
+
> createSinglebyteDecoder('iso-8859-9')(Uint8Array.of(0x80, 0x81, 0xd0))
|
|
167
|
+
'\x80\x81Ğ' // this is iso-8859-9 as defined at https://unicode.org/Public/MAPPINGS/ISO8859/8859-9.txt
|
|
168
|
+
```
|
|
169
|
+
|
|
140
170
|
##### `createSinglebyteDecoder(encoding, loose = false)`
|
|
141
171
|
|
|
142
172
|
Create a decoder for a supported one-byte `encoding`, given its lowercased name `encoding`.
|
|
@@ -152,12 +182,35 @@ Returns a function `encode(string)` that encodes a string to bytes.
|
|
|
152
182
|
In `'fatal'` mode (default), will throw on non well-formed strings or any codepoints which could
|
|
153
183
|
not be encoded in the target encoding.
|
|
154
184
|
|
|
185
|
+
##### `latin1toString(arr)`
|
|
186
|
+
|
|
187
|
+
Decode `iso-8859-1` bytes to a string.
|
|
188
|
+
|
|
189
|
+
There is no loose variant for this encoding, all bytes can be decoded.
|
|
190
|
+
|
|
191
|
+
Same as:
|
|
192
|
+
```js
|
|
193
|
+
const latin1toString = createSinglebyteDecoder('iso-8859-1')
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
Note: this is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as
|
|
197
|
+
those alias to `new TextDecoder('windows-1252')`.
|
|
198
|
+
|
|
199
|
+
##### `latin1fromString(string)`
|
|
200
|
+
|
|
201
|
+
Encode a string to `iso-8859-1` bytes.
|
|
202
|
+
|
|
203
|
+
Will throw on non well-formed strings or any codepoints which could not be encoded in `iso-8859-1`.
|
|
204
|
+
|
|
205
|
+
Same as:
|
|
206
|
+
```js
|
|
207
|
+
const latin1fromString = createSinglebyteEncoder('iso-8859-1', { mode: 'fatal' })
|
|
208
|
+
```
|
|
209
|
+
|
|
155
210
|
##### `windows1252toString(arr)`
|
|
156
211
|
|
|
157
212
|
Decode `windows-1252` bytes to a string.
|
|
158
213
|
|
|
159
|
-
Also supports `ascii` and `latin-1` as those are strict subsets of `windows-1252`.
|
|
160
|
-
|
|
161
214
|
There is no loose variant for this encoding, all bytes can be decoded.
|
|
162
215
|
|
|
163
216
|
Same as:
|
|
@@ -169,8 +222,6 @@ const windows1252toString = createSinglebyteDecoder('windows-1252')
|
|
|
169
222
|
|
|
170
223
|
Encode a string to `windows-1252` bytes.
|
|
171
224
|
|
|
172
|
-
Also supports `ascii` and `latin-1` as those are strict subsets of `windows-1252`.
|
|
173
|
-
|
|
174
225
|
Will throw on non well-formed strings or any codepoints which could not be encoded in `windows-1252`.
|
|
175
226
|
|
|
176
227
|
Same as:
|
|
@@ -212,6 +263,8 @@ import { fromBigInt, toBigInt } from '@exodus/bytes/bigint.js'
|
|
|
212
263
|
|
|
213
264
|
### `@exodus/bytes/hex.js`
|
|
214
265
|
|
|
266
|
+
Implements Base16 from [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648) (no differences from [RFC3548](https://datatracker.ietf.org/doc/html/rfc4648)).
|
|
267
|
+
|
|
215
268
|
```js
|
|
216
269
|
import { fromHex, toHex } from '@exodus/bytes/hex.js'
|
|
217
270
|
```
|
|
@@ -221,6 +274,8 @@ import { fromHex, toHex } from '@exodus/bytes/hex.js'
|
|
|
221
274
|
|
|
222
275
|
### `@exodus/bytes/base64.js`
|
|
223
276
|
|
|
277
|
+
Implements Base64 from [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648) (no differences from [RFC3548](https://datatracker.ietf.org/doc/html/rfc4648)).
|
|
278
|
+
|
|
224
279
|
```js
|
|
225
280
|
import { fromBase64, toBase64 } from '@exodus/bytes/base64.js'
|
|
226
281
|
import { fromBase64url, toBase64url } from '@exodus/bytes/base64.js'
|
|
@@ -235,6 +290,8 @@ import { fromBase64any } from '@exodus/bytes/base64.js'
|
|
|
235
290
|
|
|
236
291
|
### `@exodus/bytes/base32.js`
|
|
237
292
|
|
|
293
|
+
Implements Base32 from [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648) (no differences from [RFC3548](https://datatracker.ietf.org/doc/html/rfc4648)).
|
|
294
|
+
|
|
238
295
|
```js
|
|
239
296
|
import { fromBase32, toBase32 } from '@exodus/bytes/base32.js'
|
|
240
297
|
import { fromBase32hex, toBase32hex } from '@exodus/bytes/base32.js'
|
|
@@ -247,6 +304,8 @@ import { fromBase32hex, toBase32hex } from '@exodus/bytes/base32.js'
|
|
|
247
304
|
|
|
248
305
|
### `@exodus/bytes/bech32.js`
|
|
249
306
|
|
|
307
|
+
Implements [BIP-0173](https://github.com/bitcoin/bips/blob/master/bip-0173.mediawiki#specification) and [BIP-0350](https://github.com/bitcoin/bips/blob/master/bip-0350.mediawiki#specification).
|
|
308
|
+
|
|
250
309
|
```js
|
|
251
310
|
import { fromBech32, toBech32 } from '@exodus/bytes/bech32.js'
|
|
252
311
|
import { fromBech32m, toBech32m } from '@exodus/bytes/base32.js'
|
package/base32.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { assertEmptyRest } from './assert.js'
|
|
2
2
|
import { typedView } from './array.js'
|
|
3
|
+
import { E_STRING } from './fallback/_utils.js'
|
|
3
4
|
import * as js from './fallback/base32.js'
|
|
4
5
|
|
|
5
6
|
// See https://datatracker.ietf.org/doc/html/rfc4648
|
|
@@ -25,7 +26,7 @@ export function fromBase32hex(str, options) {
|
|
|
25
26
|
}
|
|
26
27
|
|
|
27
28
|
function fromBase32common(str, isBase32Hex, padding, format, rest) {
|
|
28
|
-
if (typeof str !== 'string') throw new TypeError(
|
|
29
|
+
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
29
30
|
if (rest !== null) assertEmptyRest(rest)
|
|
30
31
|
|
|
31
32
|
if (padding === true) {
|
package/base58.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { typedView } from './array.js'
|
|
2
2
|
import { assertUint8 } from './assert.js'
|
|
3
|
-
import { nativeDecoder, nativeEncoder, isHermes } from './fallback/_utils.js'
|
|
3
|
+
import { nativeDecoder, nativeEncoder, isHermes, E_STRING } from './fallback/_utils.js'
|
|
4
4
|
import { encodeAscii, decodeAscii } from './fallback/latin1.js'
|
|
5
5
|
|
|
6
6
|
const alphabet58 = [...'123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz']
|
|
@@ -122,7 +122,7 @@ function toBase58core(arr, alphabet, codes) {
|
|
|
122
122
|
}
|
|
123
123
|
|
|
124
124
|
function fromBase58core(str, alphabet, codes, format = 'uint8') {
|
|
125
|
-
if (typeof str !== 'string') throw new TypeError(
|
|
125
|
+
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
126
126
|
const length = str.length
|
|
127
127
|
if (length === 0) return typedView(new Uint8Array(), format)
|
|
128
128
|
|
package/base64.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { assertUint8, assertEmptyRest } from './assert.js'
|
|
2
2
|
import { typedView } from './array.js'
|
|
3
|
-
import { isHermes, skipWeb } from './fallback/_utils.js'
|
|
3
|
+
import { isHermes, skipWeb, E_STRING } from './fallback/_utils.js'
|
|
4
4
|
import { decodeLatin1, encodeLatin1 } from './fallback/latin1.js'
|
|
5
5
|
import * as js from './fallback/base64.js'
|
|
6
6
|
|
|
@@ -79,7 +79,7 @@ export function fromBase64any(str, { format = 'uint8', padding = 'both', ...rest
|
|
|
79
79
|
}
|
|
80
80
|
|
|
81
81
|
function fromBase64common(str, isBase64url, padding, format, rest) {
|
|
82
|
-
if (typeof str !== 'string') throw new TypeError(
|
|
82
|
+
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
83
83
|
if (rest !== null) assertEmptyRest(rest)
|
|
84
84
|
const auto = padding === 'both' ? str.endsWith('=') : undefined
|
|
85
85
|
// Older JSC supporting Uint8Array.fromBase64 lacks proper checks
|
package/bech32.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { assertUint8 } from './assert.js'
|
|
2
|
-
import { nativeEncoder } from './fallback/_utils.js'
|
|
2
|
+
import { nativeEncoder, E_STRING } from './fallback/_utils.js'
|
|
3
3
|
import { decodeAscii, encodeAscii, encodeLatin1 } from './fallback/latin1.js'
|
|
4
4
|
|
|
5
5
|
const alphabet = [...'qpzry9x8gf2tvdw0s3jn54khce6mua7l']
|
|
@@ -12,7 +12,6 @@ const E_MIXED = 'Mixed-case string'
|
|
|
12
12
|
const E_PADDING = 'Padding is invalid'
|
|
13
13
|
const E_CHECKSUM = 'Invalid checksum'
|
|
14
14
|
const E_CHARACTER = 'Non-bech32 character'
|
|
15
|
-
const E_STRING = 'Input is not a string'
|
|
16
15
|
|
|
17
16
|
// nativeEncoder path uses encodeAscii which asserts ascii, otherwise we have 0-255 bytes from encodeLatin1
|
|
18
17
|
const c2x = new Int8Array(nativeEncoder ? 128 : 256).fill(-1)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './encoding.js'
|
package/encoding.d.ts
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/// <reference types="node" />
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Converts an encoding label to its name, as an ASCII-lowercased string
|
|
5
|
+
* @param label - The encoding label to normalize
|
|
6
|
+
* @returns The normalized encoding name, or null if invalid
|
|
7
|
+
*/
|
|
8
|
+
export function normalizeEncoding(label: string): string | null;
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Implements BOM sniff (https://encoding.spec.whatwg.org/#bom-sniff) legacy hook.
|
|
12
|
+
* @param input - The bytes to check for BOM
|
|
13
|
+
* @returns The encoding ('utf-8', 'utf-16le', 'utf-16be'), or null if no BOM found
|
|
14
|
+
*/
|
|
15
|
+
export function getBOMEncoding(
|
|
16
|
+
input: ArrayBufferLike | ArrayBufferView
|
|
17
|
+
): 'utf-8' | 'utf-16le' | 'utf-16be' | null;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Implements decode (https://encoding.spec.whatwg.org/#decode) legacy hook.
|
|
21
|
+
* @param input - The bytes to decode
|
|
22
|
+
* @param fallbackEncoding - The encoding to use if no BOM detected (default: 'utf-8')
|
|
23
|
+
* @returns The decoded string
|
|
24
|
+
*/
|
|
25
|
+
export function legacyHookDecode(
|
|
26
|
+
input: ArrayBufferLike | ArrayBufferView,
|
|
27
|
+
fallbackEncoding?: string
|
|
28
|
+
): string;
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Converts an encoding label to its name, as a case-sensitive string.
|
|
32
|
+
* @param label - The encoding label
|
|
33
|
+
* @returns The proper case encoding name, or null if invalid
|
|
34
|
+
*/
|
|
35
|
+
export function labelToName(label: string): string | null;
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Text decoder for decoding bytes to strings in various encodings
|
|
39
|
+
* Supports strict and lossy modes
|
|
40
|
+
*/
|
|
41
|
+
export const TextDecoder: typeof globalThis.TextDecoder;
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Text encoder for encoding strings to UTF-8 bytes
|
|
45
|
+
*/
|
|
46
|
+
export const TextEncoder: typeof globalThis.TextEncoder;
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Transform stream wrapper for TextDecoder
|
|
50
|
+
* Decodes chunks of bytes to strings
|
|
51
|
+
*/
|
|
52
|
+
export const TextDecoderStream: typeof globalThis.TextDecoderStream;
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Transform stream wrapper for TextEncoder
|
|
56
|
+
* Encodes chunks of strings to UTF-8 bytes
|
|
57
|
+
*/
|
|
58
|
+
export const TextEncoderStream: typeof globalThis.TextEncoderStream;
|
package/fallback/_utils.js
CHANGED
package/fallback/hex.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { assertUint8 } from '../assert.js'
|
|
2
|
-
import { nativeDecoder, nativeEncoder, decode2string } from './_utils.js'
|
|
2
|
+
import { nativeDecoder, nativeEncoder, decode2string, E_STRING } from './_utils.js'
|
|
3
3
|
import { encodeAscii, decodeAscii } from './latin1.js'
|
|
4
4
|
|
|
5
5
|
let hexArray // array of 256 bytes converted to two-char hex strings
|
|
@@ -52,7 +52,7 @@ export function toHex(arr) {
|
|
|
52
52
|
}
|
|
53
53
|
|
|
54
54
|
export function fromHex(str) {
|
|
55
|
-
if (typeof str !== 'string') throw new TypeError(
|
|
55
|
+
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
56
56
|
if (str.length % 2 !== 0) throw new SyntaxError(E_HEX)
|
|
57
57
|
|
|
58
58
|
const length = str.length / 2 // this helps Hermes in loops
|
package/fallback/latin1.js
CHANGED
|
@@ -6,12 +6,19 @@ import {
|
|
|
6
6
|
isHermes,
|
|
7
7
|
isDeno,
|
|
8
8
|
isLE,
|
|
9
|
+
skipWeb,
|
|
9
10
|
} from './_utils.js'
|
|
10
11
|
|
|
12
|
+
const { atob } = globalThis
|
|
13
|
+
const { toBase64: web64 } = Uint8Array.prototype
|
|
14
|
+
|
|
11
15
|
// See http://stackoverflow.com/a/22747272/680742, which says that lowest limit is in Chrome, with 0xffff args
|
|
12
16
|
// On Hermes, actual max is 0x20_000 minus current stack depth, 1/16 of that should be safe
|
|
13
17
|
const maxFunctionArgs = 0x20_00
|
|
14
18
|
|
|
19
|
+
// toBase64+atob path is faster on everything where fromBase64 is fast
|
|
20
|
+
const useLatin1atob = web64 && atob && !skipWeb
|
|
21
|
+
|
|
15
22
|
export function asciiPrefix(arr) {
|
|
16
23
|
let p = 0 // verified ascii bytes
|
|
17
24
|
const length = arr.length
|
|
@@ -46,6 +53,18 @@ export function decodeLatin1(arr, start = 0, stop = arr.length) {
|
|
|
46
53
|
stop |= 0
|
|
47
54
|
const total = stop - start
|
|
48
55
|
if (total === 0) return ''
|
|
56
|
+
|
|
57
|
+
if (
|
|
58
|
+
useLatin1atob &&
|
|
59
|
+
total >= 256 &&
|
|
60
|
+
total < 1e8 &&
|
|
61
|
+
arr.toBase64 === web64 &&
|
|
62
|
+
arr.BYTES_PER_ELEMENT === 1
|
|
63
|
+
) {
|
|
64
|
+
const sliced = start === 0 && stop === arr.length ? arr : arr.subarray(start, stop)
|
|
65
|
+
return atob(sliced.toBase64())
|
|
66
|
+
}
|
|
67
|
+
|
|
49
68
|
if (total > maxFunctionArgs) {
|
|
50
69
|
let prefix = ''
|
|
51
70
|
for (let i = start; i < stop; ) {
|
|
@@ -107,6 +126,20 @@ export const encodeCharcodes = isHermes
|
|
|
107
126
|
return arr
|
|
108
127
|
}
|
|
109
128
|
|
|
129
|
+
export function encodeAsciiPrefix(x, s) {
|
|
130
|
+
let i = 0
|
|
131
|
+
for (const len3 = s.length - 3; i < len3; i += 4) {
|
|
132
|
+
const x0 = s.charCodeAt(i), x1 = s.charCodeAt(i + 1), x2 = s.charCodeAt(i + 2), x3 = s.charCodeAt(i + 3) // prettier-ignore
|
|
133
|
+
if ((x0 | x1 | x2 | x3) >= 128) break
|
|
134
|
+
x[i] = x0
|
|
135
|
+
x[i + 1] = x1
|
|
136
|
+
x[i + 2] = x2
|
|
137
|
+
x[i + 3] = x3
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return i
|
|
141
|
+
}
|
|
142
|
+
|
|
110
143
|
/* eslint-enable @exodus/mutable/no-param-reassign-prop-only */
|
|
111
144
|
|
|
112
145
|
// Warning: can be used only on checked strings, converts strings to 8-bit
|
|
@@ -8,54 +8,75 @@ const h = (x) => new Array(x).fill(r)
|
|
|
8
8
|
|
|
9
9
|
// Common ranges
|
|
10
10
|
|
|
11
|
-
// prettier-ignore
|
|
12
|
-
const k8a = [9345,2,10,4,4,4,4,8,8,8,8,68,4,4,4,4,1,1,1,-627,640,-903,1,46,28,1,-8645,8833,-8817,2,5,64,9305,1,1,-8449]
|
|
13
|
-
// prettier-ignore
|
|
14
|
-
const k8b = [-30,1,21,-18,1,15,-17,18,-13,...e(7),16,-15,1,1,1,-13,-4,26,-1,-20,17,5,-4,-2,3]
|
|
15
|
-
const p1 = [8237, -8235, 8089, -7816, 7820, 8, -6, 1]
|
|
16
|
-
const p2 = [-99, 12, 20, -12, 17, 37, -29, 2]
|
|
17
|
-
// prettier-ignore
|
|
18
|
-
const p3 = [1,1,65,-63,158,-156,1,1,1,40,30,42,-46,6,-66,1,83,-6,-6,-67,176,...p2,-114,121,-119,1,1,155,-49,25,16,-142,159,2,-158,38,42,-46,6,-35,1,52,-6,-6,-36,145,...p2,-83,90,-88,1,1,124,-49,25,16,-111,128,2]
|
|
19
|
-
const i0 = e(33)
|
|
20
11
|
// prettier-ignore
|
|
21
12
|
const i2 = [-40,-147,1,64,-62,117,-51,-63,69,-67,79,-77,79,-77,1,64,2,51,4,-116,1,124,-122,1,129,22,-148,150,-148,1,133,-131,118,-116,1,33,-31,86,-51,-32,38,-36,48,-46,48,-46,1,33,2,51,4,-85,1,93,-91,1,98,22,-117,119,-117,1,102,374]
|
|
22
|
-
const i4a = [-75, -63,
|
|
23
|
-
const i4b = [34, -32,
|
|
24
|
-
const i7 = [721, 1, 1, -719, 721, -719, 721,
|
|
25
|
-
const i8 = [
|
|
13
|
+
const i4a = [-75, -63, e(5), 104, -34, -67, 79, -77, 75, -73, 1]
|
|
14
|
+
const i4b = [34, -32, e(5), 73, -34, -36, 48, -46, 44, -42, 1]
|
|
15
|
+
const i7 = [721, 1, 1, -719, 721, -719, 721, e(19), r, 2, e(43), r]
|
|
16
|
+
const i8 = [e(26), r, r, 6692, 1, r]
|
|
17
|
+
const i9 = [79, -77, e(11), 84, 46, -127, e(16), 48, -46, e(11), 53, 46]
|
|
18
|
+
const iB = [3425, e(57), h(4), 5, e(28), h(4)]
|
|
19
|
+
const p2 = [-99, 12, 20, -12, 17, 37, -29, 2]
|
|
20
|
+
const p1 = [8237, -8235, 8089, -7816, 7820, 8, -6, 1]
|
|
26
21
|
const w0 = [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104]
|
|
27
22
|
const w8 = [8072, 1, 3, 1, 5, -15, 1]
|
|
28
|
-
const w1 = [
|
|
29
|
-
const w3 = [
|
|
23
|
+
const w1 = [w8, -7480, 7750, -8129, 7897, -7911, -182]
|
|
24
|
+
const w3 = [w8, -8060, 8330, -8328, 8096, -8094]
|
|
30
25
|
const m0 = [8558, -8328, 8374, -66, -8539, 16, 8043, -8070]
|
|
26
|
+
// prettier-ignore
|
|
27
|
+
const p3 = [1,1,65,-63,158,-156,1,1,1,40,30,42,-46,6,-66,1,83,-6,-6,-67,176,p2,-114,121,-119,1,1,155,-49,25,16,-142,159,2,-158,38,42,-46,6,-35,1,52,-6,-6,-36,145,p2,-83,90,-88,1,1,124,-49,25,16,-111,128,2]
|
|
28
|
+
// prettier-ignore
|
|
29
|
+
const k8a = [9345,2,10,4,4,4,4,8,8,8,8,68,4,4,4,4,1,1,1,-627,640,-903,1,46,28,1,-8645,8833,-8817,2,5,64,9305,1,1,-8449]
|
|
30
|
+
// prettier-ignore
|
|
31
|
+
const k8b = [-30,1,21,-18,1,15,-17,18,-13,e(7),16,-15,1,1,1,-13,-4,26,-1,-20,17,5,-4,-2,3]
|
|
31
32
|
|
|
32
33
|
// prettier-ignore
|
|
33
|
-
|
|
34
|
-
ibm866: [913
|
|
35
|
-
'
|
|
36
|
-
'
|
|
37
|
-
'
|
|
38
|
-
|
|
39
|
-
'
|
|
40
|
-
'iso-8859-2': [...i0,100,468,-407,-157,153,29,-179,1,184,-2,6,21,-204,208,-2,-203,85,470,-409,-142,138,29,364,-527,169,-2,6,21,355,-351,-2,...i2],
|
|
41
|
-
'iso-8859-3': [...i0,134,434,-565,1,r,128,-125,1,136,46,-64,22,-135,r,206,-203,119,-117,1,1,1,112,-110,1,121,46,-64,22,-120,r,191,-188,1,1,r,2,70,-2,-65,...e(8),r,2,1,1,1,76,-74,1,69,-67,1,1,1,144,-16,-125,1,1,1,r,2,39,-2,-34,...e(8),r,2,1,1,1,45,-43,1,38,-36,1,1,1,113,-16,380],
|
|
42
|
-
'iso-8859-4': [...i0,100,52,30,-178,132,19,-148,1,184,-78,16,68,-185,208,-206,1,85,470,-388,-163,117,19,395,-527,169,-78,16,68,-29,52,-51,...i4a,92,-26,53,7,-22,-98,1,1,1,1,154,-152,1,1,140,2,-139,...i4b,61,-26,53,7,-22,-67,1,1,1,1,123,-121,1,1,109,2,366],
|
|
43
|
-
'iso-8859-5': [...i0,865,...e(11),-863,865,...e(65),7367,-7365,...e(11),-949,951,1],
|
|
44
|
-
'iso-8859-6': [...i0,r,r,r,4,...h(7),1384,-1375,...h(13),1390,r,r,r,4,r,2,...e(25),r,r,r,r,r,6,...e(18),...h(13)],
|
|
45
|
-
'iso-8859-7': [...i0,8056,1,-8054,8201,3,-8201,1,1,1,721,-719,1,1,r,8040,-8037,1,1,1,721,1,1,-719,...i7],
|
|
46
|
-
'iso-8859-8': [...i0,r,2,...e(7),46,-44,...e(14),62,-60,1,1,1,...h(32),8025,-6727,...i8],
|
|
47
|
-
'koi8-r': [...k8a,8450,...e(14),-8544,8545,...e(10),-9411,933,...k8b,-28,...k8b],
|
|
48
|
-
'koi8-u': [...k8a,3,8448,-8446,1,8448,1,1,1,1,-8394,-51,8448,1,1,1,-8544,3,8543,-8541,1,8543,1,1,1,1,-8410,-130,-869,933,...k8b,-28,...k8b],
|
|
49
|
-
macintosh: [69,1,2,2,8,5,6,5,-1,2,2,-1,2,2,2,-1,2,1,2,-1,2,1,2,2,-1,2,2,-1,5,-1,2,1,7972,-8048,-14,1,4,8059,-8044,41,-49,-5,8313,-8302,-12,8632,-8602,18,8518,-8557,8627,1,-8640,16,8525,15,-2,-7759,7787,-8577,16,751,-707,18,-57,-30,11,...m0,32,3,18,125,1,7872,1,8,1,-5,1,-7970,9427,-9419,121,7884,104,-115,1,56007,1,-56033,-8042,8035,4,18,-8046,8,-9,10,-3,5,1,1,-3,7,1,63531,-63533,8,1,-2,88,405,22,-557,553,1,1,-546,549,-2,-20],
|
|
50
|
-
'windows-1250': [...w0,-7888,7897,-7903,10,25,-4,-233,...w8,-8060,8330,-8129,7897,-7903,10,25,-4,-218,551,17,-407,-157,96,-94,1,1,1,181,-179,1,1,1,205,-203,1,554,-409,-142,1,1,1,1,77,90,-164,130,416,-415,62,...i2],
|
|
51
|
-
'windows-1251': [899,1,7191,-7111,7115,8,-6,1,139,-124,-7207,7216,-7215,2,-1,4,67,7110,1,3,1,5,-15,1,-8060,8330,-7369,7137,-7136,2,-1,4,-959,878,80,-86,-868,1004,-1002,1,858,-856,859,-857,1,1,1,857,-855,1,853,80,59,-988,1,1,922,7365,-7362,-921,925,-83,80,2,-71,...e(63)],
|
|
52
|
-
'windows-1252': [...p1,-7515,7530,-7888,7897,-7911,-197,240,-238,1,...w1,225,-6],
|
|
53
|
-
'windows-1253': [...p1,-8089,8104,-8102,8111,-8109,1,1,1,1,...w3,1,1,1,1,741,1,-739,1,1,1,1,1,1,r,2,1,1,1,8039,-8037,1,1,1,721,-719,1,1,...i7],
|
|
54
|
-
'windows-1254': [...p1,-7515,7530,-7888,7897,-7911,-197,1,1,1,...w1,1,218,-216,...e(47),79,-77,...e(11),84,46,-127,...e(16),48,-46,...e(11),53,46],
|
|
55
|
-
'windows-1255': [...p1,-7515,7530,-8102,8111,-8109,1,1,1,1,...w8,-7480,7750,-8328,8096,-8094,...e(7),8199,-8197,1,1,1,1,46,-44,...e(14),62,-60,1,1,1,1,1265,...e(19),45,1,1,1,1,...h(7),-36,...i8],
|
|
56
|
-
'windows-1256': [8237,-6702,6556,-7816,7820,8,-6,1,-7515,7530,-6583,6592,-7911,1332,18,-16,39,6505,1,3,1,5,-15,1,-6507,6777,-6801,6569,-7911,7865,1,-6483,-1562,1388,-1386,...e(7),1557,-1555,...e(14),1378,-1376,1,1,1,1377,162,-160,...e(21),-1375,1376,1,1,1,6,1,1,1,-1379,1380,-1378,1379,1,1,1,-1377,1,1,1,1,1374,1,-1372,1,1372,1,1,1,-1370,1371,1,-1369,1370,-1368,1369,-1367,1,7954,1,-6461],
|
|
57
|
-
'windows-1257': [...w0,-8102,8111,-8109,28,543,-527,-40,...w3,19,556,-572,1,r,2,1,1,r,2,1,49,-47,173,-171,1,1,1,24,-22,...e(5),...p3,347],
|
|
58
|
-
'windows-1258': [...p1,-7515,7530,-8102,8111,-7911,-197,1,1,1,...w8,-7480,7750,-8328,8096,-7911,-182,1,218,-216,...e(34),64,-62,...e(7),565,-563,1,1,65,-63,568,-566,1,204,-202,1,1,1,1,1,1,211,340,-548,1,1,1,33,-31,...e(7),534,-532,1,1,34,-32,562,-560,1,173,-171,1,1,1,1,1,1,180,7931],
|
|
59
|
-
'windows-874': [8237,-8235,1,1,1,8098,-8096,...e(10),...w8,-8060,...e(8),3425,...e(57),r,r,r,r,5,...e(28),r,r,r,r],
|
|
60
|
-
'x-mac-cyrillic': [913,...e(31),7153,-8048,992,-1005,4,8059,-8044,848,-856,-5,8313,-7456,80,7694,-7773,80,7627,-8557,8627,1,-7695,-929,988,-137,-4,80,-77,80,-78,80,-79,80,-2,-83,-857,...m0,875,80,-79,80,-7,7102,1,8,1,-5,1,-7970,7975,-7184,80,-79,80,7351,-7445,80,-2,-31,...e(30),7262]
|
|
34
|
+
const maps = {
|
|
35
|
+
ibm866: [913,e(47),8530,1,1,-145,34,61,1,-12,-1,14,-18,6,6,-1,-1,-75,4,32,-8,-16,-28,60,34,1,-5,-6,21,-3,-6,-16,28,-5,1,-4,1,-12,-1,-6,1,24,-1,-82,-12,124,-4,8,4,-16,-8512,e(15),-78,80,-77,80,-77,80,-73,80,-942,8553,-8546,8547,-260,-8306,9468,-9472],
|
|
36
|
+
'koi8-r': [k8a,8450,e(14),-8544,8545,e(10),-9411,933,k8b,-28,k8b],
|
|
37
|
+
'koi8-u': [k8a,3,8448,-8446,1,8448,1,1,1,1,-8394,-51,8448,1,1,1,-8544,3,8543,-8541,1,8543,1,1,1,1,-8410,-130,-869,933,k8b,-28,k8b],
|
|
38
|
+
'x-mac-cyrillic': [913,e(31),7153,-8048,992,-1005,4,8059,-8044,848,-856,-5,8313,-7456,80,7694,-7773,80,7627,-8557,8627,1,-7695,-929,988,-137,-4,80,-77,80,-78,80,-79,80,-2,-83,-857,m0,875,80,-79,80,-7,7102,1,8,1,-5,1,-7970,7975,-7184,80,-79,80,7351,-7445,80,-2,-31,e(30),7262],
|
|
39
|
+
macintosh: [69,1,2,2,8,5,6,5,-1,2,2,-1,2,2,2,-1,2,1,2,-1,2,1,2,2,-1,2,2,-1,5,-1,2,1,7972,-8048,-14,1,4,8059,-8044,41,-49,-5,8313,-8302,-12,8632,-8602,18,8518,-8557,8627,1,-8640,16,8525,15,-2,-7759,7787,-8577,16,751,-707,18,-57,-30,11,m0,32,3,18,125,1,7872,1,8,1,-5,1,-7970,9427,-9419,121,7884,104,-115,1,56007,1,-56033,-8042,8035,4,18,-8046,8,-9,10,-3,5,1,1,-3,7,1,63531,-63533,8,1,-2,88,405,22,-557,553,1,1,-546,549,-2,-20],
|
|
40
|
+
'windows-874': [8237,-8235,1,1,1,8098,-8096,e(10),w8,-8060,e(8),iB],
|
|
61
41
|
}
|
|
42
|
+
|
|
43
|
+
// windows-1250 - windows-1258
|
|
44
|
+
// prettier-ignore
|
|
45
|
+
;[
|
|
46
|
+
[w0,-7888,7897,-7903,10,25,-4,-233,w8,-8060,8330,-8129,7897,-7903,10,25,-4,-218,551,17,-407,-157,96,-94,1,1,1,181,-179,1,1,1,205,-203,1,554,-409,-142,1,1,1,1,77,90,-164,130,416,-415,62,i2],
|
|
47
|
+
[899,1,7191,-7111,7115,8,-6,1,139,-124,-7207,7216,-7215,2,-1,4,67,7110,1,3,1,5,-15,1,-8060,8330,-7369,7137,-7136,2,-1,4,-959,878,80,-86,-868,1004,-1002,1,858,-856,859,-857,1,1,1,857,-855,1,853,80,59,-988,1,1,922,7365,-7362,-921,925,-83,80,2,-71,e(63)],
|
|
48
|
+
[p1,-7515,7530,-7888,7897,-7911,-197,240,-238,1,w1,225,-6],
|
|
49
|
+
[p1,-8089,8104,-8102,8111,-8109,1,1,1,1,w3,1,1,1,1,741,1,-739,e(6),r,2,1,1,1,8039,-8037,1,1,1,721,-719,1,1,i7],
|
|
50
|
+
[p1,-7515,7530,-7888,7897,-7911,-197,1,1,1,w1,1,218,-216,e(47),i9],
|
|
51
|
+
[p1,-7515,7530,-8102,8111,-8109,1,1,1,1,w8,-7480,7750,-8328,8096,-8094,e(7),8199,-8197,1,1,1,1,46,-44,e(14),62,-60,1,1,1,1,1265,e(19),45,1,1,1,1,h(7),-36,i8],
|
|
52
|
+
[8237,-6702,6556,-7816,7820,8,-6,1,-7515,7530,-6583,6592,-7911,1332,18,-16,39,6505,1,3,1,5,-15,1,-6507,6777,-6801,6569,-7911,7865,1,-6483,-1562,1388,-1386,e(7),1557,-1555,e(14),1378,-1376,1,1,1,1377,162,-160,e(21),-1375,1376,1,1,1,6,1,1,1,-1379,1380,-1378,1379,1,1,1,-1377,1,1,1,1,1374,1,-1372,1,1372,1,1,1,-1370,1371,1,-1369,1370,-1368,1369,-1367,1,7954,1,-6461],
|
|
53
|
+
[w0,-8102,8111,-8109,28,543,-527,-40,w3,19,556,-572,1,r,2,1,1,r,2,1,49,-47,173,-171,1,1,1,24,-22,e(5),p3,347],
|
|
54
|
+
[p1,-7515,7530,-8102,8111,-7911,-197,1,1,1,w8,-7480,7750,-8328,8096,-7911,-182,1,218,-216,e(34),64,-62,e(7),565,-563,1,1,65,-63,568,-566,1,204,-202,e(6),211,340,-548,1,1,1,33,-31,e(7),534,-532,1,1,34,-32,562,-560,1,173,-171,e(6),180,7931],
|
|
55
|
+
].forEach((m, i) => {
|
|
56
|
+
maps[`windows-${i + 1250}`] = m
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
// iso-8859-1 - iso-8859-16
|
|
60
|
+
// prettier-ignore
|
|
61
|
+
;[
|
|
62
|
+
[], // Actual Latin1 / Unicode subset, non-WHATWG, which maps iso-8859-1 to windows-1252
|
|
63
|
+
[100,468,-407,-157,153,29,-179,1,184,-2,6,21,-204,208,-2,-203,85,470,-409,-142,138,29,364,-527,169,-2,6,21,355,-351,-2,i2],
|
|
64
|
+
[134,434,-565,1,r,128,-125,1,136,46,-64,22,-135,r,206,-203,119,-117,1,1,1,112,-110,1,121,46,-64,22,-120,r,191,-188,1,1,r,2,70,-2,-65,e(8),r,2,1,1,1,76,-74,1,69,-67,1,1,1,144,-16,-125,1,1,1,r,2,39,-2,-34,e(8),r,2,1,1,1,45,-43,1,38,-36,1,1,1,113,-16,380],
|
|
65
|
+
[100,52,30,-178,132,19,-148,1,184,-78,16,68,-185,208,-206,1,85,470,-388,-163,117,19,395,-527,169,-78,16,68,-29,52,-51,i4a,92,-26,53,7,-22,-98,1,1,1,1,154,-152,1,1,140,2,-139,i4b,61,-26,53,7,-22,-67,1,1,1,1,123,-121,1,1,109,2,366],
|
|
66
|
+
[865,e(11),-863,865,e(65),7367,-7365,e(11),-949,951,1],
|
|
67
|
+
[r,r,r,4,h(7),1384,-1375,h(13),1390,r,r,r,4,r,2,e(25),h(5),6,e(18),h(13)],
|
|
68
|
+
[8056,1,-8054,8201,3,-8201,1,1,1,721,-719,1,1,r,8040,-8037,1,1,1,721,1,1,-719,i7],
|
|
69
|
+
[r,2,e(7),46,-44,e(14),62,-60,1,1,1,h(32),8025,-6727,i8],
|
|
70
|
+
[e(47),i9], // non-WHATWG, which maps iso-8859-9 to windows-1254
|
|
71
|
+
[100,14,16,8,-2,14,-143,148,-43,80,6,23,-208,189,-32,-154,85,14,16,8,-2,14,-128,133,-43,80,6,23,7831,-7850,-32,i4a,1,1,117,7,-121,1,1,1,146,-144,154,-152,e(5),i4b,1,1,86,7,-90,1,1,1,115,-113,123,-121,1,1,1,1,58],
|
|
72
|
+
iB, // non-WHATWG, which maps iso-8859-11 to windows-874
|
|
73
|
+
null, // no 12
|
|
74
|
+
[8061,-8059,1,1,8058,-8056,1,49,-47,173,-171,1,1,1,24,-22,1,1,1,8041,-8039,p3,7835],
|
|
75
|
+
[7522,1,-7520,103,1,7423,-7523,7641,-7639,7641,-119,231,-7749,1,202,7334,1,-7423,1,7455,1,-7563,7584,43,-42,44,-35,147,-111,1,-36,-7585,e(15),165,-163,e(5),7572,-7570,e(5),153,-151,e(16),134,-132,e(5),7541,-7539,e(5),122],
|
|
76
|
+
[1,1,1,8201,-8199,187,-185,186,-184,e(10),202,-200,1,1,199,-197,1,1,151,1,37],
|
|
77
|
+
[100,1,60,8043,-142,-7870,-185,186,-184,367,-365,206,-204,205,1,-203,1,91,54,59,7840,-8039,1,199,-113,268,-350,151,1,37,4,-188,1,1,64,-62,66,-64,e(9),65,51,-113,1,1,124,-122,132,22,-151,1,1,1,60,258,-315,1,1,1,33,-31,35,-33,e(9),34,51,-82,1,1,93,-91,101,22,-120,1,1,1,29,258],
|
|
78
|
+
].forEach((m, i) => {
|
|
79
|
+
if (m) maps[`iso-8859-${i + 1}`] = [e(33), m]
|
|
80
|
+
})
|
|
81
|
+
|
|
82
|
+
export default maps
|
package/fallback/single-byte.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { asciiPrefix, decodeAscii, decodeLatin1 } from './latin1.js'
|
|
2
2
|
import encodings from './single-byte.encodings.js'
|
|
3
|
-
import { decode2string } from './_utils.js'
|
|
3
|
+
import { decode2string, nativeDecoder } from './_utils.js'
|
|
4
4
|
|
|
5
5
|
export const E_STRICT = 'Input is not well-formed for this encoding'
|
|
6
6
|
const xUserDefined = 'x-user-defined'
|
|
@@ -18,7 +18,8 @@ function getEncoding(encoding) {
|
|
|
18
18
|
if (encoding === xUserDefined) return Array.from({ length: 128 }, (_, i) => 0xf7_80 + i)
|
|
19
19
|
if (encoding === iso8i) encoding = 'iso-8859-8'
|
|
20
20
|
let prev = 127
|
|
21
|
-
|
|
21
|
+
const enc = encodings[encoding].flat().flat().flat() // max depth is 3, rechecked by tests
|
|
22
|
+
return enc.map((x) => (x === r ? x : (prev += x))) // eslint-disable-line no-return-assign
|
|
22
23
|
}
|
|
23
24
|
|
|
24
25
|
const mappers = new Map()
|
|
@@ -64,6 +65,8 @@ export function encodingMapper(encoding) {
|
|
|
64
65
|
export function encodingDecoder(encoding) {
|
|
65
66
|
const cached = decoders.get(encoding)
|
|
66
67
|
if (cached) return cached
|
|
68
|
+
const isLatin1 = encoding === 'iso-8859-1'
|
|
69
|
+
if (isLatin1 && !nativeDecoder) return (arr, loose = false) => decodeLatin1(arr) // native decoder is faster for ascii below
|
|
67
70
|
|
|
68
71
|
let strings
|
|
69
72
|
const codes = getEncoding(encoding)
|
|
@@ -77,6 +80,7 @@ export function encodingDecoder(encoding) {
|
|
|
77
80
|
|
|
78
81
|
const prefixLen = asciiPrefix(arr)
|
|
79
82
|
if (prefixLen === arr.length) return decodeAscii(arr)
|
|
83
|
+
if (isLatin1) return decodeLatin1(arr) // TODO: check if decodeAscii with subarray is faster for small prefixes too
|
|
80
84
|
const prefix = decodeLatin1(arr, 0, prefixLen) // TODO: check if decodeAscii with subarray is faster for small prefixes too
|
|
81
85
|
const suffix = decode2string(arr, prefix.length, arr.length, strings)
|
|
82
86
|
if (!loose && incomplete && suffix.includes('\uFFFD')) throw new TypeError(E_STRICT)
|
package/fallback/utf8.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { encodeAsciiPrefix } from './latin1.js'
|
|
2
|
+
|
|
1
3
|
export const E_STRICT = 'Input is not well-formed utf8'
|
|
2
4
|
export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
|
|
3
5
|
|
|
@@ -152,9 +154,10 @@ export function encode(string, loose) {
|
|
|
152
154
|
const length = string.length
|
|
153
155
|
let small = true
|
|
154
156
|
let bytes = new Uint8Array(length) // assume ascii
|
|
155
|
-
let p = 0
|
|
156
157
|
|
|
157
|
-
|
|
158
|
+
let i = encodeAsciiPrefix(bytes, string)
|
|
159
|
+
let p = i
|
|
160
|
+
for (; i < length; i++) {
|
|
158
161
|
let code = string.charCodeAt(i)
|
|
159
162
|
if (code < 0x80) {
|
|
160
163
|
bytes[p++] = code
|
package/hex.node.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { assertUint8 } from './assert.js'
|
|
2
2
|
import { typedView } from './array.js'
|
|
3
|
+
import { E_STRING } from './fallback/_utils.js'
|
|
3
4
|
import { E_HEX } from './fallback/hex.js'
|
|
4
5
|
|
|
5
6
|
if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
|
|
@@ -19,7 +20,7 @@ export function toHex(arr) {
|
|
|
19
20
|
export const fromHex = Uint8Array.fromHex
|
|
20
21
|
? (str, format = 'uint8') => typedView(Uint8Array.fromHex(str), format)
|
|
21
22
|
: (str, format = 'uint8') => {
|
|
22
|
-
if (typeof str !== 'string') throw new TypeError(
|
|
23
|
+
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
23
24
|
if (str.length % 2 !== 0) throw new SyntaxError(E_HEX)
|
|
24
25
|
if (denoBug && /[^\dA-Fa-f]/.test(str)) throw new SyntaxError(E_HEX)
|
|
25
26
|
const buf = Buffer.from(str, 'hex') // will stop on first non-hex character, so we can just validate length
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@exodus/bytes",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.8.0",
|
|
4
4
|
"description": "Various operations on Uint8Array data",
|
|
5
5
|
"scripts": {
|
|
6
6
|
"lint": "eslint .",
|
|
@@ -72,7 +72,9 @@
|
|
|
72
72
|
"/bech32.js",
|
|
73
73
|
"/bigint.js",
|
|
74
74
|
"/encoding.js",
|
|
75
|
+
"/encoding.d.ts",
|
|
75
76
|
"/encoding-lite.js",
|
|
77
|
+
"/encoding-lite.d.ts",
|
|
76
78
|
"/hex.js",
|
|
77
79
|
"/hex.d.ts",
|
|
78
80
|
"/hex.node.js",
|
|
@@ -117,8 +119,14 @@
|
|
|
117
119
|
"node": "./single-byte.node.js",
|
|
118
120
|
"default": "./single-byte.js"
|
|
119
121
|
},
|
|
120
|
-
"./encoding.js":
|
|
121
|
-
|
|
122
|
+
"./encoding.js": {
|
|
123
|
+
"types": "./encoding.d.ts",
|
|
124
|
+
"default": "./encoding.js"
|
|
125
|
+
},
|
|
126
|
+
"./encoding-lite.js": {
|
|
127
|
+
"types": "./encoding-lite.d.ts",
|
|
128
|
+
"default": "./encoding-lite.js"
|
|
129
|
+
},
|
|
122
130
|
"./utf16.js": {
|
|
123
131
|
"node": "./utf16.node.js",
|
|
124
132
|
"default": "./utf16.js"
|
|
@@ -174,7 +182,6 @@
|
|
|
174
182
|
"uint8array-tools": "^0.0.9",
|
|
175
183
|
"utf8": "^3.0.0",
|
|
176
184
|
"web-streams-polyfill": "^4.2.0",
|
|
177
|
-
"whatwg-encoding": "^3.1.1",
|
|
178
185
|
"wif": "^5.0.0"
|
|
179
186
|
},
|
|
180
187
|
"prettier": "@exodus/prettier",
|
package/single-byte.js
CHANGED
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
import { assertUint8 } from './assert.js'
|
|
2
|
-
import { canDecoders, nativeEncoder } from './fallback/_utils.js'
|
|
3
|
-
import { encodeAscii } from './fallback/latin1.js'
|
|
2
|
+
import { canDecoders, nativeEncoder, isHermes, skipWeb, E_STRING } from './fallback/_utils.js'
|
|
3
|
+
import { encodeAscii, encodeAsciiPrefix, encodeLatin1 } from './fallback/latin1.js'
|
|
4
4
|
import { assertEncoding, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
|
|
5
5
|
|
|
6
|
-
const { TextDecoder } = globalThis
|
|
6
|
+
const { TextDecoder, btoa } = globalThis
|
|
7
7
|
|
|
8
8
|
let windows1252works
|
|
9
9
|
|
|
10
10
|
// prettier-ignore
|
|
11
11
|
const skipNative = new Set([
|
|
12
|
-
'iso-8859-
|
|
12
|
+
'iso-8859-1', 'iso-8859-9', 'iso-8859-11', // non-WHATWG
|
|
13
13
|
'iso-8859-6', 'iso-8859-8', 'iso-8859-8-i', // slow in all 3 engines
|
|
14
|
+
'iso-8859-16', // iso-8859-16 is somehow broken in WebKit, at least on CI
|
|
14
15
|
])
|
|
15
16
|
|
|
16
17
|
function shouldUseNative(enc) {
|
|
@@ -63,7 +64,22 @@ const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
|
|
|
63
64
|
function encode(s, m) {
|
|
64
65
|
const len = s.length
|
|
65
66
|
const x = new Uint8Array(len)
|
|
66
|
-
|
|
67
|
+
let i = nativeEncoder ? 0 : encodeAsciiPrefix(x, s)
|
|
68
|
+
|
|
69
|
+
if (!isHermes) {
|
|
70
|
+
for (const len3 = len - 3; i < len3; i += 4) {
|
|
71
|
+
const x0 = s.charCodeAt(i), x1 = s.charCodeAt(i + 1), x2 = s.charCodeAt(i + 2), x3 = s.charCodeAt(i + 3) // prettier-ignore
|
|
72
|
+
const c0 = m[x0], c1 = m[x1], c2 = m[x2], c3 = m[x3] // prettier-ignore
|
|
73
|
+
if ((!c0 && x0) || (!c1 && x1) || (!c2 && x2) || (!c3 && x3)) throw new TypeError(E_STRICT)
|
|
74
|
+
|
|
75
|
+
x[i] = c0
|
|
76
|
+
x[i + 1] = c1
|
|
77
|
+
x[i + 2] = c2
|
|
78
|
+
x[i + 3] = c3
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
for (; i < len; i++) {
|
|
67
83
|
const x0 = s.charCodeAt(i)
|
|
68
84
|
const c0 = m[x0]
|
|
69
85
|
if (!c0 && x0) return null
|
|
@@ -73,16 +89,33 @@ function encode(s, m) {
|
|
|
73
89
|
return x
|
|
74
90
|
}
|
|
75
91
|
|
|
92
|
+
// fromBase64+btoa path is faster on everything where fromBase64 is fast
|
|
93
|
+
const useLatin1btoa = Uint8Array.fromBase64 && btoa && !skipWeb
|
|
94
|
+
|
|
76
95
|
export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
|
|
77
96
|
// TODO: replacement, truncate (replacement will need varying length)
|
|
78
97
|
if (mode !== 'fatal') throw new Error('Unsupported mode')
|
|
79
98
|
const m = encodeMap(encoding) // asserts
|
|
99
|
+
const isLatin1 = encoding === 'iso-8859-1'
|
|
80
100
|
|
|
81
101
|
// No single-byte encoder produces surrogate pairs, so any surrogate is invalid
|
|
82
102
|
// This needs special treatment only to decide how many replacement chars to output, one or two
|
|
83
103
|
// Not much use in running isWellFormed, most likely cause of error is unmapped chars, not surrogate pairs
|
|
84
104
|
return (s) => {
|
|
85
|
-
if (typeof s !== 'string') throw new TypeError(
|
|
105
|
+
if (typeof s !== 'string') throw new TypeError(E_STRING)
|
|
106
|
+
if (isLatin1) {
|
|
107
|
+
// max limit is to not produce base64 strings that are too long
|
|
108
|
+
if (useLatin1btoa && s.length >= 1024 && s.length < 1e8) {
|
|
109
|
+
try {
|
|
110
|
+
return Uint8Array.fromBase64(btoa(s)) // fails on non-latin1
|
|
111
|
+
} catch {
|
|
112
|
+
throw new TypeError(E_STRICT)
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if (NON_LATIN.test(s)) throw new TypeError(E_STRICT)
|
|
117
|
+
return encodeLatin1(s)
|
|
118
|
+
}
|
|
86
119
|
|
|
87
120
|
// Instead of an ASCII regex check, encode optimistically - this is faster
|
|
88
121
|
// Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
|
|
@@ -98,5 +131,7 @@ export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
|
|
|
98
131
|
}
|
|
99
132
|
}
|
|
100
133
|
|
|
134
|
+
export const latin1toString = createSinglebyteDecoder('iso-8859-1')
|
|
135
|
+
export const latin1fromString = createSinglebyteEncoder('iso-8859-1')
|
|
101
136
|
export const windows1252toString = createSinglebyteDecoder('windows-1252')
|
|
102
137
|
export const windows1252fromString = createSinglebyteEncoder('windows-1252')
|
package/single-byte.node.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { assertUint8 } from './assert.js'
|
|
2
2
|
import { isAscii } from 'node:buffer'
|
|
3
|
-
import { isDeno, isLE, toBuf } from './fallback/_utils.js'
|
|
3
|
+
import { isDeno, isLE, toBuf, E_STRING } from './fallback/_utils.js'
|
|
4
4
|
import { asciiPrefix } from './fallback/latin1.js'
|
|
5
5
|
import { encodingMapper, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
|
|
6
6
|
|
|
@@ -23,7 +23,6 @@ function latin1Prefix(arr, start) {
|
|
|
23
23
|
|
|
24
24
|
export function createSinglebyteDecoder(encoding, loose = false) {
|
|
25
25
|
if (typeof loose !== 'boolean') throw new TypeError('loose option should be boolean')
|
|
26
|
-
const latin1path = encoding === 'windows-1252'
|
|
27
26
|
if (isDeno) {
|
|
28
27
|
const jsDecoder = encodingDecoder(encoding) // asserts
|
|
29
28
|
return (arr) => {
|
|
@@ -34,11 +33,13 @@ export function createSinglebyteDecoder(encoding, loose = false) {
|
|
|
34
33
|
}
|
|
35
34
|
}
|
|
36
35
|
|
|
36
|
+
const isLatin1 = encoding === 'iso-8859-1'
|
|
37
|
+
const latin1path = encoding === 'windows-1252'
|
|
37
38
|
const { incomplete, mapper } = encodingMapper(encoding) // asserts
|
|
38
39
|
return (arr) => {
|
|
39
40
|
assertUint8(arr)
|
|
40
41
|
if (arr.byteLength === 0) return ''
|
|
41
|
-
if (isAscii(arr)) return toBuf(arr).latin1Slice(
|
|
42
|
+
if (isLatin1 || isAscii(arr)) return toBuf(arr).latin1Slice() // .latin1Slice is faster than .asciiSlice
|
|
42
43
|
|
|
43
44
|
// Node.js TextDecoder is broken, so we can't use it. It's also slow anyway
|
|
44
45
|
|
|
@@ -64,9 +65,15 @@ export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
|
|
|
64
65
|
// TODO: replacement, truncate (replacement will need varying length)
|
|
65
66
|
if (mode !== 'fatal') throw new Error('Unsupported mode')
|
|
66
67
|
const m = encodeMap(encoding) // asserts
|
|
68
|
+
const isLatin1 = encoding === 'iso-8859-1'
|
|
67
69
|
|
|
68
70
|
return (s) => {
|
|
69
|
-
if (typeof s !== 'string') throw new TypeError(
|
|
71
|
+
if (typeof s !== 'string') throw new TypeError(E_STRING)
|
|
72
|
+
if (isLatin1) {
|
|
73
|
+
if (NON_LATIN.test(s)) throw new TypeError(E_STRICT)
|
|
74
|
+
const b = Buffer.from(s, 'latin1')
|
|
75
|
+
return new Uint8Array(b.buffer, b.byteOffset, b.byteLength)
|
|
76
|
+
}
|
|
70
77
|
|
|
71
78
|
// Instead of an ASCII regex check, encode optimistically - this is faster
|
|
72
79
|
// Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
|
|
@@ -104,5 +111,7 @@ export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
|
|
|
104
111
|
}
|
|
105
112
|
}
|
|
106
113
|
|
|
114
|
+
export const latin1toString = createSinglebyteDecoder('iso-8859-1')
|
|
115
|
+
export const latin1fromString = createSinglebyteEncoder('iso-8859-1')
|
|
107
116
|
export const windows1252toString = createSinglebyteDecoder('windows-1252')
|
|
108
117
|
export const windows1252fromString = createSinglebyteEncoder('windows-1252')
|
package/utf16.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as js from './fallback/utf16.js'
|
|
2
|
-
import { canDecoders, isLE } from './fallback/_utils.js'
|
|
2
|
+
import { canDecoders, isLE, E_STRING } from './fallback/_utils.js'
|
|
3
3
|
|
|
4
4
|
const { TextDecoder } = globalThis // Buffer is optional
|
|
5
5
|
const ignoreBOM = true
|
|
@@ -18,7 +18,7 @@ const { E_STRICT, E_STRICT_UNICODE } = js
|
|
|
18
18
|
const to8 = (a) => new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
|
|
19
19
|
|
|
20
20
|
function encode(str, loose = false, format = 'uint16') {
|
|
21
|
-
if (typeof str !== 'string') throw new TypeError(
|
|
21
|
+
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
22
22
|
if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
|
|
23
23
|
throw new TypeError('Unknown format')
|
|
24
24
|
}
|
package/utf16.node.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { isDeno, isLE } from './fallback/_utils.js'
|
|
1
|
+
import { isDeno, isLE, E_STRING } from './fallback/_utils.js'
|
|
2
2
|
import { E_STRICT, E_STRICT_UNICODE } from './fallback/utf16.js'
|
|
3
3
|
|
|
4
4
|
if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
|
|
@@ -9,7 +9,7 @@ const to8 = (a) => new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
|
|
|
9
9
|
// Unlike utf8, operates on Uint16Arrays by default
|
|
10
10
|
|
|
11
11
|
function encode(str, loose = false, format = 'uint16') {
|
|
12
|
-
if (typeof str !== 'string') throw new TypeError(
|
|
12
|
+
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
13
13
|
if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
|
|
14
14
|
throw new TypeError('Unknown format')
|
|
15
15
|
}
|
package/utf8.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { assertUint8 } from './assert.js'
|
|
2
2
|
import { typedView } from './array.js'
|
|
3
|
-
import { isHermes, nativeDecoder, nativeEncoder } from './fallback/_utils.js'
|
|
3
|
+
import { isHermes, nativeDecoder, nativeEncoder, E_STRING } from './fallback/_utils.js'
|
|
4
4
|
import { asciiPrefix, decodeLatin1 } from './fallback/latin1.js'
|
|
5
5
|
import * as js from './fallback/utf8.js'
|
|
6
6
|
|
|
@@ -44,7 +44,7 @@ function deLoose(str, loose, res) {
|
|
|
44
44
|
}
|
|
45
45
|
|
|
46
46
|
function encode(str, loose = false) {
|
|
47
|
-
if (typeof str !== 'string') throw new TypeError(
|
|
47
|
+
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
48
48
|
if (str.length === 0) return new Uint8Array() // faster than Uint8Array.of
|
|
49
49
|
if (nativeEncoder) return deLoose(str, loose, nativeEncoder.encode(str))
|
|
50
50
|
// No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder
|
package/utf8.node.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { assertUint8 } from './assert.js'
|
|
2
2
|
import { typedView } from './array.js'
|
|
3
|
+
import { E_STRING } from './fallback/_utils.js'
|
|
3
4
|
import { E_STRICT, E_STRICT_UNICODE } from './fallback/utf8.js'
|
|
4
5
|
import { isAscii } from 'node:buffer'
|
|
5
6
|
|
|
@@ -17,7 +18,7 @@ try {
|
|
|
17
18
|
}
|
|
18
19
|
|
|
19
20
|
function encode(str, loose = false) {
|
|
20
|
-
if (typeof str !== 'string') throw new TypeError(
|
|
21
|
+
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
|
21
22
|
const strLength = str.length
|
|
22
23
|
if (strLength === 0) return new Uint8Array() // faster than Uint8Array.of
|
|
23
24
|
let res
|