@exodus/bytes 1.9.0 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +417 -90
- package/array.d.ts +42 -3
- package/base32.d.ts +83 -0
- package/base58.d.ts +62 -0
- package/base58check.d.ts +131 -0
- package/base58check.js +2 -1
- package/base64.d.ts +40 -19
- package/bech32.d.ts +76 -0
- package/bigint.d.ts +48 -0
- package/encoding-browser.d.ts +23 -0
- package/encoding-lite.d.ts +61 -0
- package/encoding.d.ts +93 -11
- package/encoding.js +4 -3
- package/fallback/_utils.js +14 -11
- package/fallback/encoding.js +34 -42
- package/fallback/encoding.util.js +38 -8
- package/fallback/multi-byte.encodings.json +4 -3
- package/fallback/multi-byte.js +87 -16
- package/fallback/multi-byte.table.js +3 -0
- package/fallback/percent.js +31 -0
- package/hex.d.ts +22 -8
- package/index.d.ts +1 -1
- package/multi-byte.d.ts +64 -0
- package/package.json +63 -9
- package/single-byte.d.ts +159 -0
- package/utf16.d.ts +92 -0
- package/utf16.js +1 -1
- package/utf8.d.ts +72 -18
- package/utf8.js +11 -6
- package/utf8.node.js +1 -1
- package/whatwg.d.ts +48 -0
- package/whatwg.js +76 -0
- package/wif.d.ts +76 -0
- package/wif.js +1 -2
package/hex.d.ts
CHANGED
|
@@ -1,21 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Implements Base16 from [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648)
|
|
3
|
+
* (no differences from [RFC3548](https://datatracker.ietf.org/doc/html/rfc4648)).
|
|
4
|
+
*
|
|
5
|
+
* ```js
|
|
6
|
+
* import { fromHex, toHex } from '@exodus/bytes/hex.js'
|
|
7
|
+
* ```
|
|
8
|
+
*
|
|
9
|
+
* @module @exodus/bytes/hex.js
|
|
10
|
+
*/
|
|
11
|
+
|
|
1
12
|
/// <reference types="node" />
|
|
2
13
|
|
|
3
14
|
import type { OutputFormat, Uint8ArrayBuffer } from './array.js';
|
|
4
15
|
|
|
5
16
|
/**
|
|
6
|
-
*
|
|
17
|
+
* Encode a `Uint8Array` to a lowercase hex string
|
|
18
|
+
*
|
|
7
19
|
* @param arr - The input bytes
|
|
8
20
|
* @returns The hex encoded string
|
|
9
21
|
*/
|
|
10
|
-
export function toHex(arr:
|
|
22
|
+
export function toHex(arr: Uint8Array): string;
|
|
11
23
|
|
|
12
24
|
/**
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
25
|
+
* Decode a hex string to bytes
|
|
26
|
+
*
|
|
27
|
+
* Unlike `Buffer.from()`, throws on invalid input
|
|
28
|
+
*
|
|
29
|
+
* @param string - The hex encoded string (case-insensitive)
|
|
16
30
|
* @param format - Output format (default: 'uint8')
|
|
17
31
|
* @returns The decoded bytes
|
|
18
32
|
*/
|
|
19
|
-
export function fromHex(
|
|
20
|
-
export function fromHex(
|
|
21
|
-
export function fromHex(
|
|
33
|
+
export function fromHex(string: string, format?: 'uint8'): Uint8ArrayBuffer;
|
|
34
|
+
export function fromHex(string: string, format: 'buffer'): Buffer;
|
|
35
|
+
export function fromHex(string: string, format?: OutputFormat): Uint8ArrayBuffer | Buffer;
|
package/index.d.ts
CHANGED
package/multi-byte.d.ts
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Decode / encode the legacy multi-byte encodings according to the
|
|
3
|
+
* [Encoding standard](https://encoding.spec.whatwg.org/)
|
|
4
|
+
* ([§10](https://encoding.spec.whatwg.org/#legacy-multi-byte-chinese-(simplified)-encodings),
|
|
5
|
+
* [§11](https://encoding.spec.whatwg.org/#legacy-multi-byte-chinese-(traditional)-encodings),
|
|
6
|
+
* [§12](https://encoding.spec.whatwg.org/#legacy-multi-byte-japanese-encodings),
|
|
7
|
+
* [§13](https://encoding.spec.whatwg.org/#legacy-multi-byte-korean-encodings)).
|
|
8
|
+
*
|
|
9
|
+
* ```js
|
|
10
|
+
* import { createMultibyteDecoder, createMultibyteEncoder } from '@exodus/bytes/multi-byte.js'
|
|
11
|
+
* ```
|
|
12
|
+
*
|
|
13
|
+
* > [!WARNING]
|
|
14
|
+
* > This is a lower-level API for legacy multi-byte encodings.
|
|
15
|
+
* >
|
|
16
|
+
* > For a safe WHATWG Encoding-compatible API, see `@exodus/bytes/encoding.js` import (and variants of it).
|
|
17
|
+
* >
|
|
18
|
+
* > Be sure to know what you are doing and check documentation when directly using encodings from this file.
|
|
19
|
+
*
|
|
20
|
+
* Supports all legacy multi-byte encodings listed in the WHATWG Encoding standard:
|
|
21
|
+
* `gbk`, `gb18030`, `big5`, `euc-jp`, `iso-2022-jp`, `shift_jis`, `euc-kr`.
|
|
22
|
+
*
|
|
23
|
+
* @module @exodus/bytes/multi-byte.js
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
/// <reference types="node" />
|
|
27
|
+
|
|
28
|
+
import type { Uint8ArrayBuffer } from './array.js';
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Create a decoder for a supported legacy multi-byte `encoding`, given its lowercased name `encoding`.
|
|
32
|
+
*
|
|
33
|
+
* Returns a function `decode(arr, stream = false)` that decodes bytes to a string.
|
|
34
|
+
*
|
|
35
|
+
* The returned function will maintain internal state while `stream = true` is used, allowing it to
|
|
36
|
+
* handle incomplete multi-byte sequences across multiple calls.
|
|
37
|
+
* State is reset when `stream = false` or when the function is called without the `stream` parameter.
|
|
38
|
+
*
|
|
39
|
+
* @param encoding - The encoding name (e.g., 'gbk', 'gb18030', 'big5', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'euc-kr')
|
|
40
|
+
* @param loose - If true, replaces unmapped bytes with replacement character instead of throwing (default: false)
|
|
41
|
+
* @returns A function that decodes bytes to string, with optional streaming support
|
|
42
|
+
*/
|
|
43
|
+
export function createMultibyteDecoder(
|
|
44
|
+
encoding: string,
|
|
45
|
+
loose?: boolean
|
|
46
|
+
): (arr: Uint8Array, stream?: boolean) => string;
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Create an encoder for a supported legacy multi-byte `encoding`, given its lowercased name `encoding`.
|
|
50
|
+
*
|
|
51
|
+
* Returns a function `encode(string)` that encodes a string to bytes.
|
|
52
|
+
*
|
|
53
|
+
* In `'fatal'` mode (default), will throw on non well-formed strings or any codepoints which could
|
|
54
|
+
* not be encoded in the target encoding.
|
|
55
|
+
*
|
|
56
|
+
* @param encoding - The encoding name (e.g., 'gbk', 'gb18030', 'big5', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'euc-kr')
|
|
57
|
+
* @param options - Encoding options
|
|
58
|
+
* @param options.mode - Encoding mode (default: 'fatal'). Currently, only 'fatal' mode is supported.
|
|
59
|
+
* @returns A function that encodes string to bytes
|
|
60
|
+
*/
|
|
61
|
+
export function createMultibyteEncoder(
|
|
62
|
+
encoding: string,
|
|
63
|
+
options?: { mode?: 'fatal' }
|
|
64
|
+
): (string: string) => Uint8ArrayBuffer;
|
package/package.json
CHANGED
|
@@ -1,16 +1,33 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@exodus/bytes",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.11.0",
|
|
4
4
|
"description": "Various operations on Uint8Array data",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"encoding",
|
|
7
|
+
"uint8array",
|
|
8
|
+
"textdecoder",
|
|
9
|
+
"textencoder",
|
|
10
|
+
"utf8",
|
|
11
|
+
"utf16",
|
|
12
|
+
"hex",
|
|
13
|
+
"base64",
|
|
14
|
+
"base32",
|
|
15
|
+
"base58",
|
|
16
|
+
"base58check",
|
|
17
|
+
"bech32",
|
|
18
|
+
"bech32m",
|
|
19
|
+
"wif"
|
|
20
|
+
],
|
|
5
21
|
"scripts": {
|
|
6
22
|
"lint": "eslint .",
|
|
23
|
+
"typedoc": "typedoc && mkdir -p doc/assets && cp -r theme/styles doc/assets/",
|
|
7
24
|
"test:javascriptcore": "npm run test:jsc --",
|
|
8
25
|
"test:v8": "exodus-test --engine=v8:bundle",
|
|
9
26
|
"test:jsc": "exodus-test --engine=jsc:bundle",
|
|
10
27
|
"test:spidermonkey": "exodus-test --engine=spidermonkey:bundle",
|
|
11
28
|
"test:hermes": "exodus-test --engine=hermes:bundle",
|
|
12
29
|
"test:quickjs": "exodus-test --engine=quickjs:bundle",
|
|
13
|
-
"test:xs": "exodus-test --engine=xs:bundle",
|
|
30
|
+
"test:xs": "EXODUS_TEST_IGNORE='tests/whatwg.browser.test.js' exodus-test --engine=xs:bundle",
|
|
14
31
|
"test:engine262": "exodus-test --engine=engine262:bundle",
|
|
15
32
|
"test:deno": "exodus-test --engine=deno:pure",
|
|
16
33
|
"test:bun": "exodus-test --engine=bun:pure",
|
|
@@ -38,7 +55,7 @@
|
|
|
38
55
|
"bugs": {
|
|
39
56
|
"url": "https://github.com/ExodusOSS/bytes/issues"
|
|
40
57
|
},
|
|
41
|
-
"homepage": "https://github.com/ExodusOSS/bytes
|
|
58
|
+
"homepage": "https://github.com/ExodusOSS/bytes",
|
|
42
59
|
"engines": {
|
|
43
60
|
"node": "^20.19.0 || ^22.12.0 || >=24.0.0"
|
|
44
61
|
},
|
|
@@ -54,6 +71,7 @@
|
|
|
54
71
|
"/fallback/encoding.util.js",
|
|
55
72
|
"/fallback/hex.js",
|
|
56
73
|
"/fallback/latin1.js",
|
|
74
|
+
"/fallback/percent.js",
|
|
57
75
|
"/fallback/multi-byte.encodings.cjs",
|
|
58
76
|
"/fallback/multi-byte.encodings.json",
|
|
59
77
|
"/fallback/multi-byte.js",
|
|
@@ -66,13 +84,18 @@
|
|
|
66
84
|
"/array.d.ts",
|
|
67
85
|
"/assert.js",
|
|
68
86
|
"/base32.js",
|
|
87
|
+
"/base32.d.ts",
|
|
69
88
|
"/base58.js",
|
|
89
|
+
"/base58.d.ts",
|
|
70
90
|
"/base58check.js",
|
|
91
|
+
"/base58check.d.ts",
|
|
71
92
|
"/base58check.node.js",
|
|
72
93
|
"/base64.js",
|
|
73
94
|
"/base64.d.ts",
|
|
74
95
|
"/bech32.js",
|
|
96
|
+
"/bech32.d.ts",
|
|
75
97
|
"/bigint.js",
|
|
98
|
+
"/bigint.d.ts",
|
|
76
99
|
"/encoding-browser.js",
|
|
77
100
|
"/encoding-browser.browser.js",
|
|
78
101
|
"/encoding-browser.native.js",
|
|
@@ -87,15 +110,21 @@
|
|
|
87
110
|
"/index.js",
|
|
88
111
|
"/index.d.ts",
|
|
89
112
|
"/multi-byte.js",
|
|
113
|
+
"/multi-byte.d.ts",
|
|
90
114
|
"/multi-byte.node.js",
|
|
91
115
|
"/single-byte.js",
|
|
116
|
+
"/single-byte.d.ts",
|
|
92
117
|
"/single-byte.node.js",
|
|
93
118
|
"/utf16.js",
|
|
119
|
+
"/utf16.d.ts",
|
|
94
120
|
"/utf16.node.js",
|
|
95
121
|
"/utf8.js",
|
|
96
122
|
"/utf8.d.ts",
|
|
97
123
|
"/utf8.node.js",
|
|
98
|
-
"/
|
|
124
|
+
"/whatwg.js",
|
|
125
|
+
"/whatwg.d.ts",
|
|
126
|
+
"/wif.js",
|
|
127
|
+
"/wif.d.ts"
|
|
99
128
|
],
|
|
100
129
|
"main": "index.js",
|
|
101
130
|
"module": "index.js",
|
|
@@ -109,9 +138,16 @@
|
|
|
109
138
|
"types": "./array.d.ts",
|
|
110
139
|
"default": "./array.js"
|
|
111
140
|
},
|
|
112
|
-
"./base32.js":
|
|
113
|
-
|
|
141
|
+
"./base32.js": {
|
|
142
|
+
"types": "./base32.d.ts",
|
|
143
|
+
"default": "./base32.js"
|
|
144
|
+
},
|
|
145
|
+
"./base58.js": {
|
|
146
|
+
"types": "./base58.d.ts",
|
|
147
|
+
"default": "./base58.js"
|
|
148
|
+
},
|
|
114
149
|
"./base58check.js": {
|
|
150
|
+
"types": "./base58check.d.ts",
|
|
115
151
|
"node": "./base58check.node.js",
|
|
116
152
|
"default": "./base58check.js"
|
|
117
153
|
},
|
|
@@ -119,18 +155,26 @@
|
|
|
119
155
|
"types": "./base64.d.ts",
|
|
120
156
|
"default": "./base64.js"
|
|
121
157
|
},
|
|
122
|
-
"./bech32.js":
|
|
123
|
-
|
|
158
|
+
"./bech32.js": {
|
|
159
|
+
"types": "./bech32.d.ts",
|
|
160
|
+
"default": "./bech32.js"
|
|
161
|
+
},
|
|
162
|
+
"./bigint.js": {
|
|
163
|
+
"types": "./bigint.d.ts",
|
|
164
|
+
"default": "./bigint.js"
|
|
165
|
+
},
|
|
124
166
|
"./hex.js": {
|
|
125
167
|
"types": "./hex.d.ts",
|
|
126
168
|
"node": "./hex.node.js",
|
|
127
169
|
"default": "./hex.js"
|
|
128
170
|
},
|
|
129
171
|
"./multi-byte.js": {
|
|
172
|
+
"types": "./multi-byte.d.ts",
|
|
130
173
|
"node": "./multi-byte.node.js",
|
|
131
174
|
"default": "./multi-byte.js"
|
|
132
175
|
},
|
|
133
176
|
"./single-byte.js": {
|
|
177
|
+
"types": "./single-byte.d.ts",
|
|
134
178
|
"node": "./single-byte.node.js",
|
|
135
179
|
"default": "./single-byte.js"
|
|
136
180
|
},
|
|
@@ -150,6 +194,7 @@
|
|
|
150
194
|
"default": "./encoding-browser.js"
|
|
151
195
|
},
|
|
152
196
|
"./utf16.js": {
|
|
197
|
+
"types": "./utf16.d.ts",
|
|
153
198
|
"node": "./utf16.node.js",
|
|
154
199
|
"default": "./utf16.js"
|
|
155
200
|
},
|
|
@@ -158,7 +203,14 @@
|
|
|
158
203
|
"node": "./utf8.node.js",
|
|
159
204
|
"default": "./utf8.js"
|
|
160
205
|
},
|
|
161
|
-
"./
|
|
206
|
+
"./whatwg.js": {
|
|
207
|
+
"types": "./whatwg.d.ts",
|
|
208
|
+
"default": "./whatwg.js"
|
|
209
|
+
},
|
|
210
|
+
"./wif.js": {
|
|
211
|
+
"types": "./wif.d.ts",
|
|
212
|
+
"default": "./wif.js"
|
|
213
|
+
}
|
|
162
214
|
},
|
|
163
215
|
"react-native": {
|
|
164
216
|
"./encoding-browser.js": "./encoding-browser.native.js"
|
|
@@ -177,6 +229,7 @@
|
|
|
177
229
|
"@exodus/eslint-config": "^5.24.0",
|
|
178
230
|
"@exodus/prettier": "^1.0.0",
|
|
179
231
|
"@exodus/test": "^1.0.0-rc.109",
|
|
232
|
+
"@hexagon/base64": "^2.0.4",
|
|
180
233
|
"@noble/hashes": "^2.0.1",
|
|
181
234
|
"@oslojs/encoding": "^1.1.0",
|
|
182
235
|
"@petamoriken/float16": "^3.9.3",
|
|
@@ -207,6 +260,7 @@
|
|
|
207
260
|
"jsvu": "^3.0.3",
|
|
208
261
|
"punycode": "^2.3.1",
|
|
209
262
|
"text-encoding": "^0.7.0",
|
|
263
|
+
"typedoc": "^0.28.16",
|
|
210
264
|
"typescript": "^5.9.3",
|
|
211
265
|
"uint8array-tools": "^0.0.9",
|
|
212
266
|
"utf8": "^3.0.0",
|
package/single-byte.d.ts
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Decode / encode the legacy single-byte encodings according to the
|
|
3
|
+
* [Encoding standard](https://encoding.spec.whatwg.org/)
|
|
4
|
+
* ([§9](https://encoding.spec.whatwg.org/#legacy-single-byte-encodings),
|
|
5
|
+
* [§14.5](https://encoding.spec.whatwg.org/#x-user-defined)),
|
|
6
|
+
* and [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859) `iso-8859-*` mappings.
|
|
7
|
+
*
|
|
8
|
+
* ```js
|
|
9
|
+
* import { createSinglebyteDecoder, createSinglebyteEncoder } from '@exodus/bytes/single-byte.js'
|
|
10
|
+
* import { windows1252toString, windows1252fromString } from '@exodus/bytes/single-byte.js'
|
|
11
|
+
* import { latin1toString, latin1fromString } from '@exodus/bytes/single-byte.js'
|
|
12
|
+
* ```
|
|
13
|
+
*
|
|
14
|
+
* > [!WARNING]
|
|
15
|
+
* > This is a lower-level API for single-byte encodings.
|
|
16
|
+
* > It might not match what you expect, as it supports both WHATWG and unicode.org encodings under
|
|
17
|
+
* > different names, with the main intended usecase for the latter being either non-web or legacy contexts.
|
|
18
|
+
* >
|
|
19
|
+
* > For a safe WHATWG Encoding-compatible API, see `@exodus/bytes/encoding.js` import (and variants of it).
|
|
20
|
+
* >
|
|
21
|
+
* > Be sure to know what you are doing and check documentation when directly using encodings from this file.
|
|
22
|
+
*
|
|
23
|
+
* Supports all single-byte encodings listed in the WHATWG Encoding standard:
|
|
24
|
+
* `ibm866`, `iso-8859-2`, `iso-8859-3`, `iso-8859-4`, `iso-8859-5`, `iso-8859-6`, `iso-8859-7`, `iso-8859-8`,
|
|
25
|
+
* `iso-8859-8-i`, `iso-8859-10`, `iso-8859-13`, `iso-8859-14`, `iso-8859-15`, `iso-8859-16`, `koi8-r`, `koi8-u`,
|
|
26
|
+
* `macintosh`, `windows-874`, `windows-1250`, `windows-1251`, `windows-1252`, `windows-1253`, `windows-1254`,
|
|
27
|
+
* `windows-1255`, `windows-1256`, `windows-1257`, `windows-1258`, `x-mac-cyrillic` and `x-user-defined`.
|
|
28
|
+
*
|
|
29
|
+
* Also supports `iso-8859-1`, `iso-8859-9`, `iso-8859-11` as defined at
|
|
30
|
+
* [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859)
|
|
31
|
+
* (and all other `iso-8859-*` encodings there as they match WHATWG).
|
|
32
|
+
*
|
|
33
|
+
* > [!NOTE]
|
|
34
|
+
* > While all `iso-8859-*` encodings supported by the [WHATWG Encoding standard](https://encoding.spec.whatwg.org/) match
|
|
35
|
+
* > [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859), the WHATWG Encoding spec doesn't support
|
|
36
|
+
* > `iso-8859-1`, `iso-8859-9`, `iso-8859-11`, and instead maps them as labels to `windows-1252`, `windows-1254`, `windows-874`.\
|
|
37
|
+
* > `createSinglebyteDecoder()` (unlike `TextDecoder` or `legacyHookDecode()`) does not do such mapping,
|
|
38
|
+
* > so its results will differ from `TextDecoder` for those encoding names.
|
|
39
|
+
*
|
|
40
|
+
* ```js
|
|
41
|
+
* > new TextDecoder('iso-8859-1').encoding
|
|
42
|
+
* 'windows-1252'
|
|
43
|
+
* > new TextDecoder('iso-8859-9').encoding
|
|
44
|
+
* 'windows-1254'
|
|
45
|
+
* > new TextDecoder('iso-8859-11').encoding
|
|
46
|
+
* 'windows-874'
|
|
47
|
+
* > new TextDecoder('iso-8859-9').decode(Uint8Array.of(0x80, 0x81, 0xd0))
|
|
48
|
+
* '€\x81Ğ' // this is actually decoded according to windows-1254 per TextDecoder spec
|
|
49
|
+
* > createSinglebyteDecoder('iso-8859-9')(Uint8Array.of(0x80, 0x81, 0xd0))
|
|
50
|
+
* '\x80\x81Ğ' // this is iso-8859-9 as defined at https://unicode.org/Public/MAPPINGS/ISO8859/8859-9.txt
|
|
51
|
+
* ```
|
|
52
|
+
*
|
|
53
|
+
* All WHATWG Encoding spec [`windows-*` encodings](https://encoding.spec.whatwg.org/#windows-874) are supersets of
|
|
54
|
+
* corresponding [unicode.org encodings](https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/), meaning that
|
|
55
|
+
* they encode/decode all the old valid (non-replacement) strings / byte sequences identically, but can also support
|
|
56
|
+
* a wider range of inputs.
|
|
57
|
+
*
|
|
58
|
+
* @module @exodus/bytes/single-byte.js
|
|
59
|
+
*/
|
|
60
|
+
|
|
61
|
+
/// <reference types="node" />
|
|
62
|
+
|
|
63
|
+
import type { Uint8ArrayBuffer } from './array.js';
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Create a decoder for a supported one-byte `encoding`, given its lowercased name `encoding`.
|
|
67
|
+
*
|
|
68
|
+
* Returns a function `decode(arr)` that decodes bytes to a string.
|
|
69
|
+
*
|
|
70
|
+
* @param encoding - The encoding name (e.g., 'iso-8859-1', 'windows-1252')
|
|
71
|
+
* @param loose - If true, replaces unmapped bytes with replacement character instead of throwing (default: false)
|
|
72
|
+
* @returns A function that decodes bytes to string
|
|
73
|
+
*/
|
|
74
|
+
export function createSinglebyteDecoder(
|
|
75
|
+
encoding: string,
|
|
76
|
+
loose?: boolean
|
|
77
|
+
): (arr: Uint8Array) => string;
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Create an encoder for a supported one-byte `encoding`, given its lowercased name `encoding`.
|
|
81
|
+
*
|
|
82
|
+
* Returns a function `encode(string)` that encodes a string to bytes.
|
|
83
|
+
*
|
|
84
|
+
* In `'fatal'` mode (default), will throw on non well-formed strings or any codepoints which could
|
|
85
|
+
* not be encoded in the target encoding.
|
|
86
|
+
*
|
|
87
|
+
* @param encoding - The encoding name (e.g., 'iso-8859-1', 'windows-1252')
|
|
88
|
+
* @param options - Encoding options
|
|
89
|
+
* @param options.mode - Encoding mode (default: 'fatal'). Currently, only 'fatal' mode is supported.
|
|
90
|
+
* @returns A function that encodes string to bytes
|
|
91
|
+
*/
|
|
92
|
+
export function createSinglebyteEncoder(
|
|
93
|
+
encoding: string,
|
|
94
|
+
options?: { mode?: 'fatal' }
|
|
95
|
+
): (string: string) => Uint8ArrayBuffer;
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Decode `iso-8859-1` bytes to a string.
|
|
99
|
+
*
|
|
100
|
+
* There is no loose variant for this encoding, all bytes can be decoded.
|
|
101
|
+
*
|
|
102
|
+
* Same as:
|
|
103
|
+
* ```js
|
|
104
|
+
* const latin1toString = createSinglebyteDecoder('iso-8859-1')
|
|
105
|
+
* ```
|
|
106
|
+
*
|
|
107
|
+
* > [!NOTE]
|
|
108
|
+
* > This is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as those
|
|
109
|
+
* > alias to `new TextDecoder('windows-1252')`.
|
|
110
|
+
*
|
|
111
|
+
* @param arr - The bytes to decode
|
|
112
|
+
* @returns The decoded string
|
|
113
|
+
*/
|
|
114
|
+
export function latin1toString(arr: Uint8Array): string;
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Encode a string to `iso-8859-1` bytes.
|
|
118
|
+
*
|
|
119
|
+
* Throws on non well-formed strings or any codepoints which could not be encoded in `iso-8859-1`.
|
|
120
|
+
*
|
|
121
|
+
* Same as:
|
|
122
|
+
* ```js
|
|
123
|
+
* const latin1fromString = createSinglebyteEncoder('iso-8859-1', { mode: 'fatal' })
|
|
124
|
+
* ```
|
|
125
|
+
*
|
|
126
|
+
* @param string - The string to encode
|
|
127
|
+
* @returns The encoded bytes
|
|
128
|
+
*/
|
|
129
|
+
export function latin1fromString(string: string): Uint8ArrayBuffer;
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Decode `windows-1252` bytes to a string.
|
|
133
|
+
*
|
|
134
|
+
* There is no loose variant for this encoding, all bytes can be decoded.
|
|
135
|
+
*
|
|
136
|
+
* Same as:
|
|
137
|
+
* ```js
|
|
138
|
+
* const windows1252toString = createSinglebyteDecoder('windows-1252')
|
|
139
|
+
* ```
|
|
140
|
+
*
|
|
141
|
+
* @param arr - The bytes to decode
|
|
142
|
+
* @returns The decoded string
|
|
143
|
+
*/
|
|
144
|
+
export function windows1252toString(arr: Uint8Array): string;
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Encode a string to `windows-1252` bytes.
|
|
148
|
+
*
|
|
149
|
+
* Throws on non well-formed strings or any codepoints which could not be encoded in `windows-1252`.
|
|
150
|
+
*
|
|
151
|
+
* Same as:
|
|
152
|
+
* ```js
|
|
153
|
+
* const windows1252fromString = createSinglebyteEncoder('windows-1252', { mode: 'fatal' })
|
|
154
|
+
* ```
|
|
155
|
+
*
|
|
156
|
+
* @param string - The string to encode
|
|
157
|
+
* @returns The encoded bytes
|
|
158
|
+
*/
|
|
159
|
+
export function windows1252fromString(string: string): Uint8ArrayBuffer;
|
package/utf16.d.ts
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* UTF-16 encoding/decoding
|
|
3
|
+
*
|
|
4
|
+
* ```js
|
|
5
|
+
* import { utf16fromString, utf16toString } from '@exodus/bytes/utf16.js'
|
|
6
|
+
*
|
|
7
|
+
* // loose
|
|
8
|
+
* import { utf16fromStringLoose, utf16toStringLoose } from '@exodus/bytes/utf16.js'
|
|
9
|
+
* ```
|
|
10
|
+
*
|
|
11
|
+
* _These methods by design encode/decode BOM (codepoint `U+FEFF` Byte Order Mark) as-is._\
|
|
12
|
+
* _If you need BOM handling or detection, use `@exodus/bytes/encoding.js`_
|
|
13
|
+
*
|
|
14
|
+
* @module @exodus/bytes/utf16.js
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
/// <reference types="node" />
|
|
18
|
+
|
|
19
|
+
import type { Uint8ArrayBuffer, Uint16ArrayBuffer } from './array.js';
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Output format for UTF-16 encoding
|
|
23
|
+
*/
|
|
24
|
+
export type Utf16Format = 'uint16' | 'uint8-le' | 'uint8-be';
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Encode a string to UTF-16 bytes (strict mode)
|
|
28
|
+
*
|
|
29
|
+
* Throws on invalid Unicode (unpaired surrogates)
|
|
30
|
+
*
|
|
31
|
+
* @param string - The string to encode
|
|
32
|
+
* @param format - Output format (default: 'uint16')
|
|
33
|
+
* @returns The encoded bytes
|
|
34
|
+
*/
|
|
35
|
+
export function utf16fromString(string: string, format?: 'uint16'): Uint16ArrayBuffer;
|
|
36
|
+
export function utf16fromString(string: string, format: 'uint8-le'): Uint8ArrayBuffer;
|
|
37
|
+
export function utf16fromString(string: string, format: 'uint8-be'): Uint8ArrayBuffer;
|
|
38
|
+
export function utf16fromString(string: string, format?: Utf16Format): Uint16ArrayBuffer | Uint8ArrayBuffer;
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Encode a string to UTF-16 bytes (loose mode)
|
|
42
|
+
*
|
|
43
|
+
* Replaces invalid Unicode (unpaired surrogates) with replacement codepoints `U+FFFD`
|
|
44
|
+
* per [WHATWG Encoding](https://encoding.spec.whatwg.org/) specification.
|
|
45
|
+
*
|
|
46
|
+
* _Such replacement is a non-injective function, is irreversible and causes collisions.\
|
|
47
|
+
* Prefer using strict throwing methods for cryptography applications._
|
|
48
|
+
*
|
|
49
|
+
* @param string - The string to encode
|
|
50
|
+
* @param format - Output format (default: 'uint16')
|
|
51
|
+
* @returns The encoded bytes
|
|
52
|
+
*/
|
|
53
|
+
export function utf16fromStringLoose(string: string, format?: 'uint16'): Uint16ArrayBuffer;
|
|
54
|
+
export function utf16fromStringLoose(string: string, format: 'uint8-le'): Uint8ArrayBuffer;
|
|
55
|
+
export function utf16fromStringLoose(string: string, format: 'uint8-be'): Uint8ArrayBuffer;
|
|
56
|
+
export function utf16fromStringLoose(string: string, format?: Utf16Format): Uint16ArrayBuffer | Uint8ArrayBuffer;
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Decode UTF-16 bytes to a string (strict mode)
|
|
60
|
+
*
|
|
61
|
+
* Throws on invalid UTF-16 byte sequences
|
|
62
|
+
*
|
|
63
|
+
* Throws on non-even byte length.
|
|
64
|
+
*
|
|
65
|
+
* @param arr - The bytes to decode
|
|
66
|
+
* @param format - Input format (default: 'uint16')
|
|
67
|
+
* @returns The decoded string
|
|
68
|
+
*/
|
|
69
|
+
export function utf16toString(arr: Uint16Array, format?: 'uint16'): string;
|
|
70
|
+
export function utf16toString(arr: Uint8Array, format: 'uint8-le'): string;
|
|
71
|
+
export function utf16toString(arr: Uint8Array, format: 'uint8-be'): string;
|
|
72
|
+
export function utf16toString(arr: Uint16Array | Uint8Array, format?: Utf16Format): string;
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Decode UTF-16 bytes to a string (loose mode)
|
|
76
|
+
*
|
|
77
|
+
* Replaces invalid UTF-16 byte sequences with replacement codepoints `U+FFFD`
|
|
78
|
+
* per [WHATWG Encoding](https://encoding.spec.whatwg.org/) specification.
|
|
79
|
+
*
|
|
80
|
+
* _Such replacement is a non-injective function, is irreversible and causes collisions.\
|
|
81
|
+
* Prefer using strict throwing methods for cryptography applications._
|
|
82
|
+
*
|
|
83
|
+
* Throws on non-even byte length.
|
|
84
|
+
*
|
|
85
|
+
* @param arr - The bytes to decode
|
|
86
|
+
* @param format - Input format (default: 'uint16')
|
|
87
|
+
* @returns The decoded string
|
|
88
|
+
*/
|
|
89
|
+
export function utf16toStringLoose(arr: Uint16Array, format?: 'uint16'): string;
|
|
90
|
+
export function utf16toStringLoose(arr: Uint8Array, format: 'uint8-le'): string;
|
|
91
|
+
export function utf16toStringLoose(arr: Uint8Array, format: 'uint8-be'): string;
|
|
92
|
+
export function utf16toStringLoose(arr: Uint16Array | Uint8Array, format?: Utf16Format): string;
|
package/utf16.js
CHANGED
|
@@ -8,7 +8,7 @@ const decoderLooseLE = canDecoders ? new TextDecoder('utf-16le', { ignoreBOM })
|
|
|
8
8
|
const decoderFatalBE = canDecoders ? new TextDecoder('utf-16be', { ignoreBOM, fatal: true }) : null
|
|
9
9
|
const decoderLooseBE = canDecoders ? new TextDecoder('utf-16be', { ignoreBOM }) : null
|
|
10
10
|
const decoderFatal16 = isLE ? decoderFatalLE : decoderFatalBE
|
|
11
|
-
const decoderLoose16 = isLE ? decoderLooseLE :
|
|
11
|
+
const decoderLoose16 = isLE ? decoderLooseLE : decoderLooseBE
|
|
12
12
|
const { isWellFormed, toWellFormed } = String.prototype
|
|
13
13
|
|
|
14
14
|
const { E_STRICT, E_STRICT_UNICODE } = js
|
package/utf8.d.ts
CHANGED
|
@@ -1,42 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* UTF-8 encoding/decoding
|
|
3
|
+
*
|
|
4
|
+
* ```js
|
|
5
|
+
* import { utf8fromString, utf8toString } from '@exodus/bytes/utf8.js'
|
|
6
|
+
*
|
|
7
|
+
* // loose
|
|
8
|
+
* import { utf8fromStringLoose, utf8toStringLoose } from '@exodus/bytes/utf8.js'
|
|
9
|
+
* ```
|
|
10
|
+
*
|
|
11
|
+
* _These methods by design encode/decode BOM (codepoint `U+FEFF` Byte Order Mark) as-is._\
|
|
12
|
+
* _If you need BOM handling or detection, use `@exodus/bytes/encoding.js`_
|
|
13
|
+
*
|
|
14
|
+
* @module @exodus/bytes/utf8.js
|
|
15
|
+
*/
|
|
16
|
+
|
|
1
17
|
/// <reference types="node" />
|
|
2
18
|
|
|
3
19
|
import type { OutputFormat, Uint8ArrayBuffer } from './array.js';
|
|
4
20
|
|
|
5
21
|
/**
|
|
6
|
-
*
|
|
22
|
+
* Encode a string to UTF-8 bytes (strict mode)
|
|
23
|
+
*
|
|
7
24
|
* Throws on invalid Unicode (unpaired surrogates)
|
|
8
|
-
*
|
|
25
|
+
*
|
|
26
|
+
* This is similar to the following snippet (but works on all engines):
|
|
27
|
+
* ```js
|
|
28
|
+
* // Strict encode, requiring Unicode codepoints to be valid
|
|
29
|
+
* if (typeof string !== 'string' || !string.isWellFormed()) throw new TypeError()
|
|
30
|
+
* return new TextEncoder().encode(string)
|
|
31
|
+
* ```
|
|
32
|
+
*
|
|
33
|
+
* @param string - The string to encode
|
|
9
34
|
* @param format - Output format (default: 'uint8')
|
|
10
35
|
* @returns The encoded bytes
|
|
11
36
|
*/
|
|
12
|
-
export function utf8fromString(
|
|
13
|
-
export function utf8fromString(
|
|
14
|
-
export function utf8fromString(
|
|
37
|
+
export function utf8fromString(string: string, format?: 'uint8'): Uint8ArrayBuffer;
|
|
38
|
+
export function utf8fromString(string: string, format: 'buffer'): Buffer;
|
|
39
|
+
export function utf8fromString(string: string, format?: OutputFormat): Uint8ArrayBuffer | Buffer;
|
|
15
40
|
|
|
16
41
|
/**
|
|
17
|
-
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
42
|
+
* Encode a string to UTF-8 bytes (loose mode)
|
|
43
|
+
*
|
|
44
|
+
* Replaces invalid Unicode (unpaired surrogates) with replacement codepoints `U+FFFD`
|
|
45
|
+
* per [WHATWG Encoding](https://encoding.spec.whatwg.org/) specification.
|
|
46
|
+
*
|
|
47
|
+
* _Such replacement is a non-injective function, is irreversable and causes collisions.\
|
|
48
|
+
* Prefer using strict throwing methods for cryptography applications._
|
|
49
|
+
*
|
|
50
|
+
* This is similar to the following snippet (but works on all engines):
|
|
51
|
+
* ```js
|
|
52
|
+
* // Loose encode, replacing invalid Unicode codepoints with U+FFFD
|
|
53
|
+
* if (typeof string !== 'string') throw new TypeError()
|
|
54
|
+
* return new TextEncoder().encode(string)
|
|
55
|
+
* ```
|
|
56
|
+
*
|
|
57
|
+
* @param string - The string to encode
|
|
20
58
|
* @param format - Output format (default: 'uint8')
|
|
21
59
|
* @returns The encoded bytes
|
|
22
60
|
*/
|
|
23
|
-
export function utf8fromStringLoose(
|
|
24
|
-
export function utf8fromStringLoose(
|
|
25
|
-
export function utf8fromStringLoose(
|
|
61
|
+
export function utf8fromStringLoose(string: string, format?: 'uint8'): Uint8ArrayBuffer;
|
|
62
|
+
export function utf8fromStringLoose(string: string, format: 'buffer'): Buffer;
|
|
63
|
+
export function utf8fromStringLoose(
|
|
64
|
+
string: string,
|
|
65
|
+
format?: OutputFormat
|
|
66
|
+
): Uint8ArrayBuffer | Buffer;
|
|
26
67
|
|
|
27
68
|
/**
|
|
28
|
-
*
|
|
29
|
-
*
|
|
69
|
+
* Decode UTF-8 bytes to a string (strict mode)
|
|
70
|
+
*
|
|
71
|
+
* Throws on invalid UTF-8 byte sequences
|
|
72
|
+
*
|
|
73
|
+
* This is similar to `new TextDecoder('utf-8', { fatal: true, ignoreBOM: true }).decode(arr)`,
|
|
74
|
+
* but works on all engines.
|
|
75
|
+
*
|
|
30
76
|
* @param arr - The bytes to decode
|
|
31
77
|
* @returns The decoded string
|
|
32
78
|
*/
|
|
33
|
-
export function utf8toString(arr:
|
|
79
|
+
export function utf8toString(arr: Uint8Array): string;
|
|
34
80
|
|
|
35
81
|
/**
|
|
36
|
-
*
|
|
37
|
-
*
|
|
82
|
+
* Decode UTF-8 bytes to a string (loose mode)
|
|
83
|
+
*
|
|
84
|
+
* Replaces invalid UTF-8 byte sequences with replacement codepoints `U+FFFD`
|
|
85
|
+
* per [WHATWG Encoding](https://encoding.spec.whatwg.org/) specification.
|
|
86
|
+
*
|
|
87
|
+
* _Such replacement is a non-injective function, is irreversable and causes collisions.\
|
|
88
|
+
* Prefer using strict throwing methods for cryptography applications._
|
|
89
|
+
*
|
|
90
|
+
* This is similar to `new TextDecoder('utf-8', { ignoreBOM: true }).decode(arr)`,
|
|
91
|
+
* but works on all engines.
|
|
92
|
+
*
|
|
38
93
|
* @param arr - The bytes to decode
|
|
39
94
|
* @returns The decoded string
|
|
40
95
|
*/
|
|
41
|
-
export function utf8toStringLoose(arr:
|
|
42
|
-
|
|
96
|
+
export function utf8toStringLoose(arr: Uint8Array): string;
|