@exodus/bytes 1.9.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/hex.d.ts CHANGED
@@ -1,21 +1,35 @@
1
+ /**
2
+ * Implements Base16 from [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648)
3
+ * (no differences from [RFC3548](https://datatracker.ietf.org/doc/html/rfc4648)).
4
+ *
5
+ * ```js
6
+ * import { fromHex, toHex } from '@exodus/bytes/hex.js'
7
+ * ```
8
+ *
9
+ * @module @exodus/bytes/hex.js
10
+ */
11
+
1
12
  /// <reference types="node" />
2
13
 
3
14
  import type { OutputFormat, Uint8ArrayBuffer } from './array.js';
4
15
 
5
16
  /**
6
- * Encodes a Uint8Array to a lowercase hex string
17
+ * Encode a `Uint8Array` to a lowercase hex string
18
+ *
7
19
  * @param arr - The input bytes
8
20
  * @returns The hex encoded string
9
21
  */
10
- export function toHex(arr: Uint8ArrayBuffer): string;
22
+ export function toHex(arr: Uint8Array): string;
11
23
 
12
24
  /**
13
- * Decodes a hex string to bytes
14
- * Unlike Buffer.from(), throws on invalid input
15
- * @param str - The hex encoded string (case-insensitive)
25
+ * Decode a hex string to bytes
26
+ *
27
+ * Unlike `Buffer.from()`, throws on invalid input
28
+ *
29
+ * @param string - The hex encoded string (case-insensitive)
16
30
  * @param format - Output format (default: 'uint8')
17
31
  * @returns The decoded bytes
18
32
  */
19
- export function fromHex(str: string, format?: 'uint8'): Uint8ArrayBuffer;
20
- export function fromHex(str: string, format: 'buffer'): Buffer;
21
- export function fromHex(str: string, format?: OutputFormat): Uint8ArrayBuffer | Buffer;
33
+ export function fromHex(string: string, format?: 'uint8'): Uint8ArrayBuffer;
34
+ export function fromHex(string: string, format: 'buffer'): Buffer;
35
+ export function fromHex(string: string, format?: OutputFormat): Uint8ArrayBuffer | Buffer;
package/index.d.ts CHANGED
@@ -40,4 +40,4 @@
40
40
  * import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding-browser.js'
41
41
  * ```
42
42
  */
43
- declare module "@exodus/bytes" {}
43
+ declare module '@exodus/bytes' {}
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Decode / encode the legacy multi-byte encodings according to the
3
+ * [Encoding standard](https://encoding.spec.whatwg.org/)
4
+ * ([§10](https://encoding.spec.whatwg.org/#legacy-multi-byte-chinese-(simplified)-encodings),
5
+ * [§11](https://encoding.spec.whatwg.org/#legacy-multi-byte-chinese-(traditional)-encodings),
6
+ * [§12](https://encoding.spec.whatwg.org/#legacy-multi-byte-japanese-encodings),
7
+ * [§13](https://encoding.spec.whatwg.org/#legacy-multi-byte-korean-encodings)).
8
+ *
9
+ * ```js
10
+ * import { createMultibyteDecoder, createMultibyteEncoder } from '@exodus/bytes/multi-byte.js'
11
+ * ```
12
+ *
13
+ * > [!WARNING]
14
+ * > This is a lower-level API for legacy multi-byte encodings.
15
+ * >
16
+ * > For a safe WHATWG Encoding-compatible API, see `@exodus/bytes/encoding.js` import (and variants of it).
17
+ * >
18
+ * > Be sure to know what you are doing and check documentation when directly using encodings from this file.
19
+ *
20
+ * Supports all legacy multi-byte encodings listed in the WHATWG Encoding standard:
21
+ * `gbk`, `gb18030`, `big5`, `euc-jp`, `iso-2022-jp`, `shift_jis`, `euc-kr`.
22
+ *
23
+ * @module @exodus/bytes/multi-byte.js
24
+ */
25
+
26
+ /// <reference types="node" />
27
+
28
+ import type { Uint8ArrayBuffer } from './array.js';
29
+
30
+ /**
31
+ * Create a decoder for a supported legacy multi-byte `encoding`, given its lowercased name `encoding`.
32
+ *
33
+ * Returns a function `decode(arr, stream = false)` that decodes bytes to a string.
34
+ *
35
+ * The returned function will maintain internal state while `stream = true` is used, allowing it to
36
+ * handle incomplete multi-byte sequences across multiple calls.
37
+ * State is reset when `stream = false` or when the function is called without the `stream` parameter.
38
+ *
39
+ * @param encoding - The encoding name (e.g., 'gbk', 'gb18030', 'big5', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'euc-kr')
40
+ * @param loose - If true, replaces unmapped bytes with replacement character instead of throwing (default: false)
41
+ * @returns A function that decodes bytes to string, with optional streaming support
42
+ */
43
+ export function createMultibyteDecoder(
44
+ encoding: string,
45
+ loose?: boolean
46
+ ): (arr: Uint8Array, stream?: boolean) => string;
47
+
48
+ /**
49
+ * Create an encoder for a supported legacy multi-byte `encoding`, given its lowercased name `encoding`.
50
+ *
51
+ * Returns a function `encode(string)` that encodes a string to bytes.
52
+ *
53
+ * In `'fatal'` mode (default), will throw on non well-formed strings or any codepoints which could
54
+ * not be encoded in the target encoding.
55
+ *
56
+ * @param encoding - The encoding name (e.g., 'gbk', 'gb18030', 'big5', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'euc-kr')
57
+ * @param options - Encoding options
58
+ * @param options.mode - Encoding mode (default: 'fatal'). Currently, only 'fatal' mode is supported.
59
+ * @returns A function that encodes string to bytes
60
+ */
61
+ export function createMultibyteEncoder(
62
+ encoding: string,
63
+ options?: { mode?: 'fatal' }
64
+ ): (string: string) => Uint8ArrayBuffer;
package/package.json CHANGED
@@ -1,16 +1,33 @@
1
1
  {
2
2
  "name": "@exodus/bytes",
3
- "version": "1.9.0",
3
+ "version": "1.11.0",
4
4
  "description": "Various operations on Uint8Array data",
5
+ "keywords": [
6
+ "encoding",
7
+ "uint8array",
8
+ "textdecoder",
9
+ "textencoder",
10
+ "utf8",
11
+ "utf16",
12
+ "hex",
13
+ "base64",
14
+ "base32",
15
+ "base58",
16
+ "base58check",
17
+ "bech32",
18
+ "bech32m",
19
+ "wif"
20
+ ],
5
21
  "scripts": {
6
22
  "lint": "eslint .",
23
+ "typedoc": "typedoc && mkdir -p doc/assets && cp -r theme/styles doc/assets/",
7
24
  "test:javascriptcore": "npm run test:jsc --",
8
25
  "test:v8": "exodus-test --engine=v8:bundle",
9
26
  "test:jsc": "exodus-test --engine=jsc:bundle",
10
27
  "test:spidermonkey": "exodus-test --engine=spidermonkey:bundle",
11
28
  "test:hermes": "exodus-test --engine=hermes:bundle",
12
29
  "test:quickjs": "exodus-test --engine=quickjs:bundle",
13
- "test:xs": "exodus-test --engine=xs:bundle",
30
+ "test:xs": "EXODUS_TEST_IGNORE='tests/whatwg.browser.test.js' exodus-test --engine=xs:bundle",
14
31
  "test:engine262": "exodus-test --engine=engine262:bundle",
15
32
  "test:deno": "exodus-test --engine=deno:pure",
16
33
  "test:bun": "exodus-test --engine=bun:pure",
@@ -38,7 +55,7 @@
38
55
  "bugs": {
39
56
  "url": "https://github.com/ExodusOSS/bytes/issues"
40
57
  },
41
- "homepage": "https://github.com/ExodusOSS/bytes#readme",
58
+ "homepage": "https://github.com/ExodusOSS/bytes",
42
59
  "engines": {
43
60
  "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
44
61
  },
@@ -54,6 +71,7 @@
54
71
  "/fallback/encoding.util.js",
55
72
  "/fallback/hex.js",
56
73
  "/fallback/latin1.js",
74
+ "/fallback/percent.js",
57
75
  "/fallback/multi-byte.encodings.cjs",
58
76
  "/fallback/multi-byte.encodings.json",
59
77
  "/fallback/multi-byte.js",
@@ -66,13 +84,18 @@
66
84
  "/array.d.ts",
67
85
  "/assert.js",
68
86
  "/base32.js",
87
+ "/base32.d.ts",
69
88
  "/base58.js",
89
+ "/base58.d.ts",
70
90
  "/base58check.js",
91
+ "/base58check.d.ts",
71
92
  "/base58check.node.js",
72
93
  "/base64.js",
73
94
  "/base64.d.ts",
74
95
  "/bech32.js",
96
+ "/bech32.d.ts",
75
97
  "/bigint.js",
98
+ "/bigint.d.ts",
76
99
  "/encoding-browser.js",
77
100
  "/encoding-browser.browser.js",
78
101
  "/encoding-browser.native.js",
@@ -87,15 +110,21 @@
87
110
  "/index.js",
88
111
  "/index.d.ts",
89
112
  "/multi-byte.js",
113
+ "/multi-byte.d.ts",
90
114
  "/multi-byte.node.js",
91
115
  "/single-byte.js",
116
+ "/single-byte.d.ts",
92
117
  "/single-byte.node.js",
93
118
  "/utf16.js",
119
+ "/utf16.d.ts",
94
120
  "/utf16.node.js",
95
121
  "/utf8.js",
96
122
  "/utf8.d.ts",
97
123
  "/utf8.node.js",
98
- "/wif.js"
124
+ "/whatwg.js",
125
+ "/whatwg.d.ts",
126
+ "/wif.js",
127
+ "/wif.d.ts"
99
128
  ],
100
129
  "main": "index.js",
101
130
  "module": "index.js",
@@ -109,9 +138,16 @@
109
138
  "types": "./array.d.ts",
110
139
  "default": "./array.js"
111
140
  },
112
- "./base32.js": "./base32.js",
113
- "./base58.js": "./base58.js",
141
+ "./base32.js": {
142
+ "types": "./base32.d.ts",
143
+ "default": "./base32.js"
144
+ },
145
+ "./base58.js": {
146
+ "types": "./base58.d.ts",
147
+ "default": "./base58.js"
148
+ },
114
149
  "./base58check.js": {
150
+ "types": "./base58check.d.ts",
115
151
  "node": "./base58check.node.js",
116
152
  "default": "./base58check.js"
117
153
  },
@@ -119,18 +155,26 @@
119
155
  "types": "./base64.d.ts",
120
156
  "default": "./base64.js"
121
157
  },
122
- "./bech32.js": "./bech32.js",
123
- "./bigint.js": "./bigint.js",
158
+ "./bech32.js": {
159
+ "types": "./bech32.d.ts",
160
+ "default": "./bech32.js"
161
+ },
162
+ "./bigint.js": {
163
+ "types": "./bigint.d.ts",
164
+ "default": "./bigint.js"
165
+ },
124
166
  "./hex.js": {
125
167
  "types": "./hex.d.ts",
126
168
  "node": "./hex.node.js",
127
169
  "default": "./hex.js"
128
170
  },
129
171
  "./multi-byte.js": {
172
+ "types": "./multi-byte.d.ts",
130
173
  "node": "./multi-byte.node.js",
131
174
  "default": "./multi-byte.js"
132
175
  },
133
176
  "./single-byte.js": {
177
+ "types": "./single-byte.d.ts",
134
178
  "node": "./single-byte.node.js",
135
179
  "default": "./single-byte.js"
136
180
  },
@@ -150,6 +194,7 @@
150
194
  "default": "./encoding-browser.js"
151
195
  },
152
196
  "./utf16.js": {
197
+ "types": "./utf16.d.ts",
153
198
  "node": "./utf16.node.js",
154
199
  "default": "./utf16.js"
155
200
  },
@@ -158,7 +203,14 @@
158
203
  "node": "./utf8.node.js",
159
204
  "default": "./utf8.js"
160
205
  },
161
- "./wif.js": "./wif.js"
206
+ "./whatwg.js": {
207
+ "types": "./whatwg.d.ts",
208
+ "default": "./whatwg.js"
209
+ },
210
+ "./wif.js": {
211
+ "types": "./wif.d.ts",
212
+ "default": "./wif.js"
213
+ }
162
214
  },
163
215
  "react-native": {
164
216
  "./encoding-browser.js": "./encoding-browser.native.js"
@@ -177,6 +229,7 @@
177
229
  "@exodus/eslint-config": "^5.24.0",
178
230
  "@exodus/prettier": "^1.0.0",
179
231
  "@exodus/test": "^1.0.0-rc.109",
232
+ "@hexagon/base64": "^2.0.4",
180
233
  "@noble/hashes": "^2.0.1",
181
234
  "@oslojs/encoding": "^1.1.0",
182
235
  "@petamoriken/float16": "^3.9.3",
@@ -207,6 +260,7 @@
207
260
  "jsvu": "^3.0.3",
208
261
  "punycode": "^2.3.1",
209
262
  "text-encoding": "^0.7.0",
263
+ "typedoc": "^0.28.16",
210
264
  "typescript": "^5.9.3",
211
265
  "uint8array-tools": "^0.0.9",
212
266
  "utf8": "^3.0.0",
@@ -0,0 +1,159 @@
1
+ /**
2
+ * Decode / encode the legacy single-byte encodings according to the
3
+ * [Encoding standard](https://encoding.spec.whatwg.org/)
4
+ * ([§9](https://encoding.spec.whatwg.org/#legacy-single-byte-encodings),
5
+ * [§14.5](https://encoding.spec.whatwg.org/#x-user-defined)),
6
+ * and [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859) `iso-8859-*` mappings.
7
+ *
8
+ * ```js
9
+ * import { createSinglebyteDecoder, createSinglebyteEncoder } from '@exodus/bytes/single-byte.js'
10
+ * import { windows1252toString, windows1252fromString } from '@exodus/bytes/single-byte.js'
11
+ * import { latin1toString, latin1fromString } from '@exodus/bytes/single-byte.js'
12
+ * ```
13
+ *
14
+ * > [!WARNING]
15
+ * > This is a lower-level API for single-byte encodings.
16
+ * > It might not match what you expect, as it supports both WHATWG and unicode.org encodings under
17
+ * > different names, with the main intended usecase for the latter being either non-web or legacy contexts.
18
+ * >
19
+ * > For a safe WHATWG Encoding-compatible API, see `@exodus/bytes/encoding.js` import (and variants of it).
20
+ * >
21
+ * > Be sure to know what you are doing and check documentation when directly using encodings from this file.
22
+ *
23
+ * Supports all single-byte encodings listed in the WHATWG Encoding standard:
24
+ * `ibm866`, `iso-8859-2`, `iso-8859-3`, `iso-8859-4`, `iso-8859-5`, `iso-8859-6`, `iso-8859-7`, `iso-8859-8`,
25
+ * `iso-8859-8-i`, `iso-8859-10`, `iso-8859-13`, `iso-8859-14`, `iso-8859-15`, `iso-8859-16`, `koi8-r`, `koi8-u`,
26
+ * `macintosh`, `windows-874`, `windows-1250`, `windows-1251`, `windows-1252`, `windows-1253`, `windows-1254`,
27
+ * `windows-1255`, `windows-1256`, `windows-1257`, `windows-1258`, `x-mac-cyrillic` and `x-user-defined`.
28
+ *
29
+ * Also supports `iso-8859-1`, `iso-8859-9`, `iso-8859-11` as defined at
30
+ * [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859)
31
+ * (and all other `iso-8859-*` encodings there as they match WHATWG).
32
+ *
33
+ * > [!NOTE]
34
+ * > While all `iso-8859-*` encodings supported by the [WHATWG Encoding standard](https://encoding.spec.whatwg.org/) match
35
+ * > [unicode.org](https://unicode.org/Public/MAPPINGS/ISO8859), the WHATWG Encoding spec doesn't support
36
+ * > `iso-8859-1`, `iso-8859-9`, `iso-8859-11`, and instead maps them as labels to `windows-1252`, `windows-1254`, `windows-874`.\
37
+ * > `createSinglebyteDecoder()` (unlike `TextDecoder` or `legacyHookDecode()`) does not do such mapping,
38
+ * > so its results will differ from `TextDecoder` for those encoding names.
39
+ *
40
+ * ```js
41
+ * > new TextDecoder('iso-8859-1').encoding
42
+ * 'windows-1252'
43
+ * > new TextDecoder('iso-8859-9').encoding
44
+ * 'windows-1254'
45
+ * > new TextDecoder('iso-8859-11').encoding
46
+ * 'windows-874'
47
+ * > new TextDecoder('iso-8859-9').decode(Uint8Array.of(0x80, 0x81, 0xd0))
48
+ * '€\x81Ğ' // this is actually decoded according to windows-1254 per TextDecoder spec
49
+ * > createSinglebyteDecoder('iso-8859-9')(Uint8Array.of(0x80, 0x81, 0xd0))
50
+ * '\x80\x81Ğ' // this is iso-8859-9 as defined at https://unicode.org/Public/MAPPINGS/ISO8859/8859-9.txt
51
+ * ```
52
+ *
53
+ * All WHATWG Encoding spec [`windows-*` encodings](https://encoding.spec.whatwg.org/#windows-874) are supersets of
54
+ * corresponding [unicode.org encodings](https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/), meaning that
55
+ * they encode/decode all the old valid (non-replacement) strings / byte sequences identically, but can also support
56
+ * a wider range of inputs.
57
+ *
58
+ * @module @exodus/bytes/single-byte.js
59
+ */
60
+
61
+ /// <reference types="node" />
62
+
63
+ import type { Uint8ArrayBuffer } from './array.js';
64
+
65
+ /**
66
+ * Create a decoder for a supported one-byte `encoding`, given its lowercased name `encoding`.
67
+ *
68
+ * Returns a function `decode(arr)` that decodes bytes to a string.
69
+ *
70
+ * @param encoding - The encoding name (e.g., 'iso-8859-1', 'windows-1252')
71
+ * @param loose - If true, replaces unmapped bytes with replacement character instead of throwing (default: false)
72
+ * @returns A function that decodes bytes to string
73
+ */
74
+ export function createSinglebyteDecoder(
75
+ encoding: string,
76
+ loose?: boolean
77
+ ): (arr: Uint8Array) => string;
78
+
79
+ /**
80
+ * Create an encoder for a supported one-byte `encoding`, given its lowercased name `encoding`.
81
+ *
82
+ * Returns a function `encode(string)` that encodes a string to bytes.
83
+ *
84
+ * In `'fatal'` mode (default), will throw on non well-formed strings or any codepoints which could
85
+ * not be encoded in the target encoding.
86
+ *
87
+ * @param encoding - The encoding name (e.g., 'iso-8859-1', 'windows-1252')
88
+ * @param options - Encoding options
89
+ * @param options.mode - Encoding mode (default: 'fatal'). Currently, only 'fatal' mode is supported.
90
+ * @returns A function that encodes string to bytes
91
+ */
92
+ export function createSinglebyteEncoder(
93
+ encoding: string,
94
+ options?: { mode?: 'fatal' }
95
+ ): (string: string) => Uint8ArrayBuffer;
96
+
97
+ /**
98
+ * Decode `iso-8859-1` bytes to a string.
99
+ *
100
+ * There is no loose variant for this encoding, all bytes can be decoded.
101
+ *
102
+ * Same as:
103
+ * ```js
104
+ * const latin1toString = createSinglebyteDecoder('iso-8859-1')
105
+ * ```
106
+ *
107
+ * > [!NOTE]
108
+ * > This is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as those
109
+ * > alias to `new TextDecoder('windows-1252')`.
110
+ *
111
+ * @param arr - The bytes to decode
112
+ * @returns The decoded string
113
+ */
114
+ export function latin1toString(arr: Uint8Array): string;
115
+
116
+ /**
117
+ * Encode a string to `iso-8859-1` bytes.
118
+ *
119
+ * Throws on non well-formed strings or any codepoints which could not be encoded in `iso-8859-1`.
120
+ *
121
+ * Same as:
122
+ * ```js
123
+ * const latin1fromString = createSinglebyteEncoder('iso-8859-1', { mode: 'fatal' })
124
+ * ```
125
+ *
126
+ * @param string - The string to encode
127
+ * @returns The encoded bytes
128
+ */
129
+ export function latin1fromString(string: string): Uint8ArrayBuffer;
130
+
131
+ /**
132
+ * Decode `windows-1252` bytes to a string.
133
+ *
134
+ * There is no loose variant for this encoding, all bytes can be decoded.
135
+ *
136
+ * Same as:
137
+ * ```js
138
+ * const windows1252toString = createSinglebyteDecoder('windows-1252')
139
+ * ```
140
+ *
141
+ * @param arr - The bytes to decode
142
+ * @returns The decoded string
143
+ */
144
+ export function windows1252toString(arr: Uint8Array): string;
145
+
146
+ /**
147
+ * Encode a string to `windows-1252` bytes.
148
+ *
149
+ * Throws on non well-formed strings or any codepoints which could not be encoded in `windows-1252`.
150
+ *
151
+ * Same as:
152
+ * ```js
153
+ * const windows1252fromString = createSinglebyteEncoder('windows-1252', { mode: 'fatal' })
154
+ * ```
155
+ *
156
+ * @param string - The string to encode
157
+ * @returns The encoded bytes
158
+ */
159
+ export function windows1252fromString(string: string): Uint8ArrayBuffer;
package/utf16.d.ts ADDED
@@ -0,0 +1,92 @@
1
+ /**
2
+ * UTF-16 encoding/decoding
3
+ *
4
+ * ```js
5
+ * import { utf16fromString, utf16toString } from '@exodus/bytes/utf16.js'
6
+ *
7
+ * // loose
8
+ * import { utf16fromStringLoose, utf16toStringLoose } from '@exodus/bytes/utf16.js'
9
+ * ```
10
+ *
11
+ * _These methods by design encode/decode BOM (codepoint `U+FEFF` Byte Order Mark) as-is._\
12
+ * _If you need BOM handling or detection, use `@exodus/bytes/encoding.js`_
13
+ *
14
+ * @module @exodus/bytes/utf16.js
15
+ */
16
+
17
+ /// <reference types="node" />
18
+
19
+ import type { Uint8ArrayBuffer, Uint16ArrayBuffer } from './array.js';
20
+
21
+ /**
22
+ * Output format for UTF-16 encoding
23
+ */
24
+ export type Utf16Format = 'uint16' | 'uint8-le' | 'uint8-be';
25
+
26
+ /**
27
+ * Encode a string to UTF-16 bytes (strict mode)
28
+ *
29
+ * Throws on invalid Unicode (unpaired surrogates)
30
+ *
31
+ * @param string - The string to encode
32
+ * @param format - Output format (default: 'uint16')
33
+ * @returns The encoded bytes
34
+ */
35
+ export function utf16fromString(string: string, format?: 'uint16'): Uint16ArrayBuffer;
36
+ export function utf16fromString(string: string, format: 'uint8-le'): Uint8ArrayBuffer;
37
+ export function utf16fromString(string: string, format: 'uint8-be'): Uint8ArrayBuffer;
38
+ export function utf16fromString(string: string, format?: Utf16Format): Uint16ArrayBuffer | Uint8ArrayBuffer;
39
+
40
+ /**
41
+ * Encode a string to UTF-16 bytes (loose mode)
42
+ *
43
+ * Replaces invalid Unicode (unpaired surrogates) with replacement codepoints `U+FFFD`
44
+ * per [WHATWG Encoding](https://encoding.spec.whatwg.org/) specification.
45
+ *
46
+ * _Such replacement is a non-injective function, is irreversible and causes collisions.\
47
+ * Prefer using strict throwing methods for cryptography applications._
48
+ *
49
+ * @param string - The string to encode
50
+ * @param format - Output format (default: 'uint16')
51
+ * @returns The encoded bytes
52
+ */
53
+ export function utf16fromStringLoose(string: string, format?: 'uint16'): Uint16ArrayBuffer;
54
+ export function utf16fromStringLoose(string: string, format: 'uint8-le'): Uint8ArrayBuffer;
55
+ export function utf16fromStringLoose(string: string, format: 'uint8-be'): Uint8ArrayBuffer;
56
+ export function utf16fromStringLoose(string: string, format?: Utf16Format): Uint16ArrayBuffer | Uint8ArrayBuffer;
57
+
58
+ /**
59
+ * Decode UTF-16 bytes to a string (strict mode)
60
+ *
61
+ * Throws on invalid UTF-16 byte sequences
62
+ *
63
+ * Throws on non-even byte length.
64
+ *
65
+ * @param arr - The bytes to decode
66
+ * @param format - Input format (default: 'uint16')
67
+ * @returns The decoded string
68
+ */
69
+ export function utf16toString(arr: Uint16Array, format?: 'uint16'): string;
70
+ export function utf16toString(arr: Uint8Array, format: 'uint8-le'): string;
71
+ export function utf16toString(arr: Uint8Array, format: 'uint8-be'): string;
72
+ export function utf16toString(arr: Uint16Array | Uint8Array, format?: Utf16Format): string;
73
+
74
+ /**
75
+ * Decode UTF-16 bytes to a string (loose mode)
76
+ *
77
+ * Replaces invalid UTF-16 byte sequences with replacement codepoints `U+FFFD`
78
+ * per [WHATWG Encoding](https://encoding.spec.whatwg.org/) specification.
79
+ *
80
+ * _Such replacement is a non-injective function, is irreversible and causes collisions.\
81
+ * Prefer using strict throwing methods for cryptography applications._
82
+ *
83
+ * Throws on non-even byte length.
84
+ *
85
+ * @param arr - The bytes to decode
86
+ * @param format - Input format (default: 'uint16')
87
+ * @returns The decoded string
88
+ */
89
+ export function utf16toStringLoose(arr: Uint16Array, format?: 'uint16'): string;
90
+ export function utf16toStringLoose(arr: Uint8Array, format: 'uint8-le'): string;
91
+ export function utf16toStringLoose(arr: Uint8Array, format: 'uint8-be'): string;
92
+ export function utf16toStringLoose(arr: Uint16Array | Uint8Array, format?: Utf16Format): string;
package/utf16.js CHANGED
@@ -8,7 +8,7 @@ const decoderLooseLE = canDecoders ? new TextDecoder('utf-16le', { ignoreBOM })
8
8
  const decoderFatalBE = canDecoders ? new TextDecoder('utf-16be', { ignoreBOM, fatal: true }) : null
9
9
  const decoderLooseBE = canDecoders ? new TextDecoder('utf-16be', { ignoreBOM }) : null
10
10
  const decoderFatal16 = isLE ? decoderFatalLE : decoderFatalBE
11
- const decoderLoose16 = isLE ? decoderLooseLE : decoderFatalBE
11
+ const decoderLoose16 = isLE ? decoderLooseLE : decoderLooseBE
12
12
  const { isWellFormed, toWellFormed } = String.prototype
13
13
 
14
14
  const { E_STRICT, E_STRICT_UNICODE } = js
package/utf8.d.ts CHANGED
@@ -1,42 +1,96 @@
1
+ /**
2
+ * UTF-8 encoding/decoding
3
+ *
4
+ * ```js
5
+ * import { utf8fromString, utf8toString } from '@exodus/bytes/utf8.js'
6
+ *
7
+ * // loose
8
+ * import { utf8fromStringLoose, utf8toStringLoose } from '@exodus/bytes/utf8.js'
9
+ * ```
10
+ *
11
+ * _These methods by design encode/decode BOM (codepoint `U+FEFF` Byte Order Mark) as-is._\
12
+ * _If you need BOM handling or detection, use `@exodus/bytes/encoding.js`_
13
+ *
14
+ * @module @exodus/bytes/utf8.js
15
+ */
16
+
1
17
  /// <reference types="node" />
2
18
 
3
19
  import type { OutputFormat, Uint8ArrayBuffer } from './array.js';
4
20
 
5
21
  /**
6
- * Encodes a string to UTF-8 bytes (strict mode)
22
+ * Encode a string to UTF-8 bytes (strict mode)
23
+ *
7
24
  * Throws on invalid Unicode (unpaired surrogates)
8
- * @param str - The string to encode
25
+ *
26
+ * This is similar to the following snippet (but works on all engines):
27
+ * ```js
28
+ * // Strict encode, requiring Unicode codepoints to be valid
29
+ * if (typeof string !== 'string' || !string.isWellFormed()) throw new TypeError()
30
+ * return new TextEncoder().encode(string)
31
+ * ```
32
+ *
33
+ * @param string - The string to encode
9
34
  * @param format - Output format (default: 'uint8')
10
35
  * @returns The encoded bytes
11
36
  */
12
- export function utf8fromString(str: string, format?: 'uint8'): Uint8ArrayBuffer;
13
- export function utf8fromString(str: string, format: 'buffer'): Buffer;
14
- export function utf8fromString(str: string, format?: OutputFormat): Uint8ArrayBuffer | Buffer;
37
+ export function utf8fromString(string: string, format?: 'uint8'): Uint8ArrayBuffer;
38
+ export function utf8fromString(string: string, format: 'buffer'): Buffer;
39
+ export function utf8fromString(string: string, format?: OutputFormat): Uint8ArrayBuffer | Buffer;
15
40
 
16
41
  /**
17
- * Encodes a string to UTF-8 bytes (loose mode)
18
- * Replaces invalid Unicode with replacement character
19
- * @param str - The string to encode
42
+ * Encode a string to UTF-8 bytes (loose mode)
43
+ *
44
+ * Replaces invalid Unicode (unpaired surrogates) with replacement codepoints `U+FFFD`
45
+ * per [WHATWG Encoding](https://encoding.spec.whatwg.org/) specification.
46
+ *
47
+ * _Such replacement is a non-injective function, is irreversable and causes collisions.\
48
+ * Prefer using strict throwing methods for cryptography applications._
49
+ *
50
+ * This is similar to the following snippet (but works on all engines):
51
+ * ```js
52
+ * // Loose encode, replacing invalid Unicode codepoints with U+FFFD
53
+ * if (typeof string !== 'string') throw new TypeError()
54
+ * return new TextEncoder().encode(string)
55
+ * ```
56
+ *
57
+ * @param string - The string to encode
20
58
  * @param format - Output format (default: 'uint8')
21
59
  * @returns The encoded bytes
22
60
  */
23
- export function utf8fromStringLoose(str: string, format?: 'uint8'): Uint8ArrayBuffer;
24
- export function utf8fromStringLoose(str: string, format: 'buffer'): Buffer;
25
- export function utf8fromStringLoose(str: string, format?: OutputFormat): Uint8ArrayBuffer | Buffer;
61
+ export function utf8fromStringLoose(string: string, format?: 'uint8'): Uint8ArrayBuffer;
62
+ export function utf8fromStringLoose(string: string, format: 'buffer'): Buffer;
63
+ export function utf8fromStringLoose(
64
+ string: string,
65
+ format?: OutputFormat
66
+ ): Uint8ArrayBuffer | Buffer;
26
67
 
27
68
  /**
28
- * Decodes UTF-8 bytes to a string (strict mode)
29
- * Throws on invalid UTF-8 sequences
69
+ * Decode UTF-8 bytes to a string (strict mode)
70
+ *
71
+ * Throws on invalid UTF-8 byte sequences
72
+ *
73
+ * This is similar to `new TextDecoder('utf-8', { fatal: true, ignoreBOM: true }).decode(arr)`,
74
+ * but works on all engines.
75
+ *
30
76
  * @param arr - The bytes to decode
31
77
  * @returns The decoded string
32
78
  */
33
- export function utf8toString(arr: Uint8ArrayBuffer): string;
79
+ export function utf8toString(arr: Uint8Array): string;
34
80
 
35
81
  /**
36
- * Decodes UTF-8 bytes to a string (loose mode)
37
- * Replaces invalid sequences with replacement character
82
+ * Decode UTF-8 bytes to a string (loose mode)
83
+ *
84
+ * Replaces invalid UTF-8 byte sequences with replacement codepoints `U+FFFD`
85
+ * per [WHATWG Encoding](https://encoding.spec.whatwg.org/) specification.
86
+ *
87
+ * _Such replacement is a non-injective function, is irreversable and causes collisions.\
88
+ * Prefer using strict throwing methods for cryptography applications._
89
+ *
90
+ * This is similar to `new TextDecoder('utf-8', { ignoreBOM: true }).decode(arr)`,
91
+ * but works on all engines.
92
+ *
38
93
  * @param arr - The bytes to decode
39
94
  * @returns The decoded string
40
95
  */
41
- export function utf8toStringLoose(arr: Uint8ArrayBuffer): string;
42
-
96
+ export function utf8toStringLoose(arr: Uint8Array): string;