@exodus/bytes 1.10.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,6 +10,8 @@
10
10
 
11
11
  And a [`TextEncoder` / `TextDecoder` polyfill](#textencoder--textdecoder-polyfill)
12
12
 
13
+ See [documentation](https://exodusoss.github.io/bytes).
14
+
13
15
  ## Strict
14
16
 
15
17
  Performs proper input validation, ensures no garbage-in-garbage-out
@@ -118,6 +120,13 @@ Encode a string to UTF-8 bytes (strict mode)
118
120
 
119
121
  Throws on invalid Unicode (unpaired surrogates)
120
122
 
123
+ This is similar to the following snippet (but works on all engines):
124
+ ```js
125
+ // Strict encode, requiring Unicode codepoints to be valid
126
+ if (typeof string !== 'string' || !string.isWellFormed()) throw new TypeError()
127
+ return new TextEncoder().encode(string)
128
+ ```
129
+
121
130
  #### `utf8fromStringLoose(string, format = 'uint8')`
122
131
 
123
132
  Encode a string to UTF-8 bytes (loose mode)
@@ -128,12 +137,22 @@ per [WHATWG Encoding](https://encoding.spec.whatwg.org/) specification.
128
137
  _Such replacement is a non-injective function, is irreversable and causes collisions.\
129
138
  Prefer using strict throwing methods for cryptography applications._
130
139
 
140
+ This is similar to the following snippet (but works on all engines):
141
+ ```js
142
+ // Loose encode, replacing invalid Unicode codepoints with U+FFFD
143
+ if (typeof string !== 'string') throw new TypeError()
144
+ return new TextEncoder().encode(string)
145
+ ```
146
+
131
147
  #### `utf8toString(arr)`
132
148
 
133
149
  Decode UTF-8 bytes to a string (strict mode)
134
150
 
135
151
  Throws on invalid UTF-8 byte sequences
136
152
 
153
+ This is similar to `new TextDecoder('utf-8', { fatal: true, ignoreBOM: true }).decode(arr)`,
154
+ but works on all engines.
155
+
137
156
  #### `utf8toStringLoose(arr)`
138
157
 
139
158
  Decode UTF-8 bytes to a string (loose mode)
@@ -144,6 +163,9 @@ per [WHATWG Encoding](https://encoding.spec.whatwg.org/) specification.
144
163
  _Such replacement is a non-injective function, is irreversable and causes collisions.\
145
164
  Prefer using strict throwing methods for cryptography applications._
146
165
 
166
+ This is similar to `new TextDecoder('utf-8', { ignoreBOM: true }).decode(arr)`,
167
+ but works on all engines.
168
+
147
169
  ### `@exodus/bytes/utf16.js`
148
170
 
149
171
  UTF-16 encoding/decoding
@@ -208,6 +230,15 @@ import { windows1252toString, windows1252fromString } from '@exodus/bytes/single
208
230
  import { latin1toString, latin1fromString } from '@exodus/bytes/single-byte.js'
209
231
  ```
210
232
 
233
+ > [!WARNING]
234
+ > This is a lower-level API for single-byte encodings.
235
+ > It might not match what you expect, as it supports both WHATWG and unicode.org encodings under
236
+ > different names, with the main intended usecase for the latter being either non-web or legacy contexts.
237
+ >
238
+ > For a safe WHATWG Encoding-compatible API, see `@exodus/bytes/encoding.js` import (and variants of it).
239
+ >
240
+ > Be sure to know what you are doing and check documentation when directly using encodings from this file.
241
+
211
242
  Supports all single-byte encodings listed in the WHATWG Encoding standard:
212
243
  `ibm866`, `iso-8859-2`, `iso-8859-3`, `iso-8859-4`, `iso-8859-5`, `iso-8859-6`, `iso-8859-7`, `iso-8859-8`,
213
244
  `iso-8859-8-i`, `iso-8859-10`, `iso-8859-13`, `iso-8859-14`, `iso-8859-15`, `iso-8859-16`, `koi8-r`, `koi8-u`,
@@ -269,8 +300,9 @@ Same as:
269
300
  const latin1toString = createSinglebyteDecoder('iso-8859-1')
270
301
  ```
271
302
 
272
- Note: this is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as
273
- those alias to `new TextDecoder('windows-1252')`.
303
+ > [!NOTE]
304
+ > This is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as those
305
+ > alias to `new TextDecoder('windows-1252')`.
274
306
 
275
307
  #### `latin1fromString(string)`
276
308
 
@@ -318,6 +350,13 @@ Decode / encode the legacy multi-byte encodings according to the
318
350
  import { createMultibyteDecoder, createMultibyteEncoder } from '@exodus/bytes/multi-byte.js'
319
351
  ```
320
352
 
353
+ > [!WARNING]
354
+ > This is a lower-level API for legacy multi-byte encodings.
355
+ >
356
+ > For a safe WHATWG Encoding-compatible API, see `@exodus/bytes/encoding.js` import (and variants of it).
357
+ >
358
+ > Be sure to know what you are doing and check documentation when directly using encodings from this file.
359
+
321
360
  Supports all legacy multi-byte encodings listed in the WHATWG Encoding standard:
322
361
  `gbk`, `gb18030`, `big5`, `euc-jp`, `iso-2022-jp`, `shift_jis`, `euc-kr`.
323
362
 
@@ -608,7 +647,8 @@ import { typedView } from '@exodus/bytes/array.js'
608
647
 
609
648
  Create a view of a TypedArray in the specified format (`'uint8'` or `'buffer'`)
610
649
 
611
- Important: does not copy data, returns a view on the same underlying buffer
650
+ > [!IMPORTANT]
651
+ > Does not copy data, returns a view on the same underlying buffer
612
652
 
613
653
  ### `@exodus/bytes/encoding.js`
614
654
 
@@ -801,6 +841,41 @@ do not provide sufficiently complete / non-buggy `TextDecoder` APIs.
801
841
  > but they are fixing them and the expected update window is short.\
802
842
  > If you want to circumvent browser bugs, use full `@exodus/bytes/encoding.js` import.
803
843
 
844
+ ### `@exodus/bytes/whatwg.js`
845
+
846
+ WHATWG helpers
847
+
848
+ ```js
849
+ import '@exodus/bytes/encoding.js' // For full legacy multi-byte encodings support
850
+ import { percentEncodeAfterEncoding } from '@exodus/bytes/whatwg.js'
851
+ ```
852
+
853
+ #### `percentEncodeAfterEncoding(encoding, input, percentEncodeSet, spaceAsPlus = false)`
854
+
855
+ Implements [percent-encode after encoding](https://url.spec.whatwg.org/#string-percent-encode-after-encoding)
856
+ per WHATWG URL specification.
857
+
858
+ > [!IMPORTANT]
859
+ > You must import `@exodus/bytes/encoding.js` for this API to accept legacy multi-byte encodings.
860
+
861
+ Encodings `utf16-le`, `utf16-be`, and `replacement` are not accepted.
862
+
863
+ [C0 control percent-encode set](https://url.spec.whatwg.org/#c0-control-percent-encode-set) is
864
+ always percent-encoded.
865
+
866
+ `percentEncodeSet` is an addition to that, and must be a string of unique increasing codepoints
867
+ in range 0x20 - 0x7e, e.g. `' "#<>'`.
868
+
869
+ This method accepts [DOMStrings](https://webidl.spec.whatwg.org/#idl-DOMString) and converts them
870
+ to [USVStrings](https://webidl.spec.whatwg.org/#idl-USVString).
871
+ This is different from e.g. `encodeURI` and `encodeURIComponent` which throw on surrogates:
872
+ ```js
873
+ > percentEncodeAfterEncoding('utf8', '\ud800', ' "#$%&+,/:;<=>?@[\\]^`{|}') // component
874
+ '%EF%BF%BD'
875
+ > encodeURIComponent('\ud800')
876
+ Uncaught URIError: URI malformed
877
+ ```
878
+
804
879
  ## Changelog
805
880
 
806
881
  See [GitHub Releases](https://github.com/ExodusOSS/bytes/releases) tab
package/array.d.ts CHANGED
@@ -50,7 +50,8 @@ export type OutputFormat = 'uint8' | 'buffer';
50
50
  /**
51
51
  * Create a view of a TypedArray in the specified format (`'uint8'` or `'buffer'`)
52
52
  *
53
- * Important: does not copy data, returns a view on the same underlying buffer
53
+ * > [!IMPORTANT]
54
+ * > Does not copy data, returns a view on the same underlying buffer
54
55
  *
55
56
  * @param arr - The input TypedArray
56
57
  * @param format - The desired output format (`'uint8'` or `'buffer'`)
@@ -17,9 +17,9 @@ export const nativeDecoder = isNative(TextDecoder)
17
17
  // Actually windows-1252, compatible with ascii and latin1 decoding
18
18
  // Beware that on non-latin1, i.e. on windows-1252, this is broken in ~all Node.js versions released
19
19
  // in 2025 due to a regression, so we call it Latin1 as it's usable only for that
20
- const getNativeLain1 = () => {
20
+ const getNativeLatin1 = () => {
21
21
  // Not all barebone engines with TextDecoder support something except utf-8, detect
22
- if (!nativeDecoder) {
22
+ if (nativeDecoder) {
23
23
  try {
24
24
  return new TextDecoder('latin1', { ignoreBOM: true })
25
25
  } catch {}
@@ -28,7 +28,7 @@ const getNativeLain1 = () => {
28
28
  return null
29
29
  }
30
30
 
31
- export const nativeDecoderLatin1 = /* @__PURE__ */ getNativeLain1()
31
+ export const nativeDecoderLatin1 = /* @__PURE__ */ getNativeLatin1()
32
32
  export const canDecoders = !!nativeDecoderLatin1
33
33
 
34
34
  // Block Firefox < 146 specifically from using native hex/base64, as it's very slow there
@@ -20,15 +20,11 @@ export function unfinishedBytes(u, len, enc) {
20
20
  case 'utf-16le':
21
21
  case 'utf-16be': {
22
22
  // 0-3
23
- let p = 0
24
- if (len % 2 !== 0) p++ // uneven bytes
23
+ const p = len % 2 // uneven byte length adds 1
24
+ if (len < 2) return p
25
25
  const l = len - p - 1
26
- if (len - p >= 2) {
27
- const last = enc === 'utf-16le' ? (u[l] << 8) ^ u[l - 1] : (u[l - 1] << 8) ^ u[l]
28
- if (last >= 0xd8_00 && last < 0xdc_00) p += 2 // lone lead
29
- }
30
-
31
- return p
26
+ const last = enc === 'utf-16le' ? (u[l] << 8) ^ u[l - 1] : (u[l - 1] << 8) ^ u[l]
27
+ return last >= 0xd8_00 && last < 0xdc_00 ? p + 2 : p // lone lead adds 2
32
28
  }
33
29
  }
34
30
 
@@ -10,7 +10,7 @@
10
10
  "$7": [2,12541,2,-98,1,-156,1,7897,3,-7897,1,244],
11
11
  "$8": ["E9UxzELB4htgpd4feI7ZJNwBAQEfAf8B_wH_Af8B_wH_Af8B_wEAAAD_"],
12
12
  "$9": [1,26142,4,2,1,1,4,2,1,1,1,1],
13
- "$jis0208tail": [
13
+ "$j": [
14
14
  "ipErP1Ps8XWWMAFJ4rgaAwI1HDv3D_k4cuHcHicp0VFf43EZOCAtKIYJGRokBhxNIB4qMI3tHlWG0gtGG_5HAI0TWEcHAQVAFZtpbqgTMYcTOjErvTAqSgAWUBIyTyZ-JwRT9krRHiX4Z3qSTmo8MH-xFCXNJO8FQPEBuGAlBhEMOhMaPFSWbUBCikNUq4NJTTraLApjAfFoHCnoaimC5yYVIij5CTwiyhSCyCw_DwEgXCVj9FfpAM2rPLIMZfFgRQsMDO407TAD_gQzJhVhbRIZAfwKcC5ocSwVFbV-Cwr_8ssh9gIq1PnvAAABAAAAAAAAAAABAP8BAAAAAAAAAQAAAAABAAAAAAEAAAAAAACnWgAAAAECAAAAAKMAXgABAAAAAAAAAgAa5gABAAAAAQCdYwAAAAACAAAAAAEAAAAAAf8BAAAAAQABAQAAAQEAAAAAAAAAAACUbAAAAAAAAZJuAAH_AQAAkm-RbwABAAAAAAAAAQEAAAAAAQAAAAAAAAAAAQAAAAABAAABAAAAAAEAAAAAiXcAAQABh3kAAAAAAAH_AQAAAAAAAAEAAQAAAACEfdsmAAAAAQ",
15
15
  3,32999,
16
16
  "lIZ_NRU0zrJ-KhNa6DV79Fl84mAcRy5Ra54FEbOQbwDl7RwkQS0WIELTXCtwAx1jrKtUAEF2R-4RsvwGDgD1ACAJ-S8F-xEK9-ctP88Abu8B9latCvJR-9ks9eAd5G3mTCEXGTgTAklJTHMRgwcHCQEBAwENxAD7BHGvigKY_BwhCURv-sHrt3mBfwEAgIABf4MAAAAAAQEAe4kAAAAAAQABAAB1i3UAjgACAAAAAABwkAEAAQABAG2UbACWAGqXAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAABomAAAAQD_aJn_AQAAAAAAAAABAAABZAGcAAEAAAAAAAABAGIAn2GfAQAAAQAAAAABAQBdpAA"
@@ -39,8 +39,8 @@
39
39
  1,8,1,13,1,-11,1,-36,1,54,1,17,1,11,1,13,1,-4,1,-3,-2,1,-6,1,22,1,-9,1,13,1,-5,1,7,-2,-2,1,-2,1,4,-1,1,-2,1,66,-2,1,11,-3,1,14,-1,2,1,2,9,2,2,-2,1,8,1,-5,-2,1,5,1,-4,1,5,-3,1,10,1,17,-1,1,4,2,2,
40
40
  -1,4,1,2,3,
41
41
  "MhH-C_0AAgYbHu8NBAAHAAcLCwnuDAgBABD-Ae8MWLf-_iT-9An4-QEJ9_kp7An-_A4W6hP3GwUGDgFuBAMY_uIXAiP7iHP7JND8Lv3-BwEUAAXs_ggYFv0jwwofCQQIEfz5_AELCPIW-STtBgENHAACVgMDAwYCAAUC-wsHAw7xArNqmwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAf8AAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAP8BAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAPkH",
42
- 3,1,-3,1,7,-1,1,-2,-2,1,4,-2,1,6,1,4,2,1,-1,3,1,2,9,"t10LChELBwEOdikAAAAAAAAA",3,-5,2,4,"bP0M-gMeBgOOl5EKd7yfAAAAAAAAALkRJ-TdIA",0,464,"$jis0208tail",0,2,10,-20522,"4gEi-v8AAAA",0,2068,10,-56723,
43
- 10,-26,"4gEi-i7kChP_AAAAM-8AAQ","$jis0208tail"
42
+ 3,1,-3,1,7,-1,1,-2,-2,1,4,-2,1,6,1,4,2,1,-1,3,1,2,9,"t10LChELBwEOdikAAAAAAAAA",3,-5,2,4,"bP0M-gMeBgOOl5EKd7yfAAAAAAAAALkRJ-TdIA",0,464,"$j",0,2,10,-20522,"4gEi-v8AAAA",0,2068,10,-56723,
43
+ 10,-26,"4gEi-i7kChP_AAAAM-8AAQ","$j"
44
44
  ],
45
45
  "jis0212": [
46
46
  0,108,"2O7wIAPRK_6DJQACAP4CAP4CAP0EAA",0,8,1,-741,1,4,1,24,0,38,"uu_-BHOBcQAAAAAh3yE",0,359,1,-7569,3,1,1,31,0,1,1,-31,0,1,-1,1,28,0,1,1,-29,0,4,4,28,"ysU79Qr95B0DAAAAAAAAAA",0,35,11,51,2,1,0,35,
@@ -0,0 +1,31 @@
1
+ import { decodeAscii, encodeLatin1 } from './latin1.js'
2
+ import { decode2string } from './_utils.js'
3
+
4
+ const ERR = 'percentEncodeSet must be a string of unique increasing codepoints in range 0x20 - 0x7e'
5
+ const percentMap = new Map()
6
+ let hex, base
7
+
8
+ export function percentEncoder(set, spaceAsPlus = false) {
9
+ if (typeof set !== 'string' || /[^\x20-\x7E]/.test(set)) throw new TypeError(ERR)
10
+ if (typeof spaceAsPlus !== 'boolean') throw new TypeError('spaceAsPlus must be boolean')
11
+ const id = set + +spaceAsPlus
12
+ const cached = percentMap.get(id)
13
+ if (cached) return cached
14
+
15
+ const n = encodeLatin1(set).sort() // string checked above to be ascii
16
+ if (decodeAscii(n) !== set || new Set(n).size !== n.length) throw new TypeError(ERR)
17
+
18
+ if (!base) {
19
+ hex = Array.from({ length: 256 }, (_, i) => `%${i.toString(16).padStart(2, '0').toUpperCase()}`)
20
+ base = hex.map((h, i) => (i < 0x20 || i > 0x7e ? h : String.fromCharCode(i)))
21
+ }
22
+
23
+ const map = base.slice() // copy
24
+ for (const c of n) map[c] = hex[c]
25
+ if (spaceAsPlus) map[0x20] = '+' // overrides whatever percentEncodeSet thinks about it
26
+
27
+ // Input is not typechecked, for internal use only
28
+ const percentEncode = (u8, start = 0, end = u8.length) => decode2string(u8, start, end, map)
29
+ percentMap.set(id, percentEncode)
30
+ return percentEncode
31
+ }
package/multi-byte.d.ts CHANGED
@@ -10,6 +10,13 @@
10
10
  * import { createMultibyteDecoder, createMultibyteEncoder } from '@exodus/bytes/multi-byte.js'
11
11
  * ```
12
12
  *
13
+ * > [!WARNING]
14
+ * > This is a lower-level API for legacy multi-byte encodings.
15
+ * >
16
+ * > For a safe WHATWG Encoding-compatible API, see `@exodus/bytes/encoding.js` import (and variants of it).
17
+ * >
18
+ * > Be sure to know what you are doing and check documentation when directly using encodings from this file.
19
+ *
13
20
  * Supports all legacy multi-byte encodings listed in the WHATWG Encoding standard:
14
21
  * `gbk`, `gb18030`, `big5`, `euc-jp`, `iso-2022-jp`, `shift_jis`, `euc-kr`.
15
22
  *
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@exodus/bytes",
3
- "version": "1.10.0",
3
+ "version": "1.11.0",
4
4
  "description": "Various operations on Uint8Array data",
5
5
  "keywords": [
6
6
  "encoding",
@@ -20,13 +20,14 @@
20
20
  ],
21
21
  "scripts": {
22
22
  "lint": "eslint .",
23
+ "typedoc": "typedoc && mkdir -p doc/assets && cp -r theme/styles doc/assets/",
23
24
  "test:javascriptcore": "npm run test:jsc --",
24
25
  "test:v8": "exodus-test --engine=v8:bundle",
25
26
  "test:jsc": "exodus-test --engine=jsc:bundle",
26
27
  "test:spidermonkey": "exodus-test --engine=spidermonkey:bundle",
27
28
  "test:hermes": "exodus-test --engine=hermes:bundle",
28
29
  "test:quickjs": "exodus-test --engine=quickjs:bundle",
29
- "test:xs": "exodus-test --engine=xs:bundle",
30
+ "test:xs": "EXODUS_TEST_IGNORE='tests/whatwg.browser.test.js' exodus-test --engine=xs:bundle",
30
31
  "test:engine262": "exodus-test --engine=engine262:bundle",
31
32
  "test:deno": "exodus-test --engine=deno:pure",
32
33
  "test:bun": "exodus-test --engine=bun:pure",
@@ -70,6 +71,7 @@
70
71
  "/fallback/encoding.util.js",
71
72
  "/fallback/hex.js",
72
73
  "/fallback/latin1.js",
74
+ "/fallback/percent.js",
73
75
  "/fallback/multi-byte.encodings.cjs",
74
76
  "/fallback/multi-byte.encodings.json",
75
77
  "/fallback/multi-byte.js",
@@ -119,6 +121,8 @@
119
121
  "/utf8.js",
120
122
  "/utf8.d.ts",
121
123
  "/utf8.node.js",
124
+ "/whatwg.js",
125
+ "/whatwg.d.ts",
122
126
  "/wif.js",
123
127
  "/wif.d.ts"
124
128
  ],
@@ -199,6 +203,10 @@
199
203
  "node": "./utf8.node.js",
200
204
  "default": "./utf8.js"
201
205
  },
206
+ "./whatwg.js": {
207
+ "types": "./whatwg.d.ts",
208
+ "default": "./whatwg.js"
209
+ },
202
210
  "./wif.js": {
203
211
  "types": "./wif.d.ts",
204
212
  "default": "./wif.js"
@@ -221,6 +229,7 @@
221
229
  "@exodus/eslint-config": "^5.24.0",
222
230
  "@exodus/prettier": "^1.0.0",
223
231
  "@exodus/test": "^1.0.0-rc.109",
232
+ "@hexagon/base64": "^2.0.4",
224
233
  "@noble/hashes": "^2.0.1",
225
234
  "@oslojs/encoding": "^1.1.0",
226
235
  "@petamoriken/float16": "^3.9.3",
@@ -251,6 +260,7 @@
251
260
  "jsvu": "^3.0.3",
252
261
  "punycode": "^2.3.1",
253
262
  "text-encoding": "^0.7.0",
263
+ "typedoc": "^0.28.16",
254
264
  "typescript": "^5.9.3",
255
265
  "uint8array-tools": "^0.0.9",
256
266
  "utf8": "^3.0.0",
package/single-byte.d.ts CHANGED
@@ -11,6 +11,15 @@
11
11
  * import { latin1toString, latin1fromString } from '@exodus/bytes/single-byte.js'
12
12
  * ```
13
13
  *
14
+ * > [!WARNING]
15
+ * > This is a lower-level API for single-byte encodings.
16
+ * > It might not match what you expect, as it supports both WHATWG and unicode.org encodings under
17
+ * > different names, with the main intended usecase for the latter being either non-web or legacy contexts.
18
+ * >
19
+ * > For a safe WHATWG Encoding-compatible API, see `@exodus/bytes/encoding.js` import (and variants of it).
20
+ * >
21
+ * > Be sure to know what you are doing and check documentation when directly using encodings from this file.
22
+ *
14
23
  * Supports all single-byte encodings listed in the WHATWG Encoding standard:
15
24
  * `ibm866`, `iso-8859-2`, `iso-8859-3`, `iso-8859-4`, `iso-8859-5`, `iso-8859-6`, `iso-8859-7`, `iso-8859-8`,
16
25
  * `iso-8859-8-i`, `iso-8859-10`, `iso-8859-13`, `iso-8859-14`, `iso-8859-15`, `iso-8859-16`, `koi8-r`, `koi8-u`,
@@ -95,8 +104,9 @@ export function createSinglebyteEncoder(
95
104
  * const latin1toString = createSinglebyteDecoder('iso-8859-1')
96
105
  * ```
97
106
  *
98
- * Note: this is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as
99
- * those alias to `new TextDecoder('windows-1252')`.
107
+ * > [!NOTE]
108
+ * > This is different from `new TextDecoder('iso-8859-1')` and `new TextDecoder('latin1')`, as those
109
+ * > alias to `new TextDecoder('windows-1252')`.
100
110
  *
101
111
  * @param arr - The bytes to decode
102
112
  * @returns The decoded string
package/utf16.js CHANGED
@@ -8,7 +8,7 @@ const decoderLooseLE = canDecoders ? new TextDecoder('utf-16le', { ignoreBOM })
8
8
  const decoderFatalBE = canDecoders ? new TextDecoder('utf-16be', { ignoreBOM, fatal: true }) : null
9
9
  const decoderLooseBE = canDecoders ? new TextDecoder('utf-16be', { ignoreBOM }) : null
10
10
  const decoderFatal16 = isLE ? decoderFatalLE : decoderFatalBE
11
- const decoderLoose16 = isLE ? decoderLooseLE : decoderFatalBE
11
+ const decoderLoose16 = isLE ? decoderLooseLE : decoderLooseBE
12
12
  const { isWellFormed, toWellFormed } = String.prototype
13
13
 
14
14
  const { E_STRICT, E_STRICT_UNICODE } = js
package/utf8.d.ts CHANGED
@@ -23,6 +23,13 @@ import type { OutputFormat, Uint8ArrayBuffer } from './array.js';
23
23
  *
24
24
  * Throws on invalid Unicode (unpaired surrogates)
25
25
  *
26
+ * This is similar to the following snippet (but works on all engines):
27
+ * ```js
28
+ * // Strict encode, requiring Unicode codepoints to be valid
29
+ * if (typeof string !== 'string' || !string.isWellFormed()) throw new TypeError()
30
+ * return new TextEncoder().encode(string)
31
+ * ```
32
+ *
26
33
  * @param string - The string to encode
27
34
  * @param format - Output format (default: 'uint8')
28
35
  * @returns The encoded bytes
@@ -40,6 +47,13 @@ export function utf8fromString(string: string, format?: OutputFormat): Uint8Arra
40
47
  * _Such replacement is a non-injective function, is irreversable and causes collisions.\
41
48
  * Prefer using strict throwing methods for cryptography applications._
42
49
  *
50
+ * This is similar to the following snippet (but works on all engines):
51
+ * ```js
52
+ * // Loose encode, replacing invalid Unicode codepoints with U+FFFD
53
+ * if (typeof string !== 'string') throw new TypeError()
54
+ * return new TextEncoder().encode(string)
55
+ * ```
56
+ *
43
57
  * @param string - The string to encode
44
58
  * @param format - Output format (default: 'uint8')
45
59
  * @returns The encoded bytes
@@ -56,6 +70,9 @@ export function utf8fromStringLoose(
56
70
  *
57
71
  * Throws on invalid UTF-8 byte sequences
58
72
  *
73
+ * This is similar to `new TextDecoder('utf-8', { fatal: true, ignoreBOM: true }).decode(arr)`,
74
+ * but works on all engines.
75
+ *
59
76
  * @param arr - The bytes to decode
60
77
  * @returns The decoded string
61
78
  */
@@ -70,6 +87,9 @@ export function utf8toString(arr: Uint8Array): string;
70
87
  * _Such replacement is a non-injective function, is irreversable and causes collisions.\
71
88
  * Prefer using strict throwing methods for cryptography applications._
72
89
  *
90
+ * This is similar to `new TextDecoder('utf-8', { ignoreBOM: true }).decode(arr)`,
91
+ * but works on all engines.
92
+ *
73
93
  * @param arr - The bytes to decode
74
94
  * @returns The decoded string
75
95
  */
package/utf8.js CHANGED
@@ -27,11 +27,11 @@ function deLoose(str, loose, res) {
27
27
 
28
28
  // Recheck if the string was encoded correctly
29
29
  let start = 0
30
- const last = res.length - 2
31
- // Search for EFBFBD
32
- while (start < last) {
30
+ const last = res.length - 3
31
+ // Search for EFBFBD (3-byte sequence)
32
+ while (start <= last) {
33
33
  const pos = res.indexOf(0xef, start)
34
- if (pos === -1) break
34
+ if (pos === -1 || pos > last) break
35
35
  start = pos + 1
36
36
  if (res[pos + 1] === 0xbf && res[pos + 2] === 0xbd) {
37
37
  // Found a replacement char in output, need to recheck if we encoded the input correctly
package/whatwg.d.ts ADDED
@@ -0,0 +1,48 @@
1
+ /**
2
+ * WHATWG helpers
3
+ *
4
+ * ```js
5
+ * import '@exodus/bytes/encoding.js' // For full legacy multi-byte encodings support
6
+ * import { percentEncodeAfterEncoding } from '@exodus/bytes/whatwg.js'
7
+ * ```
8
+ *
9
+ * @module @exodus/bytes/whatwg.js
10
+ */
11
+
12
+ /**
13
+ * Implements [percent-encode after encoding](https://url.spec.whatwg.org/#string-percent-encode-after-encoding)
14
+ * per WHATWG URL specification.
15
+ *
16
+ * > [!IMPORTANT]
17
+ * > You must import `@exodus/bytes/encoding.js` for this API to accept legacy multi-byte encodings.
18
+ *
19
+ * Encodings `utf16-le`, `utf16-be`, and `replacement` are not accepted.
20
+ *
21
+ * [C0 control percent-encode set](https://url.spec.whatwg.org/#c0-control-percent-encode-set) is
22
+ * always percent-encoded.
23
+ *
24
+ * `percentEncodeSet` is an addition to that, and must be a string of unique increasing codepoints
25
+ * in range 0x20 - 0x7e, e.g. `' "#<>'`.
26
+ *
27
+ * This method accepts [DOMStrings](https://webidl.spec.whatwg.org/#idl-DOMString) and converts them
28
+ * to [USVStrings](https://webidl.spec.whatwg.org/#idl-USVString).
29
+ * This is different from e.g. `encodeURI` and `encodeURIComponent` which throw on surrogates:
30
+ * ```js
31
+ * > percentEncodeAfterEncoding('utf8', '\ud800', ' "#$%&+,/:;<=>?@[\\]^`{|}') // component
32
+ * '%EF%BF%BD'
33
+ * > encodeURIComponent('\ud800')
34
+ * Uncaught URIError: URI malformed
35
+ * ```
36
+ *
37
+ * @param encoding - The encoding label per WHATWG Encoding spec
38
+ * @param input - Input scalar-value string to encode
39
+ * @param percentEncodeSet - A string of ASCII chars to escape in addition to C0 control percent-encode set
40
+ * @param spaceAsPlus - Whether to encode space as `'+'` instead of `'%20'` or `' '` (default: false)
41
+ * @returns The percent-encoded string
42
+ */
43
+ export function percentEncodeAfterEncoding(
44
+ encoding: string,
45
+ input: string,
46
+ percentEncodeSet: string,
47
+ spaceAsPlus?: boolean
48
+ ): string;
package/whatwg.js ADDED
@@ -0,0 +1,76 @@
1
+ import { utf8fromStringLoose } from '@exodus/bytes/utf8.js'
2
+ import { createSinglebyteEncoder } from '@exodus/bytes/single-byte.js'
3
+ import { isMultibyte, getMultibyteEncoder } from './fallback/encoding.js'
4
+ import { normalizeEncoding, E_ENCODING } from './fallback/encoding.api.js'
5
+ import { percentEncoder } from './fallback/percent.js'
6
+ import { encodeMap } from './fallback/single-byte.js'
7
+ import { E_STRING } from './fallback/_utils.js'
8
+
9
+ // https://url.spec.whatwg.org/#string-percent-encode-after-encoding
10
+ // Codepoints below 0x20, 0x7F specifically, and above 0x7F (non-ASCII) are always encoded
11
+ // > A C0 control is a code point in the range U+0000 NULL to U+001F INFORMATION SEPARATOR ONE, inclusive.
12
+ // > The C0 control percent-encode set are the C0 controls and all code points greater than U+007E (~).
13
+ export function percentEncodeAfterEncoding(encoding, input, percentEncodeSet, spaceAsPlus = false) {
14
+ const enc = normalizeEncoding(encoding)
15
+ // Ref: https://encoding.spec.whatwg.org/#get-an-encoder
16
+ if (!enc || enc === 'replacement' || enc === 'utf-16le' || enc === 'utf-16be') {
17
+ throw new RangeError(E_ENCODING)
18
+ }
19
+
20
+ const percent = percentEncoder(percentEncodeSet, spaceAsPlus)
21
+ if (enc === 'utf-8') return percent(utf8fromStringLoose(input))
22
+
23
+ const multi = isMultibyte(enc)
24
+ const encoder = multi ? getMultibyteEncoder() : createSinglebyteEncoder
25
+ const fatal = encoder(enc)
26
+ try {
27
+ return percent(fatal(input))
28
+ } catch {}
29
+
30
+ let res = ''
31
+ let last = 0
32
+ if (multi) {
33
+ const rep = enc === 'gb18030' ? percent(fatal('\uFFFD')) : `%26%23${0xff_fd}%3B` // only gb18030 can encode it
34
+ const escaping = encoder(enc, (cp, u, i) => {
35
+ res += percent(u, last, i)
36
+ res += cp >= 0xd8_00 && cp < 0xe0_00 ? rep : `%26%23${cp}%3B` // &#cp;
37
+ last = i
38
+ return 0 // no bytes emitted
39
+ })
40
+
41
+ const u = escaping(input) // has side effects on res
42
+ res += percent(u, last)
43
+ } else {
44
+ if (typeof input !== 'string') throw new TypeError(E_STRING) // all other paths have their own validation
45
+ const m = encodeMap(enc)
46
+ const len = input.length
47
+ const u = new Uint8Array(len)
48
+ for (let i = 0; i < len; i++) {
49
+ const x = input.charCodeAt(i)
50
+ const b = m[x]
51
+ if (!b && x) {
52
+ let cp = x
53
+ const i0 = i
54
+ if (x >= 0xd8_00 && x < 0xe0_00) {
55
+ cp = 0xff_fd
56
+ if (x < 0xdc_00 && i + 1 < len) {
57
+ const x1 = input.charCodeAt(i + 1)
58
+ if (x1 >= 0xdc_00 && x1 < 0xe0_00) {
59
+ cp = 0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10))
60
+ i++
61
+ }
62
+ }
63
+ }
64
+
65
+ res += `${percent(u, last, i0)}%26%23${cp}%3B` // &#cp;
66
+ last = i + 1 // skip current
67
+ } else {
68
+ u[i] = b
69
+ }
70
+ }
71
+
72
+ res += percent(u, last)
73
+ }
74
+
75
+ return res
76
+ }
package/wif.js CHANGED
@@ -6,6 +6,7 @@ import { assertUint8 } from './assert.js'
6
6
 
7
7
  function from(arr, expectedVersion) {
8
8
  assertUint8(arr)
9
+ if (arr.length !== 33 && arr.length !== 34) throw new Error('Invalid WIF length')
9
10
  const version = arr[0]
10
11
  if (expectedVersion !== undefined && version !== expectedVersion) {
11
12
  throw new Error('Invalid network version')
@@ -14,7 +15,6 @@ function from(arr, expectedVersion) {
14
15
  // Makes a copy, regardless of input being a Buffer or a Uint8Array (unlike .slice)
15
16
  const privateKey = Uint8Array.from(arr.subarray(1, 33))
16
17
  if (arr.length === 33) return { version, privateKey, compressed: false }
17
- if (arr.length !== 34) throw new Error('Invalid WIF length')
18
18
  if (arr[33] !== 1) throw new Error('Invalid compression flag')
19
19
  return { version, privateKey, compressed: true }
20
20
  }
@@ -22,7 +22,6 @@ function from(arr, expectedVersion) {
22
22
  function to({ version: v, privateKey, compressed }) {
23
23
  if (!Number.isSafeInteger(v) || v < 0 || v > 0xff) throw new Error('Missing or invalid version')
24
24
  assertUint8(privateKey, { length: 32, name: 'privateKey' })
25
- if (privateKey.length !== 32) throw new TypeError('Invalid privateKey length')
26
25
  const out = new Uint8Array(compressed ? 34 : 33)
27
26
  out[0] = v
28
27
  out.set(privateKey, 1)