@exodus/bytes 1.0.0-rc.3 → 1.0.0-rc.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,280 @@
1
+ export const E_STRICT = 'Input is not well-formed utf8'
2
+ export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
3
+
4
+ const replacementPoint = 0xff_fd
5
+
6
+ // https://encoding.spec.whatwg.org/#utf-8-decoder
7
+ // We are most likely in loose mode, for non-loose escape & decodeURIComponent solved everything
8
+ export function decode(arr, loose) {
9
+ const start = 0
10
+ const end = arr.length
11
+ let out = ''
12
+ const tmp = []
13
+
14
+ for (let i = start; i < end; i++) {
15
+ if (tmp.length > 0x2_00) {
16
+ // far below MAX_ARGUMENTS_LENGTH in npmjs.com/buffer, we use smaller chunks
17
+ // length can be off by a few as large code points produce two utf-16 char codes, also we overshoot in unrolled loop
18
+ out += String.fromCharCode.apply(String, tmp)
19
+ tmp.length = 0
20
+ }
21
+
22
+ const byte = arr[i]
23
+ if (byte < 0x80) {
24
+ // Fast path ascii
25
+ tmp.push(byte)
26
+ // Unroll the loop a bit for faster ops, overshoot by 20 chars
27
+ for (let j = 0; j < 5; j++) {
28
+ if (i + 1 >= end) break
29
+ const byte1 = arr[i + 1]
30
+ if (byte1 >= 0x80) break
31
+ tmp.push(byte1)
32
+ i++
33
+ if (i + 1 >= end) break
34
+ const byte2 = arr[i + 1]
35
+ if (byte2 >= 0x80) break
36
+ tmp.push(byte2)
37
+ i++
38
+ if (i + 1 >= end) break
39
+ const byte3 = arr[i + 1]
40
+ if (byte3 >= 0x80) break
41
+ tmp.push(byte3)
42
+ i++
43
+ if (i + 1 >= end) break
44
+ const byte4 = arr[i + 1]
45
+ if (byte4 >= 0x80) break
46
+ tmp.push(byte4)
47
+ i++
48
+ }
49
+ } else if (byte < 0xc2) {
50
+ if (!loose) throw new TypeError(E_STRICT)
51
+ tmp.push(replacementPoint)
52
+ } else if (byte < 0xe0) {
53
+ // need 1 more
54
+ if (i + 1 >= end) {
55
+ if (!loose) throw new TypeError(E_STRICT)
56
+ tmp.push(replacementPoint)
57
+ break
58
+ }
59
+
60
+ const byte1 = arr[i + 1]
61
+ if (byte1 < 0x80 || byte1 > 0xbf) {
62
+ if (!loose) throw new TypeError(E_STRICT)
63
+ tmp.push(replacementPoint)
64
+ continue
65
+ }
66
+
67
+ i++
68
+ tmp.push(((byte & 0x1f) << 6) | (byte1 & 0x3f))
69
+ } else if (byte < 0xf0) {
70
+ // need 2 more
71
+ if (i + 1 >= end) {
72
+ if (!loose) throw new TypeError(E_STRICT)
73
+ tmp.push(replacementPoint)
74
+ break
75
+ }
76
+
77
+ const lower = byte === 0xe0 ? 0xa0 : 0x80
78
+ const upper = byte === 0xed ? 0x9f : 0xbf
79
+ const byte1 = arr[i + 1]
80
+ if (byte1 < lower || byte1 > upper) {
81
+ if (!loose) throw new TypeError(E_STRICT)
82
+ tmp.push(replacementPoint)
83
+ continue
84
+ }
85
+
86
+ i++
87
+ if (i + 1 >= end) {
88
+ if (!loose) throw new TypeError(E_STRICT)
89
+ tmp.push(replacementPoint)
90
+ break
91
+ }
92
+
93
+ const byte2 = arr[i + 1]
94
+ if (byte2 < 0x80 || byte2 > 0xbf) {
95
+ if (!loose) throw new TypeError(E_STRICT)
96
+ tmp.push(replacementPoint)
97
+ continue
98
+ }
99
+
100
+ i++
101
+ tmp.push(((byte & 0xf) << 12) | ((byte1 & 0x3f) << 6) | (byte2 & 0x3f))
102
+ } else if (byte <= 0xf4) {
103
+ // need 3 more
104
+ if (i + 1 >= end) {
105
+ if (!loose) throw new TypeError(E_STRICT)
106
+ tmp.push(replacementPoint)
107
+ break
108
+ }
109
+
110
+ const lower = byte === 0xf0 ? 0x90 : 0x80
111
+ const upper = byte === 0xf4 ? 0x8f : 0xbf
112
+ const byte1 = arr[i + 1]
113
+ if (byte1 < lower || byte1 > upper) {
114
+ if (!loose) throw new TypeError(E_STRICT)
115
+ tmp.push(replacementPoint)
116
+ continue
117
+ }
118
+
119
+ i++
120
+ if (i + 1 >= end) {
121
+ if (!loose) throw new TypeError(E_STRICT)
122
+ tmp.push(replacementPoint)
123
+ break
124
+ }
125
+
126
+ const byte2 = arr[i + 1]
127
+ if (byte2 < 0x80 || byte2 > 0xbf) {
128
+ if (!loose) throw new TypeError(E_STRICT)
129
+ tmp.push(replacementPoint)
130
+ continue
131
+ }
132
+
133
+ i++
134
+ if (i + 1 >= end) {
135
+ if (!loose) throw new TypeError(E_STRICT)
136
+ tmp.push(replacementPoint)
137
+ break
138
+ }
139
+
140
+ const byte3 = arr[i + 1]
141
+ if (byte3 < 0x80 || byte3 > 0xbf) {
142
+ if (!loose) throw new TypeError(E_STRICT)
143
+ tmp.push(replacementPoint)
144
+ continue
145
+ }
146
+
147
+ i++
148
+ const codePoint =
149
+ ((byte & 0xf) << 18) | ((byte1 & 0x3f) << 12) | ((byte2 & 0x3f) << 6) | (byte3 & 0x3f)
150
+ if (codePoint > 0xff_ff) {
151
+ // split into char codes as String.fromCharCode is faster than String.fromCodePoint
152
+ const u = codePoint - 0x1_00_00
153
+ tmp.push(0xd8_00 + ((u >> 10) & 0x3_ff), 0xdc_00 + (u & 0x3_ff))
154
+ } else {
155
+ tmp.push(codePoint)
156
+ }
157
+ // eslint-disable-next-line sonarjs/no-duplicated-branches
158
+ } else {
159
+ if (!loose) throw new TypeError(E_STRICT)
160
+ tmp.push(replacementPoint)
161
+ }
162
+ }
163
+
164
+ if (tmp.length > 0) out += String.fromCharCode.apply(String, tmp)
165
+ return out
166
+ }
167
+
168
+ export function encode(string, loose) {
169
+ const length = string.length
170
+ let lead = null
171
+ let small = true
172
+ let bytes = new Uint8Array(length) // assume ascii
173
+ let p = 0
174
+
175
+ for (let i = 0; i < length; i++) {
176
+ const code = string.charCodeAt(i)
177
+ if (code < 0x80) {
178
+ // Fast path for ascii
179
+ if (lead) {
180
+ if (!loose) throw new TypeError(E_STRICT_UNICODE)
181
+ bytes[p++] = 0xef
182
+ bytes[p++] = 0xbf
183
+ bytes[p++] = 0xbd
184
+ lead = null
185
+ }
186
+
187
+ bytes[p++] = code
188
+ // Unroll the loop a bit for faster ops
189
+ for (let j = 0; j < 5; j++) {
190
+ if (i + 1 >= length) break
191
+ const c1 = string.charCodeAt(i + 1)
192
+ if (c1 >= 0x80) break
193
+ bytes[p++] = c1
194
+ i++
195
+ if (i + 1 >= length) break
196
+ const c2 = string.charCodeAt(i + 1)
197
+ if (c2 >= 0x80) break
198
+ bytes[p++] = c2
199
+ i++
200
+ if (i + 1 >= length) break
201
+ const c3 = string.charCodeAt(i + 1)
202
+ if (c3 >= 0x80) break
203
+ bytes[p++] = c3
204
+ i++
205
+ if (i + 1 >= length) break
206
+ const c4 = string.charCodeAt(i + 1)
207
+ if (c4 >= 0x80) break
208
+ bytes[p++] = c4
209
+ i++
210
+ }
211
+
212
+ continue
213
+ }
214
+
215
+ if (small) {
216
+ // TODO: use resizable array buffers? will have to return a non-resizeable one
217
+ const bytesNew = new Uint8Array(length * 3) // maximium can be 3x of the string length in charcodes
218
+ bytesNew.set(bytes)
219
+ bytes = bytesNew
220
+ small = false
221
+ }
222
+
223
+ // surrogate, charcodes = [d800 + a & 3ff, dc00 + b & 3ff]; codePoint = 0x1_00_00 | (a << 10) | b
224
+ // lead: d800 - dbff
225
+ // trail: dc00 - dfff
226
+ if (code >= 0xd8_00 && code < 0xe0_00) {
227
+ if (lead && code < 0xdc_00) {
228
+ // a second lead, meaning the previous one was unpaired
229
+ if (!loose) throw new TypeError(E_STRICT_UNICODE)
230
+ bytes[p++] = 0xef
231
+ bytes[p++] = 0xbf
232
+ bytes[p++] = 0xbd
233
+ lead = null
234
+ // code is still processed as a new lead
235
+ }
236
+
237
+ if (!lead) {
238
+ if (code > 0xdb_ff || i + 1 >= length) {
239
+ // lead out of range || unpaired
240
+ if (!loose) throw new TypeError(E_STRICT_UNICODE)
241
+ bytes[p++] = 0xef
242
+ bytes[p++] = 0xbf
243
+ bytes[p++] = 0xbd
244
+ continue
245
+ }
246
+
247
+ lead = code
248
+ continue
249
+ }
250
+
251
+ // here, codePoint is always between 0x1_00_00 and 0x11_00_00, we encode as 4 bytes
252
+ const codePoint = (((lead - 0xd8_00) << 10) | (code - 0xdc_00)) + 0x1_00_00
253
+ bytes[p++] = (codePoint >> 18) | 0xf0
254
+ bytes[p++] = ((codePoint >> 12) & 0x3f) | 0x80
255
+ bytes[p++] = ((codePoint >> 6) & 0x3f) | 0x80
256
+ bytes[p++] = (codePoint & 0x3f) | 0x80
257
+ lead = null
258
+ continue
259
+ } else if (lead) {
260
+ if (!loose) throw new TypeError(E_STRICT_UNICODE)
261
+ bytes[p++] = 0xef
262
+ bytes[p++] = 0xbf
263
+ bytes[p++] = 0xbd
264
+ lead = null
265
+ // code is still processed
266
+ }
267
+
268
+ // We are left with a non-pair char code above ascii, it gets encoded to 2 or 3 bytes
269
+ if (code < 0x8_00) {
270
+ bytes[p++] = (code >> 6) | 0xc0
271
+ bytes[p++] = (code & 0x3f) | 0x80
272
+ } else {
273
+ bytes[p++] = (code >> 12) | 0xe0
274
+ bytes[p++] = ((code >> 6) & 0x3f) | 0x80
275
+ bytes[p++] = (code & 0x3f) | 0x80
276
+ }
277
+ }
278
+
279
+ return bytes.length === p ? bytes : bytes.slice(0, p)
280
+ }
package/hex.js CHANGED
@@ -1,4 +1,4 @@
1
- import { assertTypedArray } from './assert.js'
1
+ import { assertUint8 } from './assert.js'
2
2
  import { typedView } from './array.js'
3
3
  import * as js from './fallback/hex.js'
4
4
 
@@ -6,9 +6,10 @@ const { Buffer } = globalThis // Buffer is optional, only used when native
6
6
  const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
7
7
  const { toHex: webHex } = Uint8Array.prototype // Modern engines have this
8
8
 
9
+ const { E_HEX } = js
10
+
9
11
  export function toHex(arr) {
10
- assertTypedArray(arr)
11
- if (!(arr instanceof Uint8Array)) arr = new Uint8Array(arr.buffer, arr.byteOffset, arr.byteLength)
12
+ assertUint8(arr)
12
13
  if (arr.length === 0) return ''
13
14
  if (webHex && arr.toHex === webHex) return arr.toHex()
14
15
  if (!haveNativeBuffer) return js.toHex(arr)
@@ -19,4 +20,12 @@ export function toHex(arr) {
19
20
  // Unlike Buffer.from(), throws on invalid input
20
21
  export const fromHex = Uint8Array.fromHex
21
22
  ? (str, format = 'uint8') => typedView(Uint8Array.fromHex(str), format)
22
- : (str, format = 'uint8') => typedView(js.fromHex(str), format)
23
+ : haveNativeBuffer
24
+ ? (str, format = 'uint8') => {
25
+ if (typeof str !== 'string') throw new TypeError('Input is not a string')
26
+ if (str.length % 2 !== 0) throw new SyntaxError(E_HEX)
27
+ const buf = Buffer.from(str, 'hex') // will stop on first non-hex character, so we can just validate length
28
+ if (buf.length * 2 !== str.length) throw new SyntaxError(E_HEX)
29
+ return typedView(buf, format)
30
+ }
31
+ : (str, format = 'uint8') => typedView(js.fromHex(str), format)
package/package.json CHANGED
@@ -1,12 +1,11 @@
1
1
  {
2
2
  "name": "@exodus/bytes",
3
- "version": "1.0.0-rc.3",
3
+ "version": "1.0.0-rc.5",
4
4
  "description": "Various operations on Uint8Array data",
5
5
  "scripts": {
6
6
  "lint": "eslint .",
7
- "test:v8": "npm run test:d8 --",
8
7
  "test:javascriptcore": "npm run test:jsc --",
9
- "test:d8": "exodus-test --engine=d8:bundle",
8
+ "test:v8": "exodus-test --engine=v8:bundle",
10
9
  "test:jsc": "exodus-test --engine=jsc:bundle",
11
10
  "test:spidermonkey": "exodus-test --engine=spidermonkey:bundle",
12
11
  "test:hermes": "exodus-test --engine=hermes:bundle",
@@ -39,32 +38,62 @@
39
38
  },
40
39
  "type": "module",
41
40
  "files": [
41
+ "/fallback/_utils.js",
42
+ "/fallback/base32.js",
42
43
  "/fallback/base64.js",
43
44
  "/fallback/hex.js",
45
+ "/fallback/utf8.js",
44
46
  "/array.js",
45
47
  "/assert.js",
48
+ "/base32.js",
49
+ "/base58.js",
50
+ "/base58check.js",
46
51
  "/base64.js",
47
- "/hex.js"
52
+ "/hex.js",
53
+ "/utf8.js"
48
54
  ],
49
55
  "exports": {
50
56
  "./array.js": "./array.js",
57
+ "./base32.js": "./base32.js",
58
+ "./base58.js": "./base58.js",
59
+ "./base58check.js": "./base58check.js",
51
60
  "./base64.js": "./base64.js",
52
- "./hex.js": "./hex.js"
61
+ "./hex.js": "./hex.js",
62
+ "./utf8.js": "./utf8.js"
63
+ },
64
+ "peerDependencies": {
65
+ "@exodus/crypto": "^1.0.0-rc.4"
66
+ },
67
+ "peerDependenciesMeta": {
68
+ "@exodus/crypto": {
69
+ "optional": true
70
+ }
53
71
  },
54
- "dependencies": {},
55
72
  "devDependencies": {
73
+ "@ethersproject/strings": "^5.8.0",
74
+ "@exodus/crypto": "1.0.0-rc.29",
56
75
  "@exodus/eslint-config": "^5.24.0",
57
76
  "@exodus/prettier": "^1.0.0",
58
- "@exodus/test": "^1.0.0-rc.105",
77
+ "@exodus/test": "^1.0.0-rc.107",
78
+ "@noble/hashes": "^2.0.1",
59
79
  "@scure/base": "^1.2.6",
80
+ "@stablelib/base64": "^2.0.1",
81
+ "@stablelib/hex": "^2.0.1",
60
82
  "@types/node": "^24.0.10",
61
83
  "base-x": "^5.0.1",
62
84
  "base32.js": "^0.1.0",
63
85
  "base64-js": "^1.5.1",
86
+ "bs58": "^6.0.0",
87
+ "bs58check": "^4.0.0",
88
+ "bstring": "^0.3.9",
64
89
  "buffer": "^6.0.3",
65
90
  "electron": "36.5.0",
66
91
  "eslint": "^8.44.0",
67
- "jsvu": "^3.0.0"
92
+ "fast-base64-decode": "^2.0.0",
93
+ "fast-base64-encode": "^1.0.0",
94
+ "hi-base32": "^0.5.1",
95
+ "jsvu": "^3.0.0",
96
+ "text-encoding": "^0.7.0"
68
97
  },
69
98
  "prettier": "@exodus/prettier",
70
99
  "packageManager": "pnpm@10.12.1+sha256.889bac470ec93ccc3764488a19d6ba8f9c648ad5e50a9a6e4be3768a5de387a3"
package/utf8.js ADDED
@@ -0,0 +1,117 @@
1
+ import { assertUint8 } from './assert.js'
2
+ import { typedView } from './array.js'
3
+ import * as js from './fallback/utf8.js'
4
+
5
+ const { Buffer, TextEncoder, TextDecoder, decodeURIComponent, escape } = globalThis // Buffer is optional
6
+ const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
7
+ const isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]')) // we consider Node.js TextDecoder/TextEncoder native
8
+ const haveDecoder = isNative(TextDecoder)
9
+ const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
10
+ // ignoreBOM: true means that BOM will be left as-is, i.e. will be present in the output
11
+ // We don't want to strip anything unexpectedly
12
+ const decoderFatal = haveDecoder ? new TextDecoder('utf8', { ignoreBOM: true, fatal: true }) : null
13
+ const decoderLoose = haveDecoder ? new TextDecoder('utf8', { ignoreBOM: true }) : null
14
+ const { isWellFormed } = String.prototype
15
+
16
+ const { E_STRICT, E_STRICT_UNICODE } = js
17
+
18
+ const shouldUseEscapePath = Boolean(globalThis.HermesInternal) // faster only on Hermes, js path beats it on normal engines
19
+
20
+ function deLoose(str, loose, res) {
21
+ if (loose) return res
22
+ if (isWellFormed) {
23
+ // We have a fast native method
24
+ if (isWellFormed.call(str)) return res
25
+ throw new TypeError(E_STRICT_UNICODE)
26
+ }
27
+
28
+ // Recheck if the string was encoded correctly
29
+ let start = 0
30
+ const last = res.length - 2
31
+ // Search for EFBFBD
32
+ while (start < last) {
33
+ const pos = res.indexOf(0xef, start)
34
+ if (pos === -1) break
35
+ start = pos + 1
36
+ if (res[pos + 1] === 0xbf && res[pos + 2] === 0xbd) {
37
+ // Found a replacement char in output, need to recheck if we encoded the input correctly
38
+ if (str !== decode(res)) throw new TypeError(E_STRICT_UNICODE)
39
+ return res
40
+ }
41
+ }
42
+
43
+ return res
44
+ }
45
+
46
+ function encode(str, loose = false) {
47
+ if (typeof str !== 'string') throw new TypeError('Input is not a string')
48
+ if (haveNativeBuffer) return deLoose(str, loose, Buffer.from(str)) // faster on ascii on Node.js
49
+ if (nativeEncoder) return deLoose(str, loose, nativeEncoder.encode(str)) // Node.js, browsers, and Hermes
50
+ // No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder
51
+ return js.encode(str, loose)
52
+ }
53
+
54
+ let escapes
55
+
56
+ function toEscapesPart(arr, start, end) {
57
+ let o = ''
58
+ let i = start
59
+ const last3 = end - 3
60
+ // Unrolled loop is faster
61
+ while (i < last3) {
62
+ const a = arr[i++]
63
+ const b = arr[i++]
64
+ const c = arr[i++]
65
+ const d = arr[i++]
66
+ o += escapes[a]
67
+ o += escapes[b]
68
+ o += escapes[c]
69
+ o += escapes[d]
70
+ }
71
+
72
+ while (i < end) o += escapes[arr[i++]]
73
+ return o
74
+ }
75
+
76
+ function decode(arr, loose = false) {
77
+ assertUint8(arr)
78
+ if (haveDecoder) return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
79
+ // No reason to use native Buffer: it's not faster than TextDecoder, needs rechecks in non-loose mode, and Node.js has TextDecoder
80
+
81
+ // This codepath gives a ~2x perf boost on Hermes
82
+ if (shouldUseEscapePath && escape && decodeURIComponent) {
83
+ if (!escapes) escapes = Array.from({ length: 256 }, (_, i) => escape(String.fromCharCode(i)))
84
+ const length = arr.length
85
+ let o
86
+ if (length > 30_000) {
87
+ // Limit concatenation to avoid excessive GC
88
+ // TODO: recheck thresholds on Hermes (taken from hex)
89
+ const concat = []
90
+ for (let i = 0; i < length; ) {
91
+ const step = i + 500
92
+ const end = step > length ? length : step
93
+ concat.push(toEscapesPart(arr, i, end))
94
+ i = end
95
+ }
96
+
97
+ o = concat.join('')
98
+ concat.length = 0
99
+ } else {
100
+ o = toEscapesPart(arr, 0, length)
101
+ }
102
+
103
+ try {
104
+ return decodeURIComponent(o) // asci to utf8, escape() is precalucated
105
+ } catch {
106
+ if (!loose) throw new TypeError(E_STRICT)
107
+ // Ok, we have to use manual implementation for loose decoder
108
+ }
109
+ }
110
+
111
+ return js.decode(arr, loose)
112
+ }
113
+
114
+ export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format)
115
+ export const utf8fromStringLoose = (str, format = 'uint8') => typedView(encode(str, true), format)
116
+ export const utf8toString = (arr) => decode(arr, false)
117
+ export const utf8toStringLoose = (arr) => decode(arr, true)