@exodus/bytes 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -78,6 +78,13 @@ See [the list of encodings](https://encoding.spec.whatwg.org/#names-and-labels).
78
78
 
79
79
  ### `@exodus/bytes/utf8.js`
80
80
 
81
+ ```js
82
+ import { utf8fromString, utf8toString } from '@exodus/bytes/utf8.js'
83
+
84
+ // loose
85
+ import { utf8fromStringLoose, utf8toStringLoose } from '@exodus/bytes/utf8.js'
86
+ ```
87
+
81
88
  ##### `utf8fromString(str, format = 'uint8')`
82
89
  ##### `utf8fromStringLoose(str, format = 'uint8')`
83
90
  ##### `utf8toString(arr)`
@@ -85,6 +92,13 @@ See [the list of encodings](https://encoding.spec.whatwg.org/#names-and-labels).
85
92
 
86
93
  ### `@exodus/bytes/utf16.js`
87
94
 
95
+ ```js
96
+ import { utf16fromString, utf16toString } from '@exodus/bytes/utf16.js'
97
+
98
+ // loose
99
+ import { utf16fromStringLoose, utf16toStringLoose } from '@exodus/bytes/utf16.js'
100
+ ```
101
+
88
102
  ##### `utf16fromString(str, format = 'uint16')`
89
103
  ##### `utf16fromStringLoose(str, format = 'uint16')`
90
104
  ##### `utf16toString(arr, 'uint16')`
@@ -92,21 +106,26 @@ See [the list of encodings](https://encoding.spec.whatwg.org/#names-and-labels).
92
106
 
93
107
  ### `@exodus/bytes/single-byte.js`
94
108
 
95
- ##### `createSinglebyteDecoder(encoding, loose = false)`
96
-
97
- Create a decoder for a supported one-byte `encoding`.
98
-
99
- Returns a function `decode(arr)` that decodes bytes to a string.
109
+ ```js
110
+ import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
111
+ import { windows1252toString } from '@exodus/bytes/single-byte.js'
112
+ ```
100
113
 
101
- ### `@exodus/bytes/multi-byte.js`
114
+ Decode the legacy single-byte encodings according to the [Encoding standard](https://encoding.spec.whatwg.org/)
115
+ ([§9](https://encoding.spec.whatwg.org/#legacy-single-byte-encodings) and
116
+ [§14.5](https://encoding.spec.whatwg.org/#x-user-defined)).
102
117
 
103
- ##### `createMultibyteDecoder(encoding, loose = false)`
118
+ Supports all single-byte encodings listed in the standard:
119
+ `ibm866`, `iso-8859-2`, `iso-8859-3`, `iso-8859-4`, `iso-8859-5`, `iso-8859-6`, `iso-8859-7`, `iso-8859-8`,
120
+ `iso-8859-8-i`, `iso-8859-10`, `iso-8859-13`, `iso-8859-14`, `iso-8859-15`, `iso-8859-16`, `koi8-r`, `koi8-u`,
121
+ `macintosh`, `windows-874`, `windows-1250`, `windows-1251`, `windows-1252`, `windows-1253`, `windows-1254`,
122
+ `windows-1255`, `windows-1256`, `windows-1257`, `windows-1258`, `x-mac-cyrillic` and `x-user-defined`.
104
123
 
105
- Create a decoder for a supported legacy multi-byte `encoding`.
124
+ ##### `createSinglebyteDecoder(encoding, loose = false)`
106
125
 
107
- Returns a function `decode(arr, stream = false)` that decodes bytes to a string.
126
+ Create a decoder for a supported one-byte `encoding`, given it's lowercased name `encoding`.
108
127
 
109
- That function will have state while `stream = true` is used.
128
+ Returns a function `decode(arr)` that decodes bytes to a string.
110
129
 
111
130
  ##### `windows1252toString(arr)`
112
131
 
@@ -116,61 +135,132 @@ Also supports `ascii` and `latin-1` as those are strict subsets of `windows-1252
116
135
 
117
136
  There is no loose variant for this encoding, all bytes can be decoded.
118
137
 
119
- Same as `windows1252toString = createSinglebyteDecoder('windows-1252')`.
138
+ Same as:
139
+ ```js
140
+ const windows1252toString = createSinglebyteDecoder('windows-1252')
141
+ ```
142
+
143
+ ### `@exodus/bytes/multi-byte.js`
144
+
145
+ ```js
146
+ import { createMultibyteDecoder } from '@exodus/bytes/multi-byte.js'
147
+ ```
148
+
149
+ Decode the legacy multi-byte encodings according to the [Encoding standard](https://encoding.spec.whatwg.org/)
150
+ ([§10](https://encoding.spec.whatwg.org/#legacy-multi-byte-chinese-(simplified)-encodings),
151
+ [§11](https://encoding.spec.whatwg.org/#legacy-multi-byte-chinese-(traditional)-encodings),
152
+ [§12](https://encoding.spec.whatwg.org/#legacy-multi-byte-japanese-encodings),
153
+ [§13](https://encoding.spec.whatwg.org/#legacy-multi-byte-korean-encodings)).
154
+
155
+ Supports all legacy multi-byte encodings listed in the standard:
156
+ `gbk`, `gb18030`, `big5`, `euc-jp`, `iso-2022-jp`, `shift_jis`, `euc-kr`.
157
+
158
+ ##### `createMultibyteDecoder(encoding, loose = false)`
159
+
160
+ Create a decoder for a supported legacy multi-byte `encoding`, given it's lowercased name `encoding`.
161
+
162
+ Returns a function `decode(arr, stream = false)` that decodes bytes to a string.
163
+
164
+ That function will have state while `stream = true` is used.
120
165
 
121
166
  ### `@exodus/bytes/bigint.js`
122
167
 
168
+ ```js
169
+ import { fromBigInt, toBigInt } from '@exodus/bytes/bigint.js'
170
+ ```
171
+
123
172
  ##### `fromBigInt(bigint, { length, format = 'uint8' })`
124
173
  ##### `toBigInt(arr)`
125
174
 
126
175
  ### `@exodus/bytes/hex.js`
127
176
 
128
- ##### `toHex(arr)`
177
+ ```js
178
+ import { fromHex, toHex } from '@exodus/bytes/hex.js'
179
+ ```
180
+
129
181
  ##### `fromHex(string)`
182
+ ##### `toHex(arr)`
130
183
 
131
184
  ### `@exodus/bytes/base64.js`
132
185
 
133
- ##### `toBase64(arr, { padding = true })`
134
- ##### `toBase64url(arr, { padding = false })`
186
+ ```js
187
+ import { fromBase64, toBase64 } from '@exodus/bytes/base64.js'
188
+ import { fromBase64url, toBase64url } from '@exodus/bytes/base64.js'
189
+ import { fromBase64any } from '@exodus/bytes/base64.js'
190
+ ```
191
+
135
192
  ##### `fromBase64(str, { format = 'uint8', padding = 'both' })`
136
193
  ##### `fromBase64url(str, { format = 'uint8', padding = false })`
137
194
  ##### `fromBase64any(str, { format = 'uint8', padding = 'both' })`
195
+ ##### `toBase64(arr, { padding = true })`
196
+ ##### `toBase64url(arr, { padding = false })`
138
197
 
139
198
  ### `@exodus/bytes/base32.js`
140
199
 
141
- ##### `toBase32(arr, { padding = false })`
142
- ##### `toBase32hex(arr, { padding = false })`
200
+ ```js
201
+ import { fromBase32, toBase32 } from '@exodus/bytes/base32.js'
202
+ import { fromBase32hex, toBase32hex } from '@exodus/bytes/base32.js'
203
+ ```
204
+
143
205
  ##### `fromBase32(str, { format = 'uint8', padding = 'both' })`
144
206
  ##### `fromBase32hex(str, { format = 'uint8', padding = 'both' })`
207
+ ##### `toBase32(arr, { padding = false })`
208
+ ##### `toBase32hex(arr, { padding = false })`
145
209
 
146
210
  ### `@exodus/bytes/bech32.js`
147
211
 
212
+ ```js
213
+ import { fromBech32, toBech32 } from '@exodus/bytes/bech32.js'
214
+ import { fromBech32m, toBech32m } from '@exodus/bytes/base32.js'
215
+ import { getPrefix } from '@exodus/bytes/base32.js'
216
+ ```
217
+
148
218
  ##### `getPrefix(str, limit = 90)`
149
- ##### `toBech32(prefix, bytes, limit = 90)`
219
+
150
220
  ##### `fromBech32(str, limit = 90)`
151
- ##### `toBech32m(prefix, bytes, limit = 90)`
221
+ ##### `toBech32(prefix, bytes, limit = 90)`
222
+
152
223
  ##### `fromBech32m(str, limit = 90)`
224
+ ##### `toBech32m(prefix, bytes, limit = 90)`
153
225
 
154
226
  ### `@exodus/bytes/base58.js`
155
227
 
156
- ##### `toBase58(arr)`
228
+ ```js
229
+ import { fromBase58, toBase58 } from '@exodus/bytes/base58.js'
230
+ import { fromBase58xrp, toBase58xrp } from '@exodus/bytes/base58.js'
231
+ ```
232
+
157
233
  ##### `fromBase58(str, format = 'uint8')`
234
+ ##### `toBase58(arr)`
158
235
 
159
- ##### `toBase58xrp(arr)`
160
236
  ##### `fromBase58xrp(str, format = 'uint8')`
237
+ ##### `toBase58xrp(arr)`
161
238
 
162
239
  ### `@exodus/bytes/base58check.js`
163
240
 
241
+ ```js
242
+ import { fromBase58check, toBase58check } from '@exodus/bytes/base58check.js'
243
+ import { fromBase58checkSync, toBase58checkSync } from '@exodus/bytes/base58check.js'
244
+ import { makeBase58check } from '@exodus/bytes/base58check.js'
245
+ ```
246
+
164
247
  On non-Node.js, requires peer dependency [@exodus/crypto](https://www.npmjs.com/package/@exodus/crypto) to be installed.
165
248
 
166
- ##### `async toBase58check(arr)`
167
- ##### `toBase58checkSync(arr)`
168
249
  ##### `async fromBase58check(str, format = 'uint8')`
250
+ ##### `async toBase58check(arr)`
169
251
  ##### `fromBase58checkSync(str, format = 'uint8')`
252
+ ##### `toBase58checkSync(arr)`
170
253
  ##### `makeBase58check(hashAlgo, hashAlgoSync)`
171
254
 
172
255
  ### `@exodus/bytes/wif.js`
173
256
 
257
+ ```js
258
+ import { fromWifString, toWifString } from '@exodus/bytes/wif.js'
259
+ import { fromWifStringSync, toWifStringSync } from '@exodus/bytes/wif.js'
260
+ ```
261
+
262
+ On non-Node.js, requires peer dependency [@exodus/crypto](https://www.npmjs.com/package/@exodus/crypto) to be installed.
263
+
174
264
  ##### `async fromWifString(string, version)`
175
265
  ##### `fromWifStringSync(string, version)`
176
266
  ##### `async toWifString({ version, privateKey, compressed })`
@@ -178,18 +268,18 @@ On non-Node.js, requires peer dependency [@exodus/crypto](https://www.npmjs.com/
178
268
 
179
269
  ### `@exodus/bytes/encoding.js`
180
270
 
181
- Implements the [Encoding standard](https://encoding.spec.whatwg.org/):
182
- [TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
183
- [TextEncoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
184
- some [hooks](https://encoding.spec.whatwg.org/#specification-hooks) (see below).
185
-
186
271
  ```js
187
- import { TextDecoder, TextDecoder } from '@exodus/bytes/encoding.js'
272
+ import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding.js'
188
273
 
189
274
  // Hooks for standards
190
275
  import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding.js'
191
276
  ```
192
277
 
278
+ Implements the [Encoding standard](https://encoding.spec.whatwg.org/):
279
+ [TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
280
+ [TextEncoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
281
+ some [hooks](https://encoding.spec.whatwg.org/#specification-hooks) (see below).
282
+
193
283
  #### `new TextDecoder(label = 'utf-8', { fatal = false, ignoreBOM = false })`
194
284
 
195
285
  [TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder) implementation/polyfill.
@@ -265,7 +355,7 @@ new TextDecoder(getBOMEncoding(input) ?? fallbackEncoding).decode(input)
265
355
  ### `@exodus/bytes/encoding-lite.js`
266
356
 
267
357
  ```js
268
- import { TextDecoder, TextDecoder } from '@exodus/bytes/encoding-lite.js'
358
+ import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding-lite.js'
269
359
 
270
360
  // Hooks for standards
271
361
  import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding-lite.js'
@@ -106,7 +106,7 @@ function decodePartTemplates(a, start, end, m) {
106
106
 
107
107
  const decodePart = isHermes ? decodePartTemplates : decodePartAddition
108
108
  export function decode2string(arr, start, end, m) {
109
- if (start - end > 30_000) {
109
+ if (end - start > 30_000) {
110
110
  // Limit concatenation to avoid excessive GC
111
111
  // Thresholds checked on Hermes for toHex
112
112
  const concat = []
@@ -47,11 +47,28 @@ export function normalizeEncoding(label) {
47
47
 
48
48
  const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false })
49
49
 
50
+ // TODO: make this more strict against Symbol.toStringTag
51
+ // Is not very significant though, anything faking Symbol.toStringTag could as well override
52
+ // prototypes, which is not something we protect against
53
+
54
+ function isAnyArrayBuffer(x) {
55
+ if (x instanceof ArrayBuffer) return true
56
+ if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return true
57
+ if (!x || typeof x.byteLength !== 'number') return false
58
+ const s = Object.prototype.toString.call(x)
59
+ return s === '[object ArrayBuffer]' || s === '[object SharedArrayBuffer]'
60
+ }
61
+
62
+ function isAnyUint8Array(x) {
63
+ if (x instanceof Uint8Array) return true
64
+ if (!x || !ArrayBuffer.isView(x) || x.BYTES_PER_ELEMENT !== 1) return false
65
+ return Object.prototype.toString.call(x) === '[object Uint8Array]'
66
+ }
67
+
50
68
  const fromSource = (x) => {
51
69
  if (x instanceof Uint8Array) return x
52
- if (x instanceof ArrayBuffer) return new Uint8Array(x)
53
70
  if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
54
- if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return new Uint8Array(x)
71
+ if (isAnyArrayBuffer(x)) return new Uint8Array(x)
55
72
  throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
56
73
  }
57
74
 
@@ -210,7 +227,7 @@ export class TextEncoder {
210
227
 
211
228
  encodeInto(str, target) {
212
229
  if (typeof str !== 'string') str = `${str}`
213
- if (!(target instanceof Uint8Array)) throw new TypeError('Target must be an Uint8Array')
230
+ if (!isAnyUint8Array(target)) throw new TypeError('Target must be an Uint8Array')
214
231
  if (target.buffer.detached) return { read: 0, written: 0 } // Until https://github.com/whatwg/encoding/issues/324 is resolved
215
232
 
216
233
  const tlen = target.length
@@ -295,6 +312,7 @@ const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
295
312
  // https://encoding.spec.whatwg.org/#names-and-labels
296
313
  export function labelToName(label) {
297
314
  const enc = normalizeEncoding(label)
315
+ if (enc === 'utf-8') return 'UTF-8' // fast path
298
316
  if (!enc) return enc
299
317
  if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
300
318
  if (enc === 'big5') return 'Big5'
@@ -8,105 +8,130 @@ export const E_STRICT = 'Input is not well-formed for this encoding'
8
8
  // If the decoder is not cleared properly, state can be preserved between non-streaming calls!
9
9
  // See comment about fatal stream
10
10
 
11
- // All except iso-2022-jp are ASCII supersets
12
- // When adding something that is not an ASCII superset, ajust the ASCII fast path
13
- const REP = 0xff_fd
14
- const mappers = {
15
- // https://encoding.spec.whatwg.org/#euc-kr-decoder
16
- 'euc-kr': () => {
17
- const euc = getTable('euc-kr')
18
- let lead = 0
11
+ // Common between euc-kr and big5
12
+ function bigDecoder(err, pair) {
13
+ let lead = 0
19
14
 
20
- const pushback = []
21
- const bytes = (b) => {
22
- if (lead) {
23
- const cp = b >= 0x41 && b <= 0xfe ? euc[(lead - 0x81) * 190 + b - 0x41] : undefined
24
- lead = 0
25
- if (cp !== undefined && cp !== REP) return cp
26
- if (b < 128) pushback.push(b)
27
- return -2
28
- }
15
+ const decodeLead = (b) => {
16
+ const str = pair(lead, b)
17
+ lead = 0
18
+ if (str) return str
19
+ return b < 128 ? String.fromCharCode(err(), b) : String.fromCharCode(err())
20
+ }
21
+
22
+ const decode = (arr, start, end, stream) => {
23
+ let res = ''
24
+ let i = start
29
25
 
30
- if (b < 128) return b
31
- if (b < 0x81 || b === 0xff) return -2
32
- lead = b
33
- return -1
26
+ if (lead && i < end) res += decodeLead(arr[i++])
27
+ while (i < end) {
28
+ const b = arr[i++]
29
+ if (b < 128) {
30
+ res += String.fromCharCode(b)
31
+ } else if (b === 0x80 || b === 0xff) {
32
+ res += String.fromCharCode(err())
33
+ } else {
34
+ lead = b
35
+ if (i < end) res += decodeLead(arr[i++])
36
+ }
34
37
  }
35
38
 
36
- const eof = () => {
37
- if (!lead) return null
39
+ if (lead && !stream) {
38
40
  lead = 0
39
- return -2
41
+ res += String.fromCharCode(err())
40
42
  }
41
43
 
42
- return { bytes, eof, pushback }
44
+ return res
45
+ }
46
+
47
+ return { decode, isAscii: () => lead === 0 }
48
+ }
49
+
50
+ // All except iso-2022-jp are ASCII supersets
51
+ // When adding something that is not an ASCII superset, ajust the ASCII fast path
52
+ const REP = 0xff_fd
53
+ const mappers = {
54
+ // https://encoding.spec.whatwg.org/#euc-kr-decoder
55
+ 'euc-kr': (err) => {
56
+ const euc = getTable('euc-kr')
57
+ return bigDecoder(err, (l, b) => {
58
+ if (b < 0x41 || b > 0xfe) return
59
+ const cp = euc[(l - 0x81) * 190 + b - 0x41]
60
+ return cp !== undefined && cp !== REP ? String.fromCharCode(cp) : undefined
61
+ })
43
62
  },
44
63
  // https://encoding.spec.whatwg.org/#euc-jp-decoder
45
- 'euc-jp': () => {
64
+ 'euc-jp': (err) => {
46
65
  const jis0208 = getTable('jis0208')
47
66
  const jis0212 = getTable('jis0212')
48
67
  let j12 = false
49
68
  let lead = 0
50
69
 
51
- const pushback = []
52
- const bytes = (b) => {
70
+ const decodeLead = (b) => {
53
71
  if (lead === 0x8e && b >= 0xa1 && b <= 0xdf) {
54
72
  lead = 0
55
- return 0xfe_c0 + b
73
+ return String.fromCharCode(0xfe_c0 + b)
56
74
  }
57
75
 
58
76
  if (lead === 0x8f && b >= 0xa1 && b <= 0xfe) {
59
77
  j12 = true
60
78
  lead = b
61
- return -1
79
+ return ''
80
+ }
81
+
82
+ let cp
83
+ if (lead >= 0xa1 && lead <= 0xfe && b >= 0xa1 && b <= 0xfe) {
84
+ cp = (j12 ? jis0212 : jis0208)[(lead - 0xa1) * 94 + b - 0xa1]
62
85
  }
63
86
 
64
- if (lead) {
65
- let cp
66
- if (lead >= 0xa1 && lead <= 0xfe && b >= 0xa1 && b <= 0xfe) {
67
- cp = (j12 ? jis0212 : jis0208)[(lead - 0xa1) * 94 + b - 0xa1]
87
+ lead = 0
88
+ j12 = false
89
+ if (cp !== undefined && cp !== REP) return String.fromCharCode(cp)
90
+ return b < 128 ? String.fromCharCode(err(), b) : String.fromCharCode(err())
91
+ }
92
+
93
+ const decode = (arr, start, end, stream) => {
94
+ let res = ''
95
+ let i = start
96
+
97
+ if (lead && i < end) res += decodeLead(arr[i++])
98
+ if (lead && i < end) res += decodeLead(arr[i++]) // could be two leads, but no more
99
+ while (i < end) {
100
+ const b = arr[i++]
101
+ if (b < 128) {
102
+ res += String.fromCharCode(b)
103
+ } else if ((b < 0xa1 && b !== 0x8e && b !== 0x8f) || b === 0xff) {
104
+ res += String.fromCharCode(err())
105
+ } else {
106
+ lead = b
107
+ if (i < end) res += decodeLead(arr[i++])
108
+ if (lead && i < end) res += decodeLead(arr[i++]) // could be two leads
68
109
  }
110
+ }
69
111
 
112
+ if (lead && !stream) {
70
113
  lead = 0
71
- j12 = false
72
- if (cp !== undefined && cp !== REP) return cp
73
- if (b < 128) pushback.push(b)
74
- return -2
114
+ j12 = false // can be true only when lead is non-zero
115
+ res += String.fromCharCode(err())
75
116
  }
76
117
 
77
- if (b < 128) return b
78
- if ((b < 0xa1 && b !== 0x8e && b !== 0x8f) || b === 0xff) return -2
79
- lead = b
80
- return -1
118
+ return res
81
119
  }
82
120
 
83
- // eslint-disable-next-line sonarjs/no-identical-functions
84
- const eof = () => {
85
- if (!lead) return null
86
- lead = 0
87
- return -2
88
- }
89
-
90
- return { bytes, eof, pushback }
121
+ return { decode, isAscii: () => lead === 0 } // j12 can be true only when lead is non-zero
91
122
  },
92
123
  // https://encoding.spec.whatwg.org/#iso-2022-jp-decoder
93
- // Per-letter of the spec, don't shortcut on state changes on EOF. Some code is regrouped but preserving the logic
94
- 'iso-2022-jp': () => {
124
+ 'iso-2022-jp': (err) => {
95
125
  const jis0208 = getTable('jis0208')
96
- const EOF = -1
97
126
  let dState = 1
98
127
  let oState = 1
99
- let lead = 0
128
+ let lead = 0 // 0 or 0x21-0x7e
100
129
  let out = false
101
130
 
102
- const pushback = []
103
- const bytes = (b) => {
104
- if (dState < 5) {
105
- if (b === EOF) return null
106
- if (b === 0x1b) {
107
- dState = 6 // escape start
108
- return -1
109
- }
131
+ const bytes = (pushback, b) => {
132
+ if (dState < 5 && b === 0x1b) {
133
+ dState = 6 // escape start
134
+ return
110
135
  }
111
136
 
112
137
  switch (dState) {
@@ -120,49 +145,46 @@ const mappers = {
120
145
  }
121
146
 
122
147
  if (b <= 0x7f && b !== 0x0e && b !== 0x0f) return b
123
- return -2
148
+ return err()
124
149
  case 3:
125
150
  // Katakana
126
151
  out = false
127
152
  if (b >= 0x21 && b <= 0x5f) return 0xff_40 + b
128
- return -2
153
+ return err()
129
154
  case 4:
130
155
  // Leading byte
131
156
  out = false
132
- if ((b >= 0x21) & (b <= 0x7e)) {
133
- lead = b
134
- dState = 5
135
- return -1
136
- }
137
-
138
- return -2
157
+ if (b < 0x21 || b > 0x7e) return err()
158
+ lead = b
159
+ dState = 5
160
+ return
139
161
  case 5:
140
162
  // Trailing byte
141
163
  out = false
142
164
  if (b === 0x1b) {
143
165
  dState = 6 // escape start
144
- return -2
166
+ return err()
145
167
  }
146
168
 
147
169
  dState = 4
148
170
  if (b >= 0x21 && b <= 0x7e) {
149
171
  const cp = jis0208[(lead - 0x21) * 94 + b - 0x21]
150
- return cp !== undefined && cp !== REP ? cp : -2
172
+ if (cp !== undefined && cp !== REP) return cp
151
173
  }
152
174
 
153
- return -2
175
+ return err()
154
176
  case 6:
155
177
  // Escape start
156
178
  if (b === 0x24 || b === 0x28) {
157
179
  lead = b
158
180
  dState = 7
159
- return -1
181
+ return
160
182
  }
161
183
 
162
184
  out = false
163
185
  dState = oState
164
- if (b !== EOF) pushback.push(b)
165
- return -2
186
+ pushback.push(b)
187
+ return err()
166
188
  case 7: {
167
189
  // Escape
168
190
  const l = lead
@@ -185,63 +207,127 @@ const mappers = {
185
207
  dState = oState = s
186
208
  const output = out
187
209
  out = true
188
- return output ? -2 : -1
210
+ return output ? err() : undefined
189
211
  }
190
212
 
191
213
  out = false
192
214
  dState = oState
193
- if (b !== EOF) pushback.push(b)
194
- pushback.push(l)
195
- return -2
215
+ pushback.push(b, l)
216
+ return err()
217
+ }
218
+ }
219
+ }
220
+
221
+ const eof = (pushback) => {
222
+ if (dState < 5) return null
223
+ out = false
224
+ switch (dState) {
225
+ case 5:
226
+ dState = 4
227
+ return err()
228
+ case 6:
229
+ dState = oState
230
+ return err()
231
+ case 7: {
232
+ dState = oState
233
+ pushback.push(lead)
234
+ lead = 0
235
+ return err()
196
236
  }
197
237
  }
198
238
  }
199
239
 
200
- const eof = () => bytes(EOF)
240
+ const decode = (arr, start, end, stream) => {
241
+ let res = ''
242
+ let i = start
243
+ const pushback = [] // local and auto-cleared
201
244
 
202
- return { bytes, eof, pushback }
245
+ // First, dump everything until EOF
246
+ // Same as the full loop, but without EOF handling
247
+ while (i < end || pushback.length > 0) {
248
+ const c = bytes(pushback, pushback.length > 0 ? pushback.pop() : arr[i++])
249
+ if (c !== undefined) res += String.fromCodePoint(c)
250
+ }
251
+
252
+ // Then, dump EOF. This needs the same loop as the characters can be pushed back
253
+ if (!stream) {
254
+ while (i <= end || pushback.length > 0) {
255
+ if (i < end || pushback.length > 0) {
256
+ const c = bytes(pushback, pushback.length > 0 ? pushback.pop() : arr[i++])
257
+ if (c !== undefined) res += String.fromCodePoint(c)
258
+ } else {
259
+ const c = eof(pushback)
260
+ if (c === null) break // clean exit
261
+ res += String.fromCodePoint(c)
262
+ }
263
+ }
264
+ }
265
+
266
+ // Chrome and WebKit fail on this, we don't: completely destroy the old decoder state when finished streaming
267
+ // > If this’s do not flush is false, then set this’s decoder to a new instance of this’s encoding’s decoder,
268
+ // > Set this’s do not flush to options["stream"]
269
+ if (!stream) {
270
+ dState = oState = 1
271
+ lead = 0
272
+ out = false
273
+ }
274
+
275
+ return res
276
+ }
277
+
278
+ return { decode, isAscii: () => false }
203
279
  },
204
280
  // https://encoding.spec.whatwg.org/#shift_jis-decoder
205
- shift_jis: () => {
281
+ shift_jis: (err) => {
206
282
  const jis0208 = getTable('jis0208')
207
283
  let lead = 0
208
284
 
209
- const pushback = []
210
- const bytes = (b) => {
211
- if (lead) {
212
- const l = lead
213
- lead = 0
214
- if (b >= 0x40 && b <= 0xfc && b !== 0x7f) {
215
- const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41)
216
- if (p >= 8836 && p <= 10_715) return 0xe0_00 - 8836 + p // 16-bit
217
- const cp = jis0208[p]
218
- if (cp !== undefined && cp !== REP) return cp
219
- }
220
-
221
- if (b < 128) pushback.push(b)
222
- return -2
285
+ const decodeLead = (b) => {
286
+ const l = lead
287
+ lead = 0
288
+ if (b >= 0x40 && b <= 0xfc && b !== 0x7f) {
289
+ const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41)
290
+ if (p >= 8836 && p <= 10_715) return String.fromCharCode(0xe0_00 - 8836 + p)
291
+ const cp = jis0208[p]
292
+ if (cp !== undefined && cp !== REP) return String.fromCharCode(cp)
223
293
  }
224
294
 
225
- if (b <= 0x80) return b // 0x80 is allowed
226
- if (b >= 0xa1 && b <= 0xdf) return 0xff_61 - 0xa1 + b
227
- if (b < 0x81 || (b > 0x9f && b < 0xe0) || b > 0xfc) return -2
228
- lead = b
229
- return -1
295
+ return b < 128 ? String.fromCharCode(err(), b) : String.fromCharCode(err())
230
296
  }
231
297
 
232
- // eslint-disable-next-line sonarjs/no-identical-functions
233
- const eof = () => {
234
- if (!lead) return null
235
- lead = 0 // this clears state completely on EOF
236
- return -2
298
+ const decode = (arr, start, end, stream) => {
299
+ let res = ''
300
+ let i = start
301
+
302
+ if (lead && i < end) res += decodeLead(arr[i++])
303
+ while (i < end) {
304
+ const b = arr[i++]
305
+ if (b <= 0x80) {
306
+ res += String.fromCharCode(b) // 0x80 is allowed
307
+ } else if (b >= 0xa1 && b <= 0xdf) {
308
+ res += String.fromCharCode(0xfe_c0 + b)
309
+ } else if (b === 0xa0 || b > 0xfc) {
310
+ res += String.fromCharCode(err())
311
+ } else {
312
+ lead = b
313
+ if (i < end) res += decodeLead(arr[i++])
314
+ }
315
+ }
316
+
317
+ if (lead && !stream) {
318
+ lead = 0
319
+ res += String.fromCharCode(err())
320
+ }
321
+
322
+ return res
237
323
  }
238
324
 
239
- return { bytes, eof, pushback }
325
+ return { decode, isAscii: () => lead === 0 }
240
326
  },
241
327
  // https://encoding.spec.whatwg.org/#gbk-decoder
242
- gbk: () => mappers.gb18030(), // 10.1.1. GBK’s decoder is gb18030’s decoder
328
+ gbk: (err) => mappers.gb18030(err), // 10.1.1. GBK’s decoder is gb18030’s decoder
243
329
  // https://encoding.spec.whatwg.org/#gb18030-decoder
244
- gb18030: () => {
330
+ gb18030: (err) => {
245
331
  const gb18030 = getTable('gb18030')
246
332
  const gb18030r = getTable('gb18030-ranges')
247
333
  let g1 = 0, g2 = 0, g3 = 0 // prettier-ignore
@@ -258,191 +344,120 @@ const mappers = {
258
344
  return b + p - a
259
345
  }
260
346
 
261
- const pushback = []
262
- const bytes = (b) => {
263
- if (g3) {
264
- if (b < 0x30 || b > 0x39) {
265
- pushback.push(b, g3, g2)
266
- g1 = g2 = g3 = 0
267
- return -2
268
- }
269
-
270
- const cp = index((g1 - 0x81) * 12_600 + (g2 - 0x30) * 1260 + (g3 - 0x81) * 10 + b - 0x30)
271
- g1 = g2 = g3 = 0
272
- if (cp !== undefined) return cp // Can validly return replacement
273
- return -2
274
- }
347
+ // g1 is 0 or 0x81-0xfe
348
+ // g2 is 0 or 0x30-0x39
349
+ // g3 is 0 or 0x81-0xfe
350
+
351
+ const decode = (arr, start, end, stream) => {
352
+ let res = ''
353
+ let i = start
354
+ const pushback = [] // local and auto-cleared
355
+
356
+ // First, dump everything until EOF
357
+ // Same as the full loop, but without EOF handling
358
+ while (i < end || pushback.length > 0) {
359
+ const b = pushback.length > 0 ? pushback.pop() : arr[i++]
360
+ if (g3) {
361
+ if (b < 0x30 || b > 0x39) {
362
+ pushback.push(b, g3, g2)
363
+ g1 = g2 = g3 = 0
364
+ res += String.fromCharCode(err())
365
+ } else {
366
+ const p = index((g1 - 0x81) * 12_600 + (g2 - 0x30) * 1260 + (g3 - 0x81) * 10 + b - 0x30)
367
+ g1 = g2 = g3 = 0
368
+ if (p === undefined) {
369
+ res += String.fromCharCode(err())
370
+ } else {
371
+ res += String.fromCodePoint(p) // Can validly return replacement
372
+ }
373
+ }
374
+ } else if (g2) {
375
+ if (b >= 0x81 && b <= 0xfe) {
376
+ g3 = b
377
+ } else {
378
+ pushback.push(b, g2)
379
+ g1 = g2 = 0
380
+ res += String.fromCharCode(err())
381
+ }
382
+ } else if (g1) {
383
+ if (b >= 0x30 && b <= 0x39) {
384
+ g2 = b
385
+ } else {
386
+ let cp
387
+ if (b >= 0x40 && b <= 0xfe && b !== 0x7f) {
388
+ cp = gb18030[(g1 - 0x81) * 190 + b - (b < 0x7f ? 0x40 : 0x41)]
389
+ }
275
390
 
276
- if (g2) {
277
- if (b >= 0x81 && b <= 0xfe) {
278
- g3 = b
279
- return -1
391
+ g1 = 0
392
+ if (cp !== undefined && cp !== REP) {
393
+ res += String.fromCodePoint(cp)
394
+ } else {
395
+ res += String.fromCharCode(err())
396
+ if (b < 128) res += String.fromCharCode(b) // can be processed immediately
397
+ }
398
+ }
399
+ } else if (b < 128) {
400
+ res += String.fromCharCode(b)
401
+ } else if (b === 0x80) {
402
+ res += '\u20AC'
403
+ } else if (b === 0xff) {
404
+ res += String.fromCharCode(err())
405
+ } else {
406
+ g1 = b
280
407
  }
281
-
282
- pushback.push(b, g2)
283
- g1 = g2 = 0
284
- return -2
285
408
  }
286
409
 
287
- if (g1) {
288
- if (b >= 0x30 && b <= 0x39) {
289
- g2 = b
290
- return -1
291
- }
292
-
293
- let cp
294
- if (b >= 0x40 && b <= 0xfe && b !== 0x7f) {
295
- cp = gb18030[(g1 - 0x81) * 190 + b - (b < 0x7f ? 0x40 : 0x41)]
296
- }
297
-
298
- g1 = 0
299
- if (cp !== undefined && cp !== REP) return cp
300
- if (b < 128) pushback.push(b)
301
- return -2
410
+ // if g1 = 0 then g2 = g3 = 0
411
+ if (g1 && !stream) {
412
+ g1 = g2 = g3 = 0
413
+ res += String.fromCharCode(err())
302
414
  }
303
415
 
304
- if (b < 128) return b
305
- if (b === 0x80) return 0x20_ac
306
- if (b === 0xff) return -2
307
- g1 = b
308
- return -1
416
+ return res
309
417
  }
310
418
 
311
- const eof = () => {
312
- if (!g1 && !g2 && !g3) return null
313
- g1 = g2 = g3 = 0
314
- return -2
315
- }
316
-
317
- return { bytes, eof, pushback }
419
+ return { decode, isAscii: () => g1 === 0 } // if g1 = 0 then g2 = g3 = 0
420
+ },
421
+ // https://encoding.spec.whatwg.org/#big5
422
+ big5: (err) => {
423
+ // The only decoder which returns multiple codepoints per byte, also has non-charcode codepoints
424
+ // We store that as strings
425
+ const big5 = getTable('big5')
426
+ return bigDecoder(err, (l, b) => {
427
+ if (b < 0x40 || (b > 0x7e && b < 0xa1) || b === 0xff) return
428
+ return big5[(l - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)] // strings
429
+ })
318
430
  },
319
431
  }
320
432
 
321
433
  export const isAsciiSuperset = (enc) => enc !== 'iso-2022-jp' // all others are ASCII supersets and can use fast path
322
434
 
323
435
  export function multibyteDecoder(enc, loose = false) {
324
- if (enc === 'big5') return big5decoder(loose)
325
436
  if (!Object.hasOwn(mappers, enc)) throw new RangeError('Unsupported encoding')
326
437
 
327
438
  // Input is assumed to be typechecked already
328
439
  let mapper
329
440
  const asciiSuperset = isAsciiSuperset(enc)
330
- return (arr, stream = false) => {
331
- const onErr = loose
332
- ? () => '\uFFFD'
333
- : () => {
334
- mapper.pushback.length = 0 // the queue is cleared on returning an error
335
- // The correct way per spec seems to be not destoying the decoder state in stream mode, even when fatal
336
- // Decoders big5, euc-jp, euc-kr, shift_jis, gb18030 / gbk - all clear state before throwing unless EOF, so not affected
337
- // iso-2022-jp is the only tricky one one where this !stream check matters in non-stream mode
338
- if (!stream) mapper = null // destroy state, effectively the same as 'do not flush' = false, but early
339
- throw new TypeError(E_STRICT)
340
- }
341
-
342
- let res = ''
343
- const length = arr.length
344
- if (asciiSuperset && !mapper) {
345
- res = decodeLatin1(arr, 0, asciiPrefix(arr))
346
- if (res.length === arr.length) return res // ascii
347
- }
348
-
349
- if (!mapper) mapper = mappers[enc]()
350
- const { bytes, eof, pushback } = mapper
351
- let i = res.length
352
-
353
- // First, dump everything until EOF
354
- // Same as the full loop, but without EOF handling
355
- while (i < length || pushback.length > 0) {
356
- const c = bytes(pushback.length > 0 ? pushback.pop() : arr[i++])
357
- if (c >= 0) {
358
- res += String.fromCodePoint(c) // gb18030 returns codepoints above 0xFFFF from ranges
359
- } else if (c === -2) {
360
- res += onErr()
441
+ let streaming // because onErr is cached in mapper
442
+ const onErr = loose
443
+ ? () => REP
444
+ : () => {
445
+ // The correct way per spec seems to be not destoying the decoder state in stream mode, even when fatal
446
+ // Decoders big5, euc-jp, euc-kr, shift_jis, gb18030 / gbk - all clear state before throwing unless EOF, so not affected
447
+ // iso-2022-jp is the only tricky one one where this !stream check matters in non-stream mode
448
+ if (!streaming) mapper = null // destroy state, effectively the same as 'do not flush' = false, but early
449
+ throw new TypeError(E_STRICT)
361
450
  }
362
- }
363
-
364
- // Then, dump EOF. This needs the same loop as the characters can be pushed back
365
- // TODO: only some encodings need this, most can be optimized
366
- if (!stream) {
367
- while (i <= length || pushback.length > 0) {
368
- const isEOF = i === length && pushback.length === 0
369
- const c = isEOF ? eof() : bytes(pushback.length > 0 ? pushback.pop() : arr[i++])
370
- if (isEOF && c === null) break // clean exit
371
- if (c === -1) continue // consuming
372
- if (c === -2) {
373
- res += onErr()
374
- } else {
375
- res += String.fromCodePoint(c) // gb18030 returns codepoints above 0xFFFF from ranges
376
- }
377
- }
378
- }
379
451
 
380
- // Chrome and WebKit fail on this, we don't: completely destroy the old decoder instance when finished streaming
381
- // > If this’s do not flush is false, then set this’s decoder to a new instance of this’s encoding’s decoder,
382
- // > Set this’s do not flush to options["stream"]
383
- if (!stream) mapper = null
384
-
385
- return res
386
- }
387
- }
388
-
389
- // The only decoder which returns multiple codepoints per byte, also has non-charcode codepoints
390
- // We store that as strings
391
- function big5decoder(loose) {
392
- // Input is assumed to be typechecked already
393
- let lead = 0
394
- let big5
395
- const pushback = []
396
452
  return (arr, stream = false) => {
397
- const onErr = loose
398
- ? () => '\uFFFD'
399
- : () => {
400
- pushback.length = 0 // the queue is cleared on returning an error
401
- // Lead is always already cleared before throwing
402
- throw new TypeError(E_STRICT)
403
- }
404
-
405
453
  let res = ''
406
- const length = arr.length
407
- if (!lead) {
454
+ if (asciiSuperset && (!mapper || mapper.isAscii?.())) {
408
455
  res = decodeLatin1(arr, 0, asciiPrefix(arr))
409
456
  if (res.length === arr.length) return res // ascii
410
457
  }
411
458
 
412
- if (!big5) big5 = getTable('big5')
413
- for (let i = res.length; i < length || pushback.length > 0; ) {
414
- const b = pushback.length > 0 ? pushback.pop() : arr[i++]
415
- if (lead) {
416
- let cp
417
- if ((b >= 0x40 && b <= 0x7e) || (b >= 0xa1 && b !== 0xff)) {
418
- cp = big5[(lead - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)]
419
- }
420
-
421
- lead = 0
422
- if (cp) {
423
- res += cp // strings
424
- } else {
425
- res += onErr()
426
- if (b < 128) pushback.push(b)
427
- }
428
- } else if (b < 128) {
429
- res += String.fromCharCode(b)
430
- } else if (b < 0x81 || b === 0xff) {
431
- res += onErr()
432
- } else {
433
- lead = b
434
- }
435
- }
436
-
437
- if (!stream) {
438
- // Destroy decoder state
439
- pushback.length = 0
440
- if (lead) {
441
- lead = 0
442
- res += onErr()
443
- }
444
- }
445
-
446
- return res
459
+ streaming = stream // affects onErr
460
+ if (!mapper) mapper = mappers[enc](onErr)
461
+ return res + mapper.decode(arr, res.length, arr.length, stream)
447
462
  }
448
463
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@exodus/bytes",
3
- "version": "1.3.0",
3
+ "version": "1.5.0",
4
4
  "description": "Various operations on Uint8Array data",
5
5
  "scripts": {
6
6
  "lint": "eslint .",
package/wif.js CHANGED
@@ -1,4 +1,4 @@
1
- import { toBase58checkSync, fromBase58checkSync } from './base58check.js'
1
+ import { toBase58checkSync, fromBase58checkSync } from '@exodus/bytes/base58check.js'
2
2
  import { assertUint8 } from './assert.js'
3
3
 
4
4
  // Mostly matches npmjs.com/wif, but with extra checks + using our base58check